gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "attribs.h"
  36 #include "optabs.h"
  37 #include "regs.h"
  38 #include "emit-rtl.h"
  39 #include "recog.h"
  40 #include "cgraph.h"
  41 #include "diagnostic-core.h"
  42 #include "alias.h"
  43 #include "fold-const.h"
  44 #include "stor-layout.h"
  45 #include "calls.h"
  46 #include "varasm.h"
  47 #include "output.h"
  48 #include "insn-attr.h"
  49 #include "flags.h"
  50 #include "reload.h"
  51 #include "explow.h"
  52 #include "expr.h"
  53 #include "cfgrtl.h"
  54 #include "sched-int.h"
  55 #include "common/common-target.h"
  56 #include "langhooks.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62 #include "target-globals.h"
  63 #include "builtins.h"
  64 #include "tm-constrs.h"
  65 #include "rtl-iter.h"
  66 #include "optabs-libfuncs.h"
  67 #include "gimplify.h"
  68 #include "gimple.h"
  69 #include "selftest.h"
  70
  71 /* This file should be included last.  */
  72 #include "target-def.h"
  73
  74 /* Forward definitions of types.  */
  75 typedef struct minipool_node    Mnode;
  76 typedef struct minipool_fixup   Mfix;
  77
  78 void (*arm_lang_output_object_attributes_hook)(void);
  79
  80 struct four_ints
  81 {
  82   int i[4];
  83 };
  84
  85 /* Forward function declarations.  */
  86 static bool arm_const_not_ok_for_debug_p (rtx);
  87 static int arm_needs_doubleword_align (machine_mode, const_tree);
  88 static int arm_compute_static_chain_stack_bytes (void);
  89 static arm_stack_offsets *arm_get_frame_offsets (void);
  90 static void arm_compute_frame_layout (void);
  91 static void arm_add_gc_roots (void);
  92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  93                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  94 static unsigned bit_count (unsigned long);
  95 static unsigned bitmap_popcount (const sbitmap);
  96 static int arm_address_register_rtx_p (rtx, int);
  97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  98 static bool is_called_in_ARM_mode (tree);
  99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 104 inline static int thumb1_index_register_rtx_p (rtx, int);
 105 static int thumb_far_jump_used_p (void);
 106 static bool thumb_force_lr_save (void);
 107 static unsigned arm_size_return_regs (void);
 108 static bool arm_assemble_integer (rtx, unsigned int, int);
 109 static void arm_print_operand (FILE *, rtx, int);
 110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 111 static bool arm_print_operand_punct_valid_p (unsigned char code);
 112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 113 static arm_cc get_arm_condition_code (rtx);
 114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 115 static const char *output_multi_immediate (rtx *, const char *, const char *,
 116                                            int, HOST_WIDE_INT);
 117 static const char *shift_op (rtx, HOST_WIDE_INT *);
 118 static struct machine_function *arm_init_machine_status (void);
 119 static void thumb_exit (FILE *, int);
 120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 122 static Mnode *add_minipool_forward_ref (Mfix *);
 123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 124 static Mnode *add_minipool_backward_ref (Mfix *);
 125 static void assign_minipool_offsets (Mfix *);
 126 static void arm_print_value (FILE *, rtx);
 127 static void dump_minipool (rtx_insn *);
 128 static int arm_barrier_cost (rtx_insn *);
 129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 132                                machine_mode, rtx);
 133 static void arm_reorg (void);
 134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 136 static unsigned long arm_compute_save_core_reg_mask (void);
 137 static unsigned long arm_isr_value (tree);
 138 static unsigned long arm_compute_func_type (void);
 139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 144 #endif
 145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 147 static void arm_output_function_epilogue (FILE *);
 148 static void arm_output_function_prologue (FILE *);
 149 static int arm_comp_type_attributes (const_tree, const_tree);
 150 static void arm_set_default_type_attributes (tree);
 151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 153 static int optimal_immediate_sequence (enum rtx_code code,
 154                                        unsigned HOST_WIDE_INT val,
 155                                        struct four_ints *return_sequence);
 156 static int optimal_immediate_sequence_1 (enum rtx_code code,
 157                                          unsigned HOST_WIDE_INT val,
 158                                          struct four_ints *return_sequence,
 159                                          int i);
 160 static int arm_get_strip_length (int);
 161 static bool arm_function_ok_for_sibcall (tree, tree);
 162 static machine_mode arm_promote_function_mode (const_tree,
 163                                                     machine_mode, int *,
 164                                                     const_tree, int);
 165 static bool arm_return_in_memory (const_tree, const_tree);
 166 static rtx arm_function_value (const_tree, const_tree, bool);
 167 static rtx arm_libcall_value_1 (machine_mode);
 168 static rtx arm_libcall_value (machine_mode, const_rtx);
 169 static bool arm_function_value_regno_p (const unsigned int);
 170 static void arm_internal_label (FILE *, const char *, unsigned long);
 171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 172                                  tree);
 173 static bool arm_have_conditional_execution (void);
 174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 175 static bool arm_legitimate_constant_p (machine_mode, rtx);
 176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 180 static void emit_constant_insn (rtx cond, rtx pattern);
 181 static rtx_insn *emit_set_insn (rtx, rtx);
 182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 184                                   tree, bool);
 185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 186                              const_tree, bool);
 187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 188                                       const_tree, bool);
 189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 192                                       const_tree);
 193 static rtx aapcs_libcall_value (machine_mode);
 194 static int aapcs_select_return_coproc (const_tree, const_tree);
 195
 196 #ifdef OBJECT_FORMAT_ELF
 197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 199 #endif
 200 #ifndef ARM_PE
 201 static void arm_encode_section_info (tree, rtx, int);
 202 #endif
 203
 204 static void arm_file_end (void);
 205 static void arm_file_start (void);
 206 static void arm_insert_attributes (tree, tree *);
 207
 208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 209                                         tree, int *, int);
 210 static bool arm_pass_by_reference (cumulative_args_t,
 211                                    machine_mode, const_tree, bool);
 212 static bool arm_promote_prototypes (const_tree);
 213 static bool arm_default_short_enums (void);
 214 static bool arm_align_anon_bitfield (void);
 215 static bool arm_return_in_msb (const_tree);
 216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 217 static bool arm_return_in_memory (const_tree, const_tree);
 218 #if ARM_UNWIND_INFO
 219 static void arm_unwind_emit (FILE *, rtx_insn *);
 220 static bool arm_output_ttype (rtx);
 221 static void arm_asm_emit_except_personality (rtx);
 222 #endif
 223 static void arm_asm_init_sections (void);
 224 static rtx arm_dwarf_register_span (rtx);
 225
 226 static tree arm_cxx_guard_type (void);
 227 static bool arm_cxx_guard_mask_bit (void);
 228 static tree arm_get_cookie_size (tree);
 229 static bool arm_cookie_has_size (void);
 230 static bool arm_cxx_cdtor_returns_this (void);
 231 static bool arm_cxx_key_method_may_be_inline (void);
 232 static void arm_cxx_determine_class_data_visibility (tree);
 233 static bool arm_cxx_class_data_always_comdat (void);
 234 static bool arm_cxx_use_aeabi_atexit (void);
 235 static void arm_init_libfuncs (void);
 236 static tree arm_build_builtin_va_list (void);
 237 static void arm_expand_builtin_va_start (tree, rtx);
 238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 239 static void arm_option_override (void);
 240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 241 static void arm_option_restore (struct gcc_options *,
 242                                 struct cl_target_option *);
 243 static void arm_override_options_after_change (void);
 244 static void arm_option_print (FILE *, int, struct cl_target_option *);
 245 static void arm_set_current_function (tree);
 246 static bool arm_can_inline_p (tree, tree);
 247 static void arm_relayout_function (tree);
 248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 250 static bool arm_sched_can_speculate_insn (rtx_insn *);
 251 static bool arm_macro_fusion_p (void);
 252 static bool arm_cannot_copy_insn_p (rtx_insn *);
 253 static int arm_issue_rate (void);
 254 static int arm_first_cycle_multipass_dfa_lookahead (void);
 255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static tree arm_promoted_type (const_tree t);
 261 static bool arm_scalar_mode_supported_p (scalar_mode);
 262 static bool arm_frame_pointer_required (void);
 263 static bool arm_can_eliminate (const int, const int);
 264 static void arm_asm_trampoline_template (FILE *);
 265 static void arm_trampoline_init (rtx, tree, rtx);
 266 static rtx arm_trampoline_adjust_address (rtx);
 267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 271 static bool arm_array_mode_supported_p (machine_mode,
 272                                         unsigned HOST_WIDE_INT);
 273 static machine_mode arm_preferred_simd_mode (scalar_mode);
 274 static bool arm_class_likely_spilled_p (reg_class_t);
 275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 278                                                      const_tree type,
 279                                                      int misalignment,
 280                                                      bool is_packed);
 281 static void arm_conditional_register_usage (void);
 282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 284 static unsigned int arm_autovectorize_vector_sizes (void);
 285 static int arm_default_branch_cost (bool, bool);
 286 static int arm_cortex_a5_branch_cost (bool, bool);
 287 static int arm_cortex_m_branch_cost (bool, bool);
 288 static int arm_cortex_m7_branch_cost (bool, bool);
 289
 290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 291
 292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 293
 294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 295                                            tree vectype,
 296                                            int misalign ATTRIBUTE_UNUSED);
 297 static unsigned arm_add_stmt_cost (void *data, int count,
 298                                    enum vect_cost_for_stmt kind,
 299                                    struct _stmt_vec_info *stmt_info,
 300                                    int misalign,
 301                                    enum vect_cost_model_location where);
 302
 303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 304                                          bool op0_preserve_value);
 305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 306
 307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 309                                      const_tree);
 310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 313                                                 int reloc);
 314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 320 \f
 321 /* Table of machine attributes.  */
 322 static const struct attribute_spec arm_attribute_table[] =
 323 {
 324   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 325        affects_type_identity } */
 326   /* Function calls made to this symbol must be done indirectly, because
 327      it may lie outside of the 26 bit addressing range of a normal function
 328      call.  */
 329   { "long_call",    0, 0, false, true,  true,  NULL, false },
 330   /* Whereas these functions are always known to reside within the 26 bit
 331      addressing range.  */
 332   { "short_call",   0, 0, false, true,  true,  NULL, false },
 333   /* Specify the procedure call conventions for a function.  */
 334   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 335     false },
 336   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 337   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 338     false },
 339   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 340     false },
 341   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 342     false },
 343 #ifdef ARM_PE
 344   /* ARM/PE has three new attributes:
 345      interfacearm - ?
 346      dllexport - for exporting a function/variable that will live in a dll
 347      dllimport - for importing a function/variable from a dll
 348
 349      Microsoft allows multiple declspecs in one __declspec, separating
 350      them with spaces.  We do NOT support this.  Instead, use __declspec
 351      multiple times.
 352   */
 353   { "dllimport",    0, 0, true,  false, false, NULL, false },
 354   { "dllexport",    0, 0, true,  false, false, NULL, false },
 355   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 356     false },
 357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 358   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 359   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 360   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 361     false },
 362 #endif
 363   /* ARMv8-M Security Extensions support.  */
 364   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 365     arm_handle_cmse_nonsecure_entry, false },
 366   { "cmse_nonsecure_call", 0, 0, true, false, false,
 367     arm_handle_cmse_nonsecure_call, true },
 368   { NULL,           0, 0, false, false, false, NULL, false }
 369 };
 370 \f
 371 /* Initialize the GCC target structure.  */
 372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 373 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 375 #endif
 376
 377 #undef TARGET_LEGITIMIZE_ADDRESS
 378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 379
 380 #undef  TARGET_ATTRIBUTE_TABLE
 381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 382
 383 #undef  TARGET_INSERT_ATTRIBUTES
 384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 385
 386 #undef TARGET_ASM_FILE_START
 387 #define TARGET_ASM_FILE_START arm_file_start
 388 #undef TARGET_ASM_FILE_END
 389 #define TARGET_ASM_FILE_END arm_file_end
 390
 391 #undef  TARGET_ASM_ALIGNED_SI_OP
 392 #define TARGET_ASM_ALIGNED_SI_OP NULL
 393 #undef  TARGET_ASM_INTEGER
 394 #define TARGET_ASM_INTEGER arm_assemble_integer
 395
 396 #undef TARGET_PRINT_OPERAND
 397 #define TARGET_PRINT_OPERAND arm_print_operand
 398 #undef TARGET_PRINT_OPERAND_ADDRESS
 399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 402
 403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 405
 406 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 408
 409 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 411
 412 #undef TARGET_CAN_INLINE_P
 413 #define TARGET_CAN_INLINE_P arm_can_inline_p
 414
 415 #undef TARGET_RELAYOUT_FUNCTION
 416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 417
 418 #undef  TARGET_OPTION_OVERRIDE
 419 #define TARGET_OPTION_OVERRIDE arm_option_override
 420
 421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 423
 424 #undef TARGET_OPTION_SAVE
 425 #define TARGET_OPTION_SAVE arm_option_save
 426
 427 #undef TARGET_OPTION_RESTORE
 428 #define TARGET_OPTION_RESTORE arm_option_restore
 429
 430 #undef TARGET_OPTION_PRINT
 431 #define TARGET_OPTION_PRINT arm_option_print
 432
 433 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 435
 436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 438
 439 #undef TARGET_SCHED_MACRO_FUSION_P
 440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 441
 442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 444
 445 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 447
 448 #undef  TARGET_SCHED_ADJUST_COST
 449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 450
 451 #undef TARGET_SET_CURRENT_FUNCTION
 452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 453
 454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 456
 457 #undef TARGET_SCHED_REORDER
 458 #define TARGET_SCHED_REORDER arm_sched_reorder
 459
 460 #undef TARGET_REGISTER_MOVE_COST
 461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 462
 463 #undef TARGET_MEMORY_MOVE_COST
 464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 465
 466 #undef TARGET_ENCODE_SECTION_INFO
 467 #ifdef ARM_PE
 468 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 469 #else
 470 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 471 #endif
 472
 473 #undef  TARGET_STRIP_NAME_ENCODING
 474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 475
 476 #undef  TARGET_ASM_INTERNAL_LABEL
 477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 478
 479 #undef TARGET_FLOATN_MODE
 480 #define TARGET_FLOATN_MODE arm_floatn_mode
 481
 482 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 484
 485 #undef  TARGET_FUNCTION_VALUE
 486 #define TARGET_FUNCTION_VALUE arm_function_value
 487
 488 #undef  TARGET_LIBCALL_VALUE
 489 #define TARGET_LIBCALL_VALUE arm_libcall_value
 490
 491 #undef TARGET_FUNCTION_VALUE_REGNO_P
 492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 493
 494 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 496 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 498
 499 #undef  TARGET_RTX_COSTS
 500 #define TARGET_RTX_COSTS arm_rtx_costs
 501 #undef  TARGET_ADDRESS_COST
 502 #define TARGET_ADDRESS_COST arm_address_cost
 503
 504 #undef TARGET_SHIFT_TRUNCATION_MASK
 505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 514   arm_autovectorize_vector_sizes
 515
 516 #undef  TARGET_MACHINE_DEPENDENT_REORG
 517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 518
 519 #undef  TARGET_INIT_BUILTINS
 520 #define TARGET_INIT_BUILTINS  arm_init_builtins
 521 #undef  TARGET_EXPAND_BUILTIN
 522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 523 #undef  TARGET_BUILTIN_DECL
 524 #define TARGET_BUILTIN_DECL arm_builtin_decl
 525
 526 #undef TARGET_INIT_LIBFUNCS
 527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 528
 529 #undef TARGET_PROMOTE_FUNCTION_MODE
 530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 531 #undef TARGET_PROMOTE_PROTOTYPES
 532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 533 #undef TARGET_PASS_BY_REFERENCE
 534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 535 #undef TARGET_ARG_PARTIAL_BYTES
 536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 537 #undef TARGET_FUNCTION_ARG
 538 #define TARGET_FUNCTION_ARG arm_function_arg
 539 #undef TARGET_FUNCTION_ARG_ADVANCE
 540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 541 #undef TARGET_FUNCTION_ARG_PADDING
 542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 543 #undef TARGET_FUNCTION_ARG_BOUNDARY
 544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 545
 546 #undef  TARGET_SETUP_INCOMING_VARARGS
 547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 548
 549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 551
 552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 554 #undef TARGET_TRAMPOLINE_INIT
 555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 558
 559 #undef TARGET_WARN_FUNC_RETURN
 560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 561
 562 #undef TARGET_DEFAULT_SHORT_ENUMS
 563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 564
 565 #undef TARGET_ALIGN_ANON_BITFIELD
 566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 567
 568 #undef TARGET_NARROW_VOLATILE_BITFIELD
 569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 570
 571 #undef TARGET_CXX_GUARD_TYPE
 572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 573
 574 #undef TARGET_CXX_GUARD_MASK_BIT
 575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 576
 577 #undef TARGET_CXX_GET_COOKIE_SIZE
 578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 579
 580 #undef TARGET_CXX_COOKIE_HAS_SIZE
 581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 582
 583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 585
 586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 588
 589 #undef TARGET_CXX_USE_AEABI_ATEXIT
 590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 591
 592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 594   arm_cxx_determine_class_data_visibility
 595
 596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 598
 599 #undef TARGET_RETURN_IN_MSB
 600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 601
 602 #undef TARGET_RETURN_IN_MEMORY
 603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 604
 605 #undef TARGET_MUST_PASS_IN_STACK
 606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 607
 608 #if ARM_UNWIND_INFO
 609 #undef TARGET_ASM_UNWIND_EMIT
 610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 611
 612 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 613 #undef TARGET_ASM_TTYPE
 614 #define TARGET_ASM_TTYPE arm_output_ttype
 615
 616 #undef TARGET_ARM_EABI_UNWINDER
 617 #define TARGET_ARM_EABI_UNWINDER true
 618
 619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 621
 622 #endif /* ARM_UNWIND_INFO */
 623
 624 #undef TARGET_ASM_INIT_SECTIONS
 625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 626
 627 #undef TARGET_DWARF_REGISTER_SPAN
 628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 629
 630 #undef  TARGET_CANNOT_COPY_INSN_P
 631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 632
 633 #ifdef HAVE_AS_TLS
 634 #undef TARGET_HAVE_TLS
 635 #define TARGET_HAVE_TLS true
 636 #endif
 637
 638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 640
 641 #undef TARGET_LEGITIMATE_CONSTANT_P
 642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 643
 644 #undef TARGET_CANNOT_FORCE_CONST_MEM
 645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 646
 647 #undef TARGET_MAX_ANCHOR_OFFSET
 648 #define TARGET_MAX_ANCHOR_OFFSET 4095
 649
 650 /* The minimum is set such that the total size of the block
 651    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 652    divisible by eight, ensuring natural spacing of anchors.  */
 653 #undef TARGET_MIN_ANCHOR_OFFSET
 654 #define TARGET_MIN_ANCHOR_OFFSET -4088
 655
 656 #undef TARGET_SCHED_ISSUE_RATE
 657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 658
 659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 661   arm_first_cycle_multipass_dfa_lookahead
 662
 663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 665   arm_first_cycle_multipass_dfa_lookahead_guard
 666
 667 #undef TARGET_MANGLE_TYPE
 668 #define TARGET_MANGLE_TYPE arm_mangle_type
 669
 670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 672
 673 #undef TARGET_BUILD_BUILTIN_VA_LIST
 674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 675 #undef TARGET_EXPAND_BUILTIN_VA_START
 676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 679
 680 #ifdef HAVE_AS_TLS
 681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 683 #endif
 684
 685 #undef TARGET_LEGITIMATE_ADDRESS_P
 686 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 687
 688 #undef TARGET_PREFERRED_RELOAD_CLASS
 689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 690
 691 #undef TARGET_PROMOTED_TYPE
 692 #define TARGET_PROMOTED_TYPE arm_promoted_type
 693
 694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 696
 697 #undef TARGET_COMPUTE_FRAME_LAYOUT
 698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 699
 700 #undef TARGET_FRAME_POINTER_REQUIRED
 701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 702
 703 #undef TARGET_CAN_ELIMINATE
 704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 705
 706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 708
 709 #undef TARGET_CLASS_LIKELY_SPILLED_P
 710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 711
 712 #undef TARGET_VECTORIZE_BUILTINS
 713 #define TARGET_VECTORIZE_BUILTINS
 714
 715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 717   arm_builtin_vectorized_function
 718
 719 #undef TARGET_VECTOR_ALIGNMENT
 720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 721
 722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 724   arm_vector_alignment_reachable
 725
 726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 728   arm_builtin_support_vector_misalignment
 729
 730 #undef TARGET_PREFERRED_RENAME_CLASS
 731 #define TARGET_PREFERRED_RENAME_CLASS \
 732   arm_preferred_rename_class
 733
 734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 736   arm_vectorize_vec_perm_const_ok
 737
 738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 740   arm_builtin_vectorization_cost
 741 #undef TARGET_VECTORIZE_ADD_STMT_COST
 742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 743
 744 #undef TARGET_CANONICALIZE_COMPARISON
 745 #define TARGET_CANONICALIZE_COMPARISON \
 746   arm_canonicalize_comparison
 747
 748 #undef TARGET_ASAN_SHADOW_OFFSET
 749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 750
 751 #undef MAX_INSN_PER_IT_BLOCK
 752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 753
 754 #undef TARGET_CAN_USE_DOLOOP_P
 755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 756
 757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 759
 760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 762
 763 #undef TARGET_SCHED_FUSION_PRIORITY
 764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 765
 766 #undef  TARGET_ASM_FUNCTION_SECTION
 767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 768
 769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 771
 772 #undef TARGET_SECTION_TYPE_FLAGS
 773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 774
 775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 777
 778 #undef TARGET_C_EXCESS_PRECISION
 779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 780
 781 /* Although the architecture reserves bits 0 and 1, only the former is
 782    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 785
 786 #undef TARGET_FIXED_CONDITION_CODE_REGS
 787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 788
 789 #undef TARGET_HARD_REGNO_NREGS
 790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 791 #undef TARGET_HARD_REGNO_MODE_OK
 792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 793
 794 #undef TARGET_MODES_TIEABLE_P
 795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 796
 797 #undef TARGET_CAN_CHANGE_MODE_CLASS
 798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 799
 800 #undef TARGET_CONSTANT_ALIGNMENT
 801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 802 \f
 803 /* Obstack for minipool constant handling.  */
 804 static struct obstack minipool_obstack;
 805 static char *         minipool_startobj;
 806
 807 /* The maximum number of insns skipped which
 808    will be conditionalised if possible.  */
 809 static int max_insns_skipped = 5;
 810
 811 extern FILE * asm_out_file;
 812
 813 /* True if we are currently building a constant table.  */
 814 int making_const_table;
 815
 816 /* The processor for which instructions should be scheduled.  */
 817 enum processor_type arm_tune = TARGET_CPU_arm_none;
 818
 819 /* The current tuning set.  */
 820 const struct tune_params *current_tune;
 821
 822 /* Which floating point hardware to schedule for.  */
 823 int arm_fpu_attr;
 824
 825 /* Used for Thumb call_via trampolines.  */
 826 rtx thumb_call_via_label[14];
 827 static int thumb_call_reg_needed;
 828
 829 /* The bits in this mask specify which instruction scheduling options should
 830    be used.  */
 831 unsigned int tune_flags = 0;
 832
 833 /* The highest ARM architecture version supported by the
 834    target.  */
 835 enum base_architecture arm_base_arch = BASE_ARCH_0;
 836
 837 /* Active target architecture and tuning.  */
 838
 839 struct arm_build_target arm_active_target;
 840
 841 /* The following are used in the arm.md file as equivalents to bits
 842    in the above two flag variables.  */
 843
 844 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 845 int arm_arch3m = 0;
 846
 847 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 848 int arm_arch4 = 0;
 849
 850 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 851 int arm_arch4t = 0;
 852
 853 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 854 int arm_arch5 = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 857 int arm_arch5e = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 860 int arm_arch5te = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 863 int arm_arch6 = 0;
 864
 865 /* Nonzero if this chip supports the ARM 6K extensions.  */
 866 int arm_arch6k = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 869 int arm_arch6kz = 0;
 870
 871 /* Nonzero if instructions present in ARMv6-M can be used.  */
 872 int arm_arch6m = 0;
 873
 874 /* Nonzero if this chip supports the ARM 7 extensions.  */
 875 int arm_arch7 = 0;
 876
 877 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 878 int arm_arch_lpae = 0;
 879
 880 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 881 int arm_arch_notm = 0;
 882
 883 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 884 int arm_arch7em = 0;
 885
 886 /* Nonzero if instructions present in ARMv8 can be used.  */
 887 int arm_arch8 = 0;
 888
 889 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 890 int arm_arch8_1 = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 893 int arm_arch8_2 = 0;
 894
 895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 896    Architecture 8.2.  */
 897 int arm_fp16_inst = 0;
 898
 899 /* Nonzero if this chip can benefit from load scheduling.  */
 900 int arm_ld_sched = 0;
 901
 902 /* Nonzero if this chip is a StrongARM.  */
 903 int arm_tune_strongarm = 0;
 904
 905 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 906 int arm_arch_iwmmxt = 0;
 907
 908 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 909 int arm_arch_iwmmxt2 = 0;
 910
 911 /* Nonzero if this chip is an XScale.  */
 912 int arm_arch_xscale = 0;
 913
 914 /* Nonzero if tuning for XScale  */
 915 int arm_tune_xscale = 0;
 916
 917 /* Nonzero if we want to tune for stores that access the write-buffer.
 918    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 919 int arm_tune_wbuf = 0;
 920
 921 /* Nonzero if tuning for Cortex-A9.  */
 922 int arm_tune_cortex_a9 = 0;
 923
 924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 925    preprocessor.
 926    XXX This is a bit of a hack, it's intended to help work around
 927    problems in GLD which doesn't understand that armv5t code is
 928    interworking clean.  */
 929 int arm_cpp_interwork = 0;
 930
 931 /* Nonzero if chip supports Thumb 1.  */
 932 int arm_arch_thumb1;
 933
 934 /* Nonzero if chip supports Thumb 2.  */
 935 int arm_arch_thumb2;
 936
 937 /* Nonzero if chip supports integer division instruction.  */
 938 int arm_arch_arm_hwdiv;
 939 int arm_arch_thumb_hwdiv;
 940
 941 /* Nonzero if chip disallows volatile memory access in IT block.  */
 942 int arm_arch_no_volatile_ce;
 943
 944 /* Nonzero if we should use Neon to handle 64-bits operations rather
 945    than core registers.  */
 946 int prefer_neon_for_64bits = 0;
 947
 948 /* Nonzero if we shouldn't use literal pools.  */
 949 bool arm_disable_literal_pool = false;
 950
 951 /* The register number to be used for the PIC offset register.  */
 952 unsigned arm_pic_register = INVALID_REGNUM;
 953
 954 enum arm_pcs arm_pcs_default;
 955
 956 /* For an explanation of these variables, see final_prescan_insn below.  */
 957 int arm_ccfsm_state;
 958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 959 enum arm_cond_code arm_current_cc;
 960
 961 rtx arm_target_insn;
 962 int arm_target_label;
 963 /* The number of conditionally executed insns, including the current insn.  */
 964 int arm_condexec_count = 0;
 965 /* A bitmask specifying the patterns for the IT block.
 966    Zero means do not output an IT block before this insn. */
 967 int arm_condexec_mask = 0;
 968 /* The number of bits used in arm_condexec_mask.  */
 969 int arm_condexec_masklen = 0;
 970
 971 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 972 int arm_arch_crc = 0;
 973
 974 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 975 int arm_arch_dotprod = 0;
 976
 977 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 978 int arm_arch_cmse = 0;
 979
 980 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 981 int arm_m_profile_small_mul = 0;
 982
 983 /* The condition codes of the ARM, and the inverse function.  */
 984 static const char * const arm_condition_codes[] =
 985 {
 986   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 987   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 988 };
 989
 990 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 991 int arm_regs_in_sequence[] =
 992 {
 993   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 994 };
 995
 996 #define ARM_LSL_NAME "lsl"
 997 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 998
 999 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1000                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1001                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1002 \f
1003 /* Initialization code.  */
1004
1005 struct cpu_tune
1006 {
1007   enum processor_type scheduler;
1008   unsigned int tune_flags;
1009   const struct tune_params *tune;
1010 };
1011
1012 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1013 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1014   {                                                             \
1015     num_slots,                                                  \
1016     l1_size,                                                    \
1017     l1_line_size                                                \
1018   }
1019
1020 /* arm generic vectorizer costs.  */
1021 static const
1022 struct cpu_vec_costs arm_default_vec_cost = {
1023   1,                                    /* scalar_stmt_cost.  */
1024   1,                                    /* scalar load_cost.  */
1025   1,                                    /* scalar_store_cost.  */
1026   1,                                    /* vec_stmt_cost.  */
1027   1,                                    /* vec_to_scalar_cost.  */
1028   1,                                    /* scalar_to_vec_cost.  */
1029   1,                                    /* vec_align_load_cost.  */
1030   1,                                    /* vec_unalign_load_cost.  */
1031   1,                                    /* vec_unalign_store_cost.  */
1032   1,                                    /* vec_store_cost.  */
1033   3,                                    /* cond_taken_branch_cost.  */
1034   1,                                    /* cond_not_taken_branch_cost.  */
1035 };
1036
1037 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1038 #include "aarch-cost-tables.h"
1039
1040
1041
1042 const struct cpu_cost_table cortexa9_extra_costs =
1043 {
1044   /* ALU */
1045   {
1046     0,                  /* arith.  */
1047     0,                  /* logical.  */
1048     0,                  /* shift.  */
1049     COSTS_N_INSNS (1),  /* shift_reg.  */
1050     COSTS_N_INSNS (1),  /* arith_shift.  */
1051     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1052     0,                  /* log_shift.  */
1053     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1054     COSTS_N_INSNS (1),  /* extend.  */
1055     COSTS_N_INSNS (2),  /* extend_arith.  */
1056     COSTS_N_INSNS (1),  /* bfi.  */
1057     COSTS_N_INSNS (1),  /* bfx.  */
1058     0,                  /* clz.  */
1059     0,                  /* rev.  */
1060     0,                  /* non_exec.  */
1061     true                /* non_exec_costs_exec.  */
1062   },
1063   {
1064     /* MULT SImode */
1065     {
1066       COSTS_N_INSNS (3),        /* simple.  */
1067       COSTS_N_INSNS (3),        /* flag_setting.  */
1068       COSTS_N_INSNS (2),        /* extend.  */
1069       COSTS_N_INSNS (3),        /* add.  */
1070       COSTS_N_INSNS (2),        /* extend_add.  */
1071       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1072     },
1073     /* MULT DImode */
1074     {
1075       0,                        /* simple (N/A).  */
1076       0,                        /* flag_setting (N/A).  */
1077       COSTS_N_INSNS (4),        /* extend.  */
1078       0,                        /* add (N/A).  */
1079       COSTS_N_INSNS (4),        /* extend_add.  */
1080       0                         /* idiv (N/A).  */
1081     }
1082   },
1083   /* LD/ST */
1084   {
1085     COSTS_N_INSNS (2),  /* load.  */
1086     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1087     COSTS_N_INSNS (2),  /* ldrd.  */
1088     COSTS_N_INSNS (2),  /* ldm_1st.  */
1089     1,                  /* ldm_regs_per_insn_1st.  */
1090     2,                  /* ldm_regs_per_insn_subsequent.  */
1091     COSTS_N_INSNS (5),  /* loadf.  */
1092     COSTS_N_INSNS (5),  /* loadd.  */
1093     COSTS_N_INSNS (1),  /* load_unaligned.  */
1094     COSTS_N_INSNS (2),  /* store.  */
1095     COSTS_N_INSNS (2),  /* strd.  */
1096     COSTS_N_INSNS (2),  /* stm_1st.  */
1097     1,                  /* stm_regs_per_insn_1st.  */
1098     2,                  /* stm_regs_per_insn_subsequent.  */
1099     COSTS_N_INSNS (1),  /* storef.  */
1100     COSTS_N_INSNS (1),  /* stored.  */
1101     COSTS_N_INSNS (1),  /* store_unaligned.  */
1102     COSTS_N_INSNS (1),  /* loadv.  */
1103     COSTS_N_INSNS (1)   /* storev.  */
1104   },
1105   {
1106     /* FP SFmode */
1107     {
1108       COSTS_N_INSNS (14),       /* div.  */
1109       COSTS_N_INSNS (4),        /* mult.  */
1110       COSTS_N_INSNS (7),        /* mult_addsub. */
1111       COSTS_N_INSNS (30),       /* fma.  */
1112       COSTS_N_INSNS (3),        /* addsub.  */
1113       COSTS_N_INSNS (1),        /* fpconst.  */
1114       COSTS_N_INSNS (1),        /* neg.  */
1115       COSTS_N_INSNS (3),        /* compare.  */
1116       COSTS_N_INSNS (3),        /* widen.  */
1117       COSTS_N_INSNS (3),        /* narrow.  */
1118       COSTS_N_INSNS (3),        /* toint.  */
1119       COSTS_N_INSNS (3),        /* fromint.  */
1120       COSTS_N_INSNS (3)         /* roundint.  */
1121     },
1122     /* FP DFmode */
1123     {
1124       COSTS_N_INSNS (24),       /* div.  */
1125       COSTS_N_INSNS (5),        /* mult.  */
1126       COSTS_N_INSNS (8),        /* mult_addsub.  */
1127       COSTS_N_INSNS (30),       /* fma.  */
1128       COSTS_N_INSNS (3),        /* addsub.  */
1129       COSTS_N_INSNS (1),        /* fpconst.  */
1130       COSTS_N_INSNS (1),        /* neg.  */
1131       COSTS_N_INSNS (3),        /* compare.  */
1132       COSTS_N_INSNS (3),        /* widen.  */
1133       COSTS_N_INSNS (3),        /* narrow.  */
1134       COSTS_N_INSNS (3),        /* toint.  */
1135       COSTS_N_INSNS (3),        /* fromint.  */
1136       COSTS_N_INSNS (3)         /* roundint.  */
1137     }
1138   },
1139   /* Vector */
1140   {
1141     COSTS_N_INSNS (1)   /* alu.  */
1142   }
1143 };
1144
1145 const struct cpu_cost_table cortexa8_extra_costs =
1146 {
1147   /* ALU */
1148   {
1149     0,                  /* arith.  */
1150     0,                  /* logical.  */
1151     COSTS_N_INSNS (1),  /* shift.  */
1152     0,                  /* shift_reg.  */
1153     COSTS_N_INSNS (1),  /* arith_shift.  */
1154     0,                  /* arith_shift_reg.  */
1155     COSTS_N_INSNS (1),  /* log_shift.  */
1156     0,                  /* log_shift_reg.  */
1157     0,                  /* extend.  */
1158     0,                  /* extend_arith.  */
1159     0,                  /* bfi.  */
1160     0,                  /* bfx.  */
1161     0,                  /* clz.  */
1162     0,                  /* rev.  */
1163     0,                  /* non_exec.  */
1164     true                /* non_exec_costs_exec.  */
1165   },
1166   {
1167     /* MULT SImode */
1168     {
1169       COSTS_N_INSNS (1),        /* simple.  */
1170       COSTS_N_INSNS (1),        /* flag_setting.  */
1171       COSTS_N_INSNS (1),        /* extend.  */
1172       COSTS_N_INSNS (1),        /* add.  */
1173       COSTS_N_INSNS (1),        /* extend_add.  */
1174       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1175     },
1176     /* MULT DImode */
1177     {
1178       0,                        /* simple (N/A).  */
1179       0,                        /* flag_setting (N/A).  */
1180       COSTS_N_INSNS (2),        /* extend.  */
1181       0,                        /* add (N/A).  */
1182       COSTS_N_INSNS (2),        /* extend_add.  */
1183       0                         /* idiv (N/A).  */
1184     }
1185   },
1186   /* LD/ST */
1187   {
1188     COSTS_N_INSNS (1),  /* load.  */
1189     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1190     COSTS_N_INSNS (1),  /* ldrd.  */
1191     COSTS_N_INSNS (1),  /* ldm_1st.  */
1192     1,                  /* ldm_regs_per_insn_1st.  */
1193     2,                  /* ldm_regs_per_insn_subsequent.  */
1194     COSTS_N_INSNS (1),  /* loadf.  */
1195     COSTS_N_INSNS (1),  /* loadd.  */
1196     COSTS_N_INSNS (1),  /* load_unaligned.  */
1197     COSTS_N_INSNS (1),  /* store.  */
1198     COSTS_N_INSNS (1),  /* strd.  */
1199     COSTS_N_INSNS (1),  /* stm_1st.  */
1200     1,                  /* stm_regs_per_insn_1st.  */
1201     2,                  /* stm_regs_per_insn_subsequent.  */
1202     COSTS_N_INSNS (1),  /* storef.  */
1203     COSTS_N_INSNS (1),  /* stored.  */
1204     COSTS_N_INSNS (1),  /* store_unaligned.  */
1205     COSTS_N_INSNS (1),  /* loadv.  */
1206     COSTS_N_INSNS (1)   /* storev.  */
1207   },
1208   {
1209     /* FP SFmode */
1210     {
1211       COSTS_N_INSNS (36),       /* div.  */
1212       COSTS_N_INSNS (11),       /* mult.  */
1213       COSTS_N_INSNS (20),       /* mult_addsub. */
1214       COSTS_N_INSNS (30),       /* fma.  */
1215       COSTS_N_INSNS (9),        /* addsub.  */
1216       COSTS_N_INSNS (3),        /* fpconst.  */
1217       COSTS_N_INSNS (3),        /* neg.  */
1218       COSTS_N_INSNS (6),        /* compare.  */
1219       COSTS_N_INSNS (4),        /* widen.  */
1220       COSTS_N_INSNS (4),        /* narrow.  */
1221       COSTS_N_INSNS (8),        /* toint.  */
1222       COSTS_N_INSNS (8),        /* fromint.  */
1223       COSTS_N_INSNS (8)         /* roundint.  */
1224     },
1225     /* FP DFmode */
1226     {
1227       COSTS_N_INSNS (64),       /* div.  */
1228       COSTS_N_INSNS (16),       /* mult.  */
1229       COSTS_N_INSNS (25),       /* mult_addsub.  */
1230       COSTS_N_INSNS (30),       /* fma.  */
1231       COSTS_N_INSNS (9),        /* addsub.  */
1232       COSTS_N_INSNS (3),        /* fpconst.  */
1233       COSTS_N_INSNS (3),        /* neg.  */
1234       COSTS_N_INSNS (6),        /* compare.  */
1235       COSTS_N_INSNS (6),        /* widen.  */
1236       COSTS_N_INSNS (6),        /* narrow.  */
1237       COSTS_N_INSNS (8),        /* toint.  */
1238       COSTS_N_INSNS (8),        /* fromint.  */
1239       COSTS_N_INSNS (8)         /* roundint.  */
1240     }
1241   },
1242   /* Vector */
1243   {
1244     COSTS_N_INSNS (1)   /* alu.  */
1245   }
1246 };
1247
1248 const struct cpu_cost_table cortexa5_extra_costs =
1249 {
1250   /* ALU */
1251   {
1252     0,                  /* arith.  */
1253     0,                  /* logical.  */
1254     COSTS_N_INSNS (1),  /* shift.  */
1255     COSTS_N_INSNS (1),  /* shift_reg.  */
1256     COSTS_N_INSNS (1),  /* arith_shift.  */
1257     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1258     COSTS_N_INSNS (1),  /* log_shift.  */
1259     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1260     COSTS_N_INSNS (1),  /* extend.  */
1261     COSTS_N_INSNS (1),  /* extend_arith.  */
1262     COSTS_N_INSNS (1),  /* bfi.  */
1263     COSTS_N_INSNS (1),  /* bfx.  */
1264     COSTS_N_INSNS (1),  /* clz.  */
1265     COSTS_N_INSNS (1),  /* rev.  */
1266     0,                  /* non_exec.  */
1267     true                /* non_exec_costs_exec.  */
1268   },
1269
1270   {
1271     /* MULT SImode */
1272     {
1273       0,                        /* simple.  */
1274       COSTS_N_INSNS (1),        /* flag_setting.  */
1275       COSTS_N_INSNS (1),        /* extend.  */
1276       COSTS_N_INSNS (1),        /* add.  */
1277       COSTS_N_INSNS (1),        /* extend_add.  */
1278       COSTS_N_INSNS (7)         /* idiv.  */
1279     },
1280     /* MULT DImode */
1281     {
1282       0,                        /* simple (N/A).  */
1283       0,                        /* flag_setting (N/A).  */
1284       COSTS_N_INSNS (1),        /* extend.  */
1285       0,                        /* add.  */
1286       COSTS_N_INSNS (2),        /* extend_add.  */
1287       0                         /* idiv (N/A).  */
1288     }
1289   },
1290   /* LD/ST */
1291   {
1292     COSTS_N_INSNS (1),  /* load.  */
1293     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1294     COSTS_N_INSNS (6),  /* ldrd.  */
1295     COSTS_N_INSNS (1),  /* ldm_1st.  */
1296     1,                  /* ldm_regs_per_insn_1st.  */
1297     2,                  /* ldm_regs_per_insn_subsequent.  */
1298     COSTS_N_INSNS (2),  /* loadf.  */
1299     COSTS_N_INSNS (4),  /* loadd.  */
1300     COSTS_N_INSNS (1),  /* load_unaligned.  */
1301     COSTS_N_INSNS (1),  /* store.  */
1302     COSTS_N_INSNS (3),  /* strd.  */
1303     COSTS_N_INSNS (1),  /* stm_1st.  */
1304     1,                  /* stm_regs_per_insn_1st.  */
1305     2,                  /* stm_regs_per_insn_subsequent.  */
1306     COSTS_N_INSNS (2),  /* storef.  */
1307     COSTS_N_INSNS (2),  /* stored.  */
1308     COSTS_N_INSNS (1),  /* store_unaligned.  */
1309     COSTS_N_INSNS (1),  /* loadv.  */
1310     COSTS_N_INSNS (1)   /* storev.  */
1311   },
1312   {
1313     /* FP SFmode */
1314     {
1315       COSTS_N_INSNS (15),       /* div.  */
1316       COSTS_N_INSNS (3),        /* mult.  */
1317       COSTS_N_INSNS (7),        /* mult_addsub. */
1318       COSTS_N_INSNS (7),        /* fma.  */
1319       COSTS_N_INSNS (3),        /* addsub.  */
1320       COSTS_N_INSNS (3),        /* fpconst.  */
1321       COSTS_N_INSNS (3),        /* neg.  */
1322       COSTS_N_INSNS (3),        /* compare.  */
1323       COSTS_N_INSNS (3),        /* widen.  */
1324       COSTS_N_INSNS (3),        /* narrow.  */
1325       COSTS_N_INSNS (3),        /* toint.  */
1326       COSTS_N_INSNS (3),        /* fromint.  */
1327       COSTS_N_INSNS (3)         /* roundint.  */
1328     },
1329     /* FP DFmode */
1330     {
1331       COSTS_N_INSNS (30),       /* div.  */
1332       COSTS_N_INSNS (6),        /* mult.  */
1333       COSTS_N_INSNS (10),       /* mult_addsub.  */
1334       COSTS_N_INSNS (7),        /* fma.  */
1335       COSTS_N_INSNS (3),        /* addsub.  */
1336       COSTS_N_INSNS (3),        /* fpconst.  */
1337       COSTS_N_INSNS (3),        /* neg.  */
1338       COSTS_N_INSNS (3),        /* compare.  */
1339       COSTS_N_INSNS (3),        /* widen.  */
1340       COSTS_N_INSNS (3),        /* narrow.  */
1341       COSTS_N_INSNS (3),        /* toint.  */
1342       COSTS_N_INSNS (3),        /* fromint.  */
1343       COSTS_N_INSNS (3)         /* roundint.  */
1344     }
1345   },
1346   /* Vector */
1347   {
1348     COSTS_N_INSNS (1)   /* alu.  */
1349   }
1350 };
1351
1352
1353 const struct cpu_cost_table cortexa7_extra_costs =
1354 {
1355   /* ALU */
1356   {
1357     0,                  /* arith.  */
1358     0,                  /* logical.  */
1359     COSTS_N_INSNS (1),  /* shift.  */
1360     COSTS_N_INSNS (1),  /* shift_reg.  */
1361     COSTS_N_INSNS (1),  /* arith_shift.  */
1362     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1363     COSTS_N_INSNS (1),  /* log_shift.  */
1364     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1365     COSTS_N_INSNS (1),  /* extend.  */
1366     COSTS_N_INSNS (1),  /* extend_arith.  */
1367     COSTS_N_INSNS (1),  /* bfi.  */
1368     COSTS_N_INSNS (1),  /* bfx.  */
1369     COSTS_N_INSNS (1),  /* clz.  */
1370     COSTS_N_INSNS (1),  /* rev.  */
1371     0,                  /* non_exec.  */
1372     true                /* non_exec_costs_exec.  */
1373   },
1374
1375   {
1376     /* MULT SImode */
1377     {
1378       0,                        /* simple.  */
1379       COSTS_N_INSNS (1),        /* flag_setting.  */
1380       COSTS_N_INSNS (1),        /* extend.  */
1381       COSTS_N_INSNS (1),        /* add.  */
1382       COSTS_N_INSNS (1),        /* extend_add.  */
1383       COSTS_N_INSNS (7)         /* idiv.  */
1384     },
1385     /* MULT DImode */
1386     {
1387       0,                        /* simple (N/A).  */
1388       0,                        /* flag_setting (N/A).  */
1389       COSTS_N_INSNS (1),        /* extend.  */
1390       0,                        /* add.  */
1391       COSTS_N_INSNS (2),        /* extend_add.  */
1392       0                         /* idiv (N/A).  */
1393     }
1394   },
1395   /* LD/ST */
1396   {
1397     COSTS_N_INSNS (1),  /* load.  */
1398     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1399     COSTS_N_INSNS (3),  /* ldrd.  */
1400     COSTS_N_INSNS (1),  /* ldm_1st.  */
1401     1,                  /* ldm_regs_per_insn_1st.  */
1402     2,                  /* ldm_regs_per_insn_subsequent.  */
1403     COSTS_N_INSNS (2),  /* loadf.  */
1404     COSTS_N_INSNS (2),  /* loadd.  */
1405     COSTS_N_INSNS (1),  /* load_unaligned.  */
1406     COSTS_N_INSNS (1),  /* store.  */
1407     COSTS_N_INSNS (3),  /* strd.  */
1408     COSTS_N_INSNS (1),  /* stm_1st.  */
1409     1,                  /* stm_regs_per_insn_1st.  */
1410     2,                  /* stm_regs_per_insn_subsequent.  */
1411     COSTS_N_INSNS (2),  /* storef.  */
1412     COSTS_N_INSNS (2),  /* stored.  */
1413     COSTS_N_INSNS (1),  /* store_unaligned.  */
1414     COSTS_N_INSNS (1),  /* loadv.  */
1415     COSTS_N_INSNS (1)   /* storev.  */
1416   },
1417   {
1418     /* FP SFmode */
1419     {
1420       COSTS_N_INSNS (15),       /* div.  */
1421       COSTS_N_INSNS (3),        /* mult.  */
1422       COSTS_N_INSNS (7),        /* mult_addsub. */
1423       COSTS_N_INSNS (7),        /* fma.  */
1424       COSTS_N_INSNS (3),        /* addsub.  */
1425       COSTS_N_INSNS (3),        /* fpconst.  */
1426       COSTS_N_INSNS (3),        /* neg.  */
1427       COSTS_N_INSNS (3),        /* compare.  */
1428       COSTS_N_INSNS (3),        /* widen.  */
1429       COSTS_N_INSNS (3),        /* narrow.  */
1430       COSTS_N_INSNS (3),        /* toint.  */
1431       COSTS_N_INSNS (3),        /* fromint.  */
1432       COSTS_N_INSNS (3)         /* roundint.  */
1433     },
1434     /* FP DFmode */
1435     {
1436       COSTS_N_INSNS (30),       /* div.  */
1437       COSTS_N_INSNS (6),        /* mult.  */
1438       COSTS_N_INSNS (10),       /* mult_addsub.  */
1439       COSTS_N_INSNS (7),        /* fma.  */
1440       COSTS_N_INSNS (3),        /* addsub.  */
1441       COSTS_N_INSNS (3),        /* fpconst.  */
1442       COSTS_N_INSNS (3),        /* neg.  */
1443       COSTS_N_INSNS (3),        /* compare.  */
1444       COSTS_N_INSNS (3),        /* widen.  */
1445       COSTS_N_INSNS (3),        /* narrow.  */
1446       COSTS_N_INSNS (3),        /* toint.  */
1447       COSTS_N_INSNS (3),        /* fromint.  */
1448       COSTS_N_INSNS (3)         /* roundint.  */
1449     }
1450   },
1451   /* Vector */
1452   {
1453     COSTS_N_INSNS (1)   /* alu.  */
1454   }
1455 };
1456
1457 const struct cpu_cost_table cortexa12_extra_costs =
1458 {
1459   /* ALU */
1460   {
1461     0,                  /* arith.  */
1462     0,                  /* logical.  */
1463     0,                  /* shift.  */
1464     COSTS_N_INSNS (1),  /* shift_reg.  */
1465     COSTS_N_INSNS (1),  /* arith_shift.  */
1466     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1467     COSTS_N_INSNS (1),  /* log_shift.  */
1468     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1469     0,                  /* extend.  */
1470     COSTS_N_INSNS (1),  /* extend_arith.  */
1471     0,                  /* bfi.  */
1472     COSTS_N_INSNS (1),  /* bfx.  */
1473     COSTS_N_INSNS (1),  /* clz.  */
1474     COSTS_N_INSNS (1),  /* rev.  */
1475     0,                  /* non_exec.  */
1476     true                /* non_exec_costs_exec.  */
1477   },
1478   /* MULT SImode */
1479   {
1480     {
1481       COSTS_N_INSNS (2),        /* simple.  */
1482       COSTS_N_INSNS (3),        /* flag_setting.  */
1483       COSTS_N_INSNS (2),        /* extend.  */
1484       COSTS_N_INSNS (3),        /* add.  */
1485       COSTS_N_INSNS (2),        /* extend_add.  */
1486       COSTS_N_INSNS (18)        /* idiv.  */
1487     },
1488     /* MULT DImode */
1489     {
1490       0,                        /* simple (N/A).  */
1491       0,                        /* flag_setting (N/A).  */
1492       COSTS_N_INSNS (3),        /* extend.  */
1493       0,                        /* add (N/A).  */
1494       COSTS_N_INSNS (3),        /* extend_add.  */
1495       0                         /* idiv (N/A).  */
1496     }
1497   },
1498   /* LD/ST */
1499   {
1500     COSTS_N_INSNS (3),  /* load.  */
1501     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1502     COSTS_N_INSNS (3),  /* ldrd.  */
1503     COSTS_N_INSNS (3),  /* ldm_1st.  */
1504     1,                  /* ldm_regs_per_insn_1st.  */
1505     2,                  /* ldm_regs_per_insn_subsequent.  */
1506     COSTS_N_INSNS (3),  /* loadf.  */
1507     COSTS_N_INSNS (3),  /* loadd.  */
1508     0,                  /* load_unaligned.  */
1509     0,                  /* store.  */
1510     0,                  /* strd.  */
1511     0,                  /* stm_1st.  */
1512     1,                  /* stm_regs_per_insn_1st.  */
1513     2,                  /* stm_regs_per_insn_subsequent.  */
1514     COSTS_N_INSNS (2),  /* storef.  */
1515     COSTS_N_INSNS (2),  /* stored.  */
1516     0,                  /* store_unaligned.  */
1517     COSTS_N_INSNS (1),  /* loadv.  */
1518     COSTS_N_INSNS (1)   /* storev.  */
1519   },
1520   {
1521     /* FP SFmode */
1522     {
1523       COSTS_N_INSNS (17),       /* div.  */
1524       COSTS_N_INSNS (4),        /* mult.  */
1525       COSTS_N_INSNS (8),        /* mult_addsub. */
1526       COSTS_N_INSNS (8),        /* fma.  */
1527       COSTS_N_INSNS (4),        /* addsub.  */
1528       COSTS_N_INSNS (2),        /* fpconst. */
1529       COSTS_N_INSNS (2),        /* neg.  */
1530       COSTS_N_INSNS (2),        /* compare.  */
1531       COSTS_N_INSNS (4),        /* widen.  */
1532       COSTS_N_INSNS (4),        /* narrow.  */
1533       COSTS_N_INSNS (4),        /* toint.  */
1534       COSTS_N_INSNS (4),        /* fromint.  */
1535       COSTS_N_INSNS (4)         /* roundint.  */
1536     },
1537     /* FP DFmode */
1538     {
1539       COSTS_N_INSNS (31),       /* div.  */
1540       COSTS_N_INSNS (4),        /* mult.  */
1541       COSTS_N_INSNS (8),        /* mult_addsub.  */
1542       COSTS_N_INSNS (8),        /* fma.  */
1543       COSTS_N_INSNS (4),        /* addsub.  */
1544       COSTS_N_INSNS (2),        /* fpconst.  */
1545       COSTS_N_INSNS (2),        /* neg.  */
1546       COSTS_N_INSNS (2),        /* compare.  */
1547       COSTS_N_INSNS (4),        /* widen.  */
1548       COSTS_N_INSNS (4),        /* narrow.  */
1549       COSTS_N_INSNS (4),        /* toint.  */
1550       COSTS_N_INSNS (4),        /* fromint.  */
1551       COSTS_N_INSNS (4)         /* roundint.  */
1552     }
1553   },
1554   /* Vector */
1555   {
1556     COSTS_N_INSNS (1)   /* alu.  */
1557   }
1558 };
1559
1560 const struct cpu_cost_table cortexa15_extra_costs =
1561 {
1562   /* ALU */
1563   {
1564     0,                  /* arith.  */
1565     0,                  /* logical.  */
1566     0,                  /* shift.  */
1567     0,                  /* shift_reg.  */
1568     COSTS_N_INSNS (1),  /* arith_shift.  */
1569     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1570     COSTS_N_INSNS (1),  /* log_shift.  */
1571     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1572     0,                  /* extend.  */
1573     COSTS_N_INSNS (1),  /* extend_arith.  */
1574     COSTS_N_INSNS (1),  /* bfi.  */
1575     0,                  /* bfx.  */
1576     0,                  /* clz.  */
1577     0,                  /* rev.  */
1578     0,                  /* non_exec.  */
1579     true                /* non_exec_costs_exec.  */
1580   },
1581   /* MULT SImode */
1582   {
1583     {
1584       COSTS_N_INSNS (2),        /* simple.  */
1585       COSTS_N_INSNS (3),        /* flag_setting.  */
1586       COSTS_N_INSNS (2),        /* extend.  */
1587       COSTS_N_INSNS (2),        /* add.  */
1588       COSTS_N_INSNS (2),        /* extend_add.  */
1589       COSTS_N_INSNS (18)        /* idiv.  */
1590     },
1591     /* MULT DImode */
1592     {
1593       0,                        /* simple (N/A).  */
1594       0,                        /* flag_setting (N/A).  */
1595       COSTS_N_INSNS (3),        /* extend.  */
1596       0,                        /* add (N/A).  */
1597       COSTS_N_INSNS (3),        /* extend_add.  */
1598       0                         /* idiv (N/A).  */
1599     }
1600   },
1601   /* LD/ST */
1602   {
1603     COSTS_N_INSNS (3),  /* load.  */
1604     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1605     COSTS_N_INSNS (3),  /* ldrd.  */
1606     COSTS_N_INSNS (4),  /* ldm_1st.  */
1607     1,                  /* ldm_regs_per_insn_1st.  */
1608     2,                  /* ldm_regs_per_insn_subsequent.  */
1609     COSTS_N_INSNS (4),  /* loadf.  */
1610     COSTS_N_INSNS (4),  /* loadd.  */
1611     0,                  /* load_unaligned.  */
1612     0,                  /* store.  */
1613     0,                  /* strd.  */
1614     COSTS_N_INSNS (1),  /* stm_1st.  */
1615     1,                  /* stm_regs_per_insn_1st.  */
1616     2,                  /* stm_regs_per_insn_subsequent.  */
1617     0,                  /* storef.  */
1618     0,                  /* stored.  */
1619     0,                  /* store_unaligned.  */
1620     COSTS_N_INSNS (1),  /* loadv.  */
1621     COSTS_N_INSNS (1)   /* storev.  */
1622   },
1623   {
1624     /* FP SFmode */
1625     {
1626       COSTS_N_INSNS (17),       /* div.  */
1627       COSTS_N_INSNS (4),        /* mult.  */
1628       COSTS_N_INSNS (8),        /* mult_addsub. */
1629       COSTS_N_INSNS (8),        /* fma.  */
1630       COSTS_N_INSNS (4),        /* addsub.  */
1631       COSTS_N_INSNS (2),        /* fpconst. */
1632       COSTS_N_INSNS (2),        /* neg.  */
1633       COSTS_N_INSNS (5),        /* compare.  */
1634       COSTS_N_INSNS (4),        /* widen.  */
1635       COSTS_N_INSNS (4),        /* narrow.  */
1636       COSTS_N_INSNS (4),        /* toint.  */
1637       COSTS_N_INSNS (4),        /* fromint.  */
1638       COSTS_N_INSNS (4)         /* roundint.  */
1639     },
1640     /* FP DFmode */
1641     {
1642       COSTS_N_INSNS (31),       /* div.  */
1643       COSTS_N_INSNS (4),        /* mult.  */
1644       COSTS_N_INSNS (8),        /* mult_addsub.  */
1645       COSTS_N_INSNS (8),        /* fma.  */
1646       COSTS_N_INSNS (4),        /* addsub.  */
1647       COSTS_N_INSNS (2),        /* fpconst.  */
1648       COSTS_N_INSNS (2),        /* neg.  */
1649       COSTS_N_INSNS (2),        /* compare.  */
1650       COSTS_N_INSNS (4),        /* widen.  */
1651       COSTS_N_INSNS (4),        /* narrow.  */
1652       COSTS_N_INSNS (4),        /* toint.  */
1653       COSTS_N_INSNS (4),        /* fromint.  */
1654       COSTS_N_INSNS (4)         /* roundint.  */
1655     }
1656   },
1657   /* Vector */
1658   {
1659     COSTS_N_INSNS (1)   /* alu.  */
1660   }
1661 };
1662
1663 const struct cpu_cost_table v7m_extra_costs =
1664 {
1665   /* ALU */
1666   {
1667     0,                  /* arith.  */
1668     0,                  /* logical.  */
1669     0,                  /* shift.  */
1670     0,                  /* shift_reg.  */
1671     0,                  /* arith_shift.  */
1672     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1673     0,                  /* log_shift.  */
1674     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1675     0,                  /* extend.  */
1676     COSTS_N_INSNS (1),  /* extend_arith.  */
1677     0,                  /* bfi.  */
1678     0,                  /* bfx.  */
1679     0,                  /* clz.  */
1680     0,                  /* rev.  */
1681     COSTS_N_INSNS (1),  /* non_exec.  */
1682     false               /* non_exec_costs_exec.  */
1683   },
1684   {
1685     /* MULT SImode */
1686     {
1687       COSTS_N_INSNS (1),        /* simple.  */
1688       COSTS_N_INSNS (1),        /* flag_setting.  */
1689       COSTS_N_INSNS (2),        /* extend.  */
1690       COSTS_N_INSNS (1),        /* add.  */
1691       COSTS_N_INSNS (3),        /* extend_add.  */
1692       COSTS_N_INSNS (8)         /* idiv.  */
1693     },
1694     /* MULT DImode */
1695     {
1696       0,                        /* simple (N/A).  */
1697       0,                        /* flag_setting (N/A).  */
1698       COSTS_N_INSNS (2),        /* extend.  */
1699       0,                        /* add (N/A).  */
1700       COSTS_N_INSNS (3),        /* extend_add.  */
1701       0                         /* idiv (N/A).  */
1702     }
1703   },
1704   /* LD/ST */
1705   {
1706     COSTS_N_INSNS (2),  /* load.  */
1707     0,                  /* load_sign_extend.  */
1708     COSTS_N_INSNS (3),  /* ldrd.  */
1709     COSTS_N_INSNS (2),  /* ldm_1st.  */
1710     1,                  /* ldm_regs_per_insn_1st.  */
1711     1,                  /* ldm_regs_per_insn_subsequent.  */
1712     COSTS_N_INSNS (2),  /* loadf.  */
1713     COSTS_N_INSNS (3),  /* loadd.  */
1714     COSTS_N_INSNS (1),  /* load_unaligned.  */
1715     COSTS_N_INSNS (2),  /* store.  */
1716     COSTS_N_INSNS (3),  /* strd.  */
1717     COSTS_N_INSNS (2),  /* stm_1st.  */
1718     1,                  /* stm_regs_per_insn_1st.  */
1719     1,                  /* stm_regs_per_insn_subsequent.  */
1720     COSTS_N_INSNS (2),  /* storef.  */
1721     COSTS_N_INSNS (3),  /* stored.  */
1722     COSTS_N_INSNS (1),  /* store_unaligned.  */
1723     COSTS_N_INSNS (1),  /* loadv.  */
1724     COSTS_N_INSNS (1)   /* storev.  */
1725   },
1726   {
1727     /* FP SFmode */
1728     {
1729       COSTS_N_INSNS (7),        /* div.  */
1730       COSTS_N_INSNS (2),        /* mult.  */
1731       COSTS_N_INSNS (5),        /* mult_addsub.  */
1732       COSTS_N_INSNS (3),        /* fma.  */
1733       COSTS_N_INSNS (1),        /* addsub.  */
1734       0,                        /* fpconst.  */
1735       0,                        /* neg.  */
1736       0,                        /* compare.  */
1737       0,                        /* widen.  */
1738       0,                        /* narrow.  */
1739       0,                        /* toint.  */
1740       0,                        /* fromint.  */
1741       0                         /* roundint.  */
1742     },
1743     /* FP DFmode */
1744     {
1745       COSTS_N_INSNS (15),       /* div.  */
1746       COSTS_N_INSNS (5),        /* mult.  */
1747       COSTS_N_INSNS (7),        /* mult_addsub.  */
1748       COSTS_N_INSNS (7),        /* fma.  */
1749       COSTS_N_INSNS (3),        /* addsub.  */
1750       0,                        /* fpconst.  */
1751       0,                        /* neg.  */
1752       0,                        /* compare.  */
1753       0,                        /* widen.  */
1754       0,                        /* narrow.  */
1755       0,                        /* toint.  */
1756       0,                        /* fromint.  */
1757       0                         /* roundint.  */
1758     }
1759   },
1760   /* Vector */
1761   {
1762     COSTS_N_INSNS (1)   /* alu.  */
1763   }
1764 };
1765
1766 const struct tune_params arm_slowmul_tune =
1767 {
1768   &generic_extra_costs,                 /* Insn extra costs.  */
1769   NULL,                                 /* Sched adj cost.  */
1770   arm_default_branch_cost,
1771   &arm_default_vec_cost,
1772   3,                                            /* Constant limit.  */
1773   5,                                            /* Max cond insns.  */
1774   8,                                            /* Memset max inline.  */
1775   1,                                            /* Issue rate.  */
1776   ARM_PREFETCH_NOT_BENEFICIAL,
1777   tune_params::PREF_CONST_POOL_TRUE,
1778   tune_params::PREF_LDRD_FALSE,
1779   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1780   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1781   tune_params::DISPARAGE_FLAGS_NEITHER,
1782   tune_params::PREF_NEON_64_FALSE,
1783   tune_params::PREF_NEON_STRINGOPS_FALSE,
1784   tune_params::FUSE_NOTHING,
1785   tune_params::SCHED_AUTOPREF_OFF
1786 };
1787
1788 const struct tune_params arm_fastmul_tune =
1789 {
1790   &generic_extra_costs,                 /* Insn extra costs.  */
1791   NULL,                                 /* Sched adj cost.  */
1792   arm_default_branch_cost,
1793   &arm_default_vec_cost,
1794   1,                                            /* Constant limit.  */
1795   5,                                            /* Max cond insns.  */
1796   8,                                            /* Memset max inline.  */
1797   1,                                            /* Issue rate.  */
1798   ARM_PREFETCH_NOT_BENEFICIAL,
1799   tune_params::PREF_CONST_POOL_TRUE,
1800   tune_params::PREF_LDRD_FALSE,
1801   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1802   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1803   tune_params::DISPARAGE_FLAGS_NEITHER,
1804   tune_params::PREF_NEON_64_FALSE,
1805   tune_params::PREF_NEON_STRINGOPS_FALSE,
1806   tune_params::FUSE_NOTHING,
1807   tune_params::SCHED_AUTOPREF_OFF
1808 };
1809
1810 /* StrongARM has early execution of branches, so a sequence that is worth
1811    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1812
1813 const struct tune_params arm_strongarm_tune =
1814 {
1815   &generic_extra_costs,                 /* Insn extra costs.  */
1816   NULL,                                 /* Sched adj cost.  */
1817   arm_default_branch_cost,
1818   &arm_default_vec_cost,
1819   1,                                            /* Constant limit.  */
1820   3,                                            /* Max cond insns.  */
1821   8,                                            /* Memset max inline.  */
1822   1,                                            /* Issue rate.  */
1823   ARM_PREFETCH_NOT_BENEFICIAL,
1824   tune_params::PREF_CONST_POOL_TRUE,
1825   tune_params::PREF_LDRD_FALSE,
1826   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1827   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1828   tune_params::DISPARAGE_FLAGS_NEITHER,
1829   tune_params::PREF_NEON_64_FALSE,
1830   tune_params::PREF_NEON_STRINGOPS_FALSE,
1831   tune_params::FUSE_NOTHING,
1832   tune_params::SCHED_AUTOPREF_OFF
1833 };
1834
1835 const struct tune_params arm_xscale_tune =
1836 {
1837   &generic_extra_costs,                 /* Insn extra costs.  */
1838   xscale_sched_adjust_cost,
1839   arm_default_branch_cost,
1840   &arm_default_vec_cost,
1841   2,                                            /* Constant limit.  */
1842   3,                                            /* Max cond insns.  */
1843   8,                                            /* Memset max inline.  */
1844   1,                                            /* Issue rate.  */
1845   ARM_PREFETCH_NOT_BENEFICIAL,
1846   tune_params::PREF_CONST_POOL_TRUE,
1847   tune_params::PREF_LDRD_FALSE,
1848   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1849   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1850   tune_params::DISPARAGE_FLAGS_NEITHER,
1851   tune_params::PREF_NEON_64_FALSE,
1852   tune_params::PREF_NEON_STRINGOPS_FALSE,
1853   tune_params::FUSE_NOTHING,
1854   tune_params::SCHED_AUTOPREF_OFF
1855 };
1856
1857 const struct tune_params arm_9e_tune =
1858 {
1859   &generic_extra_costs,                 /* Insn extra costs.  */
1860   NULL,                                 /* Sched adj cost.  */
1861   arm_default_branch_cost,
1862   &arm_default_vec_cost,
1863   1,                                            /* Constant limit.  */
1864   5,                                            /* Max cond insns.  */
1865   8,                                            /* Memset max inline.  */
1866   1,                                            /* Issue rate.  */
1867   ARM_PREFETCH_NOT_BENEFICIAL,
1868   tune_params::PREF_CONST_POOL_TRUE,
1869   tune_params::PREF_LDRD_FALSE,
1870   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1871   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1872   tune_params::DISPARAGE_FLAGS_NEITHER,
1873   tune_params::PREF_NEON_64_FALSE,
1874   tune_params::PREF_NEON_STRINGOPS_FALSE,
1875   tune_params::FUSE_NOTHING,
1876   tune_params::SCHED_AUTOPREF_OFF
1877 };
1878
1879 const struct tune_params arm_marvell_pj4_tune =
1880 {
1881   &generic_extra_costs,                 /* Insn extra costs.  */
1882   NULL,                                 /* Sched adj cost.  */
1883   arm_default_branch_cost,
1884   &arm_default_vec_cost,
1885   1,                                            /* Constant limit.  */
1886   5,                                            /* Max cond insns.  */
1887   8,                                            /* Memset max inline.  */
1888   2,                                            /* Issue rate.  */
1889   ARM_PREFETCH_NOT_BENEFICIAL,
1890   tune_params::PREF_CONST_POOL_TRUE,
1891   tune_params::PREF_LDRD_FALSE,
1892   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1893   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1894   tune_params::DISPARAGE_FLAGS_NEITHER,
1895   tune_params::PREF_NEON_64_FALSE,
1896   tune_params::PREF_NEON_STRINGOPS_FALSE,
1897   tune_params::FUSE_NOTHING,
1898   tune_params::SCHED_AUTOPREF_OFF
1899 };
1900
1901 const struct tune_params arm_v6t2_tune =
1902 {
1903   &generic_extra_costs,                 /* Insn extra costs.  */
1904   NULL,                                 /* Sched adj cost.  */
1905   arm_default_branch_cost,
1906   &arm_default_vec_cost,
1907   1,                                            /* Constant limit.  */
1908   5,                                            /* Max cond insns.  */
1909   8,                                            /* Memset max inline.  */
1910   1,                                            /* Issue rate.  */
1911   ARM_PREFETCH_NOT_BENEFICIAL,
1912   tune_params::PREF_CONST_POOL_FALSE,
1913   tune_params::PREF_LDRD_FALSE,
1914   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1915   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1916   tune_params::DISPARAGE_FLAGS_NEITHER,
1917   tune_params::PREF_NEON_64_FALSE,
1918   tune_params::PREF_NEON_STRINGOPS_FALSE,
1919   tune_params::FUSE_NOTHING,
1920   tune_params::SCHED_AUTOPREF_OFF
1921 };
1922
1923
1924 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1925 const struct tune_params arm_cortex_tune =
1926 {
1927   &generic_extra_costs,
1928   NULL,                                 /* Sched adj cost.  */
1929   arm_default_branch_cost,
1930   &arm_default_vec_cost,
1931   1,                                            /* Constant limit.  */
1932   5,                                            /* Max cond insns.  */
1933   8,                                            /* Memset max inline.  */
1934   2,                                            /* Issue rate.  */
1935   ARM_PREFETCH_NOT_BENEFICIAL,
1936   tune_params::PREF_CONST_POOL_FALSE,
1937   tune_params::PREF_LDRD_FALSE,
1938   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1939   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1940   tune_params::DISPARAGE_FLAGS_NEITHER,
1941   tune_params::PREF_NEON_64_FALSE,
1942   tune_params::PREF_NEON_STRINGOPS_FALSE,
1943   tune_params::FUSE_NOTHING,
1944   tune_params::SCHED_AUTOPREF_OFF
1945 };
1946
1947 const struct tune_params arm_cortex_a8_tune =
1948 {
1949   &cortexa8_extra_costs,
1950   NULL,                                 /* Sched adj cost.  */
1951   arm_default_branch_cost,
1952   &arm_default_vec_cost,
1953   1,                                            /* Constant limit.  */
1954   5,                                            /* Max cond insns.  */
1955   8,                                            /* Memset max inline.  */
1956   2,                                            /* Issue rate.  */
1957   ARM_PREFETCH_NOT_BENEFICIAL,
1958   tune_params::PREF_CONST_POOL_FALSE,
1959   tune_params::PREF_LDRD_FALSE,
1960   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1961   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1962   tune_params::DISPARAGE_FLAGS_NEITHER,
1963   tune_params::PREF_NEON_64_FALSE,
1964   tune_params::PREF_NEON_STRINGOPS_TRUE,
1965   tune_params::FUSE_NOTHING,
1966   tune_params::SCHED_AUTOPREF_OFF
1967 };
1968
1969 const struct tune_params arm_cortex_a7_tune =
1970 {
1971   &cortexa7_extra_costs,
1972   NULL,                                 /* Sched adj cost.  */
1973   arm_default_branch_cost,
1974   &arm_default_vec_cost,
1975   1,                                            /* Constant limit.  */
1976   5,                                            /* Max cond insns.  */
1977   8,                                            /* Memset max inline.  */
1978   2,                                            /* Issue rate.  */
1979   ARM_PREFETCH_NOT_BENEFICIAL,
1980   tune_params::PREF_CONST_POOL_FALSE,
1981   tune_params::PREF_LDRD_FALSE,
1982   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1983   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1984   tune_params::DISPARAGE_FLAGS_NEITHER,
1985   tune_params::PREF_NEON_64_FALSE,
1986   tune_params::PREF_NEON_STRINGOPS_TRUE,
1987   tune_params::FUSE_NOTHING,
1988   tune_params::SCHED_AUTOPREF_OFF
1989 };
1990
1991 const struct tune_params arm_cortex_a15_tune =
1992 {
1993   &cortexa15_extra_costs,
1994   NULL,                                 /* Sched adj cost.  */
1995   arm_default_branch_cost,
1996   &arm_default_vec_cost,
1997   1,                                            /* Constant limit.  */
1998   2,                                            /* Max cond insns.  */
1999   8,                                            /* Memset max inline.  */
2000   3,                                            /* Issue rate.  */
2001   ARM_PREFETCH_NOT_BENEFICIAL,
2002   tune_params::PREF_CONST_POOL_FALSE,
2003   tune_params::PREF_LDRD_TRUE,
2004   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2005   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2006   tune_params::DISPARAGE_FLAGS_ALL,
2007   tune_params::PREF_NEON_64_FALSE,
2008   tune_params::PREF_NEON_STRINGOPS_TRUE,
2009   tune_params::FUSE_NOTHING,
2010   tune_params::SCHED_AUTOPREF_FULL
2011 };
2012
2013 const struct tune_params arm_cortex_a35_tune =
2014 {
2015   &cortexa53_extra_costs,
2016   NULL,                                 /* Sched adj cost.  */
2017   arm_default_branch_cost,
2018   &arm_default_vec_cost,
2019   1,                                            /* Constant limit.  */
2020   5,                                            /* Max cond insns.  */
2021   8,                                            /* Memset max inline.  */
2022   1,                                            /* Issue rate.  */
2023   ARM_PREFETCH_NOT_BENEFICIAL,
2024   tune_params::PREF_CONST_POOL_FALSE,
2025   tune_params::PREF_LDRD_FALSE,
2026   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2027   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2028   tune_params::DISPARAGE_FLAGS_NEITHER,
2029   tune_params::PREF_NEON_64_FALSE,
2030   tune_params::PREF_NEON_STRINGOPS_TRUE,
2031   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2032   tune_params::SCHED_AUTOPREF_OFF
2033 };
2034
2035 const struct tune_params arm_cortex_a53_tune =
2036 {
2037   &cortexa53_extra_costs,
2038   NULL,                                 /* Sched adj cost.  */
2039   arm_default_branch_cost,
2040   &arm_default_vec_cost,
2041   1,                                            /* Constant limit.  */
2042   5,                                            /* Max cond insns.  */
2043   8,                                            /* Memset max inline.  */
2044   2,                                            /* Issue rate.  */
2045   ARM_PREFETCH_NOT_BENEFICIAL,
2046   tune_params::PREF_CONST_POOL_FALSE,
2047   tune_params::PREF_LDRD_FALSE,
2048   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2049   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2050   tune_params::DISPARAGE_FLAGS_NEITHER,
2051   tune_params::PREF_NEON_64_FALSE,
2052   tune_params::PREF_NEON_STRINGOPS_TRUE,
2053   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2054   tune_params::SCHED_AUTOPREF_OFF
2055 };
2056
2057 const struct tune_params arm_cortex_a57_tune =
2058 {
2059   &cortexa57_extra_costs,
2060   NULL,                                 /* Sched adj cost.  */
2061   arm_default_branch_cost,
2062   &arm_default_vec_cost,
2063   1,                                            /* Constant limit.  */
2064   2,                                            /* Max cond insns.  */
2065   8,                                            /* Memset max inline.  */
2066   3,                                            /* Issue rate.  */
2067   ARM_PREFETCH_NOT_BENEFICIAL,
2068   tune_params::PREF_CONST_POOL_FALSE,
2069   tune_params::PREF_LDRD_TRUE,
2070   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2071   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2072   tune_params::DISPARAGE_FLAGS_ALL,
2073   tune_params::PREF_NEON_64_FALSE,
2074   tune_params::PREF_NEON_STRINGOPS_TRUE,
2075   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2076   tune_params::SCHED_AUTOPREF_FULL
2077 };
2078
2079 const struct tune_params arm_exynosm1_tune =
2080 {
2081   &exynosm1_extra_costs,
2082   NULL,                                         /* Sched adj cost.  */
2083   arm_default_branch_cost,
2084   &arm_default_vec_cost,
2085   1,                                            /* Constant limit.  */
2086   2,                                            /* Max cond insns.  */
2087   8,                                            /* Memset max inline.  */
2088   3,                                            /* Issue rate.  */
2089   ARM_PREFETCH_NOT_BENEFICIAL,
2090   tune_params::PREF_CONST_POOL_FALSE,
2091   tune_params::PREF_LDRD_TRUE,
2092   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2094   tune_params::DISPARAGE_FLAGS_ALL,
2095   tune_params::PREF_NEON_64_FALSE,
2096   tune_params::PREF_NEON_STRINGOPS_TRUE,
2097   tune_params::FUSE_NOTHING,
2098   tune_params::SCHED_AUTOPREF_OFF
2099 };
2100
2101 const struct tune_params arm_xgene1_tune =
2102 {
2103   &xgene1_extra_costs,
2104   NULL,                                 /* Sched adj cost.  */
2105   arm_default_branch_cost,
2106   &arm_default_vec_cost,
2107   1,                                            /* Constant limit.  */
2108   2,                                            /* Max cond insns.  */
2109   32,                                           /* Memset max inline.  */
2110   4,                                            /* Issue rate.  */
2111   ARM_PREFETCH_NOT_BENEFICIAL,
2112   tune_params::PREF_CONST_POOL_FALSE,
2113   tune_params::PREF_LDRD_TRUE,
2114   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2116   tune_params::DISPARAGE_FLAGS_ALL,
2117   tune_params::PREF_NEON_64_FALSE,
2118   tune_params::PREF_NEON_STRINGOPS_FALSE,
2119   tune_params::FUSE_NOTHING,
2120   tune_params::SCHED_AUTOPREF_OFF
2121 };
2122
2123 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2124    less appealing.  Set max_insns_skipped to a low value.  */
2125
2126 const struct tune_params arm_cortex_a5_tune =
2127 {
2128   &cortexa5_extra_costs,
2129   NULL,                                 /* Sched adj cost.  */
2130   arm_cortex_a5_branch_cost,
2131   &arm_default_vec_cost,
2132   1,                                            /* Constant limit.  */
2133   1,                                            /* Max cond insns.  */
2134   8,                                            /* Memset max inline.  */
2135   2,                                            /* Issue rate.  */
2136   ARM_PREFETCH_NOT_BENEFICIAL,
2137   tune_params::PREF_CONST_POOL_FALSE,
2138   tune_params::PREF_LDRD_FALSE,
2139   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2140   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2141   tune_params::DISPARAGE_FLAGS_NEITHER,
2142   tune_params::PREF_NEON_64_FALSE,
2143   tune_params::PREF_NEON_STRINGOPS_TRUE,
2144   tune_params::FUSE_NOTHING,
2145   tune_params::SCHED_AUTOPREF_OFF
2146 };
2147
2148 const struct tune_params arm_cortex_a9_tune =
2149 {
2150   &cortexa9_extra_costs,
2151   cortex_a9_sched_adjust_cost,
2152   arm_default_branch_cost,
2153   &arm_default_vec_cost,
2154   1,                                            /* Constant limit.  */
2155   5,                                            /* Max cond insns.  */
2156   8,                                            /* Memset max inline.  */
2157   2,                                            /* Issue rate.  */
2158   ARM_PREFETCH_BENEFICIAL(4,32,32),
2159   tune_params::PREF_CONST_POOL_FALSE,
2160   tune_params::PREF_LDRD_FALSE,
2161   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2162   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2163   tune_params::DISPARAGE_FLAGS_NEITHER,
2164   tune_params::PREF_NEON_64_FALSE,
2165   tune_params::PREF_NEON_STRINGOPS_FALSE,
2166   tune_params::FUSE_NOTHING,
2167   tune_params::SCHED_AUTOPREF_OFF
2168 };
2169
2170 const struct tune_params arm_cortex_a12_tune =
2171 {
2172   &cortexa12_extra_costs,
2173   NULL,                                 /* Sched adj cost.  */
2174   arm_default_branch_cost,
2175   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2176   1,                                            /* Constant limit.  */
2177   2,                                            /* Max cond insns.  */
2178   8,                                            /* Memset max inline.  */
2179   2,                                            /* Issue rate.  */
2180   ARM_PREFETCH_NOT_BENEFICIAL,
2181   tune_params::PREF_CONST_POOL_FALSE,
2182   tune_params::PREF_LDRD_TRUE,
2183   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2184   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2185   tune_params::DISPARAGE_FLAGS_ALL,
2186   tune_params::PREF_NEON_64_FALSE,
2187   tune_params::PREF_NEON_STRINGOPS_TRUE,
2188   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2189   tune_params::SCHED_AUTOPREF_OFF
2190 };
2191
2192 const struct tune_params arm_cortex_a73_tune =
2193 {
2194   &cortexa57_extra_costs,
2195   NULL,                                         /* Sched adj cost.  */
2196   arm_default_branch_cost,
2197   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2198   1,                                            /* Constant limit.  */
2199   2,                                            /* Max cond insns.  */
2200   8,                                            /* Memset max inline.  */
2201   2,                                            /* Issue rate.  */
2202   ARM_PREFETCH_NOT_BENEFICIAL,
2203   tune_params::PREF_CONST_POOL_FALSE,
2204   tune_params::PREF_LDRD_TRUE,
2205   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2206   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2207   tune_params::DISPARAGE_FLAGS_ALL,
2208   tune_params::PREF_NEON_64_FALSE,
2209   tune_params::PREF_NEON_STRINGOPS_TRUE,
2210   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2211   tune_params::SCHED_AUTOPREF_FULL
2212 };
2213
2214 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2215    cycle to execute each.  An LDR from the constant pool also takes two cycles
2216    to execute, but mildly increases pipelining opportunity (consecutive
2217    loads/stores can be pipelined together, saving one cycle), and may also
2218    improve icache utilisation.  Hence we prefer the constant pool for such
2219    processors.  */
2220
2221 const struct tune_params arm_v7m_tune =
2222 {
2223   &v7m_extra_costs,
2224   NULL,                                 /* Sched adj cost.  */
2225   arm_cortex_m_branch_cost,
2226   &arm_default_vec_cost,
2227   1,                                            /* Constant limit.  */
2228   2,                                            /* Max cond insns.  */
2229   8,                                            /* Memset max inline.  */
2230   1,                                            /* Issue rate.  */
2231   ARM_PREFETCH_NOT_BENEFICIAL,
2232   tune_params::PREF_CONST_POOL_TRUE,
2233   tune_params::PREF_LDRD_FALSE,
2234   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2235   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2236   tune_params::DISPARAGE_FLAGS_NEITHER,
2237   tune_params::PREF_NEON_64_FALSE,
2238   tune_params::PREF_NEON_STRINGOPS_FALSE,
2239   tune_params::FUSE_NOTHING,
2240   tune_params::SCHED_AUTOPREF_OFF
2241 };
2242
2243 /* Cortex-M7 tuning.  */
2244
2245 const struct tune_params arm_cortex_m7_tune =
2246 {
2247   &v7m_extra_costs,
2248   NULL,                                 /* Sched adj cost.  */
2249   arm_cortex_m7_branch_cost,
2250   &arm_default_vec_cost,
2251   0,                                            /* Constant limit.  */
2252   1,                                            /* Max cond insns.  */
2253   8,                                            /* Memset max inline.  */
2254   2,                                            /* Issue rate.  */
2255   ARM_PREFETCH_NOT_BENEFICIAL,
2256   tune_params::PREF_CONST_POOL_TRUE,
2257   tune_params::PREF_LDRD_FALSE,
2258   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2259   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2260   tune_params::DISPARAGE_FLAGS_NEITHER,
2261   tune_params::PREF_NEON_64_FALSE,
2262   tune_params::PREF_NEON_STRINGOPS_FALSE,
2263   tune_params::FUSE_NOTHING,
2264   tune_params::SCHED_AUTOPREF_OFF
2265 };
2266
2267 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2268    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2269    cortex-m23.  */
2270 const struct tune_params arm_v6m_tune =
2271 {
2272   &generic_extra_costs,                 /* Insn extra costs.  */
2273   NULL,                                 /* Sched adj cost.  */
2274   arm_default_branch_cost,
2275   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2276   1,                                            /* Constant limit.  */
2277   5,                                            /* Max cond insns.  */
2278   8,                                            /* Memset max inline.  */
2279   1,                                            /* Issue rate.  */
2280   ARM_PREFETCH_NOT_BENEFICIAL,
2281   tune_params::PREF_CONST_POOL_FALSE,
2282   tune_params::PREF_LDRD_FALSE,
2283   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2284   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2285   tune_params::DISPARAGE_FLAGS_NEITHER,
2286   tune_params::PREF_NEON_64_FALSE,
2287   tune_params::PREF_NEON_STRINGOPS_FALSE,
2288   tune_params::FUSE_NOTHING,
2289   tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_fa726te_tune =
2293 {
2294   &generic_extra_costs,                         /* Insn extra costs.  */
2295   fa726te_sched_adjust_cost,
2296   arm_default_branch_cost,
2297   &arm_default_vec_cost,
2298   1,                                            /* Constant limit.  */
2299   5,                                            /* Max cond insns.  */
2300   8,                                            /* Memset max inline.  */
2301   2,                                            /* Issue rate.  */
2302   ARM_PREFETCH_NOT_BENEFICIAL,
2303   tune_params::PREF_CONST_POOL_TRUE,
2304   tune_params::PREF_LDRD_FALSE,
2305   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2307   tune_params::DISPARAGE_FLAGS_NEITHER,
2308   tune_params::PREF_NEON_64_FALSE,
2309   tune_params::PREF_NEON_STRINGOPS_FALSE,
2310   tune_params::FUSE_NOTHING,
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* Auto-generated CPU, FPU and architecture tables.  */
2315 #include "arm-cpu-data.h"
2316
2317 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2318    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2319    is thus chosen to be big enough to hold the longest architecture name.  */
2320
2321 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2322
2323 /* Supported TLS relocations.  */
2324
2325 enum tls_reloc {
2326   TLS_GD32,
2327   TLS_LDM32,
2328   TLS_LDO32,
2329   TLS_IE32,
2330   TLS_LE32,
2331   TLS_DESCSEQ   /* GNU scheme */
2332 };
2333
2334 /* The maximum number of insns to be used when loading a constant.  */
2335 inline static int
2336 arm_constant_limit (bool size_p)
2337 {
2338   return size_p ? 1 : current_tune->constant_limit;
2339 }
2340
2341 /* Emit an insn that's a simple single-set.  Both the operands must be known
2342    to be valid.  */
2343 inline static rtx_insn *
2344 emit_set_insn (rtx x, rtx y)
2345 {
2346   return emit_insn (gen_rtx_SET (x, y));
2347 }
2348
2349 /* Return the number of bits set in VALUE.  */
2350 static unsigned
2351 bit_count (unsigned long value)
2352 {
2353   unsigned long count = 0;
2354
2355   while (value)
2356     {
2357       count++;
2358       value &= value - 1;  /* Clear the least-significant set bit.  */
2359     }
2360
2361   return count;
2362 }
2363
2364 /* Return the number of bits set in BMAP.  */
2365 static unsigned
2366 bitmap_popcount (const sbitmap bmap)
2367 {
2368   unsigned int count = 0;
2369   unsigned int n = 0;
2370   sbitmap_iterator sbi;
2371
2372   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2373     count++;
2374   return count;
2375 }
2376
2377 typedef struct
2378 {
2379   machine_mode mode;
2380   const char *name;
2381 } arm_fixed_mode_set;
2382
2383 /* A small helper for setting fixed-point library libfuncs.  */
2384
2385 static void
2386 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2387                              const char *funcname, const char *modename,
2388                              int num_suffix)
2389 {
2390   char buffer[50];
2391
2392   if (num_suffix == 0)
2393     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2394   else
2395     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2396
2397   set_optab_libfunc (optable, mode, buffer);
2398 }
2399
2400 static void
2401 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2402                             machine_mode from, const char *funcname,
2403                             const char *toname, const char *fromname)
2404 {
2405   char buffer[50];
2406   const char *maybe_suffix_2 = "";
2407
2408   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2409   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2410       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2411       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2412     maybe_suffix_2 = "2";
2413
2414   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2415            maybe_suffix_2);
2416
2417   set_conv_libfunc (optable, to, from, buffer);
2418 }
2419
2420 /* Set up library functions unique to ARM.  */
2421
2422 static void
2423 arm_init_libfuncs (void)
2424 {
2425   /* For Linux, we have access to kernel support for atomic operations.  */
2426   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2427     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2428
2429   /* There are no special library functions unless we are using the
2430      ARM BPABI.  */
2431   if (!TARGET_BPABI)
2432     return;
2433
2434   /* The functions below are described in Section 4 of the "Run-Time
2435      ABI for the ARM architecture", Version 1.0.  */
2436
2437   /* Double-precision floating-point arithmetic.  Table 2.  */
2438   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2439   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2440   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2441   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2442   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2443
2444   /* Double-precision comparisons.  Table 3.  */
2445   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2446   set_optab_libfunc (ne_optab, DFmode, NULL);
2447   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2448   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2449   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2450   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2451   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2452
2453   /* Single-precision floating-point arithmetic.  Table 4.  */
2454   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2455   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2456   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2457   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2458   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2459
2460   /* Single-precision comparisons.  Table 5.  */
2461   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2462   set_optab_libfunc (ne_optab, SFmode, NULL);
2463   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2464   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2465   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2466   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2467   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2468
2469   /* Floating-point to integer conversions.  Table 6.  */
2470   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2471   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2472   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2473   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2474   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2475   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2476   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2477   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2478
2479   /* Conversions between floating types.  Table 7.  */
2480   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2481   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2482
2483   /* Integer to floating-point conversions.  Table 8.  */
2484   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2485   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2486   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2487   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2488   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2489   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2490   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2491   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2492
2493   /* Long long.  Table 9.  */
2494   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2495   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2496   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2497   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2498   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2499   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2500   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2501   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2502
2503   /* Integer (32/32->32) division.  \S 4.3.1.  */
2504   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2505   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2506
2507   /* The divmod functions are designed so that they can be used for
2508      plain division, even though they return both the quotient and the
2509      remainder.  The quotient is returned in the usual location (i.e.,
2510      r0 for SImode, {r0, r1} for DImode), just as would be expected
2511      for an ordinary division routine.  Because the AAPCS calling
2512      conventions specify that all of { r0, r1, r2, r3 } are
2513      callee-saved registers, there is no need to tell the compiler
2514      explicitly that those registers are clobbered by these
2515      routines.  */
2516   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2517   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2518
2519   /* For SImode division the ABI provides div-without-mod routines,
2520      which are faster.  */
2521   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2522   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2523
2524   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2525      divmod libcalls instead.  */
2526   set_optab_libfunc (smod_optab, DImode, NULL);
2527   set_optab_libfunc (umod_optab, DImode, NULL);
2528   set_optab_libfunc (smod_optab, SImode, NULL);
2529   set_optab_libfunc (umod_optab, SImode, NULL);
2530
2531   /* Half-precision float operations.  The compiler handles all operations
2532      with NULL libfuncs by converting the SFmode.  */
2533   switch (arm_fp16_format)
2534     {
2535     case ARM_FP16_FORMAT_IEEE:
2536     case ARM_FP16_FORMAT_ALTERNATIVE:
2537
2538       /* Conversions.  */
2539       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2540                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541                          ? "__gnu_f2h_ieee"
2542                          : "__gnu_f2h_alternative"));
2543       set_conv_libfunc (sext_optab, SFmode, HFmode,
2544                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2545                          ? "__gnu_h2f_ieee"
2546                          : "__gnu_h2f_alternative"));
2547
2548       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2549                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2550                          ? "__gnu_d2h_ieee"
2551                          : "__gnu_d2h_alternative"));
2552
2553       /* Arithmetic.  */
2554       set_optab_libfunc (add_optab, HFmode, NULL);
2555       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2556       set_optab_libfunc (smul_optab, HFmode, NULL);
2557       set_optab_libfunc (neg_optab, HFmode, NULL);
2558       set_optab_libfunc (sub_optab, HFmode, NULL);
2559
2560       /* Comparisons.  */
2561       set_optab_libfunc (eq_optab, HFmode, NULL);
2562       set_optab_libfunc (ne_optab, HFmode, NULL);
2563       set_optab_libfunc (lt_optab, HFmode, NULL);
2564       set_optab_libfunc (le_optab, HFmode, NULL);
2565       set_optab_libfunc (ge_optab, HFmode, NULL);
2566       set_optab_libfunc (gt_optab, HFmode, NULL);
2567       set_optab_libfunc (unord_optab, HFmode, NULL);
2568       break;
2569
2570     default:
2571       break;
2572     }
2573
2574   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2575   {
2576     const arm_fixed_mode_set fixed_arith_modes[] =
2577       {
2578         { E_QQmode, "qq" },
2579         { E_UQQmode, "uqq" },
2580         { E_HQmode, "hq" },
2581         { E_UHQmode, "uhq" },
2582         { E_SQmode, "sq" },
2583         { E_USQmode, "usq" },
2584         { E_DQmode, "dq" },
2585         { E_UDQmode, "udq" },
2586         { E_TQmode, "tq" },
2587         { E_UTQmode, "utq" },
2588         { E_HAmode, "ha" },
2589         { E_UHAmode, "uha" },
2590         { E_SAmode, "sa" },
2591         { E_USAmode, "usa" },
2592         { E_DAmode, "da" },
2593         { E_UDAmode, "uda" },
2594         { E_TAmode, "ta" },
2595         { E_UTAmode, "uta" }
2596       };
2597     const arm_fixed_mode_set fixed_conv_modes[] =
2598       {
2599         { E_QQmode, "qq" },
2600         { E_UQQmode, "uqq" },
2601         { E_HQmode, "hq" },
2602         { E_UHQmode, "uhq" },
2603         { E_SQmode, "sq" },
2604         { E_USQmode, "usq" },
2605         { E_DQmode, "dq" },
2606         { E_UDQmode, "udq" },
2607         { E_TQmode, "tq" },
2608         { E_UTQmode, "utq" },
2609         { E_HAmode, "ha" },
2610         { E_UHAmode, "uha" },
2611         { E_SAmode, "sa" },
2612         { E_USAmode, "usa" },
2613         { E_DAmode, "da" },
2614         { E_UDAmode, "uda" },
2615         { E_TAmode, "ta" },
2616         { E_UTAmode, "uta" },
2617         { E_QImode, "qi" },
2618         { E_HImode, "hi" },
2619         { E_SImode, "si" },
2620         { E_DImode, "di" },
2621         { E_TImode, "ti" },
2622         { E_SFmode, "sf" },
2623         { E_DFmode, "df" }
2624       };
2625     unsigned int i, j;
2626
2627     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2628       {
2629         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2630                                      "add", fixed_arith_modes[i].name, 3);
2631         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2632                                      "ssadd", fixed_arith_modes[i].name, 3);
2633         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2634                                      "usadd", fixed_arith_modes[i].name, 3);
2635         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2636                                      "sub", fixed_arith_modes[i].name, 3);
2637         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2638                                      "sssub", fixed_arith_modes[i].name, 3);
2639         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2640                                      "ussub", fixed_arith_modes[i].name, 3);
2641         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2642                                      "mul", fixed_arith_modes[i].name, 3);
2643         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2644                                      "ssmul", fixed_arith_modes[i].name, 3);
2645         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2646                                      "usmul", fixed_arith_modes[i].name, 3);
2647         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2648                                      "div", fixed_arith_modes[i].name, 3);
2649         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2650                                      "udiv", fixed_arith_modes[i].name, 3);
2651         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2652                                      "ssdiv", fixed_arith_modes[i].name, 3);
2653         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2654                                      "usdiv", fixed_arith_modes[i].name, 3);
2655         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2656                                      "neg", fixed_arith_modes[i].name, 2);
2657         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2658                                      "ssneg", fixed_arith_modes[i].name, 2);
2659         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2660                                      "usneg", fixed_arith_modes[i].name, 2);
2661         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2662                                      "ashl", fixed_arith_modes[i].name, 3);
2663         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2664                                      "ashr", fixed_arith_modes[i].name, 3);
2665         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2666                                      "lshr", fixed_arith_modes[i].name, 3);
2667         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2668                                      "ssashl", fixed_arith_modes[i].name, 3);
2669         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2670                                      "usashl", fixed_arith_modes[i].name, 3);
2671         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2672                                      "cmp", fixed_arith_modes[i].name, 2);
2673       }
2674
2675     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2676       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2677         {
2678           if (i == j
2679               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2680                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2681             continue;
2682
2683           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2684                                       fixed_conv_modes[j].mode, "fract",
2685                                       fixed_conv_modes[i].name,
2686                                       fixed_conv_modes[j].name);
2687           arm_set_fixed_conv_libfunc (satfract_optab,
2688                                       fixed_conv_modes[i].mode,
2689                                       fixed_conv_modes[j].mode, "satfract",
2690                                       fixed_conv_modes[i].name,
2691                                       fixed_conv_modes[j].name);
2692           arm_set_fixed_conv_libfunc (fractuns_optab,
2693                                       fixed_conv_modes[i].mode,
2694                                       fixed_conv_modes[j].mode, "fractuns",
2695                                       fixed_conv_modes[i].name,
2696                                       fixed_conv_modes[j].name);
2697           arm_set_fixed_conv_libfunc (satfractuns_optab,
2698                                       fixed_conv_modes[i].mode,
2699                                       fixed_conv_modes[j].mode, "satfractuns",
2700                                       fixed_conv_modes[i].name,
2701                                       fixed_conv_modes[j].name);
2702         }
2703   }
2704
2705   if (TARGET_AAPCS_BASED)
2706     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2707 }
2708
2709 /* On AAPCS systems, this is the "struct __va_list".  */
2710 static GTY(()) tree va_list_type;
2711
2712 /* Return the type to use as __builtin_va_list.  */
2713 static tree
2714 arm_build_builtin_va_list (void)
2715 {
2716   tree va_list_name;
2717   tree ap_field;
2718
2719   if (!TARGET_AAPCS_BASED)
2720     return std_build_builtin_va_list ();
2721
2722   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2723      defined as:
2724
2725        struct __va_list
2726        {
2727          void *__ap;
2728        };
2729
2730      The C Library ABI further reinforces this definition in \S
2731      4.1.
2732
2733      We must follow this definition exactly.  The structure tag
2734      name is visible in C++ mangled names, and thus forms a part
2735      of the ABI.  The field name may be used by people who
2736      #include <stdarg.h>.  */
2737   /* Create the type.  */
2738   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2739   /* Give it the required name.  */
2740   va_list_name = build_decl (BUILTINS_LOCATION,
2741                              TYPE_DECL,
2742                              get_identifier ("__va_list"),
2743                              va_list_type);
2744   DECL_ARTIFICIAL (va_list_name) = 1;
2745   TYPE_NAME (va_list_type) = va_list_name;
2746   TYPE_STUB_DECL (va_list_type) = va_list_name;
2747   /* Create the __ap field.  */
2748   ap_field = build_decl (BUILTINS_LOCATION,
2749                          FIELD_DECL,
2750                          get_identifier ("__ap"),
2751                          ptr_type_node);
2752   DECL_ARTIFICIAL (ap_field) = 1;
2753   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2754   TYPE_FIELDS (va_list_type) = ap_field;
2755   /* Compute its layout.  */
2756   layout_type (va_list_type);
2757
2758   return va_list_type;
2759 }
2760
2761 /* Return an expression of type "void *" pointing to the next
2762    available argument in a variable-argument list.  VALIST is the
2763    user-level va_list object, of type __builtin_va_list.  */
2764 static tree
2765 arm_extract_valist_ptr (tree valist)
2766 {
2767   if (TREE_TYPE (valist) == error_mark_node)
2768     return error_mark_node;
2769
2770   /* On an AAPCS target, the pointer is stored within "struct
2771      va_list".  */
2772   if (TARGET_AAPCS_BASED)
2773     {
2774       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2775       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2776                        valist, ap_field, NULL_TREE);
2777     }
2778
2779   return valist;
2780 }
2781
2782 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2783 static void
2784 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2785 {
2786   valist = arm_extract_valist_ptr (valist);
2787   std_expand_builtin_va_start (valist, nextarg);
2788 }
2789
2790 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2791 static tree
2792 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2793                           gimple_seq *post_p)
2794 {
2795   valist = arm_extract_valist_ptr (valist);
2796   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2797 }
2798
2799 /* Check any incompatible options that the user has specified.  */
2800 static void
2801 arm_option_check_internal (struct gcc_options *opts)
2802 {
2803   int flags = opts->x_target_flags;
2804
2805   /* iWMMXt and NEON are incompatible.  */
2806   if (TARGET_IWMMXT
2807       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2808     error ("iWMMXt and NEON are incompatible");
2809
2810   /* Make sure that the processor choice does not conflict with any of the
2811      other command line choices.  */
2812   if (TARGET_ARM_P (flags)
2813       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2814     error ("target CPU does not support ARM mode");
2815
2816   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2817   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2818     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2819
2820   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2821     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2822
2823   /* If this target is normally configured to use APCS frames, warn if they
2824      are turned off and debugging is turned on.  */
2825   if (TARGET_ARM_P (flags)
2826       && write_symbols != NO_DEBUG
2827       && !TARGET_APCS_FRAME
2828       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2829     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2830
2831   /* iWMMXt unsupported under Thumb mode.  */
2832   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2833     error ("iWMMXt unsupported under Thumb mode");
2834
2835   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2836     error ("can not use -mtp=cp15 with 16-bit Thumb");
2837
2838   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2839     {
2840       error ("RTP PIC is incompatible with Thumb");
2841       flag_pic = 0;
2842     }
2843
2844   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2845      with MOVT.  */
2846   if ((target_pure_code || target_slow_flash_data)
2847       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2848     {
2849       const char *flag = (target_pure_code ? "-mpure-code" :
2850                                              "-mslow-flash-data");
2851       error ("%s only supports non-pic code on M-profile targets with the "
2852              "MOVT instruction", flag);
2853     }
2854
2855 }
2856
2857 /* Recompute the global settings depending on target attribute options.  */
2858
2859 static void
2860 arm_option_params_internal (void)
2861 {
2862   /* If we are not using the default (ARM mode) section anchor offset
2863      ranges, then set the correct ranges now.  */
2864   if (TARGET_THUMB1)
2865     {
2866       /* Thumb-1 LDR instructions cannot have negative offsets.
2867          Permissible positive offset ranges are 5-bit (for byte loads),
2868          6-bit (for halfword loads), or 7-bit (for word loads).
2869          Empirical results suggest a 7-bit anchor range gives the best
2870          overall code size.  */
2871       targetm.min_anchor_offset = 0;
2872       targetm.max_anchor_offset = 127;
2873     }
2874   else if (TARGET_THUMB2)
2875     {
2876       /* The minimum is set such that the total size of the block
2877          for a particular anchor is 248 + 1 + 4095 bytes, which is
2878          divisible by eight, ensuring natural spacing of anchors.  */
2879       targetm.min_anchor_offset = -248;
2880       targetm.max_anchor_offset = 4095;
2881     }
2882   else
2883     {
2884       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2885       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2886     }
2887
2888   /* Increase the number of conditional instructions with -Os.  */
2889   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2890
2891   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2892   if (TARGET_THUMB2)
2893     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2894 }
2895
2896 /* True if -mflip-thumb should next add an attribute for the default
2897    mode, false if it should next add an attribute for the opposite mode.  */
2898 static GTY(()) bool thumb_flipper;
2899
2900 /* Options after initial target override.  */
2901 static GTY(()) tree init_optimize;
2902
2903 static void
2904 arm_override_options_after_change_1 (struct gcc_options *opts)
2905 {
2906   if (opts->x_align_functions <= 0)
2907     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2908       && opts->x_optimize_size ? 2 : 4;
2909 }
2910
2911 /* Implement targetm.override_options_after_change.  */
2912
2913 static void
2914 arm_override_options_after_change (void)
2915 {
2916   arm_configure_build_target (&arm_active_target,
2917                               TREE_TARGET_OPTION (target_option_default_node),
2918                               &global_options_set, false);
2919
2920   arm_override_options_after_change_1 (&global_options);
2921 }
2922
2923 /* Implement TARGET_OPTION_SAVE.  */
2924 static void
2925 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2926 {
2927   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2928   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2929   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2930 }
2931
2932 /* Implement TARGET_OPTION_RESTORE.  */
2933 static void
2934 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2935 {
2936   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2937   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2938   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2939   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2940                               false);
2941 }
2942
2943 /* Reset options between modes that the user has specified.  */
2944 static void
2945 arm_option_override_internal (struct gcc_options *opts,
2946                               struct gcc_options *opts_set)
2947 {
2948   arm_override_options_after_change_1 (opts);
2949
2950   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2951     {
2952       /* The default is to enable interworking, so this warning message would
2953          be confusing to users who have just compiled with, eg, -march=armv3.  */
2954       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2955       opts->x_target_flags &= ~MASK_INTERWORK;
2956     }
2957
2958   if (TARGET_THUMB_P (opts->x_target_flags)
2959       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2960     {
2961       warning (0, "target CPU does not support THUMB instructions");
2962       opts->x_target_flags &= ~MASK_THUMB;
2963     }
2964
2965   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2966     {
2967       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2968       opts->x_target_flags &= ~MASK_APCS_FRAME;
2969     }
2970
2971   /* Callee super interworking implies thumb interworking.  Adding
2972      this to the flags here simplifies the logic elsewhere.  */
2973   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2974     opts->x_target_flags |= MASK_INTERWORK;
2975
2976   /* need to remember initial values so combinaisons of options like
2977      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2978   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2979
2980   if (! opts_set->x_arm_restrict_it)
2981     opts->x_arm_restrict_it = arm_arch8;
2982
2983   /* ARM execution state and M profile don't have [restrict] IT.  */
2984   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2985     opts->x_arm_restrict_it = 0;
2986
2987   /* Enable -munaligned-access by default for
2988      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2989      i.e. Thumb2 and ARM state only.
2990      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2991      - ARMv8 architecture-base processors.
2992
2993      Disable -munaligned-access by default for
2994      - all pre-ARMv6 architecture-based processors
2995      - ARMv6-M architecture-based processors
2996      - ARMv8-M Baseline processors.  */
2997
2998   if (! opts_set->x_unaligned_access)
2999     {
3000       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3001                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3002     }
3003   else if (opts->x_unaligned_access == 1
3004            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3005     {
3006       warning (0, "target CPU does not support unaligned accesses");
3007      opts->x_unaligned_access = 0;
3008     }
3009
3010   /* Don't warn since it's on by default in -O2.  */
3011   if (TARGET_THUMB1_P (opts->x_target_flags))
3012     opts->x_flag_schedule_insns = 0;
3013   else
3014     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3015
3016   /* Disable shrink-wrap when optimizing function for size, since it tends to
3017      generate additional returns.  */
3018   if (optimize_function_for_size_p (cfun)
3019       && TARGET_THUMB2_P (opts->x_target_flags))
3020     opts->x_flag_shrink_wrap = false;
3021   else
3022     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3023
3024   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3025      - epilogue_insns - does not accurately model the corresponding insns
3026      emitted in the asm file.  In particular, see the comment in thumb_exit
3027      'Find out how many of the (return) argument registers we can corrupt'.
3028      As a consequence, the epilogue may clobber registers without fipa-ra
3029      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3030      TODO: Accurately model clobbers for epilogue_insns and reenable
3031      fipa-ra.  */
3032   if (TARGET_THUMB1_P (opts->x_target_flags))
3033     opts->x_flag_ipa_ra = 0;
3034   else
3035     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3036
3037   /* Thumb2 inline assembly code should always use unified syntax.
3038      This will apply to ARM and Thumb1 eventually.  */
3039   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3040
3041 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3042   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3043 #endif
3044 }
3045
3046 static sbitmap isa_all_fpubits;
3047 static sbitmap isa_quirkbits;
3048
3049 /* Configure a build target TARGET from the user-specified options OPTS and
3050    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3051    architecture have been specified, but the two are not identical.  */
3052 void
3053 arm_configure_build_target (struct arm_build_target *target,
3054                             struct cl_target_option *opts,
3055                             struct gcc_options *opts_set,
3056                             bool warn_compatible)
3057 {
3058   const cpu_option *arm_selected_tune = NULL;
3059   const arch_option *arm_selected_arch = NULL;
3060   const cpu_option *arm_selected_cpu = NULL;
3061   const arm_fpu_desc *arm_selected_fpu = NULL;
3062   const char *tune_opts = NULL;
3063   const char *arch_opts = NULL;
3064   const char *cpu_opts = NULL;
3065
3066   bitmap_clear (target->isa);
3067   target->core_name = NULL;
3068   target->arch_name = NULL;
3069
3070   if (opts_set->x_arm_arch_string)
3071     {
3072       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3073                                                       "-march",
3074                                                       opts->x_arm_arch_string);
3075       arch_opts = strchr (opts->x_arm_arch_string, '+');
3076     }
3077
3078   if (opts_set->x_arm_cpu_string)
3079     {
3080       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3081                                                     opts->x_arm_cpu_string);
3082       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3083       arm_selected_tune = arm_selected_cpu;
3084       /* If taking the tuning from -mcpu, we don't need to rescan the
3085          options for tuning.  */
3086     }
3087
3088   if (opts_set->x_arm_tune_string)
3089     {
3090       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3091                                                      opts->x_arm_tune_string);
3092       tune_opts = strchr (opts->x_arm_tune_string, '+');
3093     }
3094
3095   if (arm_selected_arch)
3096     {
3097       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3098       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3099                                  arch_opts);
3100
3101       if (arm_selected_cpu)
3102         {
3103           auto_sbitmap cpu_isa (isa_num_bits);
3104           auto_sbitmap isa_delta (isa_num_bits);
3105
3106           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3107           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3108                                      cpu_opts);
3109           bitmap_xor (isa_delta, cpu_isa, target->isa);
3110           /* Ignore any bits that are quirk bits.  */
3111           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3112           /* Ignore (for now) any bits that might be set by -mfpu.  */
3113           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3114
3115           if (!bitmap_empty_p (isa_delta))
3116             {
3117               if (warn_compatible)
3118                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3119                          arm_selected_cpu->common.name,
3120                          arm_selected_arch->common.name);
3121               /* -march wins for code generation.
3122                  -mcpu wins for default tuning.  */
3123               if (!arm_selected_tune)
3124                 arm_selected_tune = arm_selected_cpu;
3125
3126               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3127               target->arch_name = arm_selected_arch->common.name;
3128             }
3129           else
3130             {
3131               /* Architecture and CPU are essentially the same.
3132                  Prefer the CPU setting.  */
3133               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3134               target->core_name = arm_selected_cpu->common.name;
3135               /* Copy the CPU's capabilities, so that we inherit the
3136                  appropriate extensions and quirks.  */
3137               bitmap_copy (target->isa, cpu_isa);
3138             }
3139         }
3140       else
3141         {
3142           /* Pick a CPU based on the architecture.  */
3143           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3144           target->arch_name = arm_selected_arch->common.name;
3145           /* Note: target->core_name is left unset in this path.  */
3146         }
3147     }
3148   else if (arm_selected_cpu)
3149     {
3150       target->core_name = arm_selected_cpu->common.name;
3151       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3152       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3153                                  cpu_opts);
3154       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3155     }
3156   /* If the user did not specify a processor or architecture, choose
3157      one for them.  */
3158   else
3159     {
3160       const cpu_option *sel;
3161       auto_sbitmap sought_isa (isa_num_bits);
3162       bitmap_clear (sought_isa);
3163       auto_sbitmap default_isa (isa_num_bits);
3164
3165       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3166                                                     TARGET_CPU_DEFAULT);
3167       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3168       gcc_assert (arm_selected_cpu->common.name);
3169
3170       /* RWE: All of the selection logic below (to the end of this
3171          'if' clause) looks somewhat suspect.  It appears to be mostly
3172          there to support forcing thumb support when the default CPU
3173          does not have thumb (somewhat dubious in terms of what the
3174          user might be expecting).  I think it should be removed once
3175          support for the pre-thumb era cores is removed.  */
3176       sel = arm_selected_cpu;
3177       arm_initialize_isa (default_isa, sel->common.isa_bits);
3178       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3179                                  cpu_opts);
3180
3181       /* Now check to see if the user has specified any command line
3182          switches that require certain abilities from the cpu.  */
3183
3184       if (TARGET_INTERWORK || TARGET_THUMB)
3185         {
3186           bitmap_set_bit (sought_isa, isa_bit_thumb);
3187           bitmap_set_bit (sought_isa, isa_bit_mode32);
3188
3189           /* There are no ARM processors that support both APCS-26 and
3190              interworking.  Therefore we forcibly remove MODE26 from
3191              from the isa features here (if it was set), so that the
3192              search below will always be able to find a compatible
3193              processor.  */
3194           bitmap_clear_bit (default_isa, isa_bit_mode26);
3195         }
3196
3197       /* If there are such requirements and the default CPU does not
3198          satisfy them, we need to run over the complete list of
3199          cores looking for one that is satisfactory.  */
3200       if (!bitmap_empty_p (sought_isa)
3201           && !bitmap_subset_p (sought_isa, default_isa))
3202         {
3203           auto_sbitmap candidate_isa (isa_num_bits);
3204           /* We're only interested in a CPU with at least the
3205              capabilities of the default CPU and the required
3206              additional features.  */
3207           bitmap_ior (default_isa, default_isa, sought_isa);
3208
3209           /* Try to locate a CPU type that supports all of the abilities
3210              of the default CPU, plus the extra abilities requested by
3211              the user.  */
3212           for (sel = all_cores; sel->common.name != NULL; sel++)
3213             {
3214               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3215               /* An exact match?  */
3216               if (bitmap_equal_p (default_isa, candidate_isa))
3217                 break;
3218             }
3219
3220           if (sel->common.name == NULL)
3221             {
3222               unsigned current_bit_count = isa_num_bits;
3223               const cpu_option *best_fit = NULL;
3224
3225               /* Ideally we would like to issue an error message here
3226                  saying that it was not possible to find a CPU compatible
3227                  with the default CPU, but which also supports the command
3228                  line options specified by the programmer, and so they
3229                  ought to use the -mcpu=<name> command line option to
3230                  override the default CPU type.
3231
3232                  If we cannot find a CPU that has exactly the
3233                  characteristics of the default CPU and the given
3234                  command line options we scan the array again looking
3235                  for a best match.  The best match must have at least
3236                  the capabilities of the perfect match.  */
3237               for (sel = all_cores; sel->common.name != NULL; sel++)
3238                 {
3239                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3240
3241                   if (bitmap_subset_p (default_isa, candidate_isa))
3242                     {
3243                       unsigned count;
3244
3245                       bitmap_and_compl (candidate_isa, candidate_isa,
3246                                         default_isa);
3247                       count = bitmap_popcount (candidate_isa);
3248
3249                       if (count < current_bit_count)
3250                         {
3251                           best_fit = sel;
3252                           current_bit_count = count;
3253                         }
3254                     }
3255
3256                   gcc_assert (best_fit);
3257                   sel = best_fit;
3258                 }
3259             }
3260           arm_selected_cpu = sel;
3261         }
3262
3263       /* Now we know the CPU, we can finally initialize the target
3264          structure.  */
3265       target->core_name = arm_selected_cpu->common.name;
3266       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3267       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3268                                  cpu_opts);
3269       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3270     }
3271
3272   gcc_assert (arm_selected_cpu);
3273   gcc_assert (arm_selected_arch);
3274
3275   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3276     {
3277       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3278       auto_sbitmap fpu_bits (isa_num_bits);
3279
3280       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3281       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3282       bitmap_ior (target->isa, target->isa, fpu_bits);
3283     }
3284
3285   if (!arm_selected_tune)
3286     arm_selected_tune = arm_selected_cpu;
3287   else /* Validate the features passed to -mtune.  */
3288     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3289
3290   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3291
3292   /* Finish initializing the target structure.  */
3293   target->arch_pp_name = arm_selected_arch->arch;
3294   target->base_arch = arm_selected_arch->base_arch;
3295   target->profile = arm_selected_arch->profile;
3296
3297   target->tune_flags = tune_data->tune_flags;
3298   target->tune = tune_data->tune;
3299   target->tune_core = tune_data->scheduler;
3300 }
3301
3302 /* Fix up any incompatible options that the user has specified.  */
3303 static void
3304 arm_option_override (void)
3305 {
3306   static const enum isa_feature fpu_bitlist[]
3307     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3308   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3309   cl_target_option opts;
3310
3311   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3312   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3313
3314   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3315   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3316
3317   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3318
3319   if (!global_options_set.x_arm_fpu_index)
3320     {
3321       bool ok;
3322       int fpu_index;
3323
3324       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3325                                   CL_TARGET);
3326       gcc_assert (ok);
3327       arm_fpu_index = (enum fpu_type) fpu_index;
3328     }
3329
3330   cl_target_option_save (&opts, &global_options);
3331   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3332                               true);
3333
3334 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3335   SUBTARGET_OVERRIDE_OPTIONS;
3336 #endif
3337
3338   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3339   arm_base_arch = arm_active_target.base_arch;
3340
3341   arm_tune = arm_active_target.tune_core;
3342   tune_flags = arm_active_target.tune_flags;
3343   current_tune = arm_active_target.tune;
3344
3345   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3346   if (TARGET_APCS_FRAME)
3347     flag_shrink_wrap = false;
3348
3349   /* BPABI targets use linker tricks to allow interworking on cores
3350      without thumb support.  */
3351   if (TARGET_INTERWORK
3352       && !TARGET_BPABI
3353       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3354     {
3355       warning (0, "target CPU does not support interworking" );
3356       target_flags &= ~MASK_INTERWORK;
3357     }
3358
3359   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3360     {
3361       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3362       target_flags |= MASK_APCS_FRAME;
3363     }
3364
3365   if (TARGET_POKE_FUNCTION_NAME)
3366     target_flags |= MASK_APCS_FRAME;
3367
3368   if (TARGET_APCS_REENT && flag_pic)
3369     error ("-fpic and -mapcs-reent are incompatible");
3370
3371   if (TARGET_APCS_REENT)
3372     warning (0, "APCS reentrant code not supported.  Ignored");
3373
3374   /* Initialize boolean versions of the architectural flags, for use
3375      in the arm.md file.  */
3376   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3377   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3378   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3379   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3380   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3381   arm_arch5te = arm_arch5e
3382     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3383   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3384   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3385   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3386   arm_arch6m = arm_arch6 && !arm_arch_notm;
3387   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3388   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3389   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3390   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3391   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3392   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3393   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3394   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3395   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3396   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3397   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3398   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3399   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3400   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3401   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3402   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3403   if (arm_fp16_inst)
3404     {
3405       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3406         error ("selected fp16 options are incompatible");
3407       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3408     }
3409
3410
3411   /* Set up some tuning parameters.  */
3412   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3413   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3414   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3415   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3416   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3417   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3418
3419   /* And finally, set up some quirks.  */
3420   arm_arch_no_volatile_ce
3421     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3422   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3423                                             isa_bit_quirk_armv6kz);
3424
3425   /* V5 code we generate is completely interworking capable, so we turn off
3426      TARGET_INTERWORK here to avoid many tests later on.  */
3427
3428   /* XXX However, we must pass the right pre-processor defines to CPP
3429      or GLD can get confused.  This is a hack.  */
3430   if (TARGET_INTERWORK)
3431     arm_cpp_interwork = 1;
3432
3433   if (arm_arch5)
3434     target_flags &= ~MASK_INTERWORK;
3435
3436   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3437     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3438
3439   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3440     error ("iwmmxt abi requires an iwmmxt capable cpu");
3441
3442   /* If soft-float is specified then don't use FPU.  */
3443   if (TARGET_SOFT_FLOAT)
3444     arm_fpu_attr = FPU_NONE;
3445   else
3446     arm_fpu_attr = FPU_VFP;
3447
3448   if (TARGET_AAPCS_BASED)
3449     {
3450       if (TARGET_CALLER_INTERWORKING)
3451         error ("AAPCS does not support -mcaller-super-interworking");
3452       else
3453         if (TARGET_CALLEE_INTERWORKING)
3454           error ("AAPCS does not support -mcallee-super-interworking");
3455     }
3456
3457   /* __fp16 support currently assumes the core has ldrh.  */
3458   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3459     sorry ("__fp16 and no ldrh");
3460
3461   if (TARGET_AAPCS_BASED)
3462     {
3463       if (arm_abi == ARM_ABI_IWMMXT)
3464         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3465       else if (TARGET_HARD_FLOAT_ABI)
3466         {
3467           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3468           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3469             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3470         }
3471       else
3472         arm_pcs_default = ARM_PCS_AAPCS;
3473     }
3474   else
3475     {
3476       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3477         sorry ("-mfloat-abi=hard and VFP");
3478
3479       if (arm_abi == ARM_ABI_APCS)
3480         arm_pcs_default = ARM_PCS_APCS;
3481       else
3482         arm_pcs_default = ARM_PCS_ATPCS;
3483     }
3484
3485   /* For arm2/3 there is no need to do any scheduling if we are doing
3486      software floating-point.  */
3487   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3488     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3489
3490   /* Use the cp15 method if it is available.  */
3491   if (target_thread_pointer == TP_AUTO)
3492     {
3493       if (arm_arch6k && !TARGET_THUMB1)
3494         target_thread_pointer = TP_CP15;
3495       else
3496         target_thread_pointer = TP_SOFT;
3497     }
3498
3499   /* Override the default structure alignment for AAPCS ABI.  */
3500   if (!global_options_set.x_arm_structure_size_boundary)
3501     {
3502       if (TARGET_AAPCS_BASED)
3503         arm_structure_size_boundary = 8;
3504     }
3505   else
3506     {
3507       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3508
3509       if (arm_structure_size_boundary != 8
3510           && arm_structure_size_boundary != 32
3511           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3512         {
3513           if (ARM_DOUBLEWORD_ALIGN)
3514             warning (0,
3515                      "structure size boundary can only be set to 8, 32 or 64");
3516           else
3517             warning (0, "structure size boundary can only be set to 8 or 32");
3518           arm_structure_size_boundary
3519             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3520         }
3521     }
3522
3523   if (TARGET_VXWORKS_RTP)
3524     {
3525       if (!global_options_set.x_arm_pic_data_is_text_relative)
3526         arm_pic_data_is_text_relative = 0;
3527     }
3528   else if (flag_pic
3529            && !arm_pic_data_is_text_relative
3530            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3531     /* When text & data segments don't have a fixed displacement, the
3532        intended use is with a single, read only, pic base register.
3533        Unless the user explicitly requested not to do that, set
3534        it.  */
3535     target_flags |= MASK_SINGLE_PIC_BASE;
3536
3537   /* If stack checking is disabled, we can use r10 as the PIC register,
3538      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3539   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3540     {
3541       if (TARGET_VXWORKS_RTP)
3542         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3543       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3544     }
3545
3546   if (flag_pic && TARGET_VXWORKS_RTP)
3547     arm_pic_register = 9;
3548
3549   if (arm_pic_register_string != NULL)
3550     {
3551       int pic_register = decode_reg_name (arm_pic_register_string);
3552
3553       if (!flag_pic)
3554         warning (0, "-mpic-register= is useless without -fpic");
3555
3556       /* Prevent the user from choosing an obviously stupid PIC register.  */
3557       else if (pic_register < 0 || call_used_regs[pic_register]
3558                || pic_register == HARD_FRAME_POINTER_REGNUM
3559                || pic_register == STACK_POINTER_REGNUM
3560                || pic_register >= PC_REGNUM
3561                || (TARGET_VXWORKS_RTP
3562                    && (unsigned int) pic_register != arm_pic_register))
3563         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3564       else
3565         arm_pic_register = pic_register;
3566     }
3567
3568   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3569   if (fix_cm3_ldrd == 2)
3570     {
3571       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3572         fix_cm3_ldrd = 1;
3573       else
3574         fix_cm3_ldrd = 0;
3575     }
3576
3577   /* Hot/Cold partitioning is not currently supported, since we can't
3578      handle literal pool placement in that case.  */
3579   if (flag_reorder_blocks_and_partition)
3580     {
3581       inform (input_location,
3582               "-freorder-blocks-and-partition not supported on this architecture");
3583       flag_reorder_blocks_and_partition = 0;
3584       flag_reorder_blocks = 1;
3585     }
3586
3587   if (flag_pic)
3588     /* Hoisting PIC address calculations more aggressively provides a small,
3589        but measurable, size reduction for PIC code.  Therefore, we decrease
3590        the bar for unrestricted expression hoisting to the cost of PIC address
3591        calculation, which is 2 instructions.  */
3592     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3593                            global_options.x_param_values,
3594                            global_options_set.x_param_values);
3595
3596   /* ARM EABI defaults to strict volatile bitfields.  */
3597   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3598       && abi_version_at_least(2))
3599     flag_strict_volatile_bitfields = 1;
3600
3601   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3602      have deemed it beneficial (signified by setting
3603      prefetch.num_slots to 1 or more).  */
3604   if (flag_prefetch_loop_arrays < 0
3605       && HAVE_prefetch
3606       && optimize >= 3
3607       && current_tune->prefetch.num_slots > 0)
3608     flag_prefetch_loop_arrays = 1;
3609
3610   /* Set up parameters to be used in prefetching algorithm.  Do not
3611      override the defaults unless we are tuning for a core we have
3612      researched values for.  */
3613   if (current_tune->prefetch.num_slots > 0)
3614     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3615                            current_tune->prefetch.num_slots,
3616                            global_options.x_param_values,
3617                            global_options_set.x_param_values);
3618   if (current_tune->prefetch.l1_cache_line_size >= 0)
3619     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3620                            current_tune->prefetch.l1_cache_line_size,
3621                            global_options.x_param_values,
3622                            global_options_set.x_param_values);
3623   if (current_tune->prefetch.l1_cache_size >= 0)
3624     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3625                            current_tune->prefetch.l1_cache_size,
3626                            global_options.x_param_values,
3627                            global_options_set.x_param_values);
3628
3629   /* Use Neon to perform 64-bits operations rather than core
3630      registers.  */
3631   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3632   if (use_neon_for_64bits == 1)
3633      prefer_neon_for_64bits = true;
3634
3635   /* Use the alternative scheduling-pressure algorithm by default.  */
3636   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3637                          global_options.x_param_values,
3638                          global_options_set.x_param_values);
3639
3640   /* Look through ready list and all of queue for instructions
3641      relevant for L2 auto-prefetcher.  */
3642   int param_sched_autopref_queue_depth;
3643
3644   switch (current_tune->sched_autopref)
3645     {
3646     case tune_params::SCHED_AUTOPREF_OFF:
3647       param_sched_autopref_queue_depth = -1;
3648       break;
3649
3650     case tune_params::SCHED_AUTOPREF_RANK:
3651       param_sched_autopref_queue_depth = 0;
3652       break;
3653
3654     case tune_params::SCHED_AUTOPREF_FULL:
3655       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3656       break;
3657
3658     default:
3659       gcc_unreachable ();
3660     }
3661
3662   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3663                          param_sched_autopref_queue_depth,
3664                          global_options.x_param_values,
3665                          global_options_set.x_param_values);
3666
3667   /* Currently, for slow flash data, we just disable literal pools.  We also
3668      disable it for pure-code.  */
3669   if (target_slow_flash_data || target_pure_code)
3670     arm_disable_literal_pool = true;
3671
3672   if (use_cmse && !arm_arch_cmse)
3673     error ("target CPU does not support ARMv8-M Security Extensions");
3674
3675   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3676      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3677   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3678     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3679
3680   /* Disable scheduling fusion by default if it's not armv7 processor
3681      or doesn't prefer ldrd/strd.  */
3682   if (flag_schedule_fusion == 2
3683       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3684     flag_schedule_fusion = 0;
3685
3686   /* Need to remember initial options before they are overriden.  */
3687   init_optimize = build_optimization_node (&global_options);
3688
3689   arm_option_override_internal (&global_options, &global_options_set);
3690   arm_option_check_internal (&global_options);
3691   arm_option_params_internal ();
3692
3693   /* Create the default target_options structure.  */
3694   target_option_default_node = target_option_current_node
3695     = build_target_option_node (&global_options);
3696
3697   /* Register global variables with the garbage collector.  */
3698   arm_add_gc_roots ();
3699
3700   /* Init initial mode for testing.  */
3701   thumb_flipper = TARGET_THUMB;
3702 }
3703
3704 static void
3705 arm_add_gc_roots (void)
3706 {
3707   gcc_obstack_init(&minipool_obstack);
3708   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3709 }
3710 \f
3711 /* A table of known ARM exception types.
3712    For use with the interrupt function attribute.  */
3713
3714 typedef struct
3715 {
3716   const char *const arg;
3717   const unsigned long return_value;
3718 }
3719 isr_attribute_arg;
3720
3721 static const isr_attribute_arg isr_attribute_args [] =
3722 {
3723   { "IRQ",   ARM_FT_ISR },
3724   { "irq",   ARM_FT_ISR },
3725   { "FIQ",   ARM_FT_FIQ },
3726   { "fiq",   ARM_FT_FIQ },
3727   { "ABORT", ARM_FT_ISR },
3728   { "abort", ARM_FT_ISR },
3729   { "ABORT", ARM_FT_ISR },
3730   { "abort", ARM_FT_ISR },
3731   { "UNDEF", ARM_FT_EXCEPTION },
3732   { "undef", ARM_FT_EXCEPTION },
3733   { "SWI",   ARM_FT_EXCEPTION },
3734   { "swi",   ARM_FT_EXCEPTION },
3735   { NULL,    ARM_FT_NORMAL }
3736 };
3737
3738 /* Returns the (interrupt) function type of the current
3739    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3740
3741 static unsigned long
3742 arm_isr_value (tree argument)
3743 {
3744   const isr_attribute_arg * ptr;
3745   const char *              arg;
3746
3747   if (!arm_arch_notm)
3748     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3749
3750   /* No argument - default to IRQ.  */
3751   if (argument == NULL_TREE)
3752     return ARM_FT_ISR;
3753
3754   /* Get the value of the argument.  */
3755   if (TREE_VALUE (argument) == NULL_TREE
3756       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3757     return ARM_FT_UNKNOWN;
3758
3759   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3760
3761   /* Check it against the list of known arguments.  */
3762   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3763     if (streq (arg, ptr->arg))
3764       return ptr->return_value;
3765
3766   /* An unrecognized interrupt type.  */
3767   return ARM_FT_UNKNOWN;
3768 }
3769
3770 /* Computes the type of the current function.  */
3771
3772 static unsigned long
3773 arm_compute_func_type (void)
3774 {
3775   unsigned long type = ARM_FT_UNKNOWN;
3776   tree a;
3777   tree attr;
3778
3779   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3780
3781   /* Decide if the current function is volatile.  Such functions
3782      never return, and many memory cycles can be saved by not storing
3783      register values that will never be needed again.  This optimization
3784      was added to speed up context switching in a kernel application.  */
3785   if (optimize > 0
3786       && (TREE_NOTHROW (current_function_decl)
3787           || !(flag_unwind_tables
3788                || (flag_exceptions
3789                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3790       && TREE_THIS_VOLATILE (current_function_decl))
3791     type |= ARM_FT_VOLATILE;
3792
3793   if (cfun->static_chain_decl != NULL)
3794     type |= ARM_FT_NESTED;
3795
3796   attr = DECL_ATTRIBUTES (current_function_decl);
3797
3798   a = lookup_attribute ("naked", attr);
3799   if (a != NULL_TREE)
3800     type |= ARM_FT_NAKED;
3801
3802   a = lookup_attribute ("isr", attr);
3803   if (a == NULL_TREE)
3804     a = lookup_attribute ("interrupt", attr);
3805
3806   if (a == NULL_TREE)
3807     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3808   else
3809     type |= arm_isr_value (TREE_VALUE (a));
3810
3811   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3812     type |= ARM_FT_CMSE_ENTRY;
3813
3814   return type;
3815 }
3816
3817 /* Returns the type of the current function.  */
3818
3819 unsigned long
3820 arm_current_func_type (void)
3821 {
3822   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3823     cfun->machine->func_type = arm_compute_func_type ();
3824
3825   return cfun->machine->func_type;
3826 }
3827
3828 bool
3829 arm_allocate_stack_slots_for_args (void)
3830 {
3831   /* Naked functions should not allocate stack slots for arguments.  */
3832   return !IS_NAKED (arm_current_func_type ());
3833 }
3834
3835 static bool
3836 arm_warn_func_return (tree decl)
3837 {
3838   /* Naked functions are implemented entirely in assembly, including the
3839      return sequence, so suppress warnings about this.  */
3840   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3841 }
3842
3843 \f
3844 /* Output assembler code for a block containing the constant parts
3845    of a trampoline, leaving space for the variable parts.
3846
3847    On the ARM, (if r8 is the static chain regnum, and remembering that
3848    referencing pc adds an offset of 8) the trampoline looks like:
3849            ldr          r8, [pc, #0]
3850            ldr          pc, [pc]
3851            .word        static chain value
3852            .word        function's address
3853    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3854
3855 static void
3856 arm_asm_trampoline_template (FILE *f)
3857 {
3858   fprintf (f, "\t.syntax unified\n");
3859
3860   if (TARGET_ARM)
3861     {
3862       fprintf (f, "\t.arm\n");
3863       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3864       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3865     }
3866   else if (TARGET_THUMB2)
3867     {
3868       fprintf (f, "\t.thumb\n");
3869       /* The Thumb-2 trampoline is similar to the arm implementation.
3870          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3871       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3872                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3873       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3874     }
3875   else
3876     {
3877       ASM_OUTPUT_ALIGN (f, 2);
3878       fprintf (f, "\t.code\t16\n");
3879       fprintf (f, ".Ltrampoline_start:\n");
3880       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3881       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3882       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3883       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3884       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3885       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3886     }
3887   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3888   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3889 }
3890
3891 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3892
3893 static void
3894 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3895 {
3896   rtx fnaddr, mem, a_tramp;
3897
3898   emit_block_move (m_tramp, assemble_trampoline_template (),
3899                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3900
3901   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3902   emit_move_insn (mem, chain_value);
3903
3904   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3905   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3906   emit_move_insn (mem, fnaddr);
3907
3908   a_tramp = XEXP (m_tramp, 0);
3909   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3910                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3911                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3912 }
3913
3914 /* Thumb trampolines should be entered in thumb mode, so set
3915    the bottom bit of the address.  */
3916
3917 static rtx
3918 arm_trampoline_adjust_address (rtx addr)
3919 {
3920   if (TARGET_THUMB)
3921     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3922                                 NULL, 0, OPTAB_LIB_WIDEN);
3923   return addr;
3924 }
3925 \f
3926 /* Return 1 if it is possible to return using a single instruction.
3927    If SIBLING is non-null, this is a test for a return before a sibling
3928    call.  SIBLING is the call insn, so we can examine its register usage.  */
3929
3930 int
3931 use_return_insn (int iscond, rtx sibling)
3932 {
3933   int regno;
3934   unsigned int func_type;
3935   unsigned long saved_int_regs;
3936   unsigned HOST_WIDE_INT stack_adjust;
3937   arm_stack_offsets *offsets;
3938
3939   /* Never use a return instruction before reload has run.  */
3940   if (!reload_completed)
3941     return 0;
3942
3943   func_type = arm_current_func_type ();
3944
3945   /* Naked, volatile and stack alignment functions need special
3946      consideration.  */
3947   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3948     return 0;
3949
3950   /* So do interrupt functions that use the frame pointer and Thumb
3951      interrupt functions.  */
3952   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3953     return 0;
3954
3955   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3956       && !optimize_function_for_size_p (cfun))
3957     return 0;
3958
3959   offsets = arm_get_frame_offsets ();
3960   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3961
3962   /* As do variadic functions.  */
3963   if (crtl->args.pretend_args_size
3964       || cfun->machine->uses_anonymous_args
3965       /* Or if the function calls __builtin_eh_return () */
3966       || crtl->calls_eh_return
3967       /* Or if the function calls alloca */
3968       || cfun->calls_alloca
3969       /* Or if there is a stack adjustment.  However, if the stack pointer
3970          is saved on the stack, we can use a pre-incrementing stack load.  */
3971       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3972                                  && stack_adjust == 4))
3973       /* Or if the static chain register was saved above the frame, under the
3974          assumption that the stack pointer isn't saved on the stack.  */
3975       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3976           && arm_compute_static_chain_stack_bytes() != 0))
3977     return 0;
3978
3979   saved_int_regs = offsets->saved_regs_mask;
3980
3981   /* Unfortunately, the insn
3982
3983        ldmib sp, {..., sp, ...}
3984
3985      triggers a bug on most SA-110 based devices, such that the stack
3986      pointer won't be correctly restored if the instruction takes a
3987      page fault.  We work around this problem by popping r3 along with
3988      the other registers, since that is never slower than executing
3989      another instruction.
3990
3991      We test for !arm_arch5 here, because code for any architecture
3992      less than this could potentially be run on one of the buggy
3993      chips.  */
3994   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3995     {
3996       /* Validate that r3 is a call-clobbered register (always true in
3997          the default abi) ...  */
3998       if (!call_used_regs[3])
3999         return 0;
4000
4001       /* ... that it isn't being used for a return value ... */
4002       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4003         return 0;
4004
4005       /* ... or for a tail-call argument ...  */
4006       if (sibling)
4007         {
4008           gcc_assert (CALL_P (sibling));
4009
4010           if (find_regno_fusage (sibling, USE, 3))
4011             return 0;
4012         }
4013
4014       /* ... and that there are no call-saved registers in r0-r2
4015          (always true in the default ABI).  */
4016       if (saved_int_regs & 0x7)
4017         return 0;
4018     }
4019
4020   /* Can't be done if interworking with Thumb, and any registers have been
4021      stacked.  */
4022   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4023     return 0;
4024
4025   /* On StrongARM, conditional returns are expensive if they aren't
4026      taken and multiple registers have been stacked.  */
4027   if (iscond && arm_tune_strongarm)
4028     {
4029       /* Conditional return when just the LR is stored is a simple
4030          conditional-load instruction, that's not expensive.  */
4031       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4032         return 0;
4033
4034       if (flag_pic
4035           && arm_pic_register != INVALID_REGNUM
4036           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4037         return 0;
4038     }
4039
4040   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4041      several instructions if anything needs to be popped.  */
4042   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4043     return 0;
4044
4045   /* If there are saved registers but the LR isn't saved, then we need
4046      two instructions for the return.  */
4047   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4048     return 0;
4049
4050   /* Can't be done if any of the VFP regs are pushed,
4051      since this also requires an insn.  */
4052   if (TARGET_HARD_FLOAT)
4053     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4054       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4055         return 0;
4056
4057   if (TARGET_REALLY_IWMMXT)
4058     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4059       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4060         return 0;
4061
4062   return 1;
4063 }
4064
4065 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4066    shrink-wrapping if possible.  This is the case if we need to emit a
4067    prologue, which we can test by looking at the offsets.  */
4068 bool
4069 use_simple_return_p (void)
4070 {
4071   arm_stack_offsets *offsets;
4072
4073   /* Note this function can be called before or after reload.  */
4074   if (!reload_completed)
4075     arm_compute_frame_layout ();
4076
4077   offsets = arm_get_frame_offsets ();
4078   return offsets->outgoing_args != 0;
4079 }
4080
4081 /* Return TRUE if int I is a valid immediate ARM constant.  */
4082
4083 int
4084 const_ok_for_arm (HOST_WIDE_INT i)
4085 {
4086   int lowbit;
4087
4088   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4089      be all zero, or all one.  */
4090   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4091       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4092           != ((~(unsigned HOST_WIDE_INT) 0)
4093               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4094     return FALSE;
4095
4096   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4097
4098   /* Fast return for 0 and small values.  We must do this for zero, since
4099      the code below can't handle that one case.  */
4100   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4101     return TRUE;
4102
4103   /* Get the number of trailing zeros.  */
4104   lowbit = ffs((int) i) - 1;
4105
4106   /* Only even shifts are allowed in ARM mode so round down to the
4107      nearest even number.  */
4108   if (TARGET_ARM)
4109     lowbit &= ~1;
4110
4111   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4112     return TRUE;
4113
4114   if (TARGET_ARM)
4115     {
4116       /* Allow rotated constants in ARM mode.  */
4117       if (lowbit <= 4
4118            && ((i & ~0xc000003f) == 0
4119                || (i & ~0xf000000f) == 0
4120                || (i & ~0xfc000003) == 0))
4121         return TRUE;
4122     }
4123   else if (TARGET_THUMB2)
4124     {
4125       HOST_WIDE_INT v;
4126
4127       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4128       v = i & 0xff;
4129       v |= v << 16;
4130       if (i == v || i == (v | (v << 8)))
4131         return TRUE;
4132
4133       /* Allow repeated pattern 0xXY00XY00.  */
4134       v = i & 0xff00;
4135       v |= v << 16;
4136       if (i == v)
4137         return TRUE;
4138     }
4139   else if (TARGET_HAVE_MOVT)
4140     {
4141       /* Thumb-1 Targets with MOVT.  */
4142       if (i > 0xffff)
4143         return FALSE;
4144       else
4145         return TRUE;
4146     }
4147
4148   return FALSE;
4149 }
4150
4151 /* Return true if I is a valid constant for the operation CODE.  */
4152 int
4153 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4154 {
4155   if (const_ok_for_arm (i))
4156     return 1;
4157
4158   switch (code)
4159     {
4160     case SET:
4161       /* See if we can use movw.  */
4162       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4163         return 1;
4164       else
4165         /* Otherwise, try mvn.  */
4166         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4167
4168     case PLUS:
4169       /* See if we can use addw or subw.  */
4170       if (TARGET_THUMB2
4171           && ((i & 0xfffff000) == 0
4172               || ((-i) & 0xfffff000) == 0))
4173         return 1;
4174       /* Fall through.  */
4175     case COMPARE:
4176     case EQ:
4177     case NE:
4178     case GT:
4179     case LE:
4180     case LT:
4181     case GE:
4182     case GEU:
4183     case LTU:
4184     case GTU:
4185     case LEU:
4186     case UNORDERED:
4187     case ORDERED:
4188     case UNEQ:
4189     case UNGE:
4190     case UNLT:
4191     case UNGT:
4192     case UNLE:
4193       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4194
4195     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4196     case XOR:
4197       return 0;
4198
4199     case IOR:
4200       if (TARGET_THUMB2)
4201         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4202       return 0;
4203
4204     case AND:
4205       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4206
4207     default:
4208       gcc_unreachable ();
4209     }
4210 }
4211
4212 /* Return true if I is a valid di mode constant for the operation CODE.  */
4213 int
4214 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4215 {
4216   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4217   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4218   rtx hi = GEN_INT (hi_val);
4219   rtx lo = GEN_INT (lo_val);
4220
4221   if (TARGET_THUMB1)
4222     return 0;
4223
4224   switch (code)
4225     {
4226     case AND:
4227     case IOR:
4228     case XOR:
4229       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4230               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4231     case PLUS:
4232       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4233
4234     default:
4235       return 0;
4236     }
4237 }
4238
4239 /* Emit a sequence of insns to handle a large constant.
4240    CODE is the code of the operation required, it can be any of SET, PLUS,
4241    IOR, AND, XOR, MINUS;
4242    MODE is the mode in which the operation is being performed;
4243    VAL is the integer to operate on;
4244    SOURCE is the other operand (a register, or a null-pointer for SET);
4245    SUBTARGETS means it is safe to create scratch registers if that will
4246    either produce a simpler sequence, or we will want to cse the values.
4247    Return value is the number of insns emitted.  */
4248
4249 /* ??? Tweak this for thumb2.  */
4250 int
4251 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4252                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4253 {
4254   rtx cond;
4255
4256   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4257     cond = COND_EXEC_TEST (PATTERN (insn));
4258   else
4259     cond = NULL_RTX;
4260
4261   if (subtargets || code == SET
4262       || (REG_P (target) && REG_P (source)
4263           && REGNO (target) != REGNO (source)))
4264     {
4265       /* After arm_reorg has been called, we can't fix up expensive
4266          constants by pushing them into memory so we must synthesize
4267          them in-line, regardless of the cost.  This is only likely to
4268          be more costly on chips that have load delay slots and we are
4269          compiling without running the scheduler (so no splitting
4270          occurred before the final instruction emission).
4271
4272          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4273       */
4274       if (!cfun->machine->after_arm_reorg
4275           && !cond
4276           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4277                                 1, 0)
4278               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4279                  + (code != SET))))
4280         {
4281           if (code == SET)
4282             {
4283               /* Currently SET is the only monadic value for CODE, all
4284                  the rest are diadic.  */
4285               if (TARGET_USE_MOVT)
4286                 arm_emit_movpair (target, GEN_INT (val));
4287               else
4288                 emit_set_insn (target, GEN_INT (val));
4289
4290               return 1;
4291             }
4292           else
4293             {
4294               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4295
4296               if (TARGET_USE_MOVT)
4297                 arm_emit_movpair (temp, GEN_INT (val));
4298               else
4299                 emit_set_insn (temp, GEN_INT (val));
4300
4301               /* For MINUS, the value is subtracted from, since we never
4302                  have subtraction of a constant.  */
4303               if (code == MINUS)
4304                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4305               else
4306                 emit_set_insn (target,
4307                                gen_rtx_fmt_ee (code, mode, source, temp));
4308               return 2;
4309             }
4310         }
4311     }
4312
4313   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4314                            1);
4315 }
4316
4317 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4318    ARM/THUMB2 immediates, and add up to VAL.
4319    Thr function return value gives the number of insns required.  */
4320 static int
4321 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4322                             struct four_ints *return_sequence)
4323 {
4324   int best_consecutive_zeros = 0;
4325   int i;
4326   int best_start = 0;
4327   int insns1, insns2;
4328   struct four_ints tmp_sequence;
4329
4330   /* If we aren't targeting ARM, the best place to start is always at
4331      the bottom, otherwise look more closely.  */
4332   if (TARGET_ARM)
4333     {
4334       for (i = 0; i < 32; i += 2)
4335         {
4336           int consecutive_zeros = 0;
4337
4338           if (!(val & (3 << i)))
4339             {
4340               while ((i < 32) && !(val & (3 << i)))
4341                 {
4342                   consecutive_zeros += 2;
4343                   i += 2;
4344                 }
4345               if (consecutive_zeros > best_consecutive_zeros)
4346                 {
4347                   best_consecutive_zeros = consecutive_zeros;
4348                   best_start = i - consecutive_zeros;
4349                 }
4350               i -= 2;
4351             }
4352         }
4353     }
4354
4355   /* So long as it won't require any more insns to do so, it's
4356      desirable to emit a small constant (in bits 0...9) in the last
4357      insn.  This way there is more chance that it can be combined with
4358      a later addressing insn to form a pre-indexed load or store
4359      operation.  Consider:
4360
4361            *((volatile int *)0xe0000100) = 1;
4362            *((volatile int *)0xe0000110) = 2;
4363
4364      We want this to wind up as:
4365
4366             mov rA, #0xe0000000
4367             mov rB, #1
4368             str rB, [rA, #0x100]
4369             mov rB, #2
4370             str rB, [rA, #0x110]
4371
4372      rather than having to synthesize both large constants from scratch.
4373
4374      Therefore, we calculate how many insns would be required to emit
4375      the constant starting from `best_start', and also starting from
4376      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4377      yield a shorter sequence, we may as well use zero.  */
4378   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4379   if (best_start != 0
4380       && ((HOST_WIDE_INT_1U << best_start) < val))
4381     {
4382       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4383       if (insns2 <= insns1)
4384         {
4385           *return_sequence = tmp_sequence;
4386           insns1 = insns2;
4387         }
4388     }
4389
4390   return insns1;
4391 }
4392
4393 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4394 static int
4395 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4396                              struct four_ints *return_sequence, int i)
4397 {
4398   int remainder = val & 0xffffffff;
4399   int insns = 0;
4400
4401   /* Try and find a way of doing the job in either two or three
4402      instructions.
4403
4404      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4405      location.  We start at position I.  This may be the MSB, or
4406      optimial_immediate_sequence may have positioned it at the largest block
4407      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4408      wrapping around to the top of the word when we drop off the bottom.
4409      In the worst case this code should produce no more than four insns.
4410
4411      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4412      constants, shifted to any arbitrary location.  We should always start
4413      at the MSB.  */
4414   do
4415     {
4416       int end;
4417       unsigned int b1, b2, b3, b4;
4418       unsigned HOST_WIDE_INT result;
4419       int loc;
4420
4421       gcc_assert (insns < 4);
4422
4423       if (i <= 0)
4424         i += 32;
4425
4426       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4427       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4428         {
4429           loc = i;
4430           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4431             /* We can use addw/subw for the last 12 bits.  */
4432             result = remainder;
4433           else
4434             {
4435               /* Use an 8-bit shifted/rotated immediate.  */
4436               end = i - 8;
4437               if (end < 0)
4438                 end += 32;
4439               result = remainder & ((0x0ff << end)
4440                                    | ((i < end) ? (0xff >> (32 - end))
4441                                                 : 0));
4442               i -= 8;
4443             }
4444         }
4445       else
4446         {
4447           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4448              arbitrary shifts.  */
4449           i -= TARGET_ARM ? 2 : 1;
4450           continue;
4451         }
4452
4453       /* Next, see if we can do a better job with a thumb2 replicated
4454          constant.
4455
4456          We do it this way around to catch the cases like 0x01F001E0 where
4457          two 8-bit immediates would work, but a replicated constant would
4458          make it worse.
4459
4460          TODO: 16-bit constants that don't clear all the bits, but still win.
4461          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4462       if (TARGET_THUMB2)
4463         {
4464           b1 = (remainder & 0xff000000) >> 24;
4465           b2 = (remainder & 0x00ff0000) >> 16;
4466           b3 = (remainder & 0x0000ff00) >> 8;
4467           b4 = remainder & 0xff;
4468
4469           if (loc > 24)
4470             {
4471               /* The 8-bit immediate already found clears b1 (and maybe b2),
4472                  but must leave b3 and b4 alone.  */
4473
4474               /* First try to find a 32-bit replicated constant that clears
4475                  almost everything.  We can assume that we can't do it in one,
4476                  or else we wouldn't be here.  */
4477               unsigned int tmp = b1 & b2 & b3 & b4;
4478               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4479                                   + (tmp << 24);
4480               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4481                                             + (tmp == b3) + (tmp == b4);
4482               if (tmp
4483                   && (matching_bytes >= 3
4484                       || (matching_bytes == 2
4485                           && const_ok_for_op (remainder & ~tmp2, code))))
4486                 {
4487                   /* At least 3 of the bytes match, and the fourth has at
4488                      least as many bits set, or two of the bytes match
4489                      and it will only require one more insn to finish.  */
4490                   result = tmp2;
4491                   i = tmp != b1 ? 32
4492                       : tmp != b2 ? 24
4493                       : tmp != b3 ? 16
4494                       : 8;
4495                 }
4496
4497               /* Second, try to find a 16-bit replicated constant that can
4498                  leave three of the bytes clear.  If b2 or b4 is already
4499                  zero, then we can.  If the 8-bit from above would not
4500                  clear b2 anyway, then we still win.  */
4501               else if (b1 == b3 && (!b2 || !b4
4502                                || (remainder & 0x00ff0000 & ~result)))
4503                 {
4504                   result = remainder & 0xff00ff00;
4505                   i = 24;
4506                 }
4507             }
4508           else if (loc > 16)
4509             {
4510               /* The 8-bit immediate already found clears b2 (and maybe b3)
4511                  and we don't get here unless b1 is alredy clear, but it will
4512                  leave b4 unchanged.  */
4513
4514               /* If we can clear b2 and b4 at once, then we win, since the
4515                  8-bits couldn't possibly reach that far.  */
4516               if (b2 == b4)
4517                 {
4518                   result = remainder & 0x00ff00ff;
4519                   i = 16;
4520                 }
4521             }
4522         }
4523
4524       return_sequence->i[insns++] = result;
4525       remainder &= ~result;
4526
4527       if (code == SET || code == MINUS)
4528         code = PLUS;
4529     }
4530   while (remainder);
4531
4532   return insns;
4533 }
4534
4535 /* Emit an instruction with the indicated PATTERN.  If COND is
4536    non-NULL, conditionalize the execution of the instruction on COND
4537    being true.  */
4538
4539 static void
4540 emit_constant_insn (rtx cond, rtx pattern)
4541 {
4542   if (cond)
4543     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4544   emit_insn (pattern);
4545 }
4546
4547 /* As above, but extra parameter GENERATE which, if clear, suppresses
4548    RTL generation.  */
4549
4550 static int
4551 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4552                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4553                   int subtargets, int generate)
4554 {
4555   int can_invert = 0;
4556   int can_negate = 0;
4557   int final_invert = 0;
4558   int i;
4559   int set_sign_bit_copies = 0;
4560   int clear_sign_bit_copies = 0;
4561   int clear_zero_bit_copies = 0;
4562   int set_zero_bit_copies = 0;
4563   int insns = 0, neg_insns, inv_insns;
4564   unsigned HOST_WIDE_INT temp1, temp2;
4565   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4566   struct four_ints *immediates;
4567   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4568
4569   /* Find out which operations are safe for a given CODE.  Also do a quick
4570      check for degenerate cases; these can occur when DImode operations
4571      are split.  */
4572   switch (code)
4573     {
4574     case SET:
4575       can_invert = 1;
4576       break;
4577
4578     case PLUS:
4579       can_negate = 1;
4580       break;
4581
4582     case IOR:
4583       if (remainder == 0xffffffff)
4584         {
4585           if (generate)
4586             emit_constant_insn (cond,
4587                                 gen_rtx_SET (target,
4588                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4589           return 1;
4590         }
4591
4592       if (remainder == 0)
4593         {
4594           if (reload_completed && rtx_equal_p (target, source))
4595             return 0;
4596
4597           if (generate)
4598             emit_constant_insn (cond, gen_rtx_SET (target, source));
4599           return 1;
4600         }
4601       break;
4602
4603     case AND:
4604       if (remainder == 0)
4605         {
4606           if (generate)
4607             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4608           return 1;
4609         }
4610       if (remainder == 0xffffffff)
4611         {
4612           if (reload_completed && rtx_equal_p (target, source))
4613             return 0;
4614           if (generate)
4615             emit_constant_insn (cond, gen_rtx_SET (target, source));
4616           return 1;
4617         }
4618       can_invert = 1;
4619       break;
4620
4621     case XOR:
4622       if (remainder == 0)
4623         {
4624           if (reload_completed && rtx_equal_p (target, source))
4625             return 0;
4626           if (generate)
4627             emit_constant_insn (cond, gen_rtx_SET (target, source));
4628           return 1;
4629         }
4630
4631       if (remainder == 0xffffffff)
4632         {
4633           if (generate)
4634             emit_constant_insn (cond,
4635                                 gen_rtx_SET (target,
4636                                              gen_rtx_NOT (mode, source)));
4637           return 1;
4638         }
4639       final_invert = 1;
4640       break;
4641
4642     case MINUS:
4643       /* We treat MINUS as (val - source), since (source - val) is always
4644          passed as (source + (-val)).  */
4645       if (remainder == 0)
4646         {
4647           if (generate)
4648             emit_constant_insn (cond,
4649                                 gen_rtx_SET (target,
4650                                              gen_rtx_NEG (mode, source)));
4651           return 1;
4652         }
4653       if (const_ok_for_arm (val))
4654         {
4655           if (generate)
4656             emit_constant_insn (cond,
4657                                 gen_rtx_SET (target,
4658                                              gen_rtx_MINUS (mode, GEN_INT (val),
4659                                                             source)));
4660           return 1;
4661         }
4662
4663       break;
4664
4665     default:
4666       gcc_unreachable ();
4667     }
4668
4669   /* If we can do it in one insn get out quickly.  */
4670   if (const_ok_for_op (val, code))
4671     {
4672       if (generate)
4673         emit_constant_insn (cond,
4674                             gen_rtx_SET (target,
4675                                          (source
4676                                           ? gen_rtx_fmt_ee (code, mode, source,
4677                                                             GEN_INT (val))
4678                                           : GEN_INT (val))));
4679       return 1;
4680     }
4681
4682   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4683      insn.  */
4684   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4685       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4686     {
4687       if (generate)
4688         {
4689           if (mode == SImode && i == 16)
4690             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4691                smaller insn.  */
4692             emit_constant_insn (cond,
4693                                 gen_zero_extendhisi2
4694                                 (target, gen_lowpart (HImode, source)));
4695           else
4696             /* Extz only supports SImode, but we can coerce the operands
4697                into that mode.  */
4698             emit_constant_insn (cond,
4699                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4700                                               gen_lowpart (SImode, source),
4701                                               GEN_INT (i), const0_rtx));
4702         }
4703
4704       return 1;
4705     }
4706
4707   /* Calculate a few attributes that may be useful for specific
4708      optimizations.  */
4709   /* Count number of leading zeros.  */
4710   for (i = 31; i >= 0; i--)
4711     {
4712       if ((remainder & (1 << i)) == 0)
4713         clear_sign_bit_copies++;
4714       else
4715         break;
4716     }
4717
4718   /* Count number of leading 1's.  */
4719   for (i = 31; i >= 0; i--)
4720     {
4721       if ((remainder & (1 << i)) != 0)
4722         set_sign_bit_copies++;
4723       else
4724         break;
4725     }
4726
4727   /* Count number of trailing zero's.  */
4728   for (i = 0; i <= 31; i++)
4729     {
4730       if ((remainder & (1 << i)) == 0)
4731         clear_zero_bit_copies++;
4732       else
4733         break;
4734     }
4735
4736   /* Count number of trailing 1's.  */
4737   for (i = 0; i <= 31; i++)
4738     {
4739       if ((remainder & (1 << i)) != 0)
4740         set_zero_bit_copies++;
4741       else
4742         break;
4743     }
4744
4745   switch (code)
4746     {
4747     case SET:
4748       /* See if we can do this by sign_extending a constant that is known
4749          to be negative.  This is a good, way of doing it, since the shift
4750          may well merge into a subsequent insn.  */
4751       if (set_sign_bit_copies > 1)
4752         {
4753           if (const_ok_for_arm
4754               (temp1 = ARM_SIGN_EXTEND (remainder
4755                                         << (set_sign_bit_copies - 1))))
4756             {
4757               if (generate)
4758                 {
4759                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4760                   emit_constant_insn (cond,
4761                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4762                   emit_constant_insn (cond,
4763                                       gen_ashrsi3 (target, new_src,
4764                                                    GEN_INT (set_sign_bit_copies - 1)));
4765                 }
4766               return 2;
4767             }
4768           /* For an inverted constant, we will need to set the low bits,
4769              these will be shifted out of harm's way.  */
4770           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4771           if (const_ok_for_arm (~temp1))
4772             {
4773               if (generate)
4774                 {
4775                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4776                   emit_constant_insn (cond,
4777                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4778                   emit_constant_insn (cond,
4779                                       gen_ashrsi3 (target, new_src,
4780                                                    GEN_INT (set_sign_bit_copies - 1)));
4781                 }
4782               return 2;
4783             }
4784         }
4785
4786       /* See if we can calculate the value as the difference between two
4787          valid immediates.  */
4788       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4789         {
4790           int topshift = clear_sign_bit_copies & ~1;
4791
4792           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4793                                    & (0xff000000 >> topshift));
4794
4795           /* If temp1 is zero, then that means the 9 most significant
4796              bits of remainder were 1 and we've caused it to overflow.
4797              When topshift is 0 we don't need to do anything since we
4798              can borrow from 'bit 32'.  */
4799           if (temp1 == 0 && topshift != 0)
4800             temp1 = 0x80000000 >> (topshift - 1);
4801
4802           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4803
4804           if (const_ok_for_arm (temp2))
4805             {
4806               if (generate)
4807                 {
4808                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4809                   emit_constant_insn (cond,
4810                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4811                   emit_constant_insn (cond,
4812                                       gen_addsi3 (target, new_src,
4813                                                   GEN_INT (-temp2)));
4814                 }
4815
4816               return 2;
4817             }
4818         }
4819
4820       /* See if we can generate this by setting the bottom (or the top)
4821          16 bits, and then shifting these into the other half of the
4822          word.  We only look for the simplest cases, to do more would cost
4823          too much.  Be careful, however, not to generate this when the
4824          alternative would take fewer insns.  */
4825       if (val & 0xffff0000)
4826         {
4827           temp1 = remainder & 0xffff0000;
4828           temp2 = remainder & 0x0000ffff;
4829
4830           /* Overlaps outside this range are best done using other methods.  */
4831           for (i = 9; i < 24; i++)
4832             {
4833               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4834                   && !const_ok_for_arm (temp2))
4835                 {
4836                   rtx new_src = (subtargets
4837                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4838                                  : target);
4839                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4840                                             source, subtargets, generate);
4841                   source = new_src;
4842                   if (generate)
4843                     emit_constant_insn
4844                       (cond,
4845                        gen_rtx_SET
4846                        (target,
4847                         gen_rtx_IOR (mode,
4848                                      gen_rtx_ASHIFT (mode, source,
4849                                                      GEN_INT (i)),
4850                                      source)));
4851                   return insns + 1;
4852                 }
4853             }
4854
4855           /* Don't duplicate cases already considered.  */
4856           for (i = 17; i < 24; i++)
4857             {
4858               if (((temp1 | (temp1 >> i)) == remainder)
4859                   && !const_ok_for_arm (temp1))
4860                 {
4861                   rtx new_src = (subtargets
4862                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4863                                  : target);
4864                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4865                                             source, subtargets, generate);
4866                   source = new_src;
4867                   if (generate)
4868                     emit_constant_insn
4869                       (cond,
4870                        gen_rtx_SET (target,
4871                                     gen_rtx_IOR
4872                                     (mode,
4873                                      gen_rtx_LSHIFTRT (mode, source,
4874                                                        GEN_INT (i)),
4875                                      source)));
4876                   return insns + 1;
4877                 }
4878             }
4879         }
4880       break;
4881
4882     case IOR:
4883     case XOR:
4884       /* If we have IOR or XOR, and the constant can be loaded in a
4885          single instruction, and we can find a temporary to put it in,
4886          then this can be done in two instructions instead of 3-4.  */
4887       if (subtargets
4888           /* TARGET can't be NULL if SUBTARGETS is 0 */
4889           || (reload_completed && !reg_mentioned_p (target, source)))
4890         {
4891           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4892             {
4893               if (generate)
4894                 {
4895                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4896
4897                   emit_constant_insn (cond,
4898                                       gen_rtx_SET (sub, GEN_INT (val)));
4899                   emit_constant_insn (cond,
4900                                       gen_rtx_SET (target,
4901                                                    gen_rtx_fmt_ee (code, mode,
4902                                                                    source, sub)));
4903                 }
4904               return 2;
4905             }
4906         }
4907
4908       if (code == XOR)
4909         break;
4910
4911       /*  Convert.
4912           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4913                              and the remainder 0s for e.g. 0xfff00000)
4914           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4915
4916           This can be done in 2 instructions by using shifts with mov or mvn.
4917           e.g. for
4918           x = x | 0xfff00000;
4919           we generate.
4920           mvn   r0, r0, asl #12
4921           mvn   r0, r0, lsr #12  */
4922       if (set_sign_bit_copies > 8
4923           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4924         {
4925           if (generate)
4926             {
4927               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4928               rtx shift = GEN_INT (set_sign_bit_copies);
4929
4930               emit_constant_insn
4931                 (cond,
4932                  gen_rtx_SET (sub,
4933                               gen_rtx_NOT (mode,
4934                                            gen_rtx_ASHIFT (mode,
4935                                                            source,
4936                                                            shift))));
4937               emit_constant_insn
4938                 (cond,
4939                  gen_rtx_SET (target,
4940                               gen_rtx_NOT (mode,
4941                                            gen_rtx_LSHIFTRT (mode, sub,
4942                                                              shift))));
4943             }
4944           return 2;
4945         }
4946
4947       /* Convert
4948           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4949            to
4950           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4951
4952           For eg. r0 = r0 | 0xfff
4953                mvn      r0, r0, lsr #12
4954                mvn      r0, r0, asl #12
4955
4956       */
4957       if (set_zero_bit_copies > 8
4958           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4959         {
4960           if (generate)
4961             {
4962               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4963               rtx shift = GEN_INT (set_zero_bit_copies);
4964
4965               emit_constant_insn
4966                 (cond,
4967                  gen_rtx_SET (sub,
4968                               gen_rtx_NOT (mode,
4969                                            gen_rtx_LSHIFTRT (mode,
4970                                                              source,
4971                                                              shift))));
4972               emit_constant_insn
4973                 (cond,
4974                  gen_rtx_SET (target,
4975                               gen_rtx_NOT (mode,
4976                                            gen_rtx_ASHIFT (mode, sub,
4977                                                            shift))));
4978             }
4979           return 2;
4980         }
4981
4982       /* This will never be reached for Thumb2 because orn is a valid
4983          instruction. This is for Thumb1 and the ARM 32 bit cases.
4984
4985          x = y | constant (such that ~constant is a valid constant)
4986          Transform this to
4987          x = ~(~y & ~constant).
4988       */
4989       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4990         {
4991           if (generate)
4992             {
4993               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4994               emit_constant_insn (cond,
4995                                   gen_rtx_SET (sub,
4996                                                gen_rtx_NOT (mode, source)));
4997               source = sub;
4998               if (subtargets)
4999                 sub = gen_reg_rtx (mode);
5000               emit_constant_insn (cond,
5001                                   gen_rtx_SET (sub,
5002                                                gen_rtx_AND (mode, source,
5003                                                             GEN_INT (temp1))));
5004               emit_constant_insn (cond,
5005                                   gen_rtx_SET (target,
5006                                                gen_rtx_NOT (mode, sub)));
5007             }
5008           return 3;
5009         }
5010       break;
5011
5012     case AND:
5013       /* See if two shifts will do 2 or more insn's worth of work.  */
5014       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5015         {
5016           HOST_WIDE_INT shift_mask = ((0xffffffff
5017                                        << (32 - clear_sign_bit_copies))
5018                                       & 0xffffffff);
5019
5020           if ((remainder | shift_mask) != 0xffffffff)
5021             {
5022               HOST_WIDE_INT new_val
5023                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5024
5025               if (generate)
5026                 {
5027                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5028                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5029                                             new_src, source, subtargets, 1);
5030                   source = new_src;
5031                 }
5032               else
5033                 {
5034                   rtx targ = subtargets ? NULL_RTX : target;
5035                   insns = arm_gen_constant (AND, mode, cond, new_val,
5036                                             targ, source, subtargets, 0);
5037                 }
5038             }
5039
5040           if (generate)
5041             {
5042               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5043               rtx shift = GEN_INT (clear_sign_bit_copies);
5044
5045               emit_insn (gen_ashlsi3 (new_src, source, shift));
5046               emit_insn (gen_lshrsi3 (target, new_src, shift));
5047             }
5048
5049           return insns + 2;
5050         }
5051
5052       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5053         {
5054           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5055
5056           if ((remainder | shift_mask) != 0xffffffff)
5057             {
5058               HOST_WIDE_INT new_val
5059                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5060               if (generate)
5061                 {
5062                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5063
5064                   insns = arm_gen_constant (AND, mode, cond, new_val,
5065                                             new_src, source, subtargets, 1);
5066                   source = new_src;
5067                 }
5068               else
5069                 {
5070                   rtx targ = subtargets ? NULL_RTX : target;
5071
5072                   insns = arm_gen_constant (AND, mode, cond, new_val,
5073                                             targ, source, subtargets, 0);
5074                 }
5075             }
5076
5077           if (generate)
5078             {
5079               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5080               rtx shift = GEN_INT (clear_zero_bit_copies);
5081
5082               emit_insn (gen_lshrsi3 (new_src, source, shift));
5083               emit_insn (gen_ashlsi3 (target, new_src, shift));
5084             }
5085
5086           return insns + 2;
5087         }
5088
5089       break;
5090
5091     default:
5092       break;
5093     }
5094
5095   /* Calculate what the instruction sequences would be if we generated it
5096      normally, negated, or inverted.  */
5097   if (code == AND)
5098     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5099     insns = 99;
5100   else
5101     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5102
5103   if (can_negate)
5104     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5105                                             &neg_immediates);
5106   else
5107     neg_insns = 99;
5108
5109   if (can_invert || final_invert)
5110     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5111                                             &inv_immediates);
5112   else
5113     inv_insns = 99;
5114
5115   immediates = &pos_immediates;
5116
5117   /* Is the negated immediate sequence more efficient?  */
5118   if (neg_insns < insns && neg_insns <= inv_insns)
5119     {
5120       insns = neg_insns;
5121       immediates = &neg_immediates;
5122     }
5123   else
5124     can_negate = 0;
5125
5126   /* Is the inverted immediate sequence more efficient?
5127      We must allow for an extra NOT instruction for XOR operations, although
5128      there is some chance that the final 'mvn' will get optimized later.  */
5129   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5130     {
5131       insns = inv_insns;
5132       immediates = &inv_immediates;
5133     }
5134   else
5135     {
5136       can_invert = 0;
5137       final_invert = 0;
5138     }
5139
5140   /* Now output the chosen sequence as instructions.  */
5141   if (generate)
5142     {
5143       for (i = 0; i < insns; i++)
5144         {
5145           rtx new_src, temp1_rtx;
5146
5147           temp1 = immediates->i[i];
5148
5149           if (code == SET || code == MINUS)
5150             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5151           else if ((final_invert || i < (insns - 1)) && subtargets)
5152             new_src = gen_reg_rtx (mode);
5153           else
5154             new_src = target;
5155
5156           if (can_invert)
5157             temp1 = ~temp1;
5158           else if (can_negate)
5159             temp1 = -temp1;
5160
5161           temp1 = trunc_int_for_mode (temp1, mode);
5162           temp1_rtx = GEN_INT (temp1);
5163
5164           if (code == SET)
5165             ;
5166           else if (code == MINUS)
5167             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5168           else
5169             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5170
5171           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5172           source = new_src;
5173
5174           if (code == SET)
5175             {
5176               can_negate = can_invert;
5177               can_invert = 0;
5178               code = PLUS;
5179             }
5180           else if (code == MINUS)
5181             code = PLUS;
5182         }
5183     }
5184
5185   if (final_invert)
5186     {
5187       if (generate)
5188         emit_constant_insn (cond, gen_rtx_SET (target,
5189                                                gen_rtx_NOT (mode, source)));
5190       insns++;
5191     }
5192
5193   return insns;
5194 }
5195
5196 /* Canonicalize a comparison so that we are more likely to recognize it.
5197    This can be done for a few constant compares, where we can make the
5198    immediate value easier to load.  */
5199
5200 static void
5201 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5202                              bool op0_preserve_value)
5203 {
5204   machine_mode mode;
5205   unsigned HOST_WIDE_INT i, maxval;
5206
5207   mode = GET_MODE (*op0);
5208   if (mode == VOIDmode)
5209     mode = GET_MODE (*op1);
5210
5211   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5212
5213   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5214      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5215      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5216      for GTU/LEU in Thumb mode.  */
5217   if (mode == DImode)
5218     {
5219
5220       if (*code == GT || *code == LE
5221           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5222         {
5223           /* Missing comparison.  First try to use an available
5224              comparison.  */
5225           if (CONST_INT_P (*op1))
5226             {
5227               i = INTVAL (*op1);
5228               switch (*code)
5229                 {
5230                 case GT:
5231                 case LE:
5232                   if (i != maxval
5233                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5234                     {
5235                       *op1 = GEN_INT (i + 1);
5236                       *code = *code == GT ? GE : LT;
5237                       return;
5238                     }
5239                   break;
5240                 case GTU:
5241                 case LEU:
5242                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5243                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5244                     {
5245                       *op1 = GEN_INT (i + 1);
5246                       *code = *code == GTU ? GEU : LTU;
5247                       return;
5248                     }
5249                   break;
5250                 default:
5251                   gcc_unreachable ();
5252                 }
5253             }
5254
5255           /* If that did not work, reverse the condition.  */
5256           if (!op0_preserve_value)
5257             {
5258               std::swap (*op0, *op1);
5259               *code = (int)swap_condition ((enum rtx_code)*code);
5260             }
5261         }
5262       return;
5263     }
5264
5265   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5266      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5267      to facilitate possible combining with a cmp into 'ands'.  */
5268   if (mode == SImode
5269       && GET_CODE (*op0) == ZERO_EXTEND
5270       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5271       && GET_MODE (XEXP (*op0, 0)) == QImode
5272       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5273       && subreg_lowpart_p (XEXP (*op0, 0))
5274       && *op1 == const0_rtx)
5275     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5276                         GEN_INT (255));
5277
5278   /* Comparisons smaller than DImode.  Only adjust comparisons against
5279      an out-of-range constant.  */
5280   if (!CONST_INT_P (*op1)
5281       || const_ok_for_arm (INTVAL (*op1))
5282       || const_ok_for_arm (- INTVAL (*op1)))
5283     return;
5284
5285   i = INTVAL (*op1);
5286
5287   switch (*code)
5288     {
5289     case EQ:
5290     case NE:
5291       return;
5292
5293     case GT:
5294     case LE:
5295       if (i != maxval
5296           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5297         {
5298           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5299           *code = *code == GT ? GE : LT;
5300           return;
5301         }
5302       break;
5303
5304     case GE:
5305     case LT:
5306       if (i != ~maxval
5307           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5308         {
5309           *op1 = GEN_INT (i - 1);
5310           *code = *code == GE ? GT : LE;
5311           return;
5312         }
5313       break;
5314
5315     case GTU:
5316     case LEU:
5317       if (i != ~((unsigned HOST_WIDE_INT) 0)
5318           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5319         {
5320           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5321           *code = *code == GTU ? GEU : LTU;
5322           return;
5323         }
5324       break;
5325
5326     case GEU:
5327     case LTU:
5328       if (i != 0
5329           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5330         {
5331           *op1 = GEN_INT (i - 1);
5332           *code = *code == GEU ? GTU : LEU;
5333           return;
5334         }
5335       break;
5336
5337     default:
5338       gcc_unreachable ();
5339     }
5340 }
5341
5342
5343 /* Define how to find the value returned by a function.  */
5344
5345 static rtx
5346 arm_function_value(const_tree type, const_tree func,
5347                    bool outgoing ATTRIBUTE_UNUSED)
5348 {
5349   machine_mode mode;
5350   int unsignedp ATTRIBUTE_UNUSED;
5351   rtx r ATTRIBUTE_UNUSED;
5352
5353   mode = TYPE_MODE (type);
5354
5355   if (TARGET_AAPCS_BASED)
5356     return aapcs_allocate_return_reg (mode, type, func);
5357
5358   /* Promote integer types.  */
5359   if (INTEGRAL_TYPE_P (type))
5360     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5361
5362   /* Promotes small structs returned in a register to full-word size
5363      for big-endian AAPCS.  */
5364   if (arm_return_in_msb (type))
5365     {
5366       HOST_WIDE_INT size = int_size_in_bytes (type);
5367       if (size % UNITS_PER_WORD != 0)
5368         {
5369           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5370           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5371         }
5372     }
5373
5374   return arm_libcall_value_1 (mode);
5375 }
5376
5377 /* libcall hashtable helpers.  */
5378
5379 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5380 {
5381   static inline hashval_t hash (const rtx_def *);
5382   static inline bool equal (const rtx_def *, const rtx_def *);
5383   static inline void remove (rtx_def *);
5384 };
5385
5386 inline bool
5387 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5388 {
5389   return rtx_equal_p (p1, p2);
5390 }
5391
5392 inline hashval_t
5393 libcall_hasher::hash (const rtx_def *p1)
5394 {
5395   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5396 }
5397
5398 typedef hash_table<libcall_hasher> libcall_table_type;
5399
5400 static void
5401 add_libcall (libcall_table_type *htab, rtx libcall)
5402 {
5403   *htab->find_slot (libcall, INSERT) = libcall;
5404 }
5405
5406 static bool
5407 arm_libcall_uses_aapcs_base (const_rtx libcall)
5408 {
5409   static bool init_done = false;
5410   static libcall_table_type *libcall_htab = NULL;
5411
5412   if (!init_done)
5413     {
5414       init_done = true;
5415
5416       libcall_htab = new libcall_table_type (31);
5417       add_libcall (libcall_htab,
5418                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5419       add_libcall (libcall_htab,
5420                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5421       add_libcall (libcall_htab,
5422                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5423       add_libcall (libcall_htab,
5424                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5425
5426       add_libcall (libcall_htab,
5427                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5428       add_libcall (libcall_htab,
5429                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5430       add_libcall (libcall_htab,
5431                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5432       add_libcall (libcall_htab,
5433                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5434
5435       add_libcall (libcall_htab,
5436                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5437       add_libcall (libcall_htab,
5438                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5439       add_libcall (libcall_htab,
5440                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5441       add_libcall (libcall_htab,
5442                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5443       add_libcall (libcall_htab,
5444                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5445       add_libcall (libcall_htab,
5446                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5447       add_libcall (libcall_htab,
5448                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5449       add_libcall (libcall_htab,
5450                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5451
5452       /* Values from double-precision helper functions are returned in core
5453          registers if the selected core only supports single-precision
5454          arithmetic, even if we are using the hard-float ABI.  The same is
5455          true for single-precision helpers, but we will never be using the
5456          hard-float ABI on a CPU which doesn't support single-precision
5457          operations in hardware.  */
5458       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5459       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5460       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5461       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5462       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5463       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5464       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5465       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5466       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5467       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5468       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5469       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5470                                                         SFmode));
5471       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5472                                                         DFmode));
5473       add_libcall (libcall_htab,
5474                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5475     }
5476
5477   return libcall && libcall_htab->find (libcall) != NULL;
5478 }
5479
5480 static rtx
5481 arm_libcall_value_1 (machine_mode mode)
5482 {
5483   if (TARGET_AAPCS_BASED)
5484     return aapcs_libcall_value (mode);
5485   else if (TARGET_IWMMXT_ABI
5486            && arm_vector_mode_supported_p (mode))
5487     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5488   else
5489     return gen_rtx_REG (mode, ARG_REGISTER (1));
5490 }
5491
5492 /* Define how to find the value returned by a library function
5493    assuming the value has mode MODE.  */
5494
5495 static rtx
5496 arm_libcall_value (machine_mode mode, const_rtx libcall)
5497 {
5498   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5499       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5500     {
5501       /* The following libcalls return their result in integer registers,
5502          even though they return a floating point value.  */
5503       if (arm_libcall_uses_aapcs_base (libcall))
5504         return gen_rtx_REG (mode, ARG_REGISTER(1));
5505
5506     }
5507
5508   return arm_libcall_value_1 (mode);
5509 }
5510
5511 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5512
5513 static bool
5514 arm_function_value_regno_p (const unsigned int regno)
5515 {
5516   if (regno == ARG_REGISTER (1)
5517       || (TARGET_32BIT
5518           && TARGET_AAPCS_BASED
5519           && TARGET_HARD_FLOAT
5520           && regno == FIRST_VFP_REGNUM)
5521       || (TARGET_IWMMXT_ABI
5522           && regno == FIRST_IWMMXT_REGNUM))
5523     return true;
5524
5525   return false;
5526 }
5527
5528 /* Determine the amount of memory needed to store the possible return
5529    registers of an untyped call.  */
5530 int
5531 arm_apply_result_size (void)
5532 {
5533   int size = 16;
5534
5535   if (TARGET_32BIT)
5536     {
5537       if (TARGET_HARD_FLOAT_ABI)
5538         size += 32;
5539       if (TARGET_IWMMXT_ABI)
5540         size += 8;
5541     }
5542
5543   return size;
5544 }
5545
5546 /* Decide whether TYPE should be returned in memory (true)
5547    or in a register (false).  FNTYPE is the type of the function making
5548    the call.  */
5549 static bool
5550 arm_return_in_memory (const_tree type, const_tree fntype)
5551 {
5552   HOST_WIDE_INT size;
5553
5554   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5555
5556   if (TARGET_AAPCS_BASED)
5557     {
5558       /* Simple, non-aggregate types (ie not including vectors and
5559          complex) are always returned in a register (or registers).
5560          We don't care about which register here, so we can short-cut
5561          some of the detail.  */
5562       if (!AGGREGATE_TYPE_P (type)
5563           && TREE_CODE (type) != VECTOR_TYPE
5564           && TREE_CODE (type) != COMPLEX_TYPE)
5565         return false;
5566
5567       /* Any return value that is no larger than one word can be
5568          returned in r0.  */
5569       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5570         return false;
5571
5572       /* Check any available co-processors to see if they accept the
5573          type as a register candidate (VFP, for example, can return
5574          some aggregates in consecutive registers).  These aren't
5575          available if the call is variadic.  */
5576       if (aapcs_select_return_coproc (type, fntype) >= 0)
5577         return false;
5578
5579       /* Vector values should be returned using ARM registers, not
5580          memory (unless they're over 16 bytes, which will break since
5581          we only have four call-clobbered registers to play with).  */
5582       if (TREE_CODE (type) == VECTOR_TYPE)
5583         return (size < 0 || size > (4 * UNITS_PER_WORD));
5584
5585       /* The rest go in memory.  */
5586       return true;
5587     }
5588
5589   if (TREE_CODE (type) == VECTOR_TYPE)
5590     return (size < 0 || size > (4 * UNITS_PER_WORD));
5591
5592   if (!AGGREGATE_TYPE_P (type) &&
5593       (TREE_CODE (type) != VECTOR_TYPE))
5594     /* All simple types are returned in registers.  */
5595     return false;
5596
5597   if (arm_abi != ARM_ABI_APCS)
5598     {
5599       /* ATPCS and later return aggregate types in memory only if they are
5600          larger than a word (or are variable size).  */
5601       return (size < 0 || size > UNITS_PER_WORD);
5602     }
5603
5604   /* For the arm-wince targets we choose to be compatible with Microsoft's
5605      ARM and Thumb compilers, which always return aggregates in memory.  */
5606 #ifndef ARM_WINCE
5607   /* All structures/unions bigger than one word are returned in memory.
5608      Also catch the case where int_size_in_bytes returns -1.  In this case
5609      the aggregate is either huge or of variable size, and in either case
5610      we will want to return it via memory and not in a register.  */
5611   if (size < 0 || size > UNITS_PER_WORD)
5612     return true;
5613
5614   if (TREE_CODE (type) == RECORD_TYPE)
5615     {
5616       tree field;
5617
5618       /* For a struct the APCS says that we only return in a register
5619          if the type is 'integer like' and every addressable element
5620          has an offset of zero.  For practical purposes this means
5621          that the structure can have at most one non bit-field element
5622          and that this element must be the first one in the structure.  */
5623
5624       /* Find the first field, ignoring non FIELD_DECL things which will
5625          have been created by C++.  */
5626       for (field = TYPE_FIELDS (type);
5627            field && TREE_CODE (field) != FIELD_DECL;
5628            field = DECL_CHAIN (field))
5629         continue;
5630
5631       if (field == NULL)
5632         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5633
5634       /* Check that the first field is valid for returning in a register.  */
5635
5636       /* ... Floats are not allowed */
5637       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5638         return true;
5639
5640       /* ... Aggregates that are not themselves valid for returning in
5641          a register are not allowed.  */
5642       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5643         return true;
5644
5645       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5646          since they are not addressable.  */
5647       for (field = DECL_CHAIN (field);
5648            field;
5649            field = DECL_CHAIN (field))
5650         {
5651           if (TREE_CODE (field) != FIELD_DECL)
5652             continue;
5653
5654           if (!DECL_BIT_FIELD_TYPE (field))
5655             return true;
5656         }
5657
5658       return false;
5659     }
5660
5661   if (TREE_CODE (type) == UNION_TYPE)
5662     {
5663       tree field;
5664
5665       /* Unions can be returned in registers if every element is
5666          integral, or can be returned in an integer register.  */
5667       for (field = TYPE_FIELDS (type);
5668            field;
5669            field = DECL_CHAIN (field))
5670         {
5671           if (TREE_CODE (field) != FIELD_DECL)
5672             continue;
5673
5674           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5675             return true;
5676
5677           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5678             return true;
5679         }
5680
5681       return false;
5682     }
5683 #endif /* not ARM_WINCE */
5684
5685   /* Return all other types in memory.  */
5686   return true;
5687 }
5688
5689 const struct pcs_attribute_arg
5690 {
5691   const char *arg;
5692   enum arm_pcs value;
5693 } pcs_attribute_args[] =
5694   {
5695     {"aapcs", ARM_PCS_AAPCS},
5696     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5697 #if 0
5698     /* We could recognize these, but changes would be needed elsewhere
5699      * to implement them.  */
5700     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5701     {"atpcs", ARM_PCS_ATPCS},
5702     {"apcs", ARM_PCS_APCS},
5703 #endif
5704     {NULL, ARM_PCS_UNKNOWN}
5705   };
5706
5707 static enum arm_pcs
5708 arm_pcs_from_attribute (tree attr)
5709 {
5710   const struct pcs_attribute_arg *ptr;
5711   const char *arg;
5712
5713   /* Get the value of the argument.  */
5714   if (TREE_VALUE (attr) == NULL_TREE
5715       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5716     return ARM_PCS_UNKNOWN;
5717
5718   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5719
5720   /* Check it against the list of known arguments.  */
5721   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5722     if (streq (arg, ptr->arg))
5723       return ptr->value;
5724
5725   /* An unrecognized interrupt type.  */
5726   return ARM_PCS_UNKNOWN;
5727 }
5728
5729 /* Get the PCS variant to use for this call.  TYPE is the function's type
5730    specification, DECL is the specific declartion.  DECL may be null if
5731    the call could be indirect or if this is a library call.  */
5732 static enum arm_pcs
5733 arm_get_pcs_model (const_tree type, const_tree decl)
5734 {
5735   bool user_convention = false;
5736   enum arm_pcs user_pcs = arm_pcs_default;
5737   tree attr;
5738
5739   gcc_assert (type);
5740
5741   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5742   if (attr)
5743     {
5744       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5745       user_convention = true;
5746     }
5747
5748   if (TARGET_AAPCS_BASED)
5749     {
5750       /* Detect varargs functions.  These always use the base rules
5751          (no argument is ever a candidate for a co-processor
5752          register).  */
5753       bool base_rules = stdarg_p (type);
5754
5755       if (user_convention)
5756         {
5757           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5758             sorry ("non-AAPCS derived PCS variant");
5759           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5760             error ("variadic functions must use the base AAPCS variant");
5761         }
5762
5763       if (base_rules)
5764         return ARM_PCS_AAPCS;
5765       else if (user_convention)
5766         return user_pcs;
5767       else if (decl && flag_unit_at_a_time)
5768         {
5769           /* Local functions never leak outside this compilation unit,
5770              so we are free to use whatever conventions are
5771              appropriate.  */
5772           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5773           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5774           if (i && i->local)
5775             return ARM_PCS_AAPCS_LOCAL;
5776         }
5777     }
5778   else if (user_convention && user_pcs != arm_pcs_default)
5779     sorry ("PCS variant");
5780
5781   /* For everything else we use the target's default.  */
5782   return arm_pcs_default;
5783 }
5784
5785
5786 static void
5787 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5788                     const_tree fntype ATTRIBUTE_UNUSED,
5789                     rtx libcall ATTRIBUTE_UNUSED,
5790                     const_tree fndecl ATTRIBUTE_UNUSED)
5791 {
5792   /* Record the unallocated VFP registers.  */
5793   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5794   pcum->aapcs_vfp_reg_alloc = 0;
5795 }
5796
5797 /* Walk down the type tree of TYPE counting consecutive base elements.
5798    If *MODEP is VOIDmode, then set it to the first valid floating point
5799    type.  If a non-floating point type is found, or if a floating point
5800    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5801    otherwise return the count in the sub-tree.  */
5802 static int
5803 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5804 {
5805   machine_mode mode;
5806   HOST_WIDE_INT size;
5807
5808   switch (TREE_CODE (type))
5809     {
5810     case REAL_TYPE:
5811       mode = TYPE_MODE (type);
5812       if (mode != DFmode && mode != SFmode && mode != HFmode)
5813         return -1;
5814
5815       if (*modep == VOIDmode)
5816         *modep = mode;
5817
5818       if (*modep == mode)
5819         return 1;
5820
5821       break;
5822
5823     case COMPLEX_TYPE:
5824       mode = TYPE_MODE (TREE_TYPE (type));
5825       if (mode != DFmode && mode != SFmode)
5826         return -1;
5827
5828       if (*modep == VOIDmode)
5829         *modep = mode;
5830
5831       if (*modep == mode)
5832         return 2;
5833
5834       break;
5835
5836     case VECTOR_TYPE:
5837       /* Use V2SImode and V4SImode as representatives of all 64-bit
5838          and 128-bit vector types, whether or not those modes are
5839          supported with the present options.  */
5840       size = int_size_in_bytes (type);
5841       switch (size)
5842         {
5843         case 8:
5844           mode = V2SImode;
5845           break;
5846         case 16:
5847           mode = V4SImode;
5848           break;
5849         default:
5850           return -1;
5851         }
5852
5853       if (*modep == VOIDmode)
5854         *modep = mode;
5855
5856       /* Vector modes are considered to be opaque: two vectors are
5857          equivalent for the purposes of being homogeneous aggregates
5858          if they are the same size.  */
5859       if (*modep == mode)
5860         return 1;
5861
5862       break;
5863
5864     case ARRAY_TYPE:
5865       {
5866         int count;
5867         tree index = TYPE_DOMAIN (type);
5868
5869         /* Can't handle incomplete types nor sizes that are not
5870            fixed.  */
5871         if (!COMPLETE_TYPE_P (type)
5872             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5873           return -1;
5874
5875         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5876         if (count == -1
5877             || !index
5878             || !TYPE_MAX_VALUE (index)
5879             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5880             || !TYPE_MIN_VALUE (index)
5881             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5882             || count < 0)
5883           return -1;
5884
5885         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5886                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5887
5888         /* There must be no padding.  */
5889         if (wi::to_wide (TYPE_SIZE (type))
5890             != count * GET_MODE_BITSIZE (*modep))
5891           return -1;
5892
5893         return count;
5894       }
5895
5896     case RECORD_TYPE:
5897       {
5898         int count = 0;
5899         int sub_count;
5900         tree field;
5901
5902         /* Can't handle incomplete types nor sizes that are not
5903            fixed.  */
5904         if (!COMPLETE_TYPE_P (type)
5905             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5906           return -1;
5907
5908         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5909           {
5910             if (TREE_CODE (field) != FIELD_DECL)
5911               continue;
5912
5913             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5914             if (sub_count < 0)
5915               return -1;
5916             count += sub_count;
5917           }
5918
5919         /* There must be no padding.  */
5920         if (wi::to_wide (TYPE_SIZE (type))
5921             != count * GET_MODE_BITSIZE (*modep))
5922           return -1;
5923
5924         return count;
5925       }
5926
5927     case UNION_TYPE:
5928     case QUAL_UNION_TYPE:
5929       {
5930         /* These aren't very interesting except in a degenerate case.  */
5931         int count = 0;
5932         int sub_count;
5933         tree field;
5934
5935         /* Can't handle incomplete types nor sizes that are not
5936            fixed.  */
5937         if (!COMPLETE_TYPE_P (type)
5938             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5939           return -1;
5940
5941         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5942           {
5943             if (TREE_CODE (field) != FIELD_DECL)
5944               continue;
5945
5946             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5947             if (sub_count < 0)
5948               return -1;
5949             count = count > sub_count ? count : sub_count;
5950           }
5951
5952         /* There must be no padding.  */
5953         if (wi::to_wide (TYPE_SIZE (type))
5954             != count * GET_MODE_BITSIZE (*modep))
5955           return -1;
5956
5957         return count;
5958       }
5959
5960     default:
5961       break;
5962     }
5963
5964   return -1;
5965 }
5966
5967 /* Return true if PCS_VARIANT should use VFP registers.  */
5968 static bool
5969 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5970 {
5971   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5972     {
5973       static bool seen_thumb1_vfp = false;
5974
5975       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5976         {
5977           sorry ("Thumb-1 hard-float VFP ABI");
5978           /* sorry() is not immediately fatal, so only display this once.  */
5979           seen_thumb1_vfp = true;
5980         }
5981
5982       return true;
5983     }
5984
5985   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5986     return false;
5987
5988   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5989           (TARGET_VFP_DOUBLE || !is_double));
5990 }
5991
5992 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5993    suitable for passing or returning in VFP registers for the PCS
5994    variant selected.  If it is, then *BASE_MODE is updated to contain
5995    a machine mode describing each element of the argument's type and
5996    *COUNT to hold the number of such elements.  */
5997 static bool
5998 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5999                                        machine_mode mode, const_tree type,
6000                                        machine_mode *base_mode, int *count)
6001 {
6002   machine_mode new_mode = VOIDmode;
6003
6004   /* If we have the type information, prefer that to working things
6005      out from the mode.  */
6006   if (type)
6007     {
6008       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6009
6010       if (ag_count > 0 && ag_count <= 4)
6011         *count = ag_count;
6012       else
6013         return false;
6014     }
6015   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6016            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6017            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6018     {
6019       *count = 1;
6020       new_mode = mode;
6021     }
6022   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6023     {
6024       *count = 2;
6025       new_mode = (mode == DCmode ? DFmode : SFmode);
6026     }
6027   else
6028     return false;
6029
6030
6031   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6032     return false;
6033
6034   *base_mode = new_mode;
6035   return true;
6036 }
6037
6038 static bool
6039 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6040                                machine_mode mode, const_tree type)
6041 {
6042   int count ATTRIBUTE_UNUSED;
6043   machine_mode ag_mode ATTRIBUTE_UNUSED;
6044
6045   if (!use_vfp_abi (pcs_variant, false))
6046     return false;
6047   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6048                                                 &ag_mode, &count);
6049 }
6050
6051 static bool
6052 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6053                              const_tree type)
6054 {
6055   if (!use_vfp_abi (pcum->pcs_variant, false))
6056     return false;
6057
6058   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6059                                                 &pcum->aapcs_vfp_rmode,
6060                                                 &pcum->aapcs_vfp_rcount);
6061 }
6062
6063 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6064    for the behaviour of this function.  */
6065
6066 static bool
6067 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6068                     const_tree type  ATTRIBUTE_UNUSED)
6069 {
6070   int rmode_size
6071     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6072   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6073   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6074   int regno;
6075
6076   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6077     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6078       {
6079         pcum->aapcs_vfp_reg_alloc = mask << regno;
6080         if (mode == BLKmode
6081             || (mode == TImode && ! TARGET_NEON)
6082             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6083           {
6084             int i;
6085             int rcount = pcum->aapcs_vfp_rcount;
6086             int rshift = shift;
6087             machine_mode rmode = pcum->aapcs_vfp_rmode;
6088             rtx par;
6089             if (!TARGET_NEON)
6090               {
6091                 /* Avoid using unsupported vector modes.  */
6092                 if (rmode == V2SImode)
6093                   rmode = DImode;
6094                 else if (rmode == V4SImode)
6095                   {
6096                     rmode = DImode;
6097                     rcount *= 2;
6098                     rshift /= 2;
6099                   }
6100               }
6101             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6102             for (i = 0; i < rcount; i++)
6103               {
6104                 rtx tmp = gen_rtx_REG (rmode,
6105                                        FIRST_VFP_REGNUM + regno + i * rshift);
6106                 tmp = gen_rtx_EXPR_LIST
6107                   (VOIDmode, tmp,
6108                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6109                 XVECEXP (par, 0, i) = tmp;
6110               }
6111
6112             pcum->aapcs_reg = par;
6113           }
6114         else
6115           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6116         return true;
6117       }
6118   return false;
6119 }
6120
6121 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6122    comment there for the behaviour of this function.  */
6123
6124 static rtx
6125 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6126                                machine_mode mode,
6127                                const_tree type ATTRIBUTE_UNUSED)
6128 {
6129   if (!use_vfp_abi (pcs_variant, false))
6130     return NULL;
6131
6132   if (mode == BLKmode
6133       || (GET_MODE_CLASS (mode) == MODE_INT
6134           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6135           && !TARGET_NEON))
6136     {
6137       int count;
6138       machine_mode ag_mode;
6139       int i;
6140       rtx par;
6141       int shift;
6142
6143       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6144                                              &ag_mode, &count);
6145
6146       if (!TARGET_NEON)
6147         {
6148           if (ag_mode == V2SImode)
6149             ag_mode = DImode;
6150           else if (ag_mode == V4SImode)
6151             {
6152               ag_mode = DImode;
6153               count *= 2;
6154             }
6155         }
6156       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6157       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6158       for (i = 0; i < count; i++)
6159         {
6160           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6161           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6162                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6163           XVECEXP (par, 0, i) = tmp;
6164         }
6165
6166       return par;
6167     }
6168
6169   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6170 }
6171
6172 static void
6173 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6174                    machine_mode mode  ATTRIBUTE_UNUSED,
6175                    const_tree type  ATTRIBUTE_UNUSED)
6176 {
6177   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6178   pcum->aapcs_vfp_reg_alloc = 0;
6179   return;
6180 }
6181
6182 #define AAPCS_CP(X)                             \
6183   {                                             \
6184     aapcs_ ## X ## _cum_init,                   \
6185     aapcs_ ## X ## _is_call_candidate,          \
6186     aapcs_ ## X ## _allocate,                   \
6187     aapcs_ ## X ## _is_return_candidate,        \
6188     aapcs_ ## X ## _allocate_return_reg,        \
6189     aapcs_ ## X ## _advance                     \
6190   }
6191
6192 /* Table of co-processors that can be used to pass arguments in
6193    registers.  Idealy no arugment should be a candidate for more than
6194    one co-processor table entry, but the table is processed in order
6195    and stops after the first match.  If that entry then fails to put
6196    the argument into a co-processor register, the argument will go on
6197    the stack.  */
6198 static struct
6199 {
6200   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6201   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6202
6203   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6204      BLKmode) is a candidate for this co-processor's registers; this
6205      function should ignore any position-dependent state in
6206      CUMULATIVE_ARGS and only use call-type dependent information.  */
6207   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6208
6209   /* Return true if the argument does get a co-processor register; it
6210      should set aapcs_reg to an RTX of the register allocated as is
6211      required for a return from FUNCTION_ARG.  */
6212   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6213
6214   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6215      be returned in this co-processor's registers.  */
6216   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6217
6218   /* Allocate and return an RTX element to hold the return type of a call.  This
6219      routine must not fail and will only be called if is_return_candidate
6220      returned true with the same parameters.  */
6221   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6222
6223   /* Finish processing this argument and prepare to start processing
6224      the next one.  */
6225   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6226 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6227   {
6228     AAPCS_CP(vfp)
6229   };
6230
6231 #undef AAPCS_CP
6232
6233 static int
6234 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6235                           const_tree type)
6236 {
6237   int i;
6238
6239   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6240     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6241       return i;
6242
6243   return -1;
6244 }
6245
6246 static int
6247 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6248 {
6249   /* We aren't passed a decl, so we can't check that a call is local.
6250      However, it isn't clear that that would be a win anyway, since it
6251      might limit some tail-calling opportunities.  */
6252   enum arm_pcs pcs_variant;
6253
6254   if (fntype)
6255     {
6256       const_tree fndecl = NULL_TREE;
6257
6258       if (TREE_CODE (fntype) == FUNCTION_DECL)
6259         {
6260           fndecl = fntype;
6261           fntype = TREE_TYPE (fntype);
6262         }
6263
6264       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6265     }
6266   else
6267     pcs_variant = arm_pcs_default;
6268
6269   if (pcs_variant != ARM_PCS_AAPCS)
6270     {
6271       int i;
6272
6273       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6274         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6275                                                         TYPE_MODE (type),
6276                                                         type))
6277           return i;
6278     }
6279   return -1;
6280 }
6281
6282 static rtx
6283 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6284                            const_tree fntype)
6285 {
6286   /* We aren't passed a decl, so we can't check that a call is local.
6287      However, it isn't clear that that would be a win anyway, since it
6288      might limit some tail-calling opportunities.  */
6289   enum arm_pcs pcs_variant;
6290   int unsignedp ATTRIBUTE_UNUSED;
6291
6292   if (fntype)
6293     {
6294       const_tree fndecl = NULL_TREE;
6295
6296       if (TREE_CODE (fntype) == FUNCTION_DECL)
6297         {
6298           fndecl = fntype;
6299           fntype = TREE_TYPE (fntype);
6300         }
6301
6302       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6303     }
6304   else
6305     pcs_variant = arm_pcs_default;
6306
6307   /* Promote integer types.  */
6308   if (type && INTEGRAL_TYPE_P (type))
6309     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6310
6311   if (pcs_variant != ARM_PCS_AAPCS)
6312     {
6313       int i;
6314
6315       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6316         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6317                                                         type))
6318           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6319                                                              mode, type);
6320     }
6321
6322   /* Promotes small structs returned in a register to full-word size
6323      for big-endian AAPCS.  */
6324   if (type && arm_return_in_msb (type))
6325     {
6326       HOST_WIDE_INT size = int_size_in_bytes (type);
6327       if (size % UNITS_PER_WORD != 0)
6328         {
6329           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6330           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6331         }
6332     }
6333
6334   return gen_rtx_REG (mode, R0_REGNUM);
6335 }
6336
6337 static rtx
6338 aapcs_libcall_value (machine_mode mode)
6339 {
6340   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6341       && GET_MODE_SIZE (mode) <= 4)
6342     mode = SImode;
6343
6344   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6345 }
6346
6347 /* Lay out a function argument using the AAPCS rules.  The rule
6348    numbers referred to here are those in the AAPCS.  */
6349 static void
6350 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6351                   const_tree type, bool named)
6352 {
6353   int nregs, nregs2;
6354   int ncrn;
6355
6356   /* We only need to do this once per argument.  */
6357   if (pcum->aapcs_arg_processed)
6358     return;
6359
6360   pcum->aapcs_arg_processed = true;
6361
6362   /* Special case: if named is false then we are handling an incoming
6363      anonymous argument which is on the stack.  */
6364   if (!named)
6365     return;
6366
6367   /* Is this a potential co-processor register candidate?  */
6368   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6369     {
6370       int slot = aapcs_select_call_coproc (pcum, mode, type);
6371       pcum->aapcs_cprc_slot = slot;
6372
6373       /* We don't have to apply any of the rules from part B of the
6374          preparation phase, these are handled elsewhere in the
6375          compiler.  */
6376
6377       if (slot >= 0)
6378         {
6379           /* A Co-processor register candidate goes either in its own
6380              class of registers or on the stack.  */
6381           if (!pcum->aapcs_cprc_failed[slot])
6382             {
6383               /* C1.cp - Try to allocate the argument to co-processor
6384                  registers.  */
6385               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6386                 return;
6387
6388               /* C2.cp - Put the argument on the stack and note that we
6389                  can't assign any more candidates in this slot.  We also
6390                  need to note that we have allocated stack space, so that
6391                  we won't later try to split a non-cprc candidate between
6392                  core registers and the stack.  */
6393               pcum->aapcs_cprc_failed[slot] = true;
6394               pcum->can_split = false;
6395             }
6396
6397           /* We didn't get a register, so this argument goes on the
6398              stack.  */
6399           gcc_assert (pcum->can_split == false);
6400           return;
6401         }
6402     }
6403
6404   /* C3 - For double-word aligned arguments, round the NCRN up to the
6405      next even number.  */
6406   ncrn = pcum->aapcs_ncrn;
6407   if (ncrn & 1)
6408     {
6409       int res = arm_needs_doubleword_align (mode, type);
6410       /* Only warn during RTL expansion of call stmts, otherwise we would
6411          warn e.g. during gimplification even on functions that will be
6412          always inlined, and we'd warn multiple times.  Don't warn when
6413          called in expand_function_start either, as we warn instead in
6414          arm_function_arg_boundary in that case.  */
6415       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6416         inform (input_location, "parameter passing for argument of type "
6417                 "%qT changed in GCC 7.1", type);
6418       else if (res > 0)
6419         ncrn++;
6420     }
6421
6422   nregs = ARM_NUM_REGS2(mode, type);
6423
6424   /* Sigh, this test should really assert that nregs > 0, but a GCC
6425      extension allows empty structs and then gives them empty size; it
6426      then allows such a structure to be passed by value.  For some of
6427      the code below we have to pretend that such an argument has
6428      non-zero size so that we 'locate' it correctly either in
6429      registers or on the stack.  */
6430   gcc_assert (nregs >= 0);
6431
6432   nregs2 = nregs ? nregs : 1;
6433
6434   /* C4 - Argument fits entirely in core registers.  */
6435   if (ncrn + nregs2 <= NUM_ARG_REGS)
6436     {
6437       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6438       pcum->aapcs_next_ncrn = ncrn + nregs;
6439       return;
6440     }
6441
6442   /* C5 - Some core registers left and there are no arguments already
6443      on the stack: split this argument between the remaining core
6444      registers and the stack.  */
6445   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6446     {
6447       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6448       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6449       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6450       return;
6451     }
6452
6453   /* C6 - NCRN is set to 4.  */
6454   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6455
6456   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6457   return;
6458 }
6459
6460 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6461    for a call to a function whose data type is FNTYPE.
6462    For a library call, FNTYPE is NULL.  */
6463 void
6464 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6465                           rtx libname,
6466                           tree fndecl ATTRIBUTE_UNUSED)
6467 {
6468   /* Long call handling.  */
6469   if (fntype)
6470     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6471   else
6472     pcum->pcs_variant = arm_pcs_default;
6473
6474   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6475     {
6476       if (arm_libcall_uses_aapcs_base (libname))
6477         pcum->pcs_variant = ARM_PCS_AAPCS;
6478
6479       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6480       pcum->aapcs_reg = NULL_RTX;
6481       pcum->aapcs_partial = 0;
6482       pcum->aapcs_arg_processed = false;
6483       pcum->aapcs_cprc_slot = -1;
6484       pcum->can_split = true;
6485
6486       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6487         {
6488           int i;
6489
6490           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6491             {
6492               pcum->aapcs_cprc_failed[i] = false;
6493               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6494             }
6495         }
6496       return;
6497     }
6498
6499   /* Legacy ABIs */
6500
6501   /* On the ARM, the offset starts at 0.  */
6502   pcum->nregs = 0;
6503   pcum->iwmmxt_nregs = 0;
6504   pcum->can_split = true;
6505
6506   /* Varargs vectors are treated the same as long long.
6507      named_count avoids having to change the way arm handles 'named' */
6508   pcum->named_count = 0;
6509   pcum->nargs = 0;
6510
6511   if (TARGET_REALLY_IWMMXT && fntype)
6512     {
6513       tree fn_arg;
6514
6515       for (fn_arg = TYPE_ARG_TYPES (fntype);
6516            fn_arg;
6517            fn_arg = TREE_CHAIN (fn_arg))
6518         pcum->named_count += 1;
6519
6520       if (! pcum->named_count)
6521         pcum->named_count = INT_MAX;
6522     }
6523 }
6524
6525 /* Return 1 if double word alignment is required for argument passing.
6526    Return -1 if double word alignment used to be required for argument
6527    passing before PR77728 ABI fix, but is not required anymore.
6528    Return 0 if double word alignment is not required and wasn't requried
6529    before either.  */
6530 static int
6531 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6532 {
6533   if (!type)
6534     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6535
6536   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6537   if (!AGGREGATE_TYPE_P (type))
6538     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6539
6540   /* Array types: Use member alignment of element type.  */
6541   if (TREE_CODE (type) == ARRAY_TYPE)
6542     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6543
6544   int ret = 0;
6545   /* Record/aggregate types: Use greatest member alignment of any member.  */
6546   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6547     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6548       {
6549         if (TREE_CODE (field) == FIELD_DECL)
6550           return 1;
6551         else
6552           /* Before PR77728 fix, we were incorrectly considering also
6553              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6554              Make sure we can warn about that with -Wpsabi.  */
6555           ret = -1;
6556       }
6557
6558   return ret;
6559 }
6560
6561
6562 /* Determine where to put an argument to a function.
6563    Value is zero to push the argument on the stack,
6564    or a hard register in which to store the argument.
6565
6566    MODE is the argument's machine mode.
6567    TYPE is the data type of the argument (as a tree).
6568     This is null for libcalls where that information may
6569     not be available.
6570    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6571     the preceding args and about the function being called.
6572    NAMED is nonzero if this argument is a named parameter
6573     (otherwise it is an extra parameter matching an ellipsis).
6574
6575    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6576    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6577    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6578    defined), say it is passed in the stack (function_prologue will
6579    indeed make it pass in the stack if necessary).  */
6580
6581 static rtx
6582 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6583                   const_tree type, bool named)
6584 {
6585   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6586   int nregs;
6587
6588   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6589      a call insn (op3 of a call_value insn).  */
6590   if (mode == VOIDmode)
6591     return const0_rtx;
6592
6593   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6594     {
6595       aapcs_layout_arg (pcum, mode, type, named);
6596       return pcum->aapcs_reg;
6597     }
6598
6599   /* Varargs vectors are treated the same as long long.
6600      named_count avoids having to change the way arm handles 'named' */
6601   if (TARGET_IWMMXT_ABI
6602       && arm_vector_mode_supported_p (mode)
6603       && pcum->named_count > pcum->nargs + 1)
6604     {
6605       if (pcum->iwmmxt_nregs <= 9)
6606         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6607       else
6608         {
6609           pcum->can_split = false;
6610           return NULL_RTX;
6611         }
6612     }
6613
6614   /* Put doubleword aligned quantities in even register pairs.  */
6615   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6616     {
6617       int res = arm_needs_doubleword_align (mode, type);
6618       if (res < 0 && warn_psabi)
6619         inform (input_location, "parameter passing for argument of type "
6620                 "%qT changed in GCC 7.1", type);
6621       else if (res > 0)
6622         pcum->nregs++;
6623     }
6624
6625   /* Only allow splitting an arg between regs and memory if all preceding
6626      args were allocated to regs.  For args passed by reference we only count
6627      the reference pointer.  */
6628   if (pcum->can_split)
6629     nregs = 1;
6630   else
6631     nregs = ARM_NUM_REGS2 (mode, type);
6632
6633   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6634     return NULL_RTX;
6635
6636   return gen_rtx_REG (mode, pcum->nregs);
6637 }
6638
6639 static unsigned int
6640 arm_function_arg_boundary (machine_mode mode, const_tree type)
6641 {
6642   if (!ARM_DOUBLEWORD_ALIGN)
6643     return PARM_BOUNDARY;
6644
6645   int res = arm_needs_doubleword_align (mode, type);
6646   if (res < 0 && warn_psabi)
6647     inform (input_location, "parameter passing for argument of type %qT "
6648             "changed in GCC 7.1", type);
6649
6650   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6651 }
6652
6653 static int
6654 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6655                        tree type, bool named)
6656 {
6657   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6658   int nregs = pcum->nregs;
6659
6660   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6661     {
6662       aapcs_layout_arg (pcum, mode, type, named);
6663       return pcum->aapcs_partial;
6664     }
6665
6666   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6667     return 0;
6668
6669   if (NUM_ARG_REGS > nregs
6670       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6671       && pcum->can_split)
6672     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6673
6674   return 0;
6675 }
6676
6677 /* Update the data in PCUM to advance over an argument
6678    of mode MODE and data type TYPE.
6679    (TYPE is null for libcalls where that information may not be available.)  */
6680
6681 static void
6682 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6683                           const_tree type, bool named)
6684 {
6685   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6686
6687   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6688     {
6689       aapcs_layout_arg (pcum, mode, type, named);
6690
6691       if (pcum->aapcs_cprc_slot >= 0)
6692         {
6693           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6694                                                               type);
6695           pcum->aapcs_cprc_slot = -1;
6696         }
6697
6698       /* Generic stuff.  */
6699       pcum->aapcs_arg_processed = false;
6700       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6701       pcum->aapcs_reg = NULL_RTX;
6702       pcum->aapcs_partial = 0;
6703     }
6704   else
6705     {
6706       pcum->nargs += 1;
6707       if (arm_vector_mode_supported_p (mode)
6708           && pcum->named_count > pcum->nargs
6709           && TARGET_IWMMXT_ABI)
6710         pcum->iwmmxt_nregs += 1;
6711       else
6712         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6713     }
6714 }
6715
6716 /* Variable sized types are passed by reference.  This is a GCC
6717    extension to the ARM ABI.  */
6718
6719 static bool
6720 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6721                        machine_mode mode ATTRIBUTE_UNUSED,
6722                        const_tree type, bool named ATTRIBUTE_UNUSED)
6723 {
6724   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6725 }
6726 \f
6727 /* Encode the current state of the #pragma [no_]long_calls.  */
6728 typedef enum
6729 {
6730   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6731   LONG,         /* #pragma long_calls is in effect.  */
6732   SHORT         /* #pragma no_long_calls is in effect.  */
6733 } arm_pragma_enum;
6734
6735 static arm_pragma_enum arm_pragma_long_calls = OFF;
6736
6737 void
6738 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6739 {
6740   arm_pragma_long_calls = LONG;
6741 }
6742
6743 void
6744 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6745 {
6746   arm_pragma_long_calls = SHORT;
6747 }
6748
6749 void
6750 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6751 {
6752   arm_pragma_long_calls = OFF;
6753 }
6754 \f
6755 /* Handle an attribute requiring a FUNCTION_DECL;
6756    arguments as in struct attribute_spec.handler.  */
6757 static tree
6758 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6759                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6760 {
6761   if (TREE_CODE (*node) != FUNCTION_DECL)
6762     {
6763       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6764                name);
6765       *no_add_attrs = true;
6766     }
6767
6768   return NULL_TREE;
6769 }
6770
6771 /* Handle an "interrupt" or "isr" attribute;
6772    arguments as in struct attribute_spec.handler.  */
6773 static tree
6774 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6775                           bool *no_add_attrs)
6776 {
6777   if (DECL_P (*node))
6778     {
6779       if (TREE_CODE (*node) != FUNCTION_DECL)
6780         {
6781           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6782                    name);
6783           *no_add_attrs = true;
6784         }
6785       /* FIXME: the argument if any is checked for type attributes;
6786          should it be checked for decl ones?  */
6787     }
6788   else
6789     {
6790       if (TREE_CODE (*node) == FUNCTION_TYPE
6791           || TREE_CODE (*node) == METHOD_TYPE)
6792         {
6793           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6794             {
6795               warning (OPT_Wattributes, "%qE attribute ignored",
6796                        name);
6797               *no_add_attrs = true;
6798             }
6799         }
6800       else if (TREE_CODE (*node) == POINTER_TYPE
6801                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6802                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6803                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6804         {
6805           *node = build_variant_type_copy (*node);
6806           TREE_TYPE (*node) = build_type_attribute_variant
6807             (TREE_TYPE (*node),
6808              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6809           *no_add_attrs = true;
6810         }
6811       else
6812         {
6813           /* Possibly pass this attribute on from the type to a decl.  */
6814           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6815                        | (int) ATTR_FLAG_FUNCTION_NEXT
6816                        | (int) ATTR_FLAG_ARRAY_NEXT))
6817             {
6818               *no_add_attrs = true;
6819               return tree_cons (name, args, NULL_TREE);
6820             }
6821           else
6822             {
6823               warning (OPT_Wattributes, "%qE attribute ignored",
6824                        name);
6825             }
6826         }
6827     }
6828
6829   return NULL_TREE;
6830 }
6831
6832 /* Handle a "pcs" attribute; arguments as in struct
6833    attribute_spec.handler.  */
6834 static tree
6835 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6836                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6837 {
6838   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6839     {
6840       warning (OPT_Wattributes, "%qE attribute ignored", name);
6841       *no_add_attrs = true;
6842     }
6843   return NULL_TREE;
6844 }
6845
6846 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6847 /* Handle the "notshared" attribute.  This attribute is another way of
6848    requesting hidden visibility.  ARM's compiler supports
6849    "__declspec(notshared)"; we support the same thing via an
6850    attribute.  */
6851
6852 static tree
6853 arm_handle_notshared_attribute (tree *node,
6854                                 tree name ATTRIBUTE_UNUSED,
6855                                 tree args ATTRIBUTE_UNUSED,
6856                                 int flags ATTRIBUTE_UNUSED,
6857                                 bool *no_add_attrs)
6858 {
6859   tree decl = TYPE_NAME (*node);
6860
6861   if (decl)
6862     {
6863       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6864       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6865       *no_add_attrs = false;
6866     }
6867   return NULL_TREE;
6868 }
6869 #endif
6870
6871 /* This function returns true if a function with declaration FNDECL and type
6872    FNTYPE uses the stack to pass arguments or return variables and false
6873    otherwise.  This is used for functions with the attributes
6874    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6875    diagnostic messages if the stack is used.  NAME is the name of the attribute
6876    used.  */
6877
6878 static bool
6879 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6880 {
6881   function_args_iterator args_iter;
6882   CUMULATIVE_ARGS args_so_far_v;
6883   cumulative_args_t args_so_far;
6884   bool first_param = true;
6885   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6886
6887   /* Error out if any argument is passed on the stack.  */
6888   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6889   args_so_far = pack_cumulative_args (&args_so_far_v);
6890   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6891     {
6892       rtx arg_rtx;
6893       machine_mode arg_mode = TYPE_MODE (arg_type);
6894
6895       prev_arg_type = arg_type;
6896       if (VOID_TYPE_P (arg_type))
6897         continue;
6898
6899       if (!first_param)
6900         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6901       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6902       if (!arg_rtx
6903           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6904         {
6905           error ("%qE attribute not available to functions with arguments "
6906                  "passed on the stack", name);
6907           return true;
6908         }
6909       first_param = false;
6910     }
6911
6912   /* Error out for variadic functions since we cannot control how many
6913      arguments will be passed and thus stack could be used.  stdarg_p () is not
6914      used for the checking to avoid browsing arguments twice.  */
6915   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6916     {
6917       error ("%qE attribute not available to functions with variable number "
6918              "of arguments", name);
6919       return true;
6920     }
6921
6922   /* Error out if return value is passed on the stack.  */
6923   ret_type = TREE_TYPE (fntype);
6924   if (arm_return_in_memory (ret_type, fntype))
6925     {
6926       error ("%qE attribute not available to functions that return value on "
6927              "the stack", name);
6928       return true;
6929     }
6930   return false;
6931 }
6932
6933 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6934    function will check whether the attribute is allowed here and will add the
6935    attribute to the function declaration tree or otherwise issue a warning.  */
6936
6937 static tree
6938 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6939                                  tree /* args */,
6940                                  int /* flags */,
6941                                  bool *no_add_attrs)
6942 {
6943   tree fndecl;
6944
6945   if (!use_cmse)
6946     {
6947       *no_add_attrs = true;
6948       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6949                name);
6950       return NULL_TREE;
6951     }
6952
6953   /* Ignore attribute for function types.  */
6954   if (TREE_CODE (*node) != FUNCTION_DECL)
6955     {
6956       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6957                name);
6958       *no_add_attrs = true;
6959       return NULL_TREE;
6960     }
6961
6962   fndecl = *node;
6963
6964   /* Warn for static linkage functions.  */
6965   if (!TREE_PUBLIC (fndecl))
6966     {
6967       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6968                "with static linkage", name);
6969       *no_add_attrs = true;
6970       return NULL_TREE;
6971     }
6972
6973   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6974                                                 TREE_TYPE (fndecl));
6975   return NULL_TREE;
6976 }
6977
6978
6979 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6980    function will check whether the attribute is allowed here and will add the
6981    attribute to the function type tree or otherwise issue a diagnostic.  The
6982    reason we check this at declaration time is to only allow the use of the
6983    attribute with declarations of function pointers and not function
6984    declarations.  This function checks NODE is of the expected type and issues
6985    diagnostics otherwise using NAME.  If it is not of the expected type
6986    *NO_ADD_ATTRS will be set to true.  */
6987
6988 static tree
6989 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6990                                  tree /* args */,
6991                                  int /* flags */,
6992                                  bool *no_add_attrs)
6993 {
6994   tree decl = NULL_TREE, fntype = NULL_TREE;
6995   tree type;
6996
6997   if (!use_cmse)
6998     {
6999       *no_add_attrs = true;
7000       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7001                name);
7002       return NULL_TREE;
7003     }
7004
7005   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7006     {
7007       decl = *node;
7008       fntype = TREE_TYPE (decl);
7009     }
7010
7011   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7012     fntype = TREE_TYPE (fntype);
7013
7014   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7015     {
7016         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7017                  "function pointer", name);
7018         *no_add_attrs = true;
7019         return NULL_TREE;
7020     }
7021
7022   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7023
7024   if (*no_add_attrs)
7025     return NULL_TREE;
7026
7027   /* Prevent trees being shared among function types with and without
7028      cmse_nonsecure_call attribute.  */
7029   type = TREE_TYPE (decl);
7030
7031   type = build_distinct_type_copy (type);
7032   TREE_TYPE (decl) = type;
7033   fntype = type;
7034
7035   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7036     {
7037       type = fntype;
7038       fntype = TREE_TYPE (fntype);
7039       fntype = build_distinct_type_copy (fntype);
7040       TREE_TYPE (type) = fntype;
7041     }
7042
7043   /* Construct a type attribute and add it to the function type.  */
7044   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7045                           TYPE_ATTRIBUTES (fntype));
7046   TYPE_ATTRIBUTES (fntype) = attrs;
7047   return NULL_TREE;
7048 }
7049
7050 /* Return 0 if the attributes for two types are incompatible, 1 if they
7051    are compatible, and 2 if they are nearly compatible (which causes a
7052    warning to be generated).  */
7053 static int
7054 arm_comp_type_attributes (const_tree type1, const_tree type2)
7055 {
7056   int l1, l2, s1, s2;
7057
7058   /* Check for mismatch of non-default calling convention.  */
7059   if (TREE_CODE (type1) != FUNCTION_TYPE)
7060     return 1;
7061
7062   /* Check for mismatched call attributes.  */
7063   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7064   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7065   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7066   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7067
7068   /* Only bother to check if an attribute is defined.  */
7069   if (l1 | l2 | s1 | s2)
7070     {
7071       /* If one type has an attribute, the other must have the same attribute.  */
7072       if ((l1 != l2) || (s1 != s2))
7073         return 0;
7074
7075       /* Disallow mixed attributes.  */
7076       if ((l1 & s2) || (l2 & s1))
7077         return 0;
7078     }
7079
7080   /* Check for mismatched ISR attribute.  */
7081   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7082   if (! l1)
7083     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7084   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7085   if (! l2)
7086     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7087   if (l1 != l2)
7088     return 0;
7089
7090   l1 = lookup_attribute ("cmse_nonsecure_call",
7091                          TYPE_ATTRIBUTES (type1)) != NULL;
7092   l2 = lookup_attribute ("cmse_nonsecure_call",
7093                          TYPE_ATTRIBUTES (type2)) != NULL;
7094
7095   if (l1 != l2)
7096     return 0;
7097
7098   return 1;
7099 }
7100
7101 /*  Assigns default attributes to newly defined type.  This is used to
7102     set short_call/long_call attributes for function types of
7103     functions defined inside corresponding #pragma scopes.  */
7104 static void
7105 arm_set_default_type_attributes (tree type)
7106 {
7107   /* Add __attribute__ ((long_call)) to all functions, when
7108      inside #pragma long_calls or __attribute__ ((short_call)),
7109      when inside #pragma no_long_calls.  */
7110   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7111     {
7112       tree type_attr_list, attr_name;
7113       type_attr_list = TYPE_ATTRIBUTES (type);
7114
7115       if (arm_pragma_long_calls == LONG)
7116         attr_name = get_identifier ("long_call");
7117       else if (arm_pragma_long_calls == SHORT)
7118         attr_name = get_identifier ("short_call");
7119       else
7120         return;
7121
7122       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7123       TYPE_ATTRIBUTES (type) = type_attr_list;
7124     }
7125 }
7126 \f
7127 /* Return true if DECL is known to be linked into section SECTION.  */
7128
7129 static bool
7130 arm_function_in_section_p (tree decl, section *section)
7131 {
7132   /* We can only be certain about the prevailing symbol definition.  */
7133   if (!decl_binds_to_current_def_p (decl))
7134     return false;
7135
7136   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7137   if (!DECL_SECTION_NAME (decl))
7138     {
7139       /* Make sure that we will not create a unique section for DECL.  */
7140       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7141         return false;
7142     }
7143
7144   return function_section (decl) == section;
7145 }
7146
7147 /* Return nonzero if a 32-bit "long_call" should be generated for
7148    a call from the current function to DECL.  We generate a long_call
7149    if the function:
7150
7151         a.  has an __attribute__((long call))
7152      or b.  is within the scope of a #pragma long_calls
7153      or c.  the -mlong-calls command line switch has been specified
7154
7155    However we do not generate a long call if the function:
7156
7157         d.  has an __attribute__ ((short_call))
7158      or e.  is inside the scope of a #pragma no_long_calls
7159      or f.  is defined in the same section as the current function.  */
7160
7161 bool
7162 arm_is_long_call_p (tree decl)
7163 {
7164   tree attrs;
7165
7166   if (!decl)
7167     return TARGET_LONG_CALLS;
7168
7169   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7170   if (lookup_attribute ("short_call", attrs))
7171     return false;
7172
7173   /* For "f", be conservative, and only cater for cases in which the
7174      whole of the current function is placed in the same section.  */
7175   if (!flag_reorder_blocks_and_partition
7176       && TREE_CODE (decl) == FUNCTION_DECL
7177       && arm_function_in_section_p (decl, current_function_section ()))
7178     return false;
7179
7180   if (lookup_attribute ("long_call", attrs))
7181     return true;
7182
7183   return TARGET_LONG_CALLS;
7184 }
7185
7186 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7187 static bool
7188 arm_function_ok_for_sibcall (tree decl, tree exp)
7189 {
7190   unsigned long func_type;
7191
7192   if (cfun->machine->sibcall_blocked)
7193     return false;
7194
7195   /* Never tailcall something if we are generating code for Thumb-1.  */
7196   if (TARGET_THUMB1)
7197     return false;
7198
7199   /* The PIC register is live on entry to VxWorks PLT entries, so we
7200      must make the call before restoring the PIC register.  */
7201   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7202     return false;
7203
7204   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7205      may be used both as target of the call and base register for restoring
7206      the VFP registers  */
7207   if (TARGET_APCS_FRAME && TARGET_ARM
7208       && TARGET_HARD_FLOAT
7209       && decl && arm_is_long_call_p (decl))
7210     return false;
7211
7212   /* If we are interworking and the function is not declared static
7213      then we can't tail-call it unless we know that it exists in this
7214      compilation unit (since it might be a Thumb routine).  */
7215   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7216       && !TREE_ASM_WRITTEN (decl))
7217     return false;
7218
7219   func_type = arm_current_func_type ();
7220   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7221   if (IS_INTERRUPT (func_type))
7222     return false;
7223
7224   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7225      generated for entry functions themselves.  */
7226   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7227     return false;
7228
7229   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7230      this would complicate matters for later code generation.  */
7231   if (TREE_CODE (exp) == CALL_EXPR)
7232     {
7233       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7234       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7235         return false;
7236     }
7237
7238   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7239     {
7240       /* Check that the return value locations are the same.  For
7241          example that we aren't returning a value from the sibling in
7242          a VFP register but then need to transfer it to a core
7243          register.  */
7244       rtx a, b;
7245       tree decl_or_type = decl;
7246
7247       /* If it is an indirect function pointer, get the function type.  */
7248       if (!decl)
7249         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7250
7251       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7252       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7253                               cfun->decl, false);
7254       if (!rtx_equal_p (a, b))
7255         return false;
7256     }
7257
7258   /* Never tailcall if function may be called with a misaligned SP.  */
7259   if (IS_STACKALIGN (func_type))
7260     return false;
7261
7262   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7263      references should become a NOP.  Don't convert such calls into
7264      sibling calls.  */
7265   if (TARGET_AAPCS_BASED
7266       && arm_abi == ARM_ABI_AAPCS
7267       && decl
7268       && DECL_WEAK (decl))
7269     return false;
7270
7271   /* We cannot do a tailcall for an indirect call by descriptor if all the
7272      argument registers are used because the only register left to load the
7273      address is IP and it will already contain the static chain.  */
7274   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7275     {
7276       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7277       CUMULATIVE_ARGS cum;
7278       cumulative_args_t cum_v;
7279
7280       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7281       cum_v = pack_cumulative_args (&cum);
7282
7283       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7284         {
7285           tree type = TREE_VALUE (t);
7286           if (!VOID_TYPE_P (type))
7287             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7288         }
7289
7290       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7291         return false;
7292     }
7293
7294   /* Everything else is ok.  */
7295   return true;
7296 }
7297
7298 \f
7299 /* Addressing mode support functions.  */
7300
7301 /* Return nonzero if X is a legitimate immediate operand when compiling
7302    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7303 int
7304 legitimate_pic_operand_p (rtx x)
7305 {
7306   if (GET_CODE (x) == SYMBOL_REF
7307       || (GET_CODE (x) == CONST
7308           && GET_CODE (XEXP (x, 0)) == PLUS
7309           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7310     return 0;
7311
7312   return 1;
7313 }
7314
7315 /* Record that the current function needs a PIC register.  Initialize
7316    cfun->machine->pic_reg if we have not already done so.  */
7317
7318 static void
7319 require_pic_register (void)
7320 {
7321   /* A lot of the logic here is made obscure by the fact that this
7322      routine gets called as part of the rtx cost estimation process.
7323      We don't want those calls to affect any assumptions about the real
7324      function; and further, we can't call entry_of_function() until we
7325      start the real expansion process.  */
7326   if (!crtl->uses_pic_offset_table)
7327     {
7328       gcc_assert (can_create_pseudo_p ());
7329       if (arm_pic_register != INVALID_REGNUM
7330           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7331         {
7332           if (!cfun->machine->pic_reg)
7333             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7334
7335           /* Play games to avoid marking the function as needing pic
7336              if we are being called as part of the cost-estimation
7337              process.  */
7338           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7339             crtl->uses_pic_offset_table = 1;
7340         }
7341       else
7342         {
7343           rtx_insn *seq, *insn;
7344
7345           if (!cfun->machine->pic_reg)
7346             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7347
7348           /* Play games to avoid marking the function as needing pic
7349              if we are being called as part of the cost-estimation
7350              process.  */
7351           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7352             {
7353               crtl->uses_pic_offset_table = 1;
7354               start_sequence ();
7355
7356               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7357                   && arm_pic_register > LAST_LO_REGNUM)
7358                 emit_move_insn (cfun->machine->pic_reg,
7359                                 gen_rtx_REG (Pmode, arm_pic_register));
7360               else
7361                 arm_load_pic_register (0UL);
7362
7363               seq = get_insns ();
7364               end_sequence ();
7365
7366               for (insn = seq; insn; insn = NEXT_INSN (insn))
7367                 if (INSN_P (insn))
7368                   INSN_LOCATION (insn) = prologue_location;
7369
7370               /* We can be called during expansion of PHI nodes, where
7371                  we can't yet emit instructions directly in the final
7372                  insn stream.  Queue the insns on the entry edge, they will
7373                  be committed after everything else is expanded.  */
7374               insert_insn_on_edge (seq,
7375                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7376             }
7377         }
7378     }
7379 }
7380
7381 rtx
7382 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7383 {
7384   if (GET_CODE (orig) == SYMBOL_REF
7385       || GET_CODE (orig) == LABEL_REF)
7386     {
7387       if (reg == 0)
7388         {
7389           gcc_assert (can_create_pseudo_p ());
7390           reg = gen_reg_rtx (Pmode);
7391         }
7392
7393       /* VxWorks does not impose a fixed gap between segments; the run-time
7394          gap can be different from the object-file gap.  We therefore can't
7395          use GOTOFF unless we are absolutely sure that the symbol is in the
7396          same segment as the GOT.  Unfortunately, the flexibility of linker
7397          scripts means that we can't be sure of that in general, so assume
7398          that GOTOFF is never valid on VxWorks.  */
7399       /* References to weak symbols cannot be resolved locally: they
7400          may be overridden by a non-weak definition at link time.  */
7401       rtx_insn *insn;
7402       if ((GET_CODE (orig) == LABEL_REF
7403            || (GET_CODE (orig) == SYMBOL_REF
7404                && SYMBOL_REF_LOCAL_P (orig)
7405                && (SYMBOL_REF_DECL (orig)
7406                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7407           && NEED_GOT_RELOC
7408           && arm_pic_data_is_text_relative)
7409         insn = arm_pic_static_addr (orig, reg);
7410       else
7411         {
7412           rtx pat;
7413           rtx mem;
7414
7415           /* If this function doesn't have a pic register, create one now.  */
7416           require_pic_register ();
7417
7418           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7419
7420           /* Make the MEM as close to a constant as possible.  */
7421           mem = SET_SRC (pat);
7422           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7423           MEM_READONLY_P (mem) = 1;
7424           MEM_NOTRAP_P (mem) = 1;
7425
7426           insn = emit_insn (pat);
7427         }
7428
7429       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7430          by loop.  */
7431       set_unique_reg_note (insn, REG_EQUAL, orig);
7432
7433       return reg;
7434     }
7435   else if (GET_CODE (orig) == CONST)
7436     {
7437       rtx base, offset;
7438
7439       if (GET_CODE (XEXP (orig, 0)) == PLUS
7440           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7441         return orig;
7442
7443       /* Handle the case where we have: const (UNSPEC_TLS).  */
7444       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7445           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7446         return orig;
7447
7448       /* Handle the case where we have:
7449          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7450          CONST_INT.  */
7451       if (GET_CODE (XEXP (orig, 0)) == PLUS
7452           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7453           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7454         {
7455           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7456           return orig;
7457         }
7458
7459       if (reg == 0)
7460         {
7461           gcc_assert (can_create_pseudo_p ());
7462           reg = gen_reg_rtx (Pmode);
7463         }
7464
7465       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7466
7467       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7468       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7469                                        base == reg ? 0 : reg);
7470
7471       if (CONST_INT_P (offset))
7472         {
7473           /* The base register doesn't really matter, we only want to
7474              test the index for the appropriate mode.  */
7475           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7476             {
7477               gcc_assert (can_create_pseudo_p ());
7478               offset = force_reg (Pmode, offset);
7479             }
7480
7481           if (CONST_INT_P (offset))
7482             return plus_constant (Pmode, base, INTVAL (offset));
7483         }
7484
7485       if (GET_MODE_SIZE (mode) > 4
7486           && (GET_MODE_CLASS (mode) == MODE_INT
7487               || TARGET_SOFT_FLOAT))
7488         {
7489           emit_insn (gen_addsi3 (reg, base, offset));
7490           return reg;
7491         }
7492
7493       return gen_rtx_PLUS (Pmode, base, offset);
7494     }
7495
7496   return orig;
7497 }
7498
7499
7500 /* Find a spare register to use during the prolog of a function.  */
7501
7502 static int
7503 thumb_find_work_register (unsigned long pushed_regs_mask)
7504 {
7505   int reg;
7506
7507   /* Check the argument registers first as these are call-used.  The
7508      register allocation order means that sometimes r3 might be used
7509      but earlier argument registers might not, so check them all.  */
7510   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7511     if (!df_regs_ever_live_p (reg))
7512       return reg;
7513
7514   /* Before going on to check the call-saved registers we can try a couple
7515      more ways of deducing that r3 is available.  The first is when we are
7516      pushing anonymous arguments onto the stack and we have less than 4
7517      registers worth of fixed arguments(*).  In this case r3 will be part of
7518      the variable argument list and so we can be sure that it will be
7519      pushed right at the start of the function.  Hence it will be available
7520      for the rest of the prologue.
7521      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7522   if (cfun->machine->uses_anonymous_args
7523       && crtl->args.pretend_args_size > 0)
7524     return LAST_ARG_REGNUM;
7525
7526   /* The other case is when we have fixed arguments but less than 4 registers
7527      worth.  In this case r3 might be used in the body of the function, but
7528      it is not being used to convey an argument into the function.  In theory
7529      we could just check crtl->args.size to see how many bytes are
7530      being passed in argument registers, but it seems that it is unreliable.
7531      Sometimes it will have the value 0 when in fact arguments are being
7532      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7533      check the args_info.nregs field as well.  The problem with this field is
7534      that it makes no allowances for arguments that are passed to the
7535      function but which are not used.  Hence we could miss an opportunity
7536      when a function has an unused argument in r3.  But it is better to be
7537      safe than to be sorry.  */
7538   if (! cfun->machine->uses_anonymous_args
7539       && crtl->args.size >= 0
7540       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7541       && (TARGET_AAPCS_BASED
7542           ? crtl->args.info.aapcs_ncrn < 4
7543           : crtl->args.info.nregs < 4))
7544     return LAST_ARG_REGNUM;
7545
7546   /* Otherwise look for a call-saved register that is going to be pushed.  */
7547   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7548     if (pushed_regs_mask & (1 << reg))
7549       return reg;
7550
7551   if (TARGET_THUMB2)
7552     {
7553       /* Thumb-2 can use high regs.  */
7554       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7555         if (pushed_regs_mask & (1 << reg))
7556           return reg;
7557     }
7558   /* Something went wrong - thumb_compute_save_reg_mask()
7559      should have arranged for a suitable register to be pushed.  */
7560   gcc_unreachable ();
7561 }
7562
7563 static GTY(()) int pic_labelno;
7564
7565 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7566    low register.  */
7567
7568 void
7569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7570 {
7571   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7572
7573   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7574     return;
7575
7576   gcc_assert (flag_pic);
7577
7578   pic_reg = cfun->machine->pic_reg;
7579   if (TARGET_VXWORKS_RTP)
7580     {
7581       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7582       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7583       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7584
7585       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7586
7587       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7588       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7589     }
7590   else
7591     {
7592       /* We use an UNSPEC rather than a LABEL_REF because this label
7593          never appears in the code stream.  */
7594
7595       labelno = GEN_INT (pic_labelno++);
7596       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7597       l1 = gen_rtx_CONST (VOIDmode, l1);
7598
7599       /* On the ARM the PC register contains 'dot + 8' at the time of the
7600          addition, on the Thumb it is 'dot + 4'.  */
7601       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7602       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7603                                 UNSPEC_GOTSYM_OFF);
7604       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7605
7606       if (TARGET_32BIT)
7607         {
7608           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7609         }
7610       else /* TARGET_THUMB1 */
7611         {
7612           if (arm_pic_register != INVALID_REGNUM
7613               && REGNO (pic_reg) > LAST_LO_REGNUM)
7614             {
7615               /* We will have pushed the pic register, so we should always be
7616                  able to find a work register.  */
7617               pic_tmp = gen_rtx_REG (SImode,
7618                                      thumb_find_work_register (saved_regs));
7619               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7620               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7621               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7622             }
7623           else if (arm_pic_register != INVALID_REGNUM
7624                    && arm_pic_register > LAST_LO_REGNUM
7625                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7626             {
7627               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7628               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7629               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7630             }
7631           else
7632             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7633         }
7634     }
7635
7636   /* Need to emit this whether or not we obey regdecls,
7637      since setjmp/longjmp can cause life info to screw up.  */
7638   emit_use (pic_reg);
7639 }
7640
7641 /* Generate code to load the address of a static var when flag_pic is set.  */
7642 static rtx_insn *
7643 arm_pic_static_addr (rtx orig, rtx reg)
7644 {
7645   rtx l1, labelno, offset_rtx;
7646
7647   gcc_assert (flag_pic);
7648
7649   /* We use an UNSPEC rather than a LABEL_REF because this label
7650      never appears in the code stream.  */
7651   labelno = GEN_INT (pic_labelno++);
7652   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7653   l1 = gen_rtx_CONST (VOIDmode, l1);
7654
7655   /* On the ARM the PC register contains 'dot + 8' at the time of the
7656      addition, on the Thumb it is 'dot + 4'.  */
7657   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7658   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7659                                UNSPEC_SYMBOL_OFFSET);
7660   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7661
7662   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7663 }
7664
7665 /* Return nonzero if X is valid as an ARM state addressing register.  */
7666 static int
7667 arm_address_register_rtx_p (rtx x, int strict_p)
7668 {
7669   int regno;
7670
7671   if (!REG_P (x))
7672     return 0;
7673
7674   regno = REGNO (x);
7675
7676   if (strict_p)
7677     return ARM_REGNO_OK_FOR_BASE_P (regno);
7678
7679   return (regno <= LAST_ARM_REGNUM
7680           || regno >= FIRST_PSEUDO_REGISTER
7681           || regno == FRAME_POINTER_REGNUM
7682           || regno == ARG_POINTER_REGNUM);
7683 }
7684
7685 /* Return TRUE if this rtx is the difference of a symbol and a label,
7686    and will reduce to a PC-relative relocation in the object file.
7687    Expressions like this can be left alone when generating PIC, rather
7688    than forced through the GOT.  */
7689 static int
7690 pcrel_constant_p (rtx x)
7691 {
7692   if (GET_CODE (x) == MINUS)
7693     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7694
7695   return FALSE;
7696 }
7697
7698 /* Return true if X will surely end up in an index register after next
7699    splitting pass.  */
7700 static bool
7701 will_be_in_index_register (const_rtx x)
7702 {
7703   /* arm.md: calculate_pic_address will split this into a register.  */
7704   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7705 }
7706
7707 /* Return nonzero if X is a valid ARM state address operand.  */
7708 int
7709 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7710                                 int strict_p)
7711 {
7712   bool use_ldrd;
7713   enum rtx_code code = GET_CODE (x);
7714
7715   if (arm_address_register_rtx_p (x, strict_p))
7716     return 1;
7717
7718   use_ldrd = (TARGET_LDRD
7719               && (mode == DImode || mode == DFmode));
7720
7721   if (code == POST_INC || code == PRE_DEC
7722       || ((code == PRE_INC || code == POST_DEC)
7723           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7724     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7725
7726   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7727            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7728            && GET_CODE (XEXP (x, 1)) == PLUS
7729            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7730     {
7731       rtx addend = XEXP (XEXP (x, 1), 1);
7732
7733       /* Don't allow ldrd post increment by register because it's hard
7734          to fixup invalid register choices.  */
7735       if (use_ldrd
7736           && GET_CODE (x) == POST_MODIFY
7737           && REG_P (addend))
7738         return 0;
7739
7740       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7741               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7742     }
7743
7744   /* After reload constants split into minipools will have addresses
7745      from a LABEL_REF.  */
7746   else if (reload_completed
7747            && (code == LABEL_REF
7748                || (code == CONST
7749                    && GET_CODE (XEXP (x, 0)) == PLUS
7750                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7751                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7752     return 1;
7753
7754   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7755     return 0;
7756
7757   else if (code == PLUS)
7758     {
7759       rtx xop0 = XEXP (x, 0);
7760       rtx xop1 = XEXP (x, 1);
7761
7762       return ((arm_address_register_rtx_p (xop0, strict_p)
7763                && ((CONST_INT_P (xop1)
7764                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7765                    || (!strict_p && will_be_in_index_register (xop1))))
7766               || (arm_address_register_rtx_p (xop1, strict_p)
7767                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7768     }
7769
7770 #if 0
7771   /* Reload currently can't handle MINUS, so disable this for now */
7772   else if (GET_CODE (x) == MINUS)
7773     {
7774       rtx xop0 = XEXP (x, 0);
7775       rtx xop1 = XEXP (x, 1);
7776
7777       return (arm_address_register_rtx_p (xop0, strict_p)
7778               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7779     }
7780 #endif
7781
7782   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7783            && code == SYMBOL_REF
7784            && CONSTANT_POOL_ADDRESS_P (x)
7785            && ! (flag_pic
7786                  && symbol_mentioned_p (get_pool_constant (x))
7787                  && ! pcrel_constant_p (get_pool_constant (x))))
7788     return 1;
7789
7790   return 0;
7791 }
7792
7793 /* Return true if we can avoid creating a constant pool entry for x.  */
7794 static bool
7795 can_avoid_literal_pool_for_label_p (rtx x)
7796 {
7797   /* Normally we can assign constant values to target registers without
7798      the help of constant pool.  But there are cases we have to use constant
7799      pool like:
7800      1) assign a label to register.
7801      2) sign-extend a 8bit value to 32bit and then assign to register.
7802
7803      Constant pool access in format:
7804      (set (reg r0) (mem (symbol_ref (".LC0"))))
7805      will cause the use of literal pool (later in function arm_reorg).
7806      So here we mark such format as an invalid format, then the compiler
7807      will adjust it into:
7808      (set (reg r0) (symbol_ref (".LC0")))
7809      (set (reg r0) (mem (reg r0))).
7810      No extra register is required, and (mem (reg r0)) won't cause the use
7811      of literal pools.  */
7812   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7813       && CONSTANT_POOL_ADDRESS_P (x))
7814     return 1;
7815   return 0;
7816 }
7817
7818
7819 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7820 static int
7821 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7822 {
7823   bool use_ldrd;
7824   enum rtx_code code = GET_CODE (x);
7825
7826   if (arm_address_register_rtx_p (x, strict_p))
7827     return 1;
7828
7829   use_ldrd = (TARGET_LDRD
7830               && (mode == DImode || mode == DFmode));
7831
7832   if (code == POST_INC || code == PRE_DEC
7833       || ((code == PRE_INC || code == POST_DEC)
7834           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7835     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7836
7837   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7838            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7839            && GET_CODE (XEXP (x, 1)) == PLUS
7840            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7841     {
7842       /* Thumb-2 only has autoincrement by constant.  */
7843       rtx addend = XEXP (XEXP (x, 1), 1);
7844       HOST_WIDE_INT offset;
7845
7846       if (!CONST_INT_P (addend))
7847         return 0;
7848
7849       offset = INTVAL(addend);
7850       if (GET_MODE_SIZE (mode) <= 4)
7851         return (offset > -256 && offset < 256);
7852
7853       return (use_ldrd && offset > -1024 && offset < 1024
7854               && (offset & 3) == 0);
7855     }
7856
7857   /* After reload constants split into minipools will have addresses
7858      from a LABEL_REF.  */
7859   else if (reload_completed
7860            && (code == LABEL_REF
7861                || (code == CONST
7862                    && GET_CODE (XEXP (x, 0)) == PLUS
7863                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7864                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7865     return 1;
7866
7867   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7868     return 0;
7869
7870   else if (code == PLUS)
7871     {
7872       rtx xop0 = XEXP (x, 0);
7873       rtx xop1 = XEXP (x, 1);
7874
7875       return ((arm_address_register_rtx_p (xop0, strict_p)
7876                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7877                    || (!strict_p && will_be_in_index_register (xop1))))
7878               || (arm_address_register_rtx_p (xop1, strict_p)
7879                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7880     }
7881
7882   else if (can_avoid_literal_pool_for_label_p (x))
7883     return 0;
7884
7885   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7886            && code == SYMBOL_REF
7887            && CONSTANT_POOL_ADDRESS_P (x)
7888            && ! (flag_pic
7889                  && symbol_mentioned_p (get_pool_constant (x))
7890                  && ! pcrel_constant_p (get_pool_constant (x))))
7891     return 1;
7892
7893   return 0;
7894 }
7895
7896 /* Return nonzero if INDEX is valid for an address index operand in
7897    ARM state.  */
7898 static int
7899 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7900                         int strict_p)
7901 {
7902   HOST_WIDE_INT range;
7903   enum rtx_code code = GET_CODE (index);
7904
7905   /* Standard coprocessor addressing modes.  */
7906   if (TARGET_HARD_FLOAT
7907       && (mode == SFmode || mode == DFmode))
7908     return (code == CONST_INT && INTVAL (index) < 1024
7909             && INTVAL (index) > -1024
7910             && (INTVAL (index) & 3) == 0);
7911
7912   /* For quad modes, we restrict the constant offset to be slightly less
7913      than what the instruction format permits.  We do this because for
7914      quad mode moves, we will actually decompose them into two separate
7915      double-mode reads or writes.  INDEX must therefore be a valid
7916      (double-mode) offset and so should INDEX+8.  */
7917   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7918     return (code == CONST_INT
7919             && INTVAL (index) < 1016
7920             && INTVAL (index) > -1024
7921             && (INTVAL (index) & 3) == 0);
7922
7923   /* We have no such constraint on double mode offsets, so we permit the
7924      full range of the instruction format.  */
7925   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7926     return (code == CONST_INT
7927             && INTVAL (index) < 1024
7928             && INTVAL (index) > -1024
7929             && (INTVAL (index) & 3) == 0);
7930
7931   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7932     return (code == CONST_INT
7933             && INTVAL (index) < 1024
7934             && INTVAL (index) > -1024
7935             && (INTVAL (index) & 3) == 0);
7936
7937   if (arm_address_register_rtx_p (index, strict_p)
7938       && (GET_MODE_SIZE (mode) <= 4))
7939     return 1;
7940
7941   if (mode == DImode || mode == DFmode)
7942     {
7943       if (code == CONST_INT)
7944         {
7945           HOST_WIDE_INT val = INTVAL (index);
7946
7947           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7948              If vldr is selected it uses arm_coproc_mem_operand.  */
7949           if (TARGET_LDRD)
7950             return val > -256 && val < 256;
7951           else
7952             return val > -4096 && val < 4092;
7953         }
7954
7955       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7956     }
7957
7958   if (GET_MODE_SIZE (mode) <= 4
7959       && ! (arm_arch4
7960             && (mode == HImode
7961                 || mode == HFmode
7962                 || (mode == QImode && outer == SIGN_EXTEND))))
7963     {
7964       if (code == MULT)
7965         {
7966           rtx xiop0 = XEXP (index, 0);
7967           rtx xiop1 = XEXP (index, 1);
7968
7969           return ((arm_address_register_rtx_p (xiop0, strict_p)
7970                    && power_of_two_operand (xiop1, SImode))
7971                   || (arm_address_register_rtx_p (xiop1, strict_p)
7972                       && power_of_two_operand (xiop0, SImode)));
7973         }
7974       else if (code == LSHIFTRT || code == ASHIFTRT
7975                || code == ASHIFT || code == ROTATERT)
7976         {
7977           rtx op = XEXP (index, 1);
7978
7979           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7980                   && CONST_INT_P (op)
7981                   && INTVAL (op) > 0
7982                   && INTVAL (op) <= 31);
7983         }
7984     }
7985
7986   /* For ARM v4 we may be doing a sign-extend operation during the
7987      load.  */
7988   if (arm_arch4)
7989     {
7990       if (mode == HImode
7991           || mode == HFmode
7992           || (outer == SIGN_EXTEND && mode == QImode))
7993         range = 256;
7994       else
7995         range = 4096;
7996     }
7997   else
7998     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7999
8000   return (code == CONST_INT
8001           && INTVAL (index) < range
8002           && INTVAL (index) > -range);
8003 }
8004
8005 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8006    index operand.  i.e. 1, 2, 4 or 8.  */
8007 static bool
8008 thumb2_index_mul_operand (rtx op)
8009 {
8010   HOST_WIDE_INT val;
8011
8012   if (!CONST_INT_P (op))
8013     return false;
8014
8015   val = INTVAL(op);
8016   return (val == 1 || val == 2 || val == 4 || val == 8);
8017 }
8018
8019 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8020 static int
8021 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8022 {
8023   enum rtx_code code = GET_CODE (index);
8024
8025   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8026   /* Standard coprocessor addressing modes.  */
8027   if (TARGET_HARD_FLOAT
8028       && (mode == SFmode || mode == DFmode))
8029     return (code == CONST_INT && INTVAL (index) < 1024
8030             /* Thumb-2 allows only > -256 index range for it's core register
8031                load/stores. Since we allow SF/DF in core registers, we have
8032                to use the intersection between -256~4096 (core) and -1024~1024
8033                (coprocessor).  */
8034             && INTVAL (index) > -256
8035             && (INTVAL (index) & 3) == 0);
8036
8037   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8038     {
8039       /* For DImode assume values will usually live in core regs
8040          and only allow LDRD addressing modes.  */
8041       if (!TARGET_LDRD || mode != DImode)
8042         return (code == CONST_INT
8043                 && INTVAL (index) < 1024
8044                 && INTVAL (index) > -1024
8045                 && (INTVAL (index) & 3) == 0);
8046     }
8047
8048   /* For quad modes, we restrict the constant offset to be slightly less
8049      than what the instruction format permits.  We do this because for
8050      quad mode moves, we will actually decompose them into two separate
8051      double-mode reads or writes.  INDEX must therefore be a valid
8052      (double-mode) offset and so should INDEX+8.  */
8053   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8054     return (code == CONST_INT
8055             && INTVAL (index) < 1016
8056             && INTVAL (index) > -1024
8057             && (INTVAL (index) & 3) == 0);
8058
8059   /* We have no such constraint on double mode offsets, so we permit the
8060      full range of the instruction format.  */
8061   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8062     return (code == CONST_INT
8063             && INTVAL (index) < 1024
8064             && INTVAL (index) > -1024
8065             && (INTVAL (index) & 3) == 0);
8066
8067   if (arm_address_register_rtx_p (index, strict_p)
8068       && (GET_MODE_SIZE (mode) <= 4))
8069     return 1;
8070
8071   if (mode == DImode || mode == DFmode)
8072     {
8073       if (code == CONST_INT)
8074         {
8075           HOST_WIDE_INT val = INTVAL (index);
8076           /* Thumb-2 ldrd only has reg+const addressing modes.
8077              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8078              If vldr is selected it uses arm_coproc_mem_operand.  */
8079           if (TARGET_LDRD)
8080             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8081           else
8082             return IN_RANGE (val, -255, 4095 - 4);
8083         }
8084       else
8085         return 0;
8086     }
8087
8088   if (code == MULT)
8089     {
8090       rtx xiop0 = XEXP (index, 0);
8091       rtx xiop1 = XEXP (index, 1);
8092
8093       return ((arm_address_register_rtx_p (xiop0, strict_p)
8094                && thumb2_index_mul_operand (xiop1))
8095               || (arm_address_register_rtx_p (xiop1, strict_p)
8096                   && thumb2_index_mul_operand (xiop0)));
8097     }
8098   else if (code == ASHIFT)
8099     {
8100       rtx op = XEXP (index, 1);
8101
8102       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8103               && CONST_INT_P (op)
8104               && INTVAL (op) > 0
8105               && INTVAL (op) <= 3);
8106     }
8107
8108   return (code == CONST_INT
8109           && INTVAL (index) < 4096
8110           && INTVAL (index) > -256);
8111 }
8112
8113 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8114 static int
8115 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8116 {
8117   int regno;
8118
8119   if (!REG_P (x))
8120     return 0;
8121
8122   regno = REGNO (x);
8123
8124   if (strict_p)
8125     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8126
8127   return (regno <= LAST_LO_REGNUM
8128           || regno > LAST_VIRTUAL_REGISTER
8129           || regno == FRAME_POINTER_REGNUM
8130           || (GET_MODE_SIZE (mode) >= 4
8131               && (regno == STACK_POINTER_REGNUM
8132                   || regno >= FIRST_PSEUDO_REGISTER
8133                   || x == hard_frame_pointer_rtx
8134                   || x == arg_pointer_rtx)));
8135 }
8136
8137 /* Return nonzero if x is a legitimate index register.  This is the case
8138    for any base register that can access a QImode object.  */
8139 inline static int
8140 thumb1_index_register_rtx_p (rtx x, int strict_p)
8141 {
8142   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8143 }
8144
8145 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8146
8147    The AP may be eliminated to either the SP or the FP, so we use the
8148    least common denominator, e.g. SImode, and offsets from 0 to 64.
8149
8150    ??? Verify whether the above is the right approach.
8151
8152    ??? Also, the FP may be eliminated to the SP, so perhaps that
8153    needs special handling also.
8154
8155    ??? Look at how the mips16 port solves this problem.  It probably uses
8156    better ways to solve some of these problems.
8157
8158    Although it is not incorrect, we don't accept QImode and HImode
8159    addresses based on the frame pointer or arg pointer until the
8160    reload pass starts.  This is so that eliminating such addresses
8161    into stack based ones won't produce impossible code.  */
8162 int
8163 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8164 {
8165   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8166     return 0;
8167
8168   /* ??? Not clear if this is right.  Experiment.  */
8169   if (GET_MODE_SIZE (mode) < 4
8170       && !(reload_in_progress || reload_completed)
8171       && (reg_mentioned_p (frame_pointer_rtx, x)
8172           || reg_mentioned_p (arg_pointer_rtx, x)
8173           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8174           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8175           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8176           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8177     return 0;
8178
8179   /* Accept any base register.  SP only in SImode or larger.  */
8180   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8181     return 1;
8182
8183   /* This is PC relative data before arm_reorg runs.  */
8184   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8185            && GET_CODE (x) == SYMBOL_REF
8186            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8187     return 1;
8188
8189   /* This is PC relative data after arm_reorg runs.  */
8190   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8191            && reload_completed
8192            && (GET_CODE (x) == LABEL_REF
8193                || (GET_CODE (x) == CONST
8194                    && GET_CODE (XEXP (x, 0)) == PLUS
8195                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8196                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8197     return 1;
8198
8199   /* Post-inc indexing only supported for SImode and larger.  */
8200   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8201            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8202     return 1;
8203
8204   else if (GET_CODE (x) == PLUS)
8205     {
8206       /* REG+REG address can be any two index registers.  */
8207       /* We disallow FRAME+REG addressing since we know that FRAME
8208          will be replaced with STACK, and SP relative addressing only
8209          permits SP+OFFSET.  */
8210       if (GET_MODE_SIZE (mode) <= 4
8211           && XEXP (x, 0) != frame_pointer_rtx
8212           && XEXP (x, 1) != frame_pointer_rtx
8213           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8214           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8215               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8216         return 1;
8217
8218       /* REG+const has 5-7 bit offset for non-SP registers.  */
8219       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8220                 || XEXP (x, 0) == arg_pointer_rtx)
8221                && CONST_INT_P (XEXP (x, 1))
8222                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8223         return 1;
8224
8225       /* REG+const has 10-bit offset for SP, but only SImode and
8226          larger is supported.  */
8227       /* ??? Should probably check for DI/DFmode overflow here
8228          just like GO_IF_LEGITIMATE_OFFSET does.  */
8229       else if (REG_P (XEXP (x, 0))
8230                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8231                && GET_MODE_SIZE (mode) >= 4
8232                && CONST_INT_P (XEXP (x, 1))
8233                && INTVAL (XEXP (x, 1)) >= 0
8234                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8235                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8236         return 1;
8237
8238       else if (REG_P (XEXP (x, 0))
8239                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8240                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8241                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8242                        && REGNO (XEXP (x, 0))
8243                           <= LAST_VIRTUAL_POINTER_REGISTER))
8244                && GET_MODE_SIZE (mode) >= 4
8245                && CONST_INT_P (XEXP (x, 1))
8246                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8247         return 1;
8248     }
8249
8250   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8251            && GET_MODE_SIZE (mode) == 4
8252            && GET_CODE (x) == SYMBOL_REF
8253            && CONSTANT_POOL_ADDRESS_P (x)
8254            && ! (flag_pic
8255                  && symbol_mentioned_p (get_pool_constant (x))
8256                  && ! pcrel_constant_p (get_pool_constant (x))))
8257     return 1;
8258
8259   return 0;
8260 }
8261
8262 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8263    instruction of mode MODE.  */
8264 int
8265 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8266 {
8267   switch (GET_MODE_SIZE (mode))
8268     {
8269     case 1:
8270       return val >= 0 && val < 32;
8271
8272     case 2:
8273       return val >= 0 && val < 64 && (val & 1) == 0;
8274
8275     default:
8276       return (val >= 0
8277               && (val + GET_MODE_SIZE (mode)) <= 128
8278               && (val & 3) == 0);
8279     }
8280 }
8281
8282 bool
8283 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8284 {
8285   if (TARGET_ARM)
8286     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8287   else if (TARGET_THUMB2)
8288     return thumb2_legitimate_address_p (mode, x, strict_p);
8289   else /* if (TARGET_THUMB1) */
8290     return thumb1_legitimate_address_p (mode, x, strict_p);
8291 }
8292
8293 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8294
8295    Given an rtx X being reloaded into a reg required to be
8296    in class CLASS, return the class of reg to actually use.
8297    In general this is just CLASS, but for the Thumb core registers and
8298    immediate constants we prefer a LO_REGS class or a subset.  */
8299
8300 static reg_class_t
8301 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8302 {
8303   if (TARGET_32BIT)
8304     return rclass;
8305   else
8306     {
8307       if (rclass == GENERAL_REGS)
8308         return LO_REGS;
8309       else
8310         return rclass;
8311     }
8312 }
8313
8314 /* Build the SYMBOL_REF for __tls_get_addr.  */
8315
8316 static GTY(()) rtx tls_get_addr_libfunc;
8317
8318 static rtx
8319 get_tls_get_addr (void)
8320 {
8321   if (!tls_get_addr_libfunc)
8322     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8323   return tls_get_addr_libfunc;
8324 }
8325
8326 rtx
8327 arm_load_tp (rtx target)
8328 {
8329   if (!target)
8330     target = gen_reg_rtx (SImode);
8331
8332   if (TARGET_HARD_TP)
8333     {
8334       /* Can return in any reg.  */
8335       emit_insn (gen_load_tp_hard (target));
8336     }
8337   else
8338     {
8339       /* Always returned in r0.  Immediately copy the result into a pseudo,
8340          otherwise other uses of r0 (e.g. setting up function arguments) may
8341          clobber the value.  */
8342
8343       rtx tmp;
8344
8345       emit_insn (gen_load_tp_soft ());
8346
8347       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8348       emit_move_insn (target, tmp);
8349     }
8350   return target;
8351 }
8352
8353 static rtx
8354 load_tls_operand (rtx x, rtx reg)
8355 {
8356   rtx tmp;
8357
8358   if (reg == NULL_RTX)
8359     reg = gen_reg_rtx (SImode);
8360
8361   tmp = gen_rtx_CONST (SImode, x);
8362
8363   emit_move_insn (reg, tmp);
8364
8365   return reg;
8366 }
8367
8368 static rtx_insn *
8369 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8370 {
8371   rtx label, labelno, sum;
8372
8373   gcc_assert (reloc != TLS_DESCSEQ);
8374   start_sequence ();
8375
8376   labelno = GEN_INT (pic_labelno++);
8377   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8378   label = gen_rtx_CONST (VOIDmode, label);
8379
8380   sum = gen_rtx_UNSPEC (Pmode,
8381                         gen_rtvec (4, x, GEN_INT (reloc), label,
8382                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8383                         UNSPEC_TLS);
8384   reg = load_tls_operand (sum, reg);
8385
8386   if (TARGET_ARM)
8387     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8388   else
8389     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8390
8391   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8392                                      LCT_PURE, /* LCT_CONST?  */
8393                                      Pmode, reg, Pmode);
8394
8395   rtx_insn *insns = get_insns ();
8396   end_sequence ();
8397
8398   return insns;
8399 }
8400
8401 static rtx
8402 arm_tls_descseq_addr (rtx x, rtx reg)
8403 {
8404   rtx labelno = GEN_INT (pic_labelno++);
8405   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8406   rtx sum = gen_rtx_UNSPEC (Pmode,
8407                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8408                                        gen_rtx_CONST (VOIDmode, label),
8409                                        GEN_INT (!TARGET_ARM)),
8410                             UNSPEC_TLS);
8411   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8412
8413   emit_insn (gen_tlscall (x, labelno));
8414   if (!reg)
8415     reg = gen_reg_rtx (SImode);
8416   else
8417     gcc_assert (REGNO (reg) != R0_REGNUM);
8418
8419   emit_move_insn (reg, reg0);
8420
8421   return reg;
8422 }
8423
8424 rtx
8425 legitimize_tls_address (rtx x, rtx reg)
8426 {
8427   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8428   rtx_insn *insns;
8429   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8430
8431   switch (model)
8432     {
8433     case TLS_MODEL_GLOBAL_DYNAMIC:
8434       if (TARGET_GNU2_TLS)
8435         {
8436           reg = arm_tls_descseq_addr (x, reg);
8437
8438           tp = arm_load_tp (NULL_RTX);
8439
8440           dest = gen_rtx_PLUS (Pmode, tp, reg);
8441         }
8442       else
8443         {
8444           /* Original scheme */
8445           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8446           dest = gen_reg_rtx (Pmode);
8447           emit_libcall_block (insns, dest, ret, x);
8448         }
8449       return dest;
8450
8451     case TLS_MODEL_LOCAL_DYNAMIC:
8452       if (TARGET_GNU2_TLS)
8453         {
8454           reg = arm_tls_descseq_addr (x, reg);
8455
8456           tp = arm_load_tp (NULL_RTX);
8457
8458           dest = gen_rtx_PLUS (Pmode, tp, reg);
8459         }
8460       else
8461         {
8462           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8463
8464           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8465              share the LDM result with other LD model accesses.  */
8466           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8467                                 UNSPEC_TLS);
8468           dest = gen_reg_rtx (Pmode);
8469           emit_libcall_block (insns, dest, ret, eqv);
8470
8471           /* Load the addend.  */
8472           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8473                                                      GEN_INT (TLS_LDO32)),
8474                                    UNSPEC_TLS);
8475           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8476           dest = gen_rtx_PLUS (Pmode, dest, addend);
8477         }
8478       return dest;
8479
8480     case TLS_MODEL_INITIAL_EXEC:
8481       labelno = GEN_INT (pic_labelno++);
8482       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8483       label = gen_rtx_CONST (VOIDmode, label);
8484       sum = gen_rtx_UNSPEC (Pmode,
8485                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8486                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8487                             UNSPEC_TLS);
8488       reg = load_tls_operand (sum, reg);
8489
8490       if (TARGET_ARM)
8491         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8492       else if (TARGET_THUMB2)
8493         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8494       else
8495         {
8496           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8497           emit_move_insn (reg, gen_const_mem (SImode, reg));
8498         }
8499
8500       tp = arm_load_tp (NULL_RTX);
8501
8502       return gen_rtx_PLUS (Pmode, tp, reg);
8503
8504     case TLS_MODEL_LOCAL_EXEC:
8505       tp = arm_load_tp (NULL_RTX);
8506
8507       reg = gen_rtx_UNSPEC (Pmode,
8508                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8509                             UNSPEC_TLS);
8510       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8511
8512       return gen_rtx_PLUS (Pmode, tp, reg);
8513
8514     default:
8515       abort ();
8516     }
8517 }
8518
8519 /* Try machine-dependent ways of modifying an illegitimate address
8520    to be legitimate.  If we find one, return the new, valid address.  */
8521 rtx
8522 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8523 {
8524   if (arm_tls_referenced_p (x))
8525     {
8526       rtx addend = NULL;
8527
8528       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8529         {
8530           addend = XEXP (XEXP (x, 0), 1);
8531           x = XEXP (XEXP (x, 0), 0);
8532         }
8533
8534       if (GET_CODE (x) != SYMBOL_REF)
8535         return x;
8536
8537       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8538
8539       x = legitimize_tls_address (x, NULL_RTX);
8540
8541       if (addend)
8542         {
8543           x = gen_rtx_PLUS (SImode, x, addend);
8544           orig_x = x;
8545         }
8546       else
8547         return x;
8548     }
8549
8550   if (!TARGET_ARM)
8551     {
8552       /* TODO: legitimize_address for Thumb2.  */
8553       if (TARGET_THUMB2)
8554         return x;
8555       return thumb_legitimize_address (x, orig_x, mode);
8556     }
8557
8558   if (GET_CODE (x) == PLUS)
8559     {
8560       rtx xop0 = XEXP (x, 0);
8561       rtx xop1 = XEXP (x, 1);
8562
8563       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8564         xop0 = force_reg (SImode, xop0);
8565
8566       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8567           && !symbol_mentioned_p (xop1))
8568         xop1 = force_reg (SImode, xop1);
8569
8570       if (ARM_BASE_REGISTER_RTX_P (xop0)
8571           && CONST_INT_P (xop1))
8572         {
8573           HOST_WIDE_INT n, low_n;
8574           rtx base_reg, val;
8575           n = INTVAL (xop1);
8576
8577           /* VFP addressing modes actually allow greater offsets, but for
8578              now we just stick with the lowest common denominator.  */
8579           if (mode == DImode || mode == DFmode)
8580             {
8581               low_n = n & 0x0f;
8582               n &= ~0x0f;
8583               if (low_n > 4)
8584                 {
8585                   n += 16;
8586                   low_n -= 16;
8587                 }
8588             }
8589           else
8590             {
8591               low_n = ((mode) == TImode ? 0
8592                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8593               n -= low_n;
8594             }
8595
8596           base_reg = gen_reg_rtx (SImode);
8597           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8598           emit_move_insn (base_reg, val);
8599           x = plus_constant (Pmode, base_reg, low_n);
8600         }
8601       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8602         x = gen_rtx_PLUS (SImode, xop0, xop1);
8603     }
8604
8605   /* XXX We don't allow MINUS any more -- see comment in
8606      arm_legitimate_address_outer_p ().  */
8607   else if (GET_CODE (x) == MINUS)
8608     {
8609       rtx xop0 = XEXP (x, 0);
8610       rtx xop1 = XEXP (x, 1);
8611
8612       if (CONSTANT_P (xop0))
8613         xop0 = force_reg (SImode, xop0);
8614
8615       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8616         xop1 = force_reg (SImode, xop1);
8617
8618       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8619         x = gen_rtx_MINUS (SImode, xop0, xop1);
8620     }
8621
8622   /* Make sure to take full advantage of the pre-indexed addressing mode
8623      with absolute addresses which often allows for the base register to
8624      be factorized for multiple adjacent memory references, and it might
8625      even allows for the mini pool to be avoided entirely. */
8626   else if (CONST_INT_P (x) && optimize > 0)
8627     {
8628       unsigned int bits;
8629       HOST_WIDE_INT mask, base, index;
8630       rtx base_reg;
8631
8632       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8633          use a 8-bit index. So let's use a 12-bit index for SImode only and
8634          hope that arm_gen_constant will enable ldrb to use more bits. */
8635       bits = (mode == SImode) ? 12 : 8;
8636       mask = (1 << bits) - 1;
8637       base = INTVAL (x) & ~mask;
8638       index = INTVAL (x) & mask;
8639       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8640         {
8641           /* It'll most probably be more efficient to generate the base
8642              with more bits set and use a negative index instead. */
8643           base |= mask;
8644           index -= mask;
8645         }
8646       base_reg = force_reg (SImode, GEN_INT (base));
8647       x = plus_constant (Pmode, base_reg, index);
8648     }
8649
8650   if (flag_pic)
8651     {
8652       /* We need to find and carefully transform any SYMBOL and LABEL
8653          references; so go back to the original address expression.  */
8654       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8655
8656       if (new_x != orig_x)
8657         x = new_x;
8658     }
8659
8660   return x;
8661 }
8662
8663
8664 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8665    to be legitimate.  If we find one, return the new, valid address.  */
8666 rtx
8667 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8668 {
8669   if (GET_CODE (x) == PLUS
8670       && CONST_INT_P (XEXP (x, 1))
8671       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8672           || INTVAL (XEXP (x, 1)) < 0))
8673     {
8674       rtx xop0 = XEXP (x, 0);
8675       rtx xop1 = XEXP (x, 1);
8676       HOST_WIDE_INT offset = INTVAL (xop1);
8677
8678       /* Try and fold the offset into a biasing of the base register and
8679          then offsetting that.  Don't do this when optimizing for space
8680          since it can cause too many CSEs.  */
8681       if (optimize_size && offset >= 0
8682           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8683         {
8684           HOST_WIDE_INT delta;
8685
8686           if (offset >= 256)
8687             delta = offset - (256 - GET_MODE_SIZE (mode));
8688           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8689             delta = 31 * GET_MODE_SIZE (mode);
8690           else
8691             delta = offset & (~31 * GET_MODE_SIZE (mode));
8692
8693           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8694                                 NULL_RTX);
8695           x = plus_constant (Pmode, xop0, delta);
8696         }
8697       else if (offset < 0 && offset > -256)
8698         /* Small negative offsets are best done with a subtract before the
8699            dereference, forcing these into a register normally takes two
8700            instructions.  */
8701         x = force_operand (x, NULL_RTX);
8702       else
8703         {
8704           /* For the remaining cases, force the constant into a register.  */
8705           xop1 = force_reg (SImode, xop1);
8706           x = gen_rtx_PLUS (SImode, xop0, xop1);
8707         }
8708     }
8709   else if (GET_CODE (x) == PLUS
8710            && s_register_operand (XEXP (x, 1), SImode)
8711            && !s_register_operand (XEXP (x, 0), SImode))
8712     {
8713       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8714
8715       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8716     }
8717
8718   if (flag_pic)
8719     {
8720       /* We need to find and carefully transform any SYMBOL and LABEL
8721          references; so go back to the original address expression.  */
8722       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8723
8724       if (new_x != orig_x)
8725         x = new_x;
8726     }
8727
8728   return x;
8729 }
8730
8731 /* Return TRUE if X contains any TLS symbol references.  */
8732
8733 bool
8734 arm_tls_referenced_p (rtx x)
8735 {
8736   if (! TARGET_HAVE_TLS)
8737     return false;
8738
8739   subrtx_iterator::array_type array;
8740   FOR_EACH_SUBRTX (iter, array, x, ALL)
8741     {
8742       const_rtx x = *iter;
8743       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8744         {
8745           /* ARM currently does not provide relocations to encode TLS variables
8746              into AArch32 instructions, only data, so there is no way to
8747              currently implement these if a literal pool is disabled.  */
8748           if (arm_disable_literal_pool)
8749             sorry ("accessing thread-local storage is not currently supported "
8750                    "with -mpure-code or -mslow-flash-data");
8751
8752           return true;
8753         }
8754
8755       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8756          TLS offsets, not real symbol references.  */
8757       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8758         iter.skip_subrtxes ();
8759     }
8760   return false;
8761 }
8762
8763 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8764
8765    On the ARM, allow any integer (invalid ones are removed later by insn
8766    patterns), nice doubles and symbol_refs which refer to the function's
8767    constant pool XXX.
8768
8769    When generating pic allow anything.  */
8770
8771 static bool
8772 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8773 {
8774   return flag_pic || !label_mentioned_p (x);
8775 }
8776
8777 static bool
8778 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8779 {
8780   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8781      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8782      for ARMv8-M Baseline or later the result is valid.  */
8783   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8784     x = XEXP (x, 0);
8785
8786   return (CONST_INT_P (x)
8787           || CONST_DOUBLE_P (x)
8788           || CONSTANT_ADDRESS_P (x)
8789           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8790           || flag_pic);
8791 }
8792
8793 static bool
8794 arm_legitimate_constant_p (machine_mode mode, rtx x)
8795 {
8796   return (!arm_cannot_force_const_mem (mode, x)
8797           && (TARGET_32BIT
8798               ? arm_legitimate_constant_p_1 (mode, x)
8799               : thumb_legitimate_constant_p (mode, x)));
8800 }
8801
8802 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8803
8804 static bool
8805 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8806 {
8807   rtx base, offset;
8808
8809   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8810     {
8811       split_const (x, &base, &offset);
8812       if (GET_CODE (base) == SYMBOL_REF
8813           && !offset_within_block_p (base, INTVAL (offset)))
8814         return true;
8815     }
8816   return arm_tls_referenced_p (x);
8817 }
8818 \f
8819 #define REG_OR_SUBREG_REG(X)                                            \
8820   (REG_P (X)                                                    \
8821    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8822
8823 #define REG_OR_SUBREG_RTX(X)                    \
8824    (REG_P (X) ? (X) : SUBREG_REG (X))
8825
8826 static inline int
8827 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8828 {
8829   machine_mode mode = GET_MODE (x);
8830   int total, words;
8831
8832   switch (code)
8833     {
8834     case ASHIFT:
8835     case ASHIFTRT:
8836     case LSHIFTRT:
8837     case ROTATERT:
8838       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8839
8840     case PLUS:
8841     case MINUS:
8842     case COMPARE:
8843     case NEG:
8844     case NOT:
8845       return COSTS_N_INSNS (1);
8846
8847     case MULT:
8848       if (arm_arch6m && arm_m_profile_small_mul)
8849         return COSTS_N_INSNS (32);
8850
8851       if (CONST_INT_P (XEXP (x, 1)))
8852         {
8853           int cycles = 0;
8854           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8855
8856           while (i)
8857             {
8858               i >>= 2;
8859               cycles++;
8860             }
8861           return COSTS_N_INSNS (2) + cycles;
8862         }
8863       return COSTS_N_INSNS (1) + 16;
8864
8865     case SET:
8866       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8867          the mode.  */
8868       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8869       return (COSTS_N_INSNS (words)
8870               + 4 * ((MEM_P (SET_SRC (x)))
8871                      + MEM_P (SET_DEST (x))));
8872
8873     case CONST_INT:
8874       if (outer == SET)
8875         {
8876           if (UINTVAL (x) < 256
8877               /* 16-bit constant.  */
8878               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8879             return 0;
8880           if (thumb_shiftable_const (INTVAL (x)))
8881             return COSTS_N_INSNS (2);
8882           return COSTS_N_INSNS (3);
8883         }
8884       else if ((outer == PLUS || outer == COMPARE)
8885                && INTVAL (x) < 256 && INTVAL (x) > -256)
8886         return 0;
8887       else if ((outer == IOR || outer == XOR || outer == AND)
8888                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8889         return COSTS_N_INSNS (1);
8890       else if (outer == AND)
8891         {
8892           int i;
8893           /* This duplicates the tests in the andsi3 expander.  */
8894           for (i = 9; i <= 31; i++)
8895             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8896                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8897               return COSTS_N_INSNS (2);
8898         }
8899       else if (outer == ASHIFT || outer == ASHIFTRT
8900                || outer == LSHIFTRT)
8901         return 0;
8902       return COSTS_N_INSNS (2);
8903
8904     case CONST:
8905     case CONST_DOUBLE:
8906     case LABEL_REF:
8907     case SYMBOL_REF:
8908       return COSTS_N_INSNS (3);
8909
8910     case UDIV:
8911     case UMOD:
8912     case DIV:
8913     case MOD:
8914       return 100;
8915
8916     case TRUNCATE:
8917       return 99;
8918
8919     case AND:
8920     case XOR:
8921     case IOR:
8922       /* XXX guess.  */
8923       return 8;
8924
8925     case MEM:
8926       /* XXX another guess.  */
8927       /* Memory costs quite a lot for the first word, but subsequent words
8928          load at the equivalent of a single insn each.  */
8929       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8930               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8931                  ? 4 : 0));
8932
8933     case IF_THEN_ELSE:
8934       /* XXX a guess.  */
8935       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8936         return 14;
8937       return 2;
8938
8939     case SIGN_EXTEND:
8940     case ZERO_EXTEND:
8941       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8942       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8943
8944       if (mode == SImode)
8945         return total;
8946
8947       if (arm_arch6)
8948         return total + COSTS_N_INSNS (1);
8949
8950       /* Assume a two-shift sequence.  Increase the cost slightly so
8951          we prefer actual shifts over an extend operation.  */
8952       return total + 1 + COSTS_N_INSNS (2);
8953
8954     default:
8955       return 99;
8956     }
8957 }
8958
8959 /* Estimates the size cost of thumb1 instructions.
8960    For now most of the code is copied from thumb1_rtx_costs. We need more
8961    fine grain tuning when we have more related test cases.  */
8962 static inline int
8963 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8964 {
8965   machine_mode mode = GET_MODE (x);
8966   int words, cost;
8967
8968   switch (code)
8969     {
8970     case ASHIFT:
8971     case ASHIFTRT:
8972     case LSHIFTRT:
8973     case ROTATERT:
8974       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8975
8976     case PLUS:
8977     case MINUS:
8978       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8979          defined by RTL expansion, especially for the expansion of
8980          multiplication.  */
8981       if ((GET_CODE (XEXP (x, 0)) == MULT
8982            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8983           || (GET_CODE (XEXP (x, 1)) == MULT
8984               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8985         return COSTS_N_INSNS (2);
8986       /* Fall through.  */
8987     case COMPARE:
8988     case NEG:
8989     case NOT:
8990       return COSTS_N_INSNS (1);
8991
8992     case MULT:
8993       if (CONST_INT_P (XEXP (x, 1)))
8994         {
8995           /* Thumb1 mul instruction can't operate on const. We must Load it
8996              into a register first.  */
8997           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8998           /* For the targets which have a very small and high-latency multiply
8999              unit, we prefer to synthesize the mult with up to 5 instructions,
9000              giving a good balance between size and performance.  */
9001           if (arm_arch6m && arm_m_profile_small_mul)
9002             return COSTS_N_INSNS (5);
9003           else
9004             return COSTS_N_INSNS (1) + const_size;
9005         }
9006       return COSTS_N_INSNS (1);
9007
9008     case SET:
9009       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9010          the mode.  */
9011       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9012       cost = COSTS_N_INSNS (words);
9013       if (satisfies_constraint_J (SET_SRC (x))
9014           || satisfies_constraint_K (SET_SRC (x))
9015              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9016           || (CONST_INT_P (SET_SRC (x))
9017               && UINTVAL (SET_SRC (x)) >= 256
9018               && TARGET_HAVE_MOVT
9019               && satisfies_constraint_j (SET_SRC (x)))
9020              /* thumb1_movdi_insn.  */
9021           || ((words > 1) && MEM_P (SET_SRC (x))))
9022         cost += COSTS_N_INSNS (1);
9023       return cost;
9024
9025     case CONST_INT:
9026       if (outer == SET)
9027         {
9028           if (UINTVAL (x) < 256)
9029             return COSTS_N_INSNS (1);
9030           /* movw is 4byte long.  */
9031           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9032             return COSTS_N_INSNS (2);
9033           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9034           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9035             return COSTS_N_INSNS (2);
9036           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9037           if (thumb_shiftable_const (INTVAL (x)))
9038             return COSTS_N_INSNS (2);
9039           return COSTS_N_INSNS (3);
9040         }
9041       else if ((outer == PLUS || outer == COMPARE)
9042                && INTVAL (x) < 256 && INTVAL (x) > -256)
9043         return 0;
9044       else if ((outer == IOR || outer == XOR || outer == AND)
9045                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9046         return COSTS_N_INSNS (1);
9047       else if (outer == AND)
9048         {
9049           int i;
9050           /* This duplicates the tests in the andsi3 expander.  */
9051           for (i = 9; i <= 31; i++)
9052             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9053                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9054               return COSTS_N_INSNS (2);
9055         }
9056       else if (outer == ASHIFT || outer == ASHIFTRT
9057                || outer == LSHIFTRT)
9058         return 0;
9059       return COSTS_N_INSNS (2);
9060
9061     case CONST:
9062     case CONST_DOUBLE:
9063     case LABEL_REF:
9064     case SYMBOL_REF:
9065       return COSTS_N_INSNS (3);
9066
9067     case UDIV:
9068     case UMOD:
9069     case DIV:
9070     case MOD:
9071       return 100;
9072
9073     case TRUNCATE:
9074       return 99;
9075
9076     case AND:
9077     case XOR:
9078     case IOR:
9079       return COSTS_N_INSNS (1);
9080
9081     case MEM:
9082       return (COSTS_N_INSNS (1)
9083               + COSTS_N_INSNS (1)
9084                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9085               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9086                  ? COSTS_N_INSNS (1) : 0));
9087
9088     case IF_THEN_ELSE:
9089       /* XXX a guess.  */
9090       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9091         return 14;
9092       return 2;
9093
9094     case ZERO_EXTEND:
9095       /* XXX still guessing.  */
9096       switch (GET_MODE (XEXP (x, 0)))
9097         {
9098           case E_QImode:
9099             return (1 + (mode == DImode ? 4 : 0)
9100                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9101
9102           case E_HImode:
9103             return (4 + (mode == DImode ? 4 : 0)
9104                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9105
9106           case E_SImode:
9107             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9108
9109           default:
9110             return 99;
9111         }
9112
9113     default:
9114       return 99;
9115     }
9116 }
9117
9118 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9119    operand, then return the operand that is being shifted.  If the shift
9120    is not by a constant, then set SHIFT_REG to point to the operand.
9121    Return NULL if OP is not a shifter operand.  */
9122 static rtx
9123 shifter_op_p (rtx op, rtx *shift_reg)
9124 {
9125   enum rtx_code code = GET_CODE (op);
9126
9127   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9128       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9129     return XEXP (op, 0);
9130   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9131     return XEXP (op, 0);
9132   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9133            || code == ASHIFTRT)
9134     {
9135       if (!CONST_INT_P (XEXP (op, 1)))
9136         *shift_reg = XEXP (op, 1);
9137       return XEXP (op, 0);
9138     }
9139
9140   return NULL;
9141 }
9142
9143 static bool
9144 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9145 {
9146   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9147   rtx_code code = GET_CODE (x);
9148   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9149
9150   switch (XINT (x, 1))
9151     {
9152     case UNSPEC_UNALIGNED_LOAD:
9153       /* We can only do unaligned loads into the integer unit, and we can't
9154          use LDM or LDRD.  */
9155       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9156       if (speed_p)
9157         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9158                   + extra_cost->ldst.load_unaligned);
9159
9160 #ifdef NOT_YET
9161       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9162                                  ADDR_SPACE_GENERIC, speed_p);
9163 #endif
9164       return true;
9165
9166     case UNSPEC_UNALIGNED_STORE:
9167       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9168       if (speed_p)
9169         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9170                   + extra_cost->ldst.store_unaligned);
9171
9172       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9173 #ifdef NOT_YET
9174       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9175                                  ADDR_SPACE_GENERIC, speed_p);
9176 #endif
9177       return true;
9178
9179     case UNSPEC_VRINTZ:
9180     case UNSPEC_VRINTP:
9181     case UNSPEC_VRINTM:
9182     case UNSPEC_VRINTR:
9183     case UNSPEC_VRINTX:
9184     case UNSPEC_VRINTA:
9185       if (speed_p)
9186         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9187
9188       return true;
9189     default:
9190       *cost = COSTS_N_INSNS (2);
9191       break;
9192     }
9193   return true;
9194 }
9195
9196 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9197    call (one insn for -Os) and then one for processing the result.  */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9199
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9201         do                                                              \
9202           {                                                             \
9203             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9204             if (shift_op != NULL                                        \
9205                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9206               {                                                         \
9207                 if (shift_reg)                                          \
9208                   {                                                     \
9209                     if (speed_p)                                        \
9210                       *cost += extra_cost->alu.arith_shift_reg;         \
9211                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9212                                        ASHIFT, 1, speed_p);             \
9213                   }                                                     \
9214                 else if (speed_p)                                       \
9215                   *cost += extra_cost->alu.arith_shift;                 \
9216                                                                         \
9217                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9218                                     ASHIFT, 0, speed_p)                 \
9219                           + rtx_cost (XEXP (x, 1 - IDX),                \
9220                                       GET_MODE (shift_op),              \
9221                                       OP, 1, speed_p));                 \
9222                 return true;                                            \
9223               }                                                         \
9224           }                                                             \
9225         while (0);
9226
9227 /* RTX costs.  Make an estimate of the cost of executing the operation
9228    X, which is contained with an operation with code OUTER_CODE.
9229    SPEED_P indicates whether the cost desired is the performance cost,
9230    or the size cost.  The estimate is stored in COST and the return
9231    value is TRUE if the cost calculation is final, or FALSE if the
9232    caller should recurse through the operands of X to add additional
9233    costs.
9234
9235    We currently make no attempt to model the size savings of Thumb-2
9236    16-bit instructions.  At the normal points in compilation where
9237    this code is called we have no measure of whether the condition
9238    flags are live or not, and thus no realistic way to determine what
9239    the size will eventually be.  */
9240 static bool
9241 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9242                    const struct cpu_cost_table *extra_cost,
9243                    int *cost, bool speed_p)
9244 {
9245   machine_mode mode = GET_MODE (x);
9246
9247   *cost = COSTS_N_INSNS (1);
9248
9249   if (TARGET_THUMB1)
9250     {
9251       if (speed_p)
9252         *cost = thumb1_rtx_costs (x, code, outer_code);
9253       else
9254         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9255       return true;
9256     }
9257
9258   switch (code)
9259     {
9260     case SET:
9261       *cost = 0;
9262       /* SET RTXs don't have a mode so we get it from the destination.  */
9263       mode = GET_MODE (SET_DEST (x));
9264
9265       if (REG_P (SET_SRC (x))
9266           && REG_P (SET_DEST (x)))
9267         {
9268           /* Assume that most copies can be done with a single insn,
9269              unless we don't have HW FP, in which case everything
9270              larger than word mode will require two insns.  */
9271           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9272                                    && GET_MODE_SIZE (mode) > 4)
9273                                   || mode == DImode)
9274                                  ? 2 : 1);
9275           /* Conditional register moves can be encoded
9276              in 16 bits in Thumb mode.  */
9277           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9278             *cost >>= 1;
9279
9280           return true;
9281         }
9282
9283       if (CONST_INT_P (SET_SRC (x)))
9284         {
9285           /* Handle CONST_INT here, since the value doesn't have a mode
9286              and we would otherwise be unable to work out the true cost.  */
9287           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9288                             0, speed_p);
9289           outer_code = SET;
9290           /* Slightly lower the cost of setting a core reg to a constant.
9291              This helps break up chains and allows for better scheduling.  */
9292           if (REG_P (SET_DEST (x))
9293               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9294             *cost -= 1;
9295           x = SET_SRC (x);
9296           /* Immediate moves with an immediate in the range [0, 255] can be
9297              encoded in 16 bits in Thumb mode.  */
9298           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9299               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9300             *cost >>= 1;
9301           goto const_int_cost;
9302         }
9303
9304       return false;
9305
9306     case MEM:
9307       /* A memory access costs 1 insn if the mode is small, or the address is
9308          a single register, otherwise it costs one insn per word.  */
9309       if (REG_P (XEXP (x, 0)))
9310         *cost = COSTS_N_INSNS (1);
9311       else if (flag_pic
9312                && GET_CODE (XEXP (x, 0)) == PLUS
9313                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9314         /* This will be split into two instructions.
9315            See arm.md:calculate_pic_address.  */
9316         *cost = COSTS_N_INSNS (2);
9317       else
9318         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9319
9320       /* For speed optimizations, add the costs of the address and
9321          accessing memory.  */
9322       if (speed_p)
9323 #ifdef NOT_YET
9324         *cost += (extra_cost->ldst.load
9325                   + arm_address_cost (XEXP (x, 0), mode,
9326                                       ADDR_SPACE_GENERIC, speed_p));
9327 #else
9328         *cost += extra_cost->ldst.load;
9329 #endif
9330       return true;
9331
9332     case PARALLEL:
9333     {
9334    /* Calculations of LDM costs are complex.  We assume an initial cost
9335    (ldm_1st) which will load the number of registers mentioned in
9336    ldm_regs_per_insn_1st registers; then each additional
9337    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9338    formula for N regs is thus:
9339
9340    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9341                              + ldm_regs_per_insn_subsequent - 1)
9342                             / ldm_regs_per_insn_subsequent).
9343
9344    Additional costs may also be added for addressing.  A similar
9345    formula is used for STM.  */
9346
9347       bool is_ldm = load_multiple_operation (x, SImode);
9348       bool is_stm = store_multiple_operation (x, SImode);
9349
9350       if (is_ldm || is_stm)
9351         {
9352           if (speed_p)
9353             {
9354               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9355               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9356                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9357                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9358               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9359                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9360                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9361
9362               *cost += regs_per_insn_1st
9363                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9364                                             + regs_per_insn_sub - 1)
9365                                           / regs_per_insn_sub);
9366               return true;
9367             }
9368
9369         }
9370       return false;
9371     }
9372     case DIV:
9373     case UDIV:
9374       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9375           && (mode == SFmode || !TARGET_VFP_SINGLE))
9376         *cost += COSTS_N_INSNS (speed_p
9377                                ? extra_cost->fp[mode != SFmode].div : 0);
9378       else if (mode == SImode && TARGET_IDIV)
9379         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9380       else
9381         *cost = LIBCALL_COST (2);
9382
9383       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9384          possible udiv is prefered.  */
9385       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9386       return false;     /* All arguments must be in registers.  */
9387
9388     case MOD:
9389       /* MOD by a power of 2 can be expanded as:
9390          rsbs    r1, r0, #0
9391          and     r0, r0, #(n - 1)
9392          and     r1, r1, #(n - 1)
9393          rsbpl   r0, r1, #0.  */
9394       if (CONST_INT_P (XEXP (x, 1))
9395           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9396           && mode == SImode)
9397         {
9398           *cost += COSTS_N_INSNS (3);
9399
9400           if (speed_p)
9401             *cost += 2 * extra_cost->alu.logical
9402                      + extra_cost->alu.arith;
9403           return true;
9404         }
9405
9406     /* Fall-through.  */
9407     case UMOD:
9408       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9409          possible udiv is prefered.  */
9410       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9411       return false;     /* All arguments must be in registers.  */
9412
9413     case ROTATE:
9414       if (mode == SImode && REG_P (XEXP (x, 1)))
9415         {
9416           *cost += (COSTS_N_INSNS (1)
9417                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9418           if (speed_p)
9419             *cost += extra_cost->alu.shift_reg;
9420           return true;
9421         }
9422       /* Fall through */
9423     case ROTATERT:
9424     case ASHIFT:
9425     case LSHIFTRT:
9426     case ASHIFTRT:
9427       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9428         {
9429           *cost += (COSTS_N_INSNS (2)
9430                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9431           if (speed_p)
9432             *cost += 2 * extra_cost->alu.shift;
9433           return true;
9434         }
9435       else if (mode == SImode)
9436         {
9437           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9438           /* Slightly disparage register shifts at -Os, but not by much.  */
9439           if (!CONST_INT_P (XEXP (x, 1)))
9440             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9441                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9442           return true;
9443         }
9444       else if (GET_MODE_CLASS (mode) == MODE_INT
9445                && GET_MODE_SIZE (mode) < 4)
9446         {
9447           if (code == ASHIFT)
9448             {
9449               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9450               /* Slightly disparage register shifts at -Os, but not by
9451                  much.  */
9452               if (!CONST_INT_P (XEXP (x, 1)))
9453                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9454                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9455             }
9456           else if (code == LSHIFTRT || code == ASHIFTRT)
9457             {
9458               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9459                 {
9460                   /* Can use SBFX/UBFX.  */
9461                   if (speed_p)
9462                     *cost += extra_cost->alu.bfx;
9463                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9464                 }
9465               else
9466                 {
9467                   *cost += COSTS_N_INSNS (1);
9468                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9469                   if (speed_p)
9470                     {
9471                       if (CONST_INT_P (XEXP (x, 1)))
9472                         *cost += 2 * extra_cost->alu.shift;
9473                       else
9474                         *cost += (extra_cost->alu.shift
9475                                   + extra_cost->alu.shift_reg);
9476                     }
9477                   else
9478                     /* Slightly disparage register shifts.  */
9479                     *cost += !CONST_INT_P (XEXP (x, 1));
9480                 }
9481             }
9482           else /* Rotates.  */
9483             {
9484               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9485               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9486               if (speed_p)
9487                 {
9488                   if (CONST_INT_P (XEXP (x, 1)))
9489                     *cost += (2 * extra_cost->alu.shift
9490                               + extra_cost->alu.log_shift);
9491                   else
9492                     *cost += (extra_cost->alu.shift
9493                               + extra_cost->alu.shift_reg
9494                               + extra_cost->alu.log_shift_reg);
9495                 }
9496             }
9497           return true;
9498         }
9499
9500       *cost = LIBCALL_COST (2);
9501       return false;
9502
9503     case BSWAP:
9504       if (arm_arch6)
9505         {
9506           if (mode == SImode)
9507             {
9508               if (speed_p)
9509                 *cost += extra_cost->alu.rev;
9510
9511               return false;
9512             }
9513         }
9514       else
9515         {
9516         /* No rev instruction available.  Look at arm_legacy_rev
9517            and thumb_legacy_rev for the form of RTL used then.  */
9518           if (TARGET_THUMB)
9519             {
9520               *cost += COSTS_N_INSNS (9);
9521
9522               if (speed_p)
9523                 {
9524                   *cost += 6 * extra_cost->alu.shift;
9525                   *cost += 3 * extra_cost->alu.logical;
9526                 }
9527             }
9528           else
9529             {
9530               *cost += COSTS_N_INSNS (4);
9531
9532               if (speed_p)
9533                 {
9534                   *cost += 2 * extra_cost->alu.shift;
9535                   *cost += extra_cost->alu.arith_shift;
9536                   *cost += 2 * extra_cost->alu.logical;
9537                 }
9538             }
9539           return true;
9540         }
9541       return false;
9542
9543     case MINUS:
9544       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9545           && (mode == SFmode || !TARGET_VFP_SINGLE))
9546         {
9547           if (GET_CODE (XEXP (x, 0)) == MULT
9548               || GET_CODE (XEXP (x, 1)) == MULT)
9549             {
9550               rtx mul_op0, mul_op1, sub_op;
9551
9552               if (speed_p)
9553                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9554
9555               if (GET_CODE (XEXP (x, 0)) == MULT)
9556                 {
9557                   mul_op0 = XEXP (XEXP (x, 0), 0);
9558                   mul_op1 = XEXP (XEXP (x, 0), 1);
9559                   sub_op = XEXP (x, 1);
9560                 }
9561               else
9562                 {
9563                   mul_op0 = XEXP (XEXP (x, 1), 0);
9564                   mul_op1 = XEXP (XEXP (x, 1), 1);
9565                   sub_op = XEXP (x, 0);
9566                 }
9567
9568               /* The first operand of the multiply may be optionally
9569                  negated.  */
9570               if (GET_CODE (mul_op0) == NEG)
9571                 mul_op0 = XEXP (mul_op0, 0);
9572
9573               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9574                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9575                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9576
9577               return true;
9578             }
9579
9580           if (speed_p)
9581             *cost += extra_cost->fp[mode != SFmode].addsub;
9582           return false;
9583         }
9584
9585       if (mode == SImode)
9586         {
9587           rtx shift_by_reg = NULL;
9588           rtx shift_op;
9589           rtx non_shift_op;
9590
9591           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9592           if (shift_op == NULL)
9593             {
9594               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9595               non_shift_op = XEXP (x, 0);
9596             }
9597           else
9598             non_shift_op = XEXP (x, 1);
9599
9600           if (shift_op != NULL)
9601             {
9602               if (shift_by_reg != NULL)
9603                 {
9604                   if (speed_p)
9605                     *cost += extra_cost->alu.arith_shift_reg;
9606                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9607                 }
9608               else if (speed_p)
9609                 *cost += extra_cost->alu.arith_shift;
9610
9611               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9612               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9613               return true;
9614             }
9615
9616           if (arm_arch_thumb2
9617               && GET_CODE (XEXP (x, 1)) == MULT)
9618             {
9619               /* MLS.  */
9620               if (speed_p)
9621                 *cost += extra_cost->mult[0].add;
9622               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9623               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9624               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9625               return true;
9626             }
9627
9628           if (CONST_INT_P (XEXP (x, 0)))
9629             {
9630               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9631                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9632                                             NULL_RTX, 1, 0);
9633               *cost = COSTS_N_INSNS (insns);
9634               if (speed_p)
9635                 *cost += insns * extra_cost->alu.arith;
9636               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9637               return true;
9638             }
9639           else if (speed_p)
9640             *cost += extra_cost->alu.arith;
9641
9642           return false;
9643         }
9644
9645       if (GET_MODE_CLASS (mode) == MODE_INT
9646           && GET_MODE_SIZE (mode) < 4)
9647         {
9648           rtx shift_op, shift_reg;
9649           shift_reg = NULL;
9650
9651           /* We check both sides of the MINUS for shifter operands since,
9652              unlike PLUS, it's not commutative.  */
9653
9654           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9655           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9656
9657           /* Slightly disparage, as we might need to widen the result.  */
9658           *cost += 1;
9659           if (speed_p)
9660             *cost += extra_cost->alu.arith;
9661
9662           if (CONST_INT_P (XEXP (x, 0)))
9663             {
9664               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9665               return true;
9666             }
9667
9668           return false;
9669         }
9670
9671       if (mode == DImode)
9672         {
9673           *cost += COSTS_N_INSNS (1);
9674
9675           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9676             {
9677               rtx op1 = XEXP (x, 1);
9678
9679               if (speed_p)
9680                 *cost += 2 * extra_cost->alu.arith;
9681
9682               if (GET_CODE (op1) == ZERO_EXTEND)
9683                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9684                                    0, speed_p);
9685               else
9686                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9687               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9688                                  0, speed_p);
9689               return true;
9690             }
9691           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9692             {
9693               if (speed_p)
9694                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9695               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9696                                   0, speed_p)
9697                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9698               return true;
9699             }
9700           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9701                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9702             {
9703               if (speed_p)
9704                 *cost += (extra_cost->alu.arith
9705                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9706                              ? extra_cost->alu.arith
9707                              : extra_cost->alu.arith_shift));
9708               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9709                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9710                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9711               return true;
9712             }
9713
9714           if (speed_p)
9715             *cost += 2 * extra_cost->alu.arith;
9716           return false;
9717         }
9718
9719       /* Vector mode?  */
9720
9721       *cost = LIBCALL_COST (2);
9722       return false;
9723
9724     case PLUS:
9725       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9726           && (mode == SFmode || !TARGET_VFP_SINGLE))
9727         {
9728           if (GET_CODE (XEXP (x, 0)) == MULT)
9729             {
9730               rtx mul_op0, mul_op1, add_op;
9731
9732               if (speed_p)
9733                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9734
9735               mul_op0 = XEXP (XEXP (x, 0), 0);
9736               mul_op1 = XEXP (XEXP (x, 0), 1);
9737               add_op = XEXP (x, 1);
9738
9739               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9740                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9741                         + rtx_cost (add_op, mode, code, 0, speed_p));
9742
9743               return true;
9744             }
9745
9746           if (speed_p)
9747             *cost += extra_cost->fp[mode != SFmode].addsub;
9748           return false;
9749         }
9750       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9751         {
9752           *cost = LIBCALL_COST (2);
9753           return false;
9754         }
9755
9756         /* Narrow modes can be synthesized in SImode, but the range
9757            of useful sub-operations is limited.  Check for shift operations
9758            on one of the operands.  Only left shifts can be used in the
9759            narrow modes.  */
9760       if (GET_MODE_CLASS (mode) == MODE_INT
9761           && GET_MODE_SIZE (mode) < 4)
9762         {
9763           rtx shift_op, shift_reg;
9764           shift_reg = NULL;
9765
9766           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9767
9768           if (CONST_INT_P (XEXP (x, 1)))
9769             {
9770               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9771                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9772                                             NULL_RTX, 1, 0);
9773               *cost = COSTS_N_INSNS (insns);
9774               if (speed_p)
9775                 *cost += insns * extra_cost->alu.arith;
9776               /* Slightly penalize a narrow operation as the result may
9777                  need widening.  */
9778               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9779               return true;
9780             }
9781
9782           /* Slightly penalize a narrow operation as the result may
9783              need widening.  */
9784           *cost += 1;
9785           if (speed_p)
9786             *cost += extra_cost->alu.arith;
9787
9788           return false;
9789         }
9790
9791       if (mode == SImode)
9792         {
9793           rtx shift_op, shift_reg;
9794
9795           if (TARGET_INT_SIMD
9796               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9797                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9798             {
9799               /* UXTA[BH] or SXTA[BH].  */
9800               if (speed_p)
9801                 *cost += extra_cost->alu.extend_arith;
9802               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9803                                   0, speed_p)
9804                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9805               return true;
9806             }
9807
9808           shift_reg = NULL;
9809           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9810           if (shift_op != NULL)
9811             {
9812               if (shift_reg)
9813                 {
9814                   if (speed_p)
9815                     *cost += extra_cost->alu.arith_shift_reg;
9816                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9817                 }
9818               else if (speed_p)
9819                 *cost += extra_cost->alu.arith_shift;
9820
9821               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9822                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9823               return true;
9824             }
9825           if (GET_CODE (XEXP (x, 0)) == MULT)
9826             {
9827               rtx mul_op = XEXP (x, 0);
9828
9829               if (TARGET_DSP_MULTIPLY
9830                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9831                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9832                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9833                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9834                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9835                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9836                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9837                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9838                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9839                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9840                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9841                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9842                                       == 16))))))
9843                 {
9844                   /* SMLA[BT][BT].  */
9845                   if (speed_p)
9846                     *cost += extra_cost->mult[0].extend_add;
9847                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9848                                       SIGN_EXTEND, 0, speed_p)
9849                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9850                                         SIGN_EXTEND, 0, speed_p)
9851                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9852                   return true;
9853                 }
9854
9855               if (speed_p)
9856                 *cost += extra_cost->mult[0].add;
9857               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9858                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9859                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9860               return true;
9861             }
9862           if (CONST_INT_P (XEXP (x, 1)))
9863             {
9864               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9865                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9866                                             NULL_RTX, 1, 0);
9867               *cost = COSTS_N_INSNS (insns);
9868               if (speed_p)
9869                 *cost += insns * extra_cost->alu.arith;
9870               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9871               return true;
9872             }
9873           else if (speed_p)
9874             *cost += extra_cost->alu.arith;
9875
9876           return false;
9877         }
9878
9879       if (mode == DImode)
9880         {
9881           if (arm_arch3m
9882               && GET_CODE (XEXP (x, 0)) == MULT
9883               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9884                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9885                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9886                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9887             {
9888               if (speed_p)
9889                 *cost += extra_cost->mult[1].extend_add;
9890               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9891                                   ZERO_EXTEND, 0, speed_p)
9892                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9893                                     ZERO_EXTEND, 0, speed_p)
9894                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9895               return true;
9896             }
9897
9898           *cost += COSTS_N_INSNS (1);
9899
9900           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9901               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9902             {
9903               if (speed_p)
9904                 *cost += (extra_cost->alu.arith
9905                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9906                              ? extra_cost->alu.arith
9907                              : extra_cost->alu.arith_shift));
9908
9909               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9910                                   0, speed_p)
9911                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9912               return true;
9913             }
9914
9915           if (speed_p)
9916             *cost += 2 * extra_cost->alu.arith;
9917           return false;
9918         }
9919
9920       /* Vector mode?  */
9921       *cost = LIBCALL_COST (2);
9922       return false;
9923     case IOR:
9924       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9925         {
9926           if (speed_p)
9927             *cost += extra_cost->alu.rev;
9928
9929           return true;
9930         }
9931     /* Fall through.  */
9932     case AND: case XOR:
9933       if (mode == SImode)
9934         {
9935           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9936           rtx op0 = XEXP (x, 0);
9937           rtx shift_op, shift_reg;
9938
9939           if (subcode == NOT
9940               && (code == AND
9941                   || (code == IOR && TARGET_THUMB2)))
9942             op0 = XEXP (op0, 0);
9943
9944           shift_reg = NULL;
9945           shift_op = shifter_op_p (op0, &shift_reg);
9946           if (shift_op != NULL)
9947             {
9948               if (shift_reg)
9949                 {
9950                   if (speed_p)
9951                     *cost += extra_cost->alu.log_shift_reg;
9952                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9953                 }
9954               else if (speed_p)
9955                 *cost += extra_cost->alu.log_shift;
9956
9957               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9958                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9959               return true;
9960             }
9961
9962           if (CONST_INT_P (XEXP (x, 1)))
9963             {
9964               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9965                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9966                                             NULL_RTX, 1, 0);
9967
9968               *cost = COSTS_N_INSNS (insns);
9969               if (speed_p)
9970                 *cost += insns * extra_cost->alu.logical;
9971               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9972               return true;
9973             }
9974
9975           if (speed_p)
9976             *cost += extra_cost->alu.logical;
9977           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9978                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9979           return true;
9980         }
9981
9982       if (mode == DImode)
9983         {
9984           rtx op0 = XEXP (x, 0);
9985           enum rtx_code subcode = GET_CODE (op0);
9986
9987           *cost += COSTS_N_INSNS (1);
9988
9989           if (subcode == NOT
9990               && (code == AND
9991                   || (code == IOR && TARGET_THUMB2)))
9992             op0 = XEXP (op0, 0);
9993
9994           if (GET_CODE (op0) == ZERO_EXTEND)
9995             {
9996               if (speed_p)
9997                 *cost += 2 * extra_cost->alu.logical;
9998
9999               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10000                                   0, speed_p)
10001                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10002               return true;
10003             }
10004           else if (GET_CODE (op0) == SIGN_EXTEND)
10005             {
10006               if (speed_p)
10007                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10008
10009               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10010                                   0, speed_p)
10011                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10012               return true;
10013             }
10014
10015           if (speed_p)
10016             *cost += 2 * extra_cost->alu.logical;
10017
10018           return true;
10019         }
10020       /* Vector mode?  */
10021
10022       *cost = LIBCALL_COST (2);
10023       return false;
10024
10025     case MULT:
10026       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10027           && (mode == SFmode || !TARGET_VFP_SINGLE))
10028         {
10029           rtx op0 = XEXP (x, 0);
10030
10031           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10032             op0 = XEXP (op0, 0);
10033
10034           if (speed_p)
10035             *cost += extra_cost->fp[mode != SFmode].mult;
10036
10037           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10038                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10039           return true;
10040         }
10041       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10042         {
10043           *cost = LIBCALL_COST (2);
10044           return false;
10045         }
10046
10047       if (mode == SImode)
10048         {
10049           if (TARGET_DSP_MULTIPLY
10050               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10051                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10052                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10053                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10054                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10055                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10056                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10057                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10058                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10059                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10060                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10061                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10062                                   == 16))))))
10063             {
10064               /* SMUL[TB][TB].  */
10065               if (speed_p)
10066                 *cost += extra_cost->mult[0].extend;
10067               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10068                                  SIGN_EXTEND, 0, speed_p);
10069               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10070                                  SIGN_EXTEND, 1, speed_p);
10071               return true;
10072             }
10073           if (speed_p)
10074             *cost += extra_cost->mult[0].simple;
10075           return false;
10076         }
10077
10078       if (mode == DImode)
10079         {
10080           if (arm_arch3m
10081               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10082                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10083                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10084                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10085             {
10086               if (speed_p)
10087                 *cost += extra_cost->mult[1].extend;
10088               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10089                                   ZERO_EXTEND, 0, speed_p)
10090                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10091                                     ZERO_EXTEND, 0, speed_p));
10092               return true;
10093             }
10094
10095           *cost = LIBCALL_COST (2);
10096           return false;
10097         }
10098
10099       /* Vector mode?  */
10100       *cost = LIBCALL_COST (2);
10101       return false;
10102
10103     case NEG:
10104       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10105           && (mode == SFmode || !TARGET_VFP_SINGLE))
10106         {
10107           if (GET_CODE (XEXP (x, 0)) == MULT)
10108             {
10109               /* VNMUL.  */
10110               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10111               return true;
10112             }
10113
10114           if (speed_p)
10115             *cost += extra_cost->fp[mode != SFmode].neg;
10116
10117           return false;
10118         }
10119       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10120         {
10121           *cost = LIBCALL_COST (1);
10122           return false;
10123         }
10124
10125       if (mode == SImode)
10126         {
10127           if (GET_CODE (XEXP (x, 0)) == ABS)
10128             {
10129               *cost += COSTS_N_INSNS (1);
10130               /* Assume the non-flag-changing variant.  */
10131               if (speed_p)
10132                 *cost += (extra_cost->alu.log_shift
10133                           + extra_cost->alu.arith_shift);
10134               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10135               return true;
10136             }
10137
10138           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10139               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10140             {
10141               *cost += COSTS_N_INSNS (1);
10142               /* No extra cost for MOV imm and MVN imm.  */
10143               /* If the comparison op is using the flags, there's no further
10144                  cost, otherwise we need to add the cost of the comparison.  */
10145               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10146                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10147                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10148                 {
10149                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10150                   *cost += (COSTS_N_INSNS (1)
10151                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10152                                         0, speed_p)
10153                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10154                                         1, speed_p));
10155                   if (speed_p)
10156                     *cost += extra_cost->alu.arith;
10157                 }
10158               return true;
10159             }
10160
10161           if (speed_p)
10162             *cost += extra_cost->alu.arith;
10163           return false;
10164         }
10165
10166       if (GET_MODE_CLASS (mode) == MODE_INT
10167           && GET_MODE_SIZE (mode) < 4)
10168         {
10169           /* Slightly disparage, as we might need an extend operation.  */
10170           *cost += 1;
10171           if (speed_p)
10172             *cost += extra_cost->alu.arith;
10173           return false;
10174         }
10175
10176       if (mode == DImode)
10177         {
10178           *cost += COSTS_N_INSNS (1);
10179           if (speed_p)
10180             *cost += 2 * extra_cost->alu.arith;
10181           return false;
10182         }
10183
10184       /* Vector mode?  */
10185       *cost = LIBCALL_COST (1);
10186       return false;
10187
10188     case NOT:
10189       if (mode == SImode)
10190         {
10191           rtx shift_op;
10192           rtx shift_reg = NULL;
10193
10194           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10195
10196           if (shift_op)
10197             {
10198               if (shift_reg != NULL)
10199                 {
10200                   if (speed_p)
10201                     *cost += extra_cost->alu.log_shift_reg;
10202                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10203                 }
10204               else if (speed_p)
10205                 *cost += extra_cost->alu.log_shift;
10206               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10207               return true;
10208             }
10209
10210           if (speed_p)
10211             *cost += extra_cost->alu.logical;
10212           return false;
10213         }
10214       if (mode == DImode)
10215         {
10216           *cost += COSTS_N_INSNS (1);
10217           return false;
10218         }
10219
10220       /* Vector mode?  */
10221
10222       *cost += LIBCALL_COST (1);
10223       return false;
10224
10225     case IF_THEN_ELSE:
10226       {
10227         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10228           {
10229             *cost += COSTS_N_INSNS (3);
10230             return true;
10231           }
10232         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10233         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10234
10235         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10236         /* Assume that if one arm of the if_then_else is a register,
10237            that it will be tied with the result and eliminate the
10238            conditional insn.  */
10239         if (REG_P (XEXP (x, 1)))
10240           *cost += op2cost;
10241         else if (REG_P (XEXP (x, 2)))
10242           *cost += op1cost;
10243         else
10244           {
10245             if (speed_p)
10246               {
10247                 if (extra_cost->alu.non_exec_costs_exec)
10248                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10249                 else
10250                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10251               }
10252             else
10253               *cost += op1cost + op2cost;
10254           }
10255       }
10256       return true;
10257
10258     case COMPARE:
10259       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10260         *cost = 0;
10261       else
10262         {
10263           machine_mode op0mode;
10264           /* We'll mostly assume that the cost of a compare is the cost of the
10265              LHS.  However, there are some notable exceptions.  */
10266
10267           /* Floating point compares are never done as side-effects.  */
10268           op0mode = GET_MODE (XEXP (x, 0));
10269           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10270               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10271             {
10272               if (speed_p)
10273                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10274
10275               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10276                 {
10277                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10278                   return true;
10279                 }
10280
10281               return false;
10282             }
10283           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10284             {
10285               *cost = LIBCALL_COST (2);
10286               return false;
10287             }
10288
10289           /* DImode compares normally take two insns.  */
10290           if (op0mode == DImode)
10291             {
10292               *cost += COSTS_N_INSNS (1);
10293               if (speed_p)
10294                 *cost += 2 * extra_cost->alu.arith;
10295               return false;
10296             }
10297
10298           if (op0mode == SImode)
10299             {
10300               rtx shift_op;
10301               rtx shift_reg;
10302
10303               if (XEXP (x, 1) == const0_rtx
10304                   && !(REG_P (XEXP (x, 0))
10305                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10306                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10307                 {
10308                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10309
10310                   /* Multiply operations that set the flags are often
10311                      significantly more expensive.  */
10312                   if (speed_p
10313                       && GET_CODE (XEXP (x, 0)) == MULT
10314                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10315                     *cost += extra_cost->mult[0].flag_setting;
10316
10317                   if (speed_p
10318                       && GET_CODE (XEXP (x, 0)) == PLUS
10319                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10320                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10321                                                             0), 1), mode))
10322                     *cost += extra_cost->mult[0].flag_setting;
10323                   return true;
10324                 }
10325
10326               shift_reg = NULL;
10327               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10328               if (shift_op != NULL)
10329                 {
10330                   if (shift_reg != NULL)
10331                     {
10332                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10333                                          1, speed_p);
10334                       if (speed_p)
10335                         *cost += extra_cost->alu.arith_shift_reg;
10336                     }
10337                   else if (speed_p)
10338                     *cost += extra_cost->alu.arith_shift;
10339                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10340                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10341                   return true;
10342                 }
10343
10344               if (speed_p)
10345                 *cost += extra_cost->alu.arith;
10346               if (CONST_INT_P (XEXP (x, 1))
10347                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10348                 {
10349                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10350                   return true;
10351                 }
10352               return false;
10353             }
10354
10355           /* Vector mode?  */
10356
10357           *cost = LIBCALL_COST (2);
10358           return false;
10359         }
10360       return true;
10361
10362     case EQ:
10363     case NE:
10364     case LT:
10365     case LE:
10366     case GT:
10367     case GE:
10368     case LTU:
10369     case LEU:
10370     case GEU:
10371     case GTU:
10372     case ORDERED:
10373     case UNORDERED:
10374     case UNEQ:
10375     case UNLE:
10376     case UNLT:
10377     case UNGE:
10378     case UNGT:
10379     case LTGT:
10380       if (outer_code == SET)
10381         {
10382           /* Is it a store-flag operation?  */
10383           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10384               && XEXP (x, 1) == const0_rtx)
10385             {
10386               /* Thumb also needs an IT insn.  */
10387               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10388               return true;
10389             }
10390           if (XEXP (x, 1) == const0_rtx)
10391             {
10392               switch (code)
10393                 {
10394                 case LT:
10395                   /* LSR Rd, Rn, #31.  */
10396                   if (speed_p)
10397                     *cost += extra_cost->alu.shift;
10398                   break;
10399
10400                 case EQ:
10401                   /* RSBS T1, Rn, #0
10402                      ADC  Rd, Rn, T1.  */
10403
10404                 case NE:
10405                   /* SUBS T1, Rn, #1
10406                      SBC  Rd, Rn, T1.  */
10407                   *cost += COSTS_N_INSNS (1);
10408                   break;
10409
10410                 case LE:
10411                   /* RSBS T1, Rn, Rn, LSR #31
10412                      ADC  Rd, Rn, T1. */
10413                   *cost += COSTS_N_INSNS (1);
10414                   if (speed_p)
10415                     *cost += extra_cost->alu.arith_shift;
10416                   break;
10417
10418                 case GT:
10419                   /* RSB  Rd, Rn, Rn, ASR #1
10420                      LSR  Rd, Rd, #31.  */
10421                   *cost += COSTS_N_INSNS (1);
10422                   if (speed_p)
10423                     *cost += (extra_cost->alu.arith_shift
10424                               + extra_cost->alu.shift);
10425                   break;
10426
10427                 case GE:
10428                   /* ASR  Rd, Rn, #31
10429                      ADD  Rd, Rn, #1.  */
10430                   *cost += COSTS_N_INSNS (1);
10431                   if (speed_p)
10432                     *cost += extra_cost->alu.shift;
10433                   break;
10434
10435                 default:
10436                   /* Remaining cases are either meaningless or would take
10437                      three insns anyway.  */
10438                   *cost = COSTS_N_INSNS (3);
10439                   break;
10440                 }
10441               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10442               return true;
10443             }
10444           else
10445             {
10446               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10447               if (CONST_INT_P (XEXP (x, 1))
10448                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10449                 {
10450                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10451                   return true;
10452                 }
10453
10454               return false;
10455             }
10456         }
10457       /* Not directly inside a set.  If it involves the condition code
10458          register it must be the condition for a branch, cond_exec or
10459          I_T_E operation.  Since the comparison is performed elsewhere
10460          this is just the control part which has no additional
10461          cost.  */
10462       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10463                && XEXP (x, 1) == const0_rtx)
10464         {
10465           *cost = 0;
10466           return true;
10467         }
10468       return false;
10469
10470     case ABS:
10471       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10472           && (mode == SFmode || !TARGET_VFP_SINGLE))
10473         {
10474           if (speed_p)
10475             *cost += extra_cost->fp[mode != SFmode].neg;
10476
10477           return false;
10478         }
10479       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10480         {
10481           *cost = LIBCALL_COST (1);
10482           return false;
10483         }
10484
10485       if (mode == SImode)
10486         {
10487           if (speed_p)
10488             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10489           return false;
10490         }
10491       /* Vector mode?  */
10492       *cost = LIBCALL_COST (1);
10493       return false;
10494
10495     case SIGN_EXTEND:
10496       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10497           && MEM_P (XEXP (x, 0)))
10498         {
10499           if (mode == DImode)
10500             *cost += COSTS_N_INSNS (1);
10501
10502           if (!speed_p)
10503             return true;
10504
10505           if (GET_MODE (XEXP (x, 0)) == SImode)
10506             *cost += extra_cost->ldst.load;
10507           else
10508             *cost += extra_cost->ldst.load_sign_extend;
10509
10510           if (mode == DImode)
10511             *cost += extra_cost->alu.shift;
10512
10513           return true;
10514         }
10515
10516       /* Widening from less than 32-bits requires an extend operation.  */
10517       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10518         {
10519           /* We have SXTB/SXTH.  */
10520           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10521           if (speed_p)
10522             *cost += extra_cost->alu.extend;
10523         }
10524       else if (GET_MODE (XEXP (x, 0)) != SImode)
10525         {
10526           /* Needs two shifts.  */
10527           *cost += COSTS_N_INSNS (1);
10528           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10529           if (speed_p)
10530             *cost += 2 * extra_cost->alu.shift;
10531         }
10532
10533       /* Widening beyond 32-bits requires one more insn.  */
10534       if (mode == DImode)
10535         {
10536           *cost += COSTS_N_INSNS (1);
10537           if (speed_p)
10538             *cost += extra_cost->alu.shift;
10539         }
10540
10541       return true;
10542
10543     case ZERO_EXTEND:
10544       if ((arm_arch4
10545            || GET_MODE (XEXP (x, 0)) == SImode
10546            || GET_MODE (XEXP (x, 0)) == QImode)
10547           && MEM_P (XEXP (x, 0)))
10548         {
10549           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10550
10551           if (mode == DImode)
10552             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10553
10554           return true;
10555         }
10556
10557       /* Widening from less than 32-bits requires an extend operation.  */
10558       if (GET_MODE (XEXP (x, 0)) == QImode)
10559         {
10560           /* UXTB can be a shorter instruction in Thumb2, but it might
10561              be slower than the AND Rd, Rn, #255 alternative.  When
10562              optimizing for speed it should never be slower to use
10563              AND, and we don't really model 16-bit vs 32-bit insns
10564              here.  */
10565           if (speed_p)
10566             *cost += extra_cost->alu.logical;
10567         }
10568       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10569         {
10570           /* We have UXTB/UXTH.  */
10571           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10572           if (speed_p)
10573             *cost += extra_cost->alu.extend;
10574         }
10575       else if (GET_MODE (XEXP (x, 0)) != SImode)
10576         {
10577           /* Needs two shifts.  It's marginally preferable to use
10578              shifts rather than two BIC instructions as the second
10579              shift may merge with a subsequent insn as a shifter
10580              op.  */
10581           *cost = COSTS_N_INSNS (2);
10582           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10583           if (speed_p)
10584             *cost += 2 * extra_cost->alu.shift;
10585         }
10586
10587       /* Widening beyond 32-bits requires one more insn.  */
10588       if (mode == DImode)
10589         {
10590           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10591         }
10592
10593       return true;
10594
10595     case CONST_INT:
10596       *cost = 0;
10597       /* CONST_INT has no mode, so we cannot tell for sure how many
10598          insns are really going to be needed.  The best we can do is
10599          look at the value passed.  If it fits in SImode, then assume
10600          that's the mode it will be used for.  Otherwise assume it
10601          will be used in DImode.  */
10602       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10603         mode = SImode;
10604       else
10605         mode = DImode;
10606
10607       /* Avoid blowing up in arm_gen_constant ().  */
10608       if (!(outer_code == PLUS
10609             || outer_code == AND
10610             || outer_code == IOR
10611             || outer_code == XOR
10612             || outer_code == MINUS))
10613         outer_code = SET;
10614
10615     const_int_cost:
10616       if (mode == SImode)
10617         {
10618           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10619                                                     INTVAL (x), NULL, NULL,
10620                                                     0, 0));
10621           /* Extra costs?  */
10622         }
10623       else
10624         {
10625           *cost += COSTS_N_INSNS (arm_gen_constant
10626                                   (outer_code, SImode, NULL,
10627                                    trunc_int_for_mode (INTVAL (x), SImode),
10628                                    NULL, NULL, 0, 0)
10629                                   + arm_gen_constant (outer_code, SImode, NULL,
10630                                                       INTVAL (x) >> 32, NULL,
10631                                                       NULL, 0, 0));
10632           /* Extra costs?  */
10633         }
10634
10635       return true;
10636
10637     case CONST:
10638     case LABEL_REF:
10639     case SYMBOL_REF:
10640       if (speed_p)
10641         {
10642           if (arm_arch_thumb2 && !flag_pic)
10643             *cost += COSTS_N_INSNS (1);
10644           else
10645             *cost += extra_cost->ldst.load;
10646         }
10647       else
10648         *cost += COSTS_N_INSNS (1);
10649
10650       if (flag_pic)
10651         {
10652           *cost += COSTS_N_INSNS (1);
10653           if (speed_p)
10654             *cost += extra_cost->alu.arith;
10655         }
10656
10657       return true;
10658
10659     case CONST_FIXED:
10660       *cost = COSTS_N_INSNS (4);
10661       /* Fixme.  */
10662       return true;
10663
10664     case CONST_DOUBLE:
10665       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10666           && (mode == SFmode || !TARGET_VFP_SINGLE))
10667         {
10668           if (vfp3_const_double_rtx (x))
10669             {
10670               if (speed_p)
10671                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10672               return true;
10673             }
10674
10675           if (speed_p)
10676             {
10677               if (mode == DFmode)
10678                 *cost += extra_cost->ldst.loadd;
10679               else
10680                 *cost += extra_cost->ldst.loadf;
10681             }
10682           else
10683             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10684
10685           return true;
10686         }
10687       *cost = COSTS_N_INSNS (4);
10688       return true;
10689
10690     case CONST_VECTOR:
10691       /* Fixme.  */
10692       if (TARGET_NEON
10693           && TARGET_HARD_FLOAT
10694           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10695           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10696         *cost = COSTS_N_INSNS (1);
10697       else
10698         *cost = COSTS_N_INSNS (4);
10699       return true;
10700
10701     case HIGH:
10702     case LO_SUM:
10703       /* When optimizing for size, we prefer constant pool entries to
10704          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10705       if (!speed_p)
10706         *cost += 1;
10707       return true;
10708
10709     case CLZ:
10710       if (speed_p)
10711         *cost += extra_cost->alu.clz;
10712       return false;
10713
10714     case SMIN:
10715       if (XEXP (x, 1) == const0_rtx)
10716         {
10717           if (speed_p)
10718             *cost += extra_cost->alu.log_shift;
10719           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10720           return true;
10721         }
10722       /* Fall through.  */
10723     case SMAX:
10724     case UMIN:
10725     case UMAX:
10726       *cost += COSTS_N_INSNS (1);
10727       return false;
10728
10729     case TRUNCATE:
10730       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10731           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10732           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10733           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10734           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10735                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10736               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10737                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10738                       == ZERO_EXTEND))))
10739         {
10740           if (speed_p)
10741             *cost += extra_cost->mult[1].extend;
10742           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10743                               ZERO_EXTEND, 0, speed_p)
10744                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10745                                 ZERO_EXTEND, 0, speed_p));
10746           return true;
10747         }
10748       *cost = LIBCALL_COST (1);
10749       return false;
10750
10751     case UNSPEC_VOLATILE:
10752     case UNSPEC:
10753       return arm_unspec_cost (x, outer_code, speed_p, cost);
10754
10755     case PC:
10756       /* Reading the PC is like reading any other register.  Writing it
10757          is more expensive, but we take that into account elsewhere.  */
10758       *cost = 0;
10759       return true;
10760
10761     case ZERO_EXTRACT:
10762       /* TODO: Simple zero_extract of bottom bits using AND.  */
10763       /* Fall through.  */
10764     case SIGN_EXTRACT:
10765       if (arm_arch6
10766           && mode == SImode
10767           && CONST_INT_P (XEXP (x, 1))
10768           && CONST_INT_P (XEXP (x, 2)))
10769         {
10770           if (speed_p)
10771             *cost += extra_cost->alu.bfx;
10772           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10773           return true;
10774         }
10775       /* Without UBFX/SBFX, need to resort to shift operations.  */
10776       *cost += COSTS_N_INSNS (1);
10777       if (speed_p)
10778         *cost += 2 * extra_cost->alu.shift;
10779       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10780       return true;
10781
10782     case FLOAT_EXTEND:
10783       if (TARGET_HARD_FLOAT)
10784         {
10785           if (speed_p)
10786             *cost += extra_cost->fp[mode == DFmode].widen;
10787           if (!TARGET_VFP5
10788               && GET_MODE (XEXP (x, 0)) == HFmode)
10789             {
10790               /* Pre v8, widening HF->DF is a two-step process, first
10791                  widening to SFmode.  */
10792               *cost += COSTS_N_INSNS (1);
10793               if (speed_p)
10794                 *cost += extra_cost->fp[0].widen;
10795             }
10796           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10797           return true;
10798         }
10799
10800       *cost = LIBCALL_COST (1);
10801       return false;
10802
10803     case FLOAT_TRUNCATE:
10804       if (TARGET_HARD_FLOAT)
10805         {
10806           if (speed_p)
10807             *cost += extra_cost->fp[mode == DFmode].narrow;
10808           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10809           return true;
10810           /* Vector modes?  */
10811         }
10812       *cost = LIBCALL_COST (1);
10813       return false;
10814
10815     case FMA:
10816       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10817         {
10818           rtx op0 = XEXP (x, 0);
10819           rtx op1 = XEXP (x, 1);
10820           rtx op2 = XEXP (x, 2);
10821
10822
10823           /* vfms or vfnma.  */
10824           if (GET_CODE (op0) == NEG)
10825             op0 = XEXP (op0, 0);
10826
10827           /* vfnms or vfnma.  */
10828           if (GET_CODE (op2) == NEG)
10829             op2 = XEXP (op2, 0);
10830
10831           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10832           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10833           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10834
10835           if (speed_p)
10836             *cost += extra_cost->fp[mode ==DFmode].fma;
10837
10838           return true;
10839         }
10840
10841       *cost = LIBCALL_COST (3);
10842       return false;
10843
10844     case FIX:
10845     case UNSIGNED_FIX:
10846       if (TARGET_HARD_FLOAT)
10847         {
10848           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10849              a vcvt fixed-point conversion.  */
10850           if (code == FIX && mode == SImode
10851               && GET_CODE (XEXP (x, 0)) == FIX
10852               && GET_MODE (XEXP (x, 0)) == SFmode
10853               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10854               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10855                  > 0)
10856             {
10857               if (speed_p)
10858                 *cost += extra_cost->fp[0].toint;
10859
10860               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10861                                  code, 0, speed_p);
10862               return true;
10863             }
10864
10865           if (GET_MODE_CLASS (mode) == MODE_INT)
10866             {
10867               mode = GET_MODE (XEXP (x, 0));
10868               if (speed_p)
10869                 *cost += extra_cost->fp[mode == DFmode].toint;
10870               /* Strip of the 'cost' of rounding towards zero.  */
10871               if (GET_CODE (XEXP (x, 0)) == FIX)
10872                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10873                                    0, speed_p);
10874               else
10875                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10876               /* ??? Increase the cost to deal with transferring from
10877                  FP -> CORE registers?  */
10878               return true;
10879             }
10880           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10881                    && TARGET_VFP5)
10882             {
10883               if (speed_p)
10884                 *cost += extra_cost->fp[mode == DFmode].roundint;
10885               return false;
10886             }
10887           /* Vector costs? */
10888         }
10889       *cost = LIBCALL_COST (1);
10890       return false;
10891
10892     case FLOAT:
10893     case UNSIGNED_FLOAT:
10894       if (TARGET_HARD_FLOAT)
10895         {
10896           /* ??? Increase the cost to deal with transferring from CORE
10897              -> FP registers?  */
10898           if (speed_p)
10899             *cost += extra_cost->fp[mode == DFmode].fromint;
10900           return false;
10901         }
10902       *cost = LIBCALL_COST (1);
10903       return false;
10904
10905     case CALL:
10906       return true;
10907
10908     case ASM_OPERANDS:
10909       {
10910       /* Just a guess.  Guess number of instructions in the asm
10911          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10912          though (see PR60663).  */
10913         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10914         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10915
10916         *cost = COSTS_N_INSNS (asm_length + num_operands);
10917         return true;
10918       }
10919     default:
10920       if (mode != VOIDmode)
10921         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10922       else
10923         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10924       return false;
10925     }
10926 }
10927
10928 #undef HANDLE_NARROW_SHIFT_ARITH
10929
10930 /* RTX costs entry point.  */
10931
10932 static bool
10933 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10934                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10935 {
10936   bool result;
10937   int code = GET_CODE (x);
10938   gcc_assert (current_tune->insn_extra_cost);
10939
10940   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10941                                 (enum rtx_code) outer_code,
10942                                 current_tune->insn_extra_cost,
10943                                 total, speed);
10944
10945   if (dump_file && (dump_flags & TDF_DETAILS))
10946     {
10947       print_rtl_single (dump_file, x);
10948       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10949                *total, result ? "final" : "partial");
10950     }
10951   return result;
10952 }
10953
10954 /* All address computations that can be done are free, but rtx cost returns
10955    the same for practically all of them.  So we weight the different types
10956    of address here in the order (most pref first):
10957    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10958 static inline int
10959 arm_arm_address_cost (rtx x)
10960 {
10961   enum rtx_code c  = GET_CODE (x);
10962
10963   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10964     return 0;
10965   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10966     return 10;
10967
10968   if (c == PLUS)
10969     {
10970       if (CONST_INT_P (XEXP (x, 1)))
10971         return 2;
10972
10973       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10974         return 3;
10975
10976       return 4;
10977     }
10978
10979   return 6;
10980 }
10981
10982 static inline int
10983 arm_thumb_address_cost (rtx x)
10984 {
10985   enum rtx_code c  = GET_CODE (x);
10986
10987   if (c == REG)
10988     return 1;
10989   if (c == PLUS
10990       && REG_P (XEXP (x, 0))
10991       && CONST_INT_P (XEXP (x, 1)))
10992     return 1;
10993
10994   return 2;
10995 }
10996
10997 static int
10998 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10999                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11000 {
11001   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11002 }
11003
11004 /* Adjust cost hook for XScale.  */
11005 static bool
11006 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11007                           int * cost)
11008 {
11009   /* Some true dependencies can have a higher cost depending
11010      on precisely how certain input operands are used.  */
11011   if (dep_type == 0
11012       && recog_memoized (insn) >= 0
11013       && recog_memoized (dep) >= 0)
11014     {
11015       int shift_opnum = get_attr_shift (insn);
11016       enum attr_type attr_type = get_attr_type (dep);
11017
11018       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11019          operand for INSN.  If we have a shifted input operand and the
11020          instruction we depend on is another ALU instruction, then we may
11021          have to account for an additional stall.  */
11022       if (shift_opnum != 0
11023           && (attr_type == TYPE_ALU_SHIFT_IMM
11024               || attr_type == TYPE_ALUS_SHIFT_IMM
11025               || attr_type == TYPE_LOGIC_SHIFT_IMM
11026               || attr_type == TYPE_LOGICS_SHIFT_IMM
11027               || attr_type == TYPE_ALU_SHIFT_REG
11028               || attr_type == TYPE_ALUS_SHIFT_REG
11029               || attr_type == TYPE_LOGIC_SHIFT_REG
11030               || attr_type == TYPE_LOGICS_SHIFT_REG
11031               || attr_type == TYPE_MOV_SHIFT
11032               || attr_type == TYPE_MVN_SHIFT
11033               || attr_type == TYPE_MOV_SHIFT_REG
11034               || attr_type == TYPE_MVN_SHIFT_REG))
11035         {
11036           rtx shifted_operand;
11037           int opno;
11038
11039           /* Get the shifted operand.  */
11040           extract_insn (insn);
11041           shifted_operand = recog_data.operand[shift_opnum];
11042
11043           /* Iterate over all the operands in DEP.  If we write an operand
11044              that overlaps with SHIFTED_OPERAND, then we have increase the
11045              cost of this dependency.  */
11046           extract_insn (dep);
11047           preprocess_constraints (dep);
11048           for (opno = 0; opno < recog_data.n_operands; opno++)
11049             {
11050               /* We can ignore strict inputs.  */
11051               if (recog_data.operand_type[opno] == OP_IN)
11052                 continue;
11053
11054               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11055                                            shifted_operand))
11056                 {
11057                   *cost = 2;
11058                   return false;
11059                 }
11060             }
11061         }
11062     }
11063   return true;
11064 }
11065
11066 /* Adjust cost hook for Cortex A9.  */
11067 static bool
11068 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11069                              int * cost)
11070 {
11071   switch (dep_type)
11072     {
11073     case REG_DEP_ANTI:
11074       *cost = 0;
11075       return false;
11076
11077     case REG_DEP_TRUE:
11078     case REG_DEP_OUTPUT:
11079         if (recog_memoized (insn) >= 0
11080             && recog_memoized (dep) >= 0)
11081           {
11082             if (GET_CODE (PATTERN (insn)) == SET)
11083               {
11084                 if (GET_MODE_CLASS
11085                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11086                   || GET_MODE_CLASS
11087                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11088                   {
11089                     enum attr_type attr_type_insn = get_attr_type (insn);
11090                     enum attr_type attr_type_dep = get_attr_type (dep);
11091
11092                     /* By default all dependencies of the form
11093                        s0 = s0 <op> s1
11094                        s0 = s0 <op> s2
11095                        have an extra latency of 1 cycle because
11096                        of the input and output dependency in this
11097                        case. However this gets modeled as an true
11098                        dependency and hence all these checks.  */
11099                     if (REG_P (SET_DEST (PATTERN (insn)))
11100                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11101                       {
11102                         /* FMACS is a special case where the dependent
11103                            instruction can be issued 3 cycles before
11104                            the normal latency in case of an output
11105                            dependency.  */
11106                         if ((attr_type_insn == TYPE_FMACS
11107                              || attr_type_insn == TYPE_FMACD)
11108                             && (attr_type_dep == TYPE_FMACS
11109                                 || attr_type_dep == TYPE_FMACD))
11110                           {
11111                             if (dep_type == REG_DEP_OUTPUT)
11112                               *cost = insn_default_latency (dep) - 3;
11113                             else
11114                               *cost = insn_default_latency (dep);
11115                             return false;
11116                           }
11117                         else
11118                           {
11119                             if (dep_type == REG_DEP_OUTPUT)
11120                               *cost = insn_default_latency (dep) + 1;
11121                             else
11122                               *cost = insn_default_latency (dep);
11123                           }
11124                         return false;
11125                       }
11126                   }
11127               }
11128           }
11129         break;
11130
11131     default:
11132       gcc_unreachable ();
11133     }
11134
11135   return true;
11136 }
11137
11138 /* Adjust cost hook for FA726TE.  */
11139 static bool
11140 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11141                            int * cost)
11142 {
11143   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11144      have penalty of 3.  */
11145   if (dep_type == REG_DEP_TRUE
11146       && recog_memoized (insn) >= 0
11147       && recog_memoized (dep) >= 0
11148       && get_attr_conds (dep) == CONDS_SET)
11149     {
11150       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11151       if (get_attr_conds (insn) == CONDS_USE
11152           && get_attr_type (insn) != TYPE_BRANCH)
11153         {
11154           *cost = 3;
11155           return false;
11156         }
11157
11158       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11159           || get_attr_conds (insn) == CONDS_USE)
11160         {
11161           *cost = 0;
11162           return false;
11163         }
11164     }
11165
11166   return true;
11167 }
11168
11169 /* Implement TARGET_REGISTER_MOVE_COST.
11170
11171    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11172    it is typically more expensive than a single memory access.  We set
11173    the cost to less than two memory accesses so that floating
11174    point to integer conversion does not go through memory.  */
11175
11176 int
11177 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11178                         reg_class_t from, reg_class_t to)
11179 {
11180   if (TARGET_32BIT)
11181     {
11182       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11183           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11184         return 15;
11185       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11186                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11187         return 4;
11188       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11189         return 20;
11190       else
11191         return 2;
11192     }
11193   else
11194     {
11195       if (from == HI_REGS || to == HI_REGS)
11196         return 4;
11197       else
11198         return 2;
11199     }
11200 }
11201
11202 /* Implement TARGET_MEMORY_MOVE_COST.  */
11203
11204 int
11205 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11206                       bool in ATTRIBUTE_UNUSED)
11207 {
11208   if (TARGET_32BIT)
11209     return 10;
11210   else
11211     {
11212       if (GET_MODE_SIZE (mode) < 4)
11213         return 8;
11214       else
11215         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11216     }
11217 }
11218
11219 /* Vectorizer cost model implementation.  */
11220
11221 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11222 static int
11223 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11224                                 tree vectype,
11225                                 int misalign ATTRIBUTE_UNUSED)
11226 {
11227   unsigned elements;
11228
11229   switch (type_of_cost)
11230     {
11231       case scalar_stmt:
11232         return current_tune->vec_costs->scalar_stmt_cost;
11233
11234       case scalar_load:
11235         return current_tune->vec_costs->scalar_load_cost;
11236
11237       case scalar_store:
11238         return current_tune->vec_costs->scalar_store_cost;
11239
11240       case vector_stmt:
11241         return current_tune->vec_costs->vec_stmt_cost;
11242
11243       case vector_load:
11244         return current_tune->vec_costs->vec_align_load_cost;
11245
11246       case vector_store:
11247         return current_tune->vec_costs->vec_store_cost;
11248
11249       case vec_to_scalar:
11250         return current_tune->vec_costs->vec_to_scalar_cost;
11251
11252       case scalar_to_vec:
11253         return current_tune->vec_costs->scalar_to_vec_cost;
11254
11255       case unaligned_load:
11256       case vector_gather_load:
11257         return current_tune->vec_costs->vec_unalign_load_cost;
11258
11259       case unaligned_store:
11260       case vector_scatter_store:
11261         return current_tune->vec_costs->vec_unalign_store_cost;
11262
11263       case cond_branch_taken:
11264         return current_tune->vec_costs->cond_taken_branch_cost;
11265
11266       case cond_branch_not_taken:
11267         return current_tune->vec_costs->cond_not_taken_branch_cost;
11268
11269       case vec_perm:
11270       case vec_promote_demote:
11271         return current_tune->vec_costs->vec_stmt_cost;
11272
11273       case vec_construct:
11274         elements = TYPE_VECTOR_SUBPARTS (vectype);
11275         return elements / 2 + 1;
11276
11277       default:
11278         gcc_unreachable ();
11279     }
11280 }
11281
11282 /* Implement targetm.vectorize.add_stmt_cost.  */
11283
11284 static unsigned
11285 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11286                    struct _stmt_vec_info *stmt_info, int misalign,
11287                    enum vect_cost_model_location where)
11288 {
11289   unsigned *cost = (unsigned *) data;
11290   unsigned retval = 0;
11291
11292   if (flag_vect_cost_model)
11293     {
11294       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11295       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11296
11297       /* Statements in an inner loop relative to the loop being
11298          vectorized are weighted more heavily.  The value here is
11299          arbitrary and could potentially be improved with analysis.  */
11300       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11301         count *= 50;  /* FIXME.  */
11302
11303       retval = (unsigned) (count * stmt_cost);
11304       cost[where] += retval;
11305     }
11306
11307   return retval;
11308 }
11309
11310 /* Return true if and only if this insn can dual-issue only as older.  */
11311 static bool
11312 cortexa7_older_only (rtx_insn *insn)
11313 {
11314   if (recog_memoized (insn) < 0)
11315     return false;
11316
11317   switch (get_attr_type (insn))
11318     {
11319     case TYPE_ALU_DSP_REG:
11320     case TYPE_ALU_SREG:
11321     case TYPE_ALUS_SREG:
11322     case TYPE_LOGIC_REG:
11323     case TYPE_LOGICS_REG:
11324     case TYPE_ADC_REG:
11325     case TYPE_ADCS_REG:
11326     case TYPE_ADR:
11327     case TYPE_BFM:
11328     case TYPE_REV:
11329     case TYPE_MVN_REG:
11330     case TYPE_SHIFT_IMM:
11331     case TYPE_SHIFT_REG:
11332     case TYPE_LOAD_BYTE:
11333     case TYPE_LOAD_4:
11334     case TYPE_STORE_4:
11335     case TYPE_FFARITHS:
11336     case TYPE_FADDS:
11337     case TYPE_FFARITHD:
11338     case TYPE_FADDD:
11339     case TYPE_FMOV:
11340     case TYPE_F_CVT:
11341     case TYPE_FCMPS:
11342     case TYPE_FCMPD:
11343     case TYPE_FCONSTS:
11344     case TYPE_FCONSTD:
11345     case TYPE_FMULS:
11346     case TYPE_FMACS:
11347     case TYPE_FMULD:
11348     case TYPE_FMACD:
11349     case TYPE_FDIVS:
11350     case TYPE_FDIVD:
11351     case TYPE_F_MRC:
11352     case TYPE_F_MRRC:
11353     case TYPE_F_FLAG:
11354     case TYPE_F_LOADS:
11355     case TYPE_F_STORES:
11356       return true;
11357     default:
11358       return false;
11359     }
11360 }
11361
11362 /* Return true if and only if this insn can dual-issue as younger.  */
11363 static bool
11364 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11365 {
11366   if (recog_memoized (insn) < 0)
11367     {
11368       if (verbose > 5)
11369         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11370       return false;
11371     }
11372
11373   switch (get_attr_type (insn))
11374     {
11375     case TYPE_ALU_IMM:
11376     case TYPE_ALUS_IMM:
11377     case TYPE_LOGIC_IMM:
11378     case TYPE_LOGICS_IMM:
11379     case TYPE_EXTEND:
11380     case TYPE_MVN_IMM:
11381     case TYPE_MOV_IMM:
11382     case TYPE_MOV_REG:
11383     case TYPE_MOV_SHIFT:
11384     case TYPE_MOV_SHIFT_REG:
11385     case TYPE_BRANCH:
11386     case TYPE_CALL:
11387       return true;
11388     default:
11389       return false;
11390     }
11391 }
11392
11393
11394 /* Look for an instruction that can dual issue only as an older
11395    instruction, and move it in front of any instructions that can
11396    dual-issue as younger, while preserving the relative order of all
11397    other instructions in the ready list.  This is a hueuristic to help
11398    dual-issue in later cycles, by postponing issue of more flexible
11399    instructions.  This heuristic may affect dual issue opportunities
11400    in the current cycle.  */
11401 static void
11402 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11403                         int *n_readyp, int clock)
11404 {
11405   int i;
11406   int first_older_only = -1, first_younger = -1;
11407
11408   if (verbose > 5)
11409     fprintf (file,
11410              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11411              clock,
11412              *n_readyp);
11413
11414   /* Traverse the ready list from the head (the instruction to issue
11415      first), and looking for the first instruction that can issue as
11416      younger and the first instruction that can dual-issue only as
11417      older.  */
11418   for (i = *n_readyp - 1; i >= 0; i--)
11419     {
11420       rtx_insn *insn = ready[i];
11421       if (cortexa7_older_only (insn))
11422         {
11423           first_older_only = i;
11424           if (verbose > 5)
11425             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11426           break;
11427         }
11428       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11429         first_younger = i;
11430     }
11431
11432   /* Nothing to reorder because either no younger insn found or insn
11433      that can dual-issue only as older appears before any insn that
11434      can dual-issue as younger.  */
11435   if (first_younger == -1)
11436     {
11437       if (verbose > 5)
11438         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11439       return;
11440     }
11441
11442   /* Nothing to reorder because no older-only insn in the ready list.  */
11443   if (first_older_only == -1)
11444     {
11445       if (verbose > 5)
11446         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11447       return;
11448     }
11449
11450   /* Move first_older_only insn before first_younger.  */
11451   if (verbose > 5)
11452     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11453              INSN_UID(ready [first_older_only]),
11454              INSN_UID(ready [first_younger]));
11455   rtx_insn *first_older_only_insn = ready [first_older_only];
11456   for (i = first_older_only; i < first_younger; i++)
11457     {
11458       ready[i] = ready[i+1];
11459     }
11460
11461   ready[i] = first_older_only_insn;
11462   return;
11463 }
11464
11465 /* Implement TARGET_SCHED_REORDER. */
11466 static int
11467 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11468                    int clock)
11469 {
11470   switch (arm_tune)
11471     {
11472     case TARGET_CPU_cortexa7:
11473       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11474       break;
11475     default:
11476       /* Do nothing for other cores.  */
11477       break;
11478     }
11479
11480   return arm_issue_rate ();
11481 }
11482
11483 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11484    It corrects the value of COST based on the relationship between
11485    INSN and DEP through the dependence LINK.  It returns the new
11486    value. There is a per-core adjust_cost hook to adjust scheduler costs
11487    and the per-core hook can choose to completely override the generic
11488    adjust_cost function. Only put bits of code into arm_adjust_cost that
11489    are common across all cores.  */
11490 static int
11491 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11492                  unsigned int)
11493 {
11494   rtx i_pat, d_pat;
11495
11496  /* When generating Thumb-1 code, we want to place flag-setting operations
11497     close to a conditional branch which depends on them, so that we can
11498     omit the comparison. */
11499   if (TARGET_THUMB1
11500       && dep_type == 0
11501       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11502       && recog_memoized (dep) >= 0
11503       && get_attr_conds (dep) == CONDS_SET)
11504     return 0;
11505
11506   if (current_tune->sched_adjust_cost != NULL)
11507     {
11508       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11509         return cost;
11510     }
11511
11512   /* XXX Is this strictly true?  */
11513   if (dep_type == REG_DEP_ANTI
11514       || dep_type == REG_DEP_OUTPUT)
11515     return 0;
11516
11517   /* Call insns don't incur a stall, even if they follow a load.  */
11518   if (dep_type == 0
11519       && CALL_P (insn))
11520     return 1;
11521
11522   if ((i_pat = single_set (insn)) != NULL
11523       && MEM_P (SET_SRC (i_pat))
11524       && (d_pat = single_set (dep)) != NULL
11525       && MEM_P (SET_DEST (d_pat)))
11526     {
11527       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11528       /* This is a load after a store, there is no conflict if the load reads
11529          from a cached area.  Assume that loads from the stack, and from the
11530          constant pool are cached, and that others will miss.  This is a
11531          hack.  */
11532
11533       if ((GET_CODE (src_mem) == SYMBOL_REF
11534            && CONSTANT_POOL_ADDRESS_P (src_mem))
11535           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11536           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11537           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11538         return 1;
11539     }
11540
11541   return cost;
11542 }
11543
11544 int
11545 arm_max_conditional_execute (void)
11546 {
11547   return max_insns_skipped;
11548 }
11549
11550 static int
11551 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11552 {
11553   if (TARGET_32BIT)
11554     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11555   else
11556     return (optimize > 0) ? 2 : 0;
11557 }
11558
11559 static int
11560 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11561 {
11562   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11563 }
11564
11565 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11566    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11567    sequences of non-executed instructions in IT blocks probably take the same
11568    amount of time as executed instructions (and the IT instruction itself takes
11569    space in icache).  This function was experimentally determined to give good
11570    results on a popular embedded benchmark.  */
11571
11572 static int
11573 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11574 {
11575   return (TARGET_32BIT && speed_p) ? 1
11576          : arm_default_branch_cost (speed_p, predictable_p);
11577 }
11578
11579 static int
11580 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11581 {
11582   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11583 }
11584
11585 static bool fp_consts_inited = false;
11586
11587 static REAL_VALUE_TYPE value_fp0;
11588
11589 static void
11590 init_fp_table (void)
11591 {
11592   REAL_VALUE_TYPE r;
11593
11594   r = REAL_VALUE_ATOF ("0", DFmode);
11595   value_fp0 = r;
11596   fp_consts_inited = true;
11597 }
11598
11599 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11600 int
11601 arm_const_double_rtx (rtx x)
11602 {
11603   const REAL_VALUE_TYPE *r;
11604
11605   if (!fp_consts_inited)
11606     init_fp_table ();
11607
11608   r = CONST_DOUBLE_REAL_VALUE (x);
11609   if (REAL_VALUE_MINUS_ZERO (*r))
11610     return 0;
11611
11612   if (real_equal (r, &value_fp0))
11613     return 1;
11614
11615   return 0;
11616 }
11617
11618 /* VFPv3 has a fairly wide range of representable immediates, formed from
11619    "quarter-precision" floating-point values. These can be evaluated using this
11620    formula (with ^ for exponentiation):
11621
11622      -1^s * n * 2^-r
11623
11624    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11625    16 <= n <= 31 and 0 <= r <= 7.
11626
11627    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11628
11629      - A (most-significant) is the sign bit.
11630      - BCD are the exponent (encoded as r XOR 3).
11631      - EFGH are the mantissa (encoded as n - 16).
11632 */
11633
11634 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11635    fconst[sd] instruction, or -1 if X isn't suitable.  */
11636 static int
11637 vfp3_const_double_index (rtx x)
11638 {
11639   REAL_VALUE_TYPE r, m;
11640   int sign, exponent;
11641   unsigned HOST_WIDE_INT mantissa, mant_hi;
11642   unsigned HOST_WIDE_INT mask;
11643   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11644   bool fail;
11645
11646   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11647     return -1;
11648
11649   r = *CONST_DOUBLE_REAL_VALUE (x);
11650
11651   /* We can't represent these things, so detect them first.  */
11652   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11653     return -1;
11654
11655   /* Extract sign, exponent and mantissa.  */
11656   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11657   r = real_value_abs (&r);
11658   exponent = REAL_EXP (&r);
11659   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11660      highest (sign) bit, with a fixed binary point at bit point_pos.
11661      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11662      bits for the mantissa, this may fail (low bits would be lost).  */
11663   real_ldexp (&m, &r, point_pos - exponent);
11664   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11665   mantissa = w.elt (0);
11666   mant_hi = w.elt (1);
11667
11668   /* If there are bits set in the low part of the mantissa, we can't
11669      represent this value.  */
11670   if (mantissa != 0)
11671     return -1;
11672
11673   /* Now make it so that mantissa contains the most-significant bits, and move
11674      the point_pos to indicate that the least-significant bits have been
11675      discarded.  */
11676   point_pos -= HOST_BITS_PER_WIDE_INT;
11677   mantissa = mant_hi;
11678
11679   /* We can permit four significant bits of mantissa only, plus a high bit
11680      which is always 1.  */
11681   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11682   if ((mantissa & mask) != 0)
11683     return -1;
11684
11685   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11686   mantissa >>= point_pos - 5;
11687
11688   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11689      floating-point immediate zero with Neon using an integer-zero load, but
11690      that case is handled elsewhere.)  */
11691   if (mantissa == 0)
11692     return -1;
11693
11694   gcc_assert (mantissa >= 16 && mantissa <= 31);
11695
11696   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11697      normalized significands are in the range [1, 2). (Our mantissa is shifted
11698      left 4 places at this point relative to normalized IEEE754 values).  GCC
11699      internally uses [0.5, 1) (see real.c), so the exponent returned from
11700      REAL_EXP must be altered.  */
11701   exponent = 5 - exponent;
11702
11703   if (exponent < 0 || exponent > 7)
11704     return -1;
11705
11706   /* Sign, mantissa and exponent are now in the correct form to plug into the
11707      formula described in the comment above.  */
11708   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11709 }
11710
11711 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11712 int
11713 vfp3_const_double_rtx (rtx x)
11714 {
11715   if (!TARGET_VFP3)
11716     return 0;
11717
11718   return vfp3_const_double_index (x) != -1;
11719 }
11720
11721 /* Recognize immediates which can be used in various Neon instructions. Legal
11722    immediates are described by the following table (for VMVN variants, the
11723    bitwise inverse of the constant shown is recognized. In either case, VMOV
11724    is output and the correct instruction to use for a given constant is chosen
11725    by the assembler). The constant shown is replicated across all elements of
11726    the destination vector.
11727
11728    insn elems variant constant (binary)
11729    ---- ----- ------- -----------------
11730    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11731    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11732    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11733    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11734    vmov  i16     4    00000000 abcdefgh
11735    vmov  i16     5    abcdefgh 00000000
11736    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11737    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11738    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11739    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11740    vmvn  i16    10    00000000 abcdefgh
11741    vmvn  i16    11    abcdefgh 00000000
11742    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11743    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11744    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11745    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11746    vmov   i8    16    abcdefgh
11747    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11748                       eeeeeeee ffffffff gggggggg hhhhhhhh
11749    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11750    vmov  f32    19    00000000 00000000 00000000 00000000
11751
11752    For case 18, B = !b. Representable values are exactly those accepted by
11753    vfp3_const_double_index, but are output as floating-point numbers rather
11754    than indices.
11755
11756    For case 19, we will change it to vmov.i32 when assembling.
11757
11758    Variants 0-5 (inclusive) may also be used as immediates for the second
11759    operand of VORR/VBIC instructions.
11760
11761    The INVERSE argument causes the bitwise inverse of the given operand to be
11762    recognized instead (used for recognizing legal immediates for the VAND/VORN
11763    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11764    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11765    output, rather than the real insns vbic/vorr).
11766
11767    INVERSE makes no difference to the recognition of float vectors.
11768
11769    The return value is the variant of immediate as shown in the above table, or
11770    -1 if the given value doesn't match any of the listed patterns.
11771 */
11772 static int
11773 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11774                       rtx *modconst, int *elementwidth)
11775 {
11776 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11777   matches = 1;                                  \
11778   for (i = 0; i < idx; i += (STRIDE))           \
11779     if (!(TEST))                                \
11780       matches = 0;                              \
11781   if (matches)                                  \
11782     {                                           \
11783       immtype = (CLASS);                        \
11784       elsize = (ELSIZE);                        \
11785       break;                                    \
11786     }
11787
11788   unsigned int i, elsize = 0, idx = 0, n_elts;
11789   unsigned int innersize;
11790   unsigned char bytes[16];
11791   int immtype = -1, matches;
11792   unsigned int invmask = inverse ? 0xff : 0;
11793   bool vector = GET_CODE (op) == CONST_VECTOR;
11794
11795   if (vector)
11796     n_elts = CONST_VECTOR_NUNITS (op);
11797   else
11798     {
11799       n_elts = 1;
11800       if (mode == VOIDmode)
11801         mode = DImode;
11802     }
11803
11804   innersize = GET_MODE_UNIT_SIZE (mode);
11805
11806   /* Vectors of float constants.  */
11807   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11808     {
11809       rtx el0 = CONST_VECTOR_ELT (op, 0);
11810
11811       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11812         return -1;
11813
11814       /* FP16 vectors cannot be represented.  */
11815       if (GET_MODE_INNER (mode) == HFmode)
11816         return -1;
11817
11818       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11819          are distinct in this context.  */
11820       if (!const_vec_duplicate_p (op))
11821         return -1;
11822
11823       if (modconst)
11824         *modconst = CONST_VECTOR_ELT (op, 0);
11825
11826       if (elementwidth)
11827         *elementwidth = 0;
11828
11829       if (el0 == CONST0_RTX (GET_MODE (el0)))
11830         return 19;
11831       else
11832         return 18;
11833     }
11834
11835   /* The tricks done in the code below apply for little-endian vector layout.
11836      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11837      FIXME: Implement logic for big-endian vectors.  */
11838   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11839     return -1;
11840
11841   /* Splat vector constant out into a byte vector.  */
11842   for (i = 0; i < n_elts; i++)
11843     {
11844       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11845       unsigned HOST_WIDE_INT elpart;
11846
11847       gcc_assert (CONST_INT_P (el));
11848       elpart = INTVAL (el);
11849
11850       for (unsigned int byte = 0; byte < innersize; byte++)
11851         {
11852           bytes[idx++] = (elpart & 0xff) ^ invmask;
11853           elpart >>= BITS_PER_UNIT;
11854         }
11855     }
11856
11857   /* Sanity check.  */
11858   gcc_assert (idx == GET_MODE_SIZE (mode));
11859
11860   do
11861     {
11862       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11863                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11864
11865       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11866                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11867
11868       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11869                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11870
11871       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11872                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11873
11874       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11875
11876       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11877
11878       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11879                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11880
11881       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11882                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11883
11884       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11885                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11886
11887       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11888                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11889
11890       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11891
11892       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11893
11894       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11895                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11896
11897       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11898                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11899
11900       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11901                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11902
11903       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11904                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11905
11906       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11907
11908       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11909                         && bytes[i] == bytes[(i + 8) % idx]);
11910     }
11911   while (0);
11912
11913   if (immtype == -1)
11914     return -1;
11915
11916   if (elementwidth)
11917     *elementwidth = elsize;
11918
11919   if (modconst)
11920     {
11921       unsigned HOST_WIDE_INT imm = 0;
11922
11923       /* Un-invert bytes of recognized vector, if necessary.  */
11924       if (invmask != 0)
11925         for (i = 0; i < idx; i++)
11926           bytes[i] ^= invmask;
11927
11928       if (immtype == 17)
11929         {
11930           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11931           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11932
11933           for (i = 0; i < 8; i++)
11934             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11935                    << (i * BITS_PER_UNIT);
11936
11937           *modconst = GEN_INT (imm);
11938         }
11939       else
11940         {
11941           unsigned HOST_WIDE_INT imm = 0;
11942
11943           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11944             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11945
11946           *modconst = GEN_INT (imm);
11947         }
11948     }
11949
11950   return immtype;
11951 #undef CHECK
11952 }
11953
11954 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11955    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11956    float elements), and a modified constant (whatever should be output for a
11957    VMOV) in *MODCONST.  */
11958
11959 int
11960 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11961                                rtx *modconst, int *elementwidth)
11962 {
11963   rtx tmpconst;
11964   int tmpwidth;
11965   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11966
11967   if (retval == -1)
11968     return 0;
11969
11970   if (modconst)
11971     *modconst = tmpconst;
11972
11973   if (elementwidth)
11974     *elementwidth = tmpwidth;
11975
11976   return 1;
11977 }
11978
11979 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11980    the immediate is valid, write a constant suitable for using as an operand
11981    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11982    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11983
11984 int
11985 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11986                                 rtx *modconst, int *elementwidth)
11987 {
11988   rtx tmpconst;
11989   int tmpwidth;
11990   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11991
11992   if (retval < 0 || retval > 5)
11993     return 0;
11994
11995   if (modconst)
11996     *modconst = tmpconst;
11997
11998   if (elementwidth)
11999     *elementwidth = tmpwidth;
12000
12001   return 1;
12002 }
12003
12004 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12005    the immediate is valid, write a constant suitable for using as an operand
12006    to VSHR/VSHL to *MODCONST and the corresponding element width to
12007    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12008    because they have different limitations.  */
12009
12010 int
12011 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12012                                 rtx *modconst, int *elementwidth,
12013                                 bool isleftshift)
12014 {
12015   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12016   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12017   unsigned HOST_WIDE_INT last_elt = 0;
12018   unsigned HOST_WIDE_INT maxshift;
12019
12020   /* Split vector constant out into a byte vector.  */
12021   for (i = 0; i < n_elts; i++)
12022     {
12023       rtx el = CONST_VECTOR_ELT (op, i);
12024       unsigned HOST_WIDE_INT elpart;
12025
12026       if (CONST_INT_P (el))
12027         elpart = INTVAL (el);
12028       else if (CONST_DOUBLE_P (el))
12029         return 0;
12030       else
12031         gcc_unreachable ();
12032
12033       if (i != 0 && elpart != last_elt)
12034         return 0;
12035
12036       last_elt = elpart;
12037     }
12038
12039   /* Shift less than element size.  */
12040   maxshift = innersize * 8;
12041
12042   if (isleftshift)
12043     {
12044       /* Left shift immediate value can be from 0 to <size>-1.  */
12045       if (last_elt >= maxshift)
12046         return 0;
12047     }
12048   else
12049     {
12050       /* Right shift immediate value can be from 1 to <size>.  */
12051       if (last_elt == 0 || last_elt > maxshift)
12052         return 0;
12053     }
12054
12055   if (elementwidth)
12056     *elementwidth = innersize * 8;
12057
12058   if (modconst)
12059     *modconst = CONST_VECTOR_ELT (op, 0);
12060
12061   return 1;
12062 }
12063
12064 /* Return a string suitable for output of Neon immediate logic operation
12065    MNEM.  */
12066
12067 char *
12068 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12069                              int inverse, int quad)
12070 {
12071   int width, is_valid;
12072   static char templ[40];
12073
12074   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12075
12076   gcc_assert (is_valid != 0);
12077
12078   if (quad)
12079     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12080   else
12081     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12082
12083   return templ;
12084 }
12085
12086 /* Return a string suitable for output of Neon immediate shift operation
12087    (VSHR or VSHL) MNEM.  */
12088
12089 char *
12090 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12091                              machine_mode mode, int quad,
12092                              bool isleftshift)
12093 {
12094   int width, is_valid;
12095   static char templ[40];
12096
12097   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12098   gcc_assert (is_valid != 0);
12099
12100   if (quad)
12101     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12102   else
12103     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12104
12105   return templ;
12106 }
12107
12108 /* Output a sequence of pairwise operations to implement a reduction.
12109    NOTE: We do "too much work" here, because pairwise operations work on two
12110    registers-worth of operands in one go. Unfortunately we can't exploit those
12111    extra calculations to do the full operation in fewer steps, I don't think.
12112    Although all vector elements of the result but the first are ignored, we
12113    actually calculate the same result in each of the elements. An alternative
12114    such as initially loading a vector with zero to use as each of the second
12115    operands would use up an additional register and take an extra instruction,
12116    for no particular gain.  */
12117
12118 void
12119 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12120                       rtx (*reduc) (rtx, rtx, rtx))
12121 {
12122   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12123   rtx tmpsum = op1;
12124
12125   for (i = parts / 2; i >= 1; i /= 2)
12126     {
12127       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12128       emit_insn (reduc (dest, tmpsum, tmpsum));
12129       tmpsum = dest;
12130     }
12131 }
12132
12133 /* If VALS is a vector constant that can be loaded into a register
12134    using VDUP, generate instructions to do so and return an RTX to
12135    assign to the register.  Otherwise return NULL_RTX.  */
12136
12137 static rtx
12138 neon_vdup_constant (rtx vals)
12139 {
12140   machine_mode mode = GET_MODE (vals);
12141   machine_mode inner_mode = GET_MODE_INNER (mode);
12142   rtx x;
12143
12144   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12145     return NULL_RTX;
12146
12147   if (!const_vec_duplicate_p (vals, &x))
12148     /* The elements are not all the same.  We could handle repeating
12149        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12150        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12151        vdup.i16).  */
12152     return NULL_RTX;
12153
12154   /* We can load this constant by using VDUP and a constant in a
12155      single ARM register.  This will be cheaper than a vector
12156      load.  */
12157
12158   x = copy_to_mode_reg (inner_mode, x);
12159   return gen_rtx_VEC_DUPLICATE (mode, x);
12160 }
12161
12162 /* Generate code to load VALS, which is a PARALLEL containing only
12163    constants (for vec_init) or CONST_VECTOR, efficiently into a
12164    register.  Returns an RTX to copy into the register, or NULL_RTX
12165    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12166
12167 rtx
12168 neon_make_constant (rtx vals)
12169 {
12170   machine_mode mode = GET_MODE (vals);
12171   rtx target;
12172   rtx const_vec = NULL_RTX;
12173   int n_elts = GET_MODE_NUNITS (mode);
12174   int n_const = 0;
12175   int i;
12176
12177   if (GET_CODE (vals) == CONST_VECTOR)
12178     const_vec = vals;
12179   else if (GET_CODE (vals) == PARALLEL)
12180     {
12181       /* A CONST_VECTOR must contain only CONST_INTs and
12182          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12183          Only store valid constants in a CONST_VECTOR.  */
12184       for (i = 0; i < n_elts; ++i)
12185         {
12186           rtx x = XVECEXP (vals, 0, i);
12187           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12188             n_const++;
12189         }
12190       if (n_const == n_elts)
12191         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12192     }
12193   else
12194     gcc_unreachable ();
12195
12196   if (const_vec != NULL
12197       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12198     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12199     return const_vec;
12200   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12201     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12202        pipeline cycle; creating the constant takes one or two ARM
12203        pipeline cycles.  */
12204     return target;
12205   else if (const_vec != NULL_RTX)
12206     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12207        (for either double or quad vectors).  We can not take advantage
12208        of single-cycle VLD1 because we need a PC-relative addressing
12209        mode.  */
12210     return const_vec;
12211   else
12212     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12213        We can not construct an initializer.  */
12214     return NULL_RTX;
12215 }
12216
12217 /* Initialize vector TARGET to VALS.  */
12218
12219 void
12220 neon_expand_vector_init (rtx target, rtx vals)
12221 {
12222   machine_mode mode = GET_MODE (target);
12223   machine_mode inner_mode = GET_MODE_INNER (mode);
12224   int n_elts = GET_MODE_NUNITS (mode);
12225   int n_var = 0, one_var = -1;
12226   bool all_same = true;
12227   rtx x, mem;
12228   int i;
12229
12230   for (i = 0; i < n_elts; ++i)
12231     {
12232       x = XVECEXP (vals, 0, i);
12233       if (!CONSTANT_P (x))
12234         ++n_var, one_var = i;
12235
12236       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12237         all_same = false;
12238     }
12239
12240   if (n_var == 0)
12241     {
12242       rtx constant = neon_make_constant (vals);
12243       if (constant != NULL_RTX)
12244         {
12245           emit_move_insn (target, constant);
12246           return;
12247         }
12248     }
12249
12250   /* Splat a single non-constant element if we can.  */
12251   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12252     {
12253       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12254       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12255       return;
12256     }
12257
12258   /* One field is non-constant.  Load constant then overwrite varying
12259      field.  This is more efficient than using the stack.  */
12260   if (n_var == 1)
12261     {
12262       rtx copy = copy_rtx (vals);
12263       rtx index = GEN_INT (one_var);
12264
12265       /* Load constant part of vector, substitute neighboring value for
12266          varying element.  */
12267       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12268       neon_expand_vector_init (target, copy);
12269
12270       /* Insert variable.  */
12271       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12272       switch (mode)
12273         {
12274         case E_V8QImode:
12275           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12276           break;
12277         case E_V16QImode:
12278           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12279           break;
12280         case E_V4HImode:
12281           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12282           break;
12283         case E_V8HImode:
12284           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12285           break;
12286         case E_V2SImode:
12287           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12288           break;
12289         case E_V4SImode:
12290           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12291           break;
12292         case E_V2SFmode:
12293           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12294           break;
12295         case E_V4SFmode:
12296           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12297           break;
12298         case E_V2DImode:
12299           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12300           break;
12301         default:
12302           gcc_unreachable ();
12303         }
12304       return;
12305     }
12306
12307   /* Construct the vector in memory one field at a time
12308      and load the whole vector.  */
12309   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12310   for (i = 0; i < n_elts; i++)
12311     emit_move_insn (adjust_address_nv (mem, inner_mode,
12312                                     i * GET_MODE_SIZE (inner_mode)),
12313                     XVECEXP (vals, 0, i));
12314   emit_move_insn (target, mem);
12315 }
12316
12317 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12318    ERR if it doesn't.  EXP indicates the source location, which includes the
12319    inlining history for intrinsics.  */
12320
12321 static void
12322 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12323               const_tree exp, const char *desc)
12324 {
12325   HOST_WIDE_INT lane;
12326
12327   gcc_assert (CONST_INT_P (operand));
12328
12329   lane = INTVAL (operand);
12330
12331   if (lane < low || lane >= high)
12332     {
12333       if (exp)
12334         error ("%K%s %wd out of range %wd - %wd",
12335                exp, desc, lane, low, high - 1);
12336       else
12337         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12338     }
12339 }
12340
12341 /* Bounds-check lanes.  */
12342
12343 void
12344 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12345                   const_tree exp)
12346 {
12347   bounds_check (operand, low, high, exp, "lane");
12348 }
12349
12350 /* Bounds-check constants.  */
12351
12352 void
12353 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12354 {
12355   bounds_check (operand, low, high, NULL_TREE, "constant");
12356 }
12357
12358 HOST_WIDE_INT
12359 neon_element_bits (machine_mode mode)
12360 {
12361   return GET_MODE_UNIT_BITSIZE (mode);
12362 }
12363
12364 \f
12365 /* Predicates for `match_operand' and `match_operator'.  */
12366
12367 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12368    WB is true if full writeback address modes are allowed and is false
12369    if limited writeback address modes (POST_INC and PRE_DEC) are
12370    allowed.  */
12371
12372 int
12373 arm_coproc_mem_operand (rtx op, bool wb)
12374 {
12375   rtx ind;
12376
12377   /* Reject eliminable registers.  */
12378   if (! (reload_in_progress || reload_completed || lra_in_progress)
12379       && (   reg_mentioned_p (frame_pointer_rtx, op)
12380           || reg_mentioned_p (arg_pointer_rtx, op)
12381           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12382           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12383           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12384           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12385     return FALSE;
12386
12387   /* Constants are converted into offsets from labels.  */
12388   if (!MEM_P (op))
12389     return FALSE;
12390
12391   ind = XEXP (op, 0);
12392
12393   if (reload_completed
12394       && (GET_CODE (ind) == LABEL_REF
12395           || (GET_CODE (ind) == CONST
12396               && GET_CODE (XEXP (ind, 0)) == PLUS
12397               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12398               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12399     return TRUE;
12400
12401   /* Match: (mem (reg)).  */
12402   if (REG_P (ind))
12403     return arm_address_register_rtx_p (ind, 0);
12404
12405   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12406      acceptable in any case (subject to verification by
12407      arm_address_register_rtx_p).  We need WB to be true to accept
12408      PRE_INC and POST_DEC.  */
12409   if (GET_CODE (ind) == POST_INC
12410       || GET_CODE (ind) == PRE_DEC
12411       || (wb
12412           && (GET_CODE (ind) == PRE_INC
12413               || GET_CODE (ind) == POST_DEC)))
12414     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12415
12416   if (wb
12417       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12418       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12419       && GET_CODE (XEXP (ind, 1)) == PLUS
12420       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12421     ind = XEXP (ind, 1);
12422
12423   /* Match:
12424      (plus (reg)
12425            (const)).  */
12426   if (GET_CODE (ind) == PLUS
12427       && REG_P (XEXP (ind, 0))
12428       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12429       && CONST_INT_P (XEXP (ind, 1))
12430       && INTVAL (XEXP (ind, 1)) > -1024
12431       && INTVAL (XEXP (ind, 1)) <  1024
12432       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12433     return TRUE;
12434
12435   return FALSE;
12436 }
12437
12438 /* Return TRUE if OP is a memory operand which we can load or store a vector
12439    to/from. TYPE is one of the following values:
12440     0 - Vector load/stor (vldr)
12441     1 - Core registers (ldm)
12442     2 - Element/structure loads (vld1)
12443  */
12444 int
12445 neon_vector_mem_operand (rtx op, int type, bool strict)
12446 {
12447   rtx ind;
12448
12449   /* Reject eliminable registers.  */
12450   if (strict && ! (reload_in_progress || reload_completed)
12451       && (reg_mentioned_p (frame_pointer_rtx, op)
12452           || reg_mentioned_p (arg_pointer_rtx, op)
12453           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12454           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12455           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12456           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12457     return FALSE;
12458
12459   /* Constants are converted into offsets from labels.  */
12460   if (!MEM_P (op))
12461     return FALSE;
12462
12463   ind = XEXP (op, 0);
12464
12465   if (reload_completed
12466       && (GET_CODE (ind) == LABEL_REF
12467           || (GET_CODE (ind) == CONST
12468               && GET_CODE (XEXP (ind, 0)) == PLUS
12469               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12470               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12471     return TRUE;
12472
12473   /* Match: (mem (reg)).  */
12474   if (REG_P (ind))
12475     return arm_address_register_rtx_p (ind, 0);
12476
12477   /* Allow post-increment with Neon registers.  */
12478   if ((type != 1 && GET_CODE (ind) == POST_INC)
12479       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12480     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12481
12482   /* Allow post-increment by register for VLDn */
12483   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12484       && GET_CODE (XEXP (ind, 1)) == PLUS
12485       && REG_P (XEXP (XEXP (ind, 1), 1)))
12486      return true;
12487
12488   /* Match:
12489      (plus (reg)
12490           (const)).  */
12491   if (type == 0
12492       && GET_CODE (ind) == PLUS
12493       && REG_P (XEXP (ind, 0))
12494       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12495       && CONST_INT_P (XEXP (ind, 1))
12496       && INTVAL (XEXP (ind, 1)) > -1024
12497       /* For quad modes, we restrict the constant offset to be slightly less
12498          than what the instruction format permits.  We have no such constraint
12499          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12500       && (INTVAL (XEXP (ind, 1))
12501           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12502       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12503     return TRUE;
12504
12505   return FALSE;
12506 }
12507
12508 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12509    type.  */
12510 int
12511 neon_struct_mem_operand (rtx op)
12512 {
12513   rtx ind;
12514
12515   /* Reject eliminable registers.  */
12516   if (! (reload_in_progress || reload_completed)
12517       && (   reg_mentioned_p (frame_pointer_rtx, op)
12518           || reg_mentioned_p (arg_pointer_rtx, op)
12519           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12520           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12521           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12522           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12523     return FALSE;
12524
12525   /* Constants are converted into offsets from labels.  */
12526   if (!MEM_P (op))
12527     return FALSE;
12528
12529   ind = XEXP (op, 0);
12530
12531   if (reload_completed
12532       && (GET_CODE (ind) == LABEL_REF
12533           || (GET_CODE (ind) == CONST
12534               && GET_CODE (XEXP (ind, 0)) == PLUS
12535               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12536               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12537     return TRUE;
12538
12539   /* Match: (mem (reg)).  */
12540   if (REG_P (ind))
12541     return arm_address_register_rtx_p (ind, 0);
12542
12543   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12544   if (GET_CODE (ind) == POST_INC
12545       || GET_CODE (ind) == PRE_DEC)
12546     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12547
12548   return FALSE;
12549 }
12550
12551 /* Return true if X is a register that will be eliminated later on.  */
12552 int
12553 arm_eliminable_register (rtx x)
12554 {
12555   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12556                        || REGNO (x) == ARG_POINTER_REGNUM
12557                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12558                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12559 }
12560
12561 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12562    coprocessor registers.  Otherwise return NO_REGS.  */
12563
12564 enum reg_class
12565 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12566 {
12567   if (mode == HFmode)
12568     {
12569       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12570         return GENERAL_REGS;
12571       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12572         return NO_REGS;
12573       return GENERAL_REGS;
12574     }
12575
12576   /* The neon move patterns handle all legitimate vector and struct
12577      addresses.  */
12578   if (TARGET_NEON
12579       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12580       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12581           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12582           || VALID_NEON_STRUCT_MODE (mode)))
12583     return NO_REGS;
12584
12585   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12586     return NO_REGS;
12587
12588   return GENERAL_REGS;
12589 }
12590
12591 /* Values which must be returned in the most-significant end of the return
12592    register.  */
12593
12594 static bool
12595 arm_return_in_msb (const_tree valtype)
12596 {
12597   return (TARGET_AAPCS_BASED
12598           && BYTES_BIG_ENDIAN
12599           && (AGGREGATE_TYPE_P (valtype)
12600               || TREE_CODE (valtype) == COMPLEX_TYPE
12601               || FIXED_POINT_TYPE_P (valtype)));
12602 }
12603
12604 /* Return TRUE if X references a SYMBOL_REF.  */
12605 int
12606 symbol_mentioned_p (rtx x)
12607 {
12608   const char * fmt;
12609   int i;
12610
12611   if (GET_CODE (x) == SYMBOL_REF)
12612     return 1;
12613
12614   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12615      are constant offsets, not symbols.  */
12616   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12617     return 0;
12618
12619   fmt = GET_RTX_FORMAT (GET_CODE (x));
12620
12621   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12622     {
12623       if (fmt[i] == 'E')
12624         {
12625           int j;
12626
12627           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12628             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12629               return 1;
12630         }
12631       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12632         return 1;
12633     }
12634
12635   return 0;
12636 }
12637
12638 /* Return TRUE if X references a LABEL_REF.  */
12639 int
12640 label_mentioned_p (rtx x)
12641 {
12642   const char * fmt;
12643   int i;
12644
12645   if (GET_CODE (x) == LABEL_REF)
12646     return 1;
12647
12648   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12649      instruction, but they are constant offsets, not symbols.  */
12650   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12651     return 0;
12652
12653   fmt = GET_RTX_FORMAT (GET_CODE (x));
12654   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12655     {
12656       if (fmt[i] == 'E')
12657         {
12658           int j;
12659
12660           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12661             if (label_mentioned_p (XVECEXP (x, i, j)))
12662               return 1;
12663         }
12664       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12665         return 1;
12666     }
12667
12668   return 0;
12669 }
12670
12671 int
12672 tls_mentioned_p (rtx x)
12673 {
12674   switch (GET_CODE (x))
12675     {
12676     case CONST:
12677       return tls_mentioned_p (XEXP (x, 0));
12678
12679     case UNSPEC:
12680       if (XINT (x, 1) == UNSPEC_TLS)
12681         return 1;
12682
12683     /* Fall through.  */
12684     default:
12685       return 0;
12686     }
12687 }
12688
12689 /* Must not copy any rtx that uses a pc-relative address.
12690    Also, disallow copying of load-exclusive instructions that
12691    may appear after splitting of compare-and-swap-style operations
12692    so as to prevent those loops from being transformed away from their
12693    canonical forms (see PR 69904).  */
12694
12695 static bool
12696 arm_cannot_copy_insn_p (rtx_insn *insn)
12697 {
12698   /* The tls call insn cannot be copied, as it is paired with a data
12699      word.  */
12700   if (recog_memoized (insn) == CODE_FOR_tlscall)
12701     return true;
12702
12703   subrtx_iterator::array_type array;
12704   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12705     {
12706       const_rtx x = *iter;
12707       if (GET_CODE (x) == UNSPEC
12708           && (XINT (x, 1) == UNSPEC_PIC_BASE
12709               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12710         return true;
12711     }
12712
12713   rtx set = single_set (insn);
12714   if (set)
12715     {
12716       rtx src = SET_SRC (set);
12717       if (GET_CODE (src) == ZERO_EXTEND)
12718         src = XEXP (src, 0);
12719
12720       /* Catch the load-exclusive and load-acquire operations.  */
12721       if (GET_CODE (src) == UNSPEC_VOLATILE
12722           && (XINT (src, 1) == VUNSPEC_LL
12723               || XINT (src, 1) == VUNSPEC_LAX))
12724         return true;
12725     }
12726   return false;
12727 }
12728
12729 enum rtx_code
12730 minmax_code (rtx x)
12731 {
12732   enum rtx_code code = GET_CODE (x);
12733
12734   switch (code)
12735     {
12736     case SMAX:
12737       return GE;
12738     case SMIN:
12739       return LE;
12740     case UMIN:
12741       return LEU;
12742     case UMAX:
12743       return GEU;
12744     default:
12745       gcc_unreachable ();
12746     }
12747 }
12748
12749 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12750
12751 bool
12752 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12753                         int *mask, bool *signed_sat)
12754 {
12755   /* The high bound must be a power of two minus one.  */
12756   int log = exact_log2 (INTVAL (hi_bound) + 1);
12757   if (log == -1)
12758     return false;
12759
12760   /* The low bound is either zero (for usat) or one less than the
12761      negation of the high bound (for ssat).  */
12762   if (INTVAL (lo_bound) == 0)
12763     {
12764       if (mask)
12765         *mask = log;
12766       if (signed_sat)
12767         *signed_sat = false;
12768
12769       return true;
12770     }
12771
12772   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12773     {
12774       if (mask)
12775         *mask = log + 1;
12776       if (signed_sat)
12777         *signed_sat = true;
12778
12779       return true;
12780     }
12781
12782   return false;
12783 }
12784
12785 /* Return 1 if memory locations are adjacent.  */
12786 int
12787 adjacent_mem_locations (rtx a, rtx b)
12788 {
12789   /* We don't guarantee to preserve the order of these memory refs.  */
12790   if (volatile_refs_p (a) || volatile_refs_p (b))
12791     return 0;
12792
12793   if ((REG_P (XEXP (a, 0))
12794        || (GET_CODE (XEXP (a, 0)) == PLUS
12795            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12796       && (REG_P (XEXP (b, 0))
12797           || (GET_CODE (XEXP (b, 0)) == PLUS
12798               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12799     {
12800       HOST_WIDE_INT val0 = 0, val1 = 0;
12801       rtx reg0, reg1;
12802       int val_diff;
12803
12804       if (GET_CODE (XEXP (a, 0)) == PLUS)
12805         {
12806           reg0 = XEXP (XEXP (a, 0), 0);
12807           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12808         }
12809       else
12810         reg0 = XEXP (a, 0);
12811
12812       if (GET_CODE (XEXP (b, 0)) == PLUS)
12813         {
12814           reg1 = XEXP (XEXP (b, 0), 0);
12815           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12816         }
12817       else
12818         reg1 = XEXP (b, 0);
12819
12820       /* Don't accept any offset that will require multiple
12821          instructions to handle, since this would cause the
12822          arith_adjacentmem pattern to output an overlong sequence.  */
12823       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12824         return 0;
12825
12826       /* Don't allow an eliminable register: register elimination can make
12827          the offset too large.  */
12828       if (arm_eliminable_register (reg0))
12829         return 0;
12830
12831       val_diff = val1 - val0;
12832
12833       if (arm_ld_sched)
12834         {
12835           /* If the target has load delay slots, then there's no benefit
12836              to using an ldm instruction unless the offset is zero and
12837              we are optimizing for size.  */
12838           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12839                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12840                   && (val_diff == 4 || val_diff == -4));
12841         }
12842
12843       return ((REGNO (reg0) == REGNO (reg1))
12844               && (val_diff == 4 || val_diff == -4));
12845     }
12846
12847   return 0;
12848 }
12849
12850 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12851    for load operations, false for store operations.  CONSECUTIVE is true
12852    if the register numbers in the operation must be consecutive in the register
12853    bank. RETURN_PC is true if value is to be loaded in PC.
12854    The pattern we are trying to match for load is:
12855      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12856       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12857        :
12858        :
12859       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12860      ]
12861      where
12862      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12863      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12864      3.  If consecutive is TRUE, then for kth register being loaded,
12865          REGNO (R_dk) = REGNO (R_d0) + k.
12866    The pattern for store is similar.  */
12867 bool
12868 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12869                      bool consecutive, bool return_pc)
12870 {
12871   HOST_WIDE_INT count = XVECLEN (op, 0);
12872   rtx reg, mem, addr;
12873   unsigned regno;
12874   unsigned first_regno;
12875   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12876   rtx elt;
12877   bool addr_reg_in_reglist = false;
12878   bool update = false;
12879   int reg_increment;
12880   int offset_adj;
12881   int regs_per_val;
12882
12883   /* If not in SImode, then registers must be consecutive
12884      (e.g., VLDM instructions for DFmode).  */
12885   gcc_assert ((mode == SImode) || consecutive);
12886   /* Setting return_pc for stores is illegal.  */
12887   gcc_assert (!return_pc || load);
12888
12889   /* Set up the increments and the regs per val based on the mode.  */
12890   reg_increment = GET_MODE_SIZE (mode);
12891   regs_per_val = reg_increment / 4;
12892   offset_adj = return_pc ? 1 : 0;
12893
12894   if (count <= 1
12895       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12896       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12897     return false;
12898
12899   /* Check if this is a write-back.  */
12900   elt = XVECEXP (op, 0, offset_adj);
12901   if (GET_CODE (SET_SRC (elt)) == PLUS)
12902     {
12903       i++;
12904       base = 1;
12905       update = true;
12906
12907       /* The offset adjustment must be the number of registers being
12908          popped times the size of a single register.  */
12909       if (!REG_P (SET_DEST (elt))
12910           || !REG_P (XEXP (SET_SRC (elt), 0))
12911           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12912           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12913           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12914              ((count - 1 - offset_adj) * reg_increment))
12915         return false;
12916     }
12917
12918   i = i + offset_adj;
12919   base = base + offset_adj;
12920   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12921      success depends on the type: VLDM can do just one reg,
12922      LDM must do at least two.  */
12923   if ((count <= i) && (mode == SImode))
12924       return false;
12925
12926   elt = XVECEXP (op, 0, i - 1);
12927   if (GET_CODE (elt) != SET)
12928     return false;
12929
12930   if (load)
12931     {
12932       reg = SET_DEST (elt);
12933       mem = SET_SRC (elt);
12934     }
12935   else
12936     {
12937       reg = SET_SRC (elt);
12938       mem = SET_DEST (elt);
12939     }
12940
12941   if (!REG_P (reg) || !MEM_P (mem))
12942     return false;
12943
12944   regno = REGNO (reg);
12945   first_regno = regno;
12946   addr = XEXP (mem, 0);
12947   if (GET_CODE (addr) == PLUS)
12948     {
12949       if (!CONST_INT_P (XEXP (addr, 1)))
12950         return false;
12951
12952       offset = INTVAL (XEXP (addr, 1));
12953       addr = XEXP (addr, 0);
12954     }
12955
12956   if (!REG_P (addr))
12957     return false;
12958
12959   /* Don't allow SP to be loaded unless it is also the base register. It
12960      guarantees that SP is reset correctly when an LDM instruction
12961      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12962   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12963     return false;
12964
12965   for (; i < count; i++)
12966     {
12967       elt = XVECEXP (op, 0, i);
12968       if (GET_CODE (elt) != SET)
12969         return false;
12970
12971       if (load)
12972         {
12973           reg = SET_DEST (elt);
12974           mem = SET_SRC (elt);
12975         }
12976       else
12977         {
12978           reg = SET_SRC (elt);
12979           mem = SET_DEST (elt);
12980         }
12981
12982       if (!REG_P (reg)
12983           || GET_MODE (reg) != mode
12984           || REGNO (reg) <= regno
12985           || (consecutive
12986               && (REGNO (reg) !=
12987                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12988           /* Don't allow SP to be loaded unless it is also the base register. It
12989              guarantees that SP is reset correctly when an LDM instruction
12990              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12991           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12992           || !MEM_P (mem)
12993           || GET_MODE (mem) != mode
12994           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12995                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12996                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12997                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12998                    offset + (i - base) * reg_increment))
12999               && (!REG_P (XEXP (mem, 0))
13000                   || offset + (i - base) * reg_increment != 0)))
13001         return false;
13002
13003       regno = REGNO (reg);
13004       if (regno == REGNO (addr))
13005         addr_reg_in_reglist = true;
13006     }
13007
13008   if (load)
13009     {
13010       if (update && addr_reg_in_reglist)
13011         return false;
13012
13013       /* For Thumb-1, address register is always modified - either by write-back
13014          or by explicit load.  If the pattern does not describe an update,
13015          then the address register must be in the list of loaded registers.  */
13016       if (TARGET_THUMB1)
13017         return update || addr_reg_in_reglist;
13018     }
13019
13020   return true;
13021 }
13022
13023 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13024    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13025    instruction.  ADD_OFFSET is nonzero if the base address register needs
13026    to be modified with an add instruction before we can use it.  */
13027
13028 static bool
13029 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13030                                  int nops, HOST_WIDE_INT add_offset)
13031  {
13032   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13033      if the offset isn't small enough.  The reason 2 ldrs are faster
13034      is because these ARMs are able to do more than one cache access
13035      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13036      whilst the ARM8 has a double bandwidth cache.  This means that
13037      these cores can do both an instruction fetch and a data fetch in
13038      a single cycle, so the trick of calculating the address into a
13039      scratch register (one of the result regs) and then doing a load
13040      multiple actually becomes slower (and no smaller in code size).
13041      That is the transformation
13042
13043         ldr     rd1, [rbase + offset]
13044         ldr     rd2, [rbase + offset + 4]
13045
13046      to
13047
13048         add     rd1, rbase, offset
13049         ldmia   rd1, {rd1, rd2}
13050
13051      produces worse code -- '3 cycles + any stalls on rd2' instead of
13052      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13053      access per cycle, the first sequence could never complete in less
13054      than 6 cycles, whereas the ldm sequence would only take 5 and
13055      would make better use of sequential accesses if not hitting the
13056      cache.
13057
13058      We cheat here and test 'arm_ld_sched' which we currently know to
13059      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13060      changes, then the test below needs to be reworked.  */
13061   if (nops == 2 && arm_ld_sched && add_offset != 0)
13062     return false;
13063
13064   /* XScale has load-store double instructions, but they have stricter
13065      alignment requirements than load-store multiple, so we cannot
13066      use them.
13067
13068      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13069      the pipeline until completion.
13070
13071         NREGS           CYCLES
13072           1               3
13073           2               4
13074           3               5
13075           4               6
13076
13077      An ldr instruction takes 1-3 cycles, but does not block the
13078      pipeline.
13079
13080         NREGS           CYCLES
13081           1              1-3
13082           2              2-6
13083           3              3-9
13084           4              4-12
13085
13086      Best case ldr will always win.  However, the more ldr instructions
13087      we issue, the less likely we are to be able to schedule them well.
13088      Using ldr instructions also increases code size.
13089
13090      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13091      for counts of 3 or 4 regs.  */
13092   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13093     return false;
13094   return true;
13095 }
13096
13097 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13098    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13099    an array ORDER which describes the sequence to use when accessing the
13100    offsets that produces an ascending order.  In this sequence, each
13101    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13102    must have been filled in with the lowest offset by the caller.
13103    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13104    we use to verify that ORDER produces an ascending order of registers.
13105    Return true if it was possible to construct such an order, false if
13106    not.  */
13107
13108 static bool
13109 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13110                       int *unsorted_regs)
13111 {
13112   int i;
13113   for (i = 1; i < nops; i++)
13114     {
13115       int j;
13116
13117       order[i] = order[i - 1];
13118       for (j = 0; j < nops; j++)
13119         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13120           {
13121             /* We must find exactly one offset that is higher than the
13122                previous one by 4.  */
13123             if (order[i] != order[i - 1])
13124               return false;
13125             order[i] = j;
13126           }
13127       if (order[i] == order[i - 1])
13128         return false;
13129       /* The register numbers must be ascending.  */
13130       if (unsorted_regs != NULL
13131           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13132         return false;
13133     }
13134   return true;
13135 }
13136
13137 /* Used to determine in a peephole whether a sequence of load
13138    instructions can be changed into a load-multiple instruction.
13139    NOPS is the number of separate load instructions we are examining.  The
13140    first NOPS entries in OPERANDS are the destination registers, the
13141    next NOPS entries are memory operands.  If this function is
13142    successful, *BASE is set to the common base register of the memory
13143    accesses; *LOAD_OFFSET is set to the first memory location's offset
13144    from that base register.
13145    REGS is an array filled in with the destination register numbers.
13146    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13147    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13148    the sequence of registers in REGS matches the loads from ascending memory
13149    locations, and the function verifies that the register numbers are
13150    themselves ascending.  If CHECK_REGS is false, the register numbers
13151    are stored in the order they are found in the operands.  */
13152 static int
13153 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13154                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13155 {
13156   int unsorted_regs[MAX_LDM_STM_OPS];
13157   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13158   int order[MAX_LDM_STM_OPS];
13159   rtx base_reg_rtx = NULL;
13160   int base_reg = -1;
13161   int i, ldm_case;
13162
13163   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13164      easily extended if required.  */
13165   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13166
13167   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13168
13169   /* Loop over the operands and check that the memory references are
13170      suitable (i.e. immediate offsets from the same base register).  At
13171      the same time, extract the target register, and the memory
13172      offsets.  */
13173   for (i = 0; i < nops; i++)
13174     {
13175       rtx reg;
13176       rtx offset;
13177
13178       /* Convert a subreg of a mem into the mem itself.  */
13179       if (GET_CODE (operands[nops + i]) == SUBREG)
13180         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13181
13182       gcc_assert (MEM_P (operands[nops + i]));
13183
13184       /* Don't reorder volatile memory references; it doesn't seem worth
13185          looking for the case where the order is ok anyway.  */
13186       if (MEM_VOLATILE_P (operands[nops + i]))
13187         return 0;
13188
13189       offset = const0_rtx;
13190
13191       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13192            || (GET_CODE (reg) == SUBREG
13193                && REG_P (reg = SUBREG_REG (reg))))
13194           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13195               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13196                   || (GET_CODE (reg) == SUBREG
13197                       && REG_P (reg = SUBREG_REG (reg))))
13198               && (CONST_INT_P (offset
13199                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13200         {
13201           if (i == 0)
13202             {
13203               base_reg = REGNO (reg);
13204               base_reg_rtx = reg;
13205               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13206                 return 0;
13207             }
13208           else if (base_reg != (int) REGNO (reg))
13209             /* Not addressed from the same base register.  */
13210             return 0;
13211
13212           unsorted_regs[i] = (REG_P (operands[i])
13213                               ? REGNO (operands[i])
13214                               : REGNO (SUBREG_REG (operands[i])));
13215
13216           /* If it isn't an integer register, or if it overwrites the
13217              base register but isn't the last insn in the list, then
13218              we can't do this.  */
13219           if (unsorted_regs[i] < 0
13220               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13221               || unsorted_regs[i] > 14
13222               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13223             return 0;
13224
13225           /* Don't allow SP to be loaded unless it is also the base
13226              register.  It guarantees that SP is reset correctly when
13227              an LDM instruction is interrupted.  Otherwise, we might
13228              end up with a corrupt stack.  */
13229           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13230             return 0;
13231
13232           unsorted_offsets[i] = INTVAL (offset);
13233           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13234             order[0] = i;
13235         }
13236       else
13237         /* Not a suitable memory address.  */
13238         return 0;
13239     }
13240
13241   /* All the useful information has now been extracted from the
13242      operands into unsorted_regs and unsorted_offsets; additionally,
13243      order[0] has been set to the lowest offset in the list.  Sort
13244      the offsets into order, verifying that they are adjacent, and
13245      check that the register numbers are ascending.  */
13246   if (!compute_offset_order (nops, unsorted_offsets, order,
13247                              check_regs ? unsorted_regs : NULL))
13248     return 0;
13249
13250   if (saved_order)
13251     memcpy (saved_order, order, sizeof order);
13252
13253   if (base)
13254     {
13255       *base = base_reg;
13256
13257       for (i = 0; i < nops; i++)
13258         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13259
13260       *load_offset = unsorted_offsets[order[0]];
13261     }
13262
13263   if (TARGET_THUMB1
13264       && !peep2_reg_dead_p (nops, base_reg_rtx))
13265     return 0;
13266
13267   if (unsorted_offsets[order[0]] == 0)
13268     ldm_case = 1; /* ldmia */
13269   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13270     ldm_case = 2; /* ldmib */
13271   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13272     ldm_case = 3; /* ldmda */
13273   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13274     ldm_case = 4; /* ldmdb */
13275   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13276            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13277     ldm_case = 5;
13278   else
13279     return 0;
13280
13281   if (!multiple_operation_profitable_p (false, nops,
13282                                         ldm_case == 5
13283                                         ? unsorted_offsets[order[0]] : 0))
13284     return 0;
13285
13286   return ldm_case;
13287 }
13288
13289 /* Used to determine in a peephole whether a sequence of store instructions can
13290    be changed into a store-multiple instruction.
13291    NOPS is the number of separate store instructions we are examining.
13292    NOPS_TOTAL is the total number of instructions recognized by the peephole
13293    pattern.
13294    The first NOPS entries in OPERANDS are the source registers, the next
13295    NOPS entries are memory operands.  If this function is successful, *BASE is
13296    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13297    to the first memory location's offset from that base register.  REGS is an
13298    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13299    likewise filled with the corresponding rtx's.
13300    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13301    numbers to an ascending order of stores.
13302    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13303    from ascending memory locations, and the function verifies that the register
13304    numbers are themselves ascending.  If CHECK_REGS is false, the register
13305    numbers are stored in the order they are found in the operands.  */
13306 static int
13307 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13308                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13309                          HOST_WIDE_INT *load_offset, bool check_regs)
13310 {
13311   int unsorted_regs[MAX_LDM_STM_OPS];
13312   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13313   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13314   int order[MAX_LDM_STM_OPS];
13315   int base_reg = -1;
13316   rtx base_reg_rtx = NULL;
13317   int i, stm_case;
13318
13319   /* Write back of base register is currently only supported for Thumb 1.  */
13320   int base_writeback = TARGET_THUMB1;
13321
13322   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13323      easily extended if required.  */
13324   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13325
13326   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13327
13328   /* Loop over the operands and check that the memory references are
13329      suitable (i.e. immediate offsets from the same base register).  At
13330      the same time, extract the target register, and the memory
13331      offsets.  */
13332   for (i = 0; i < nops; i++)
13333     {
13334       rtx reg;
13335       rtx offset;
13336
13337       /* Convert a subreg of a mem into the mem itself.  */
13338       if (GET_CODE (operands[nops + i]) == SUBREG)
13339         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13340
13341       gcc_assert (MEM_P (operands[nops + i]));
13342
13343       /* Don't reorder volatile memory references; it doesn't seem worth
13344          looking for the case where the order is ok anyway.  */
13345       if (MEM_VOLATILE_P (operands[nops + i]))
13346         return 0;
13347
13348       offset = const0_rtx;
13349
13350       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13351            || (GET_CODE (reg) == SUBREG
13352                && REG_P (reg = SUBREG_REG (reg))))
13353           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13354               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13355                   || (GET_CODE (reg) == SUBREG
13356                       && REG_P (reg = SUBREG_REG (reg))))
13357               && (CONST_INT_P (offset
13358                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13359         {
13360           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13361                                   ? operands[i] : SUBREG_REG (operands[i]));
13362           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13363
13364           if (i == 0)
13365             {
13366               base_reg = REGNO (reg);
13367               base_reg_rtx = reg;
13368               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13369                 return 0;
13370             }
13371           else if (base_reg != (int) REGNO (reg))
13372             /* Not addressed from the same base register.  */
13373             return 0;
13374
13375           /* If it isn't an integer register, then we can't do this.  */
13376           if (unsorted_regs[i] < 0
13377               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13378               /* The effects are unpredictable if the base register is
13379                  both updated and stored.  */
13380               || (base_writeback && unsorted_regs[i] == base_reg)
13381               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13382               || unsorted_regs[i] > 14)
13383             return 0;
13384
13385           unsorted_offsets[i] = INTVAL (offset);
13386           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13387             order[0] = i;
13388         }
13389       else
13390         /* Not a suitable memory address.  */
13391         return 0;
13392     }
13393
13394   /* All the useful information has now been extracted from the
13395      operands into unsorted_regs and unsorted_offsets; additionally,
13396      order[0] has been set to the lowest offset in the list.  Sort
13397      the offsets into order, verifying that they are adjacent, and
13398      check that the register numbers are ascending.  */
13399   if (!compute_offset_order (nops, unsorted_offsets, order,
13400                              check_regs ? unsorted_regs : NULL))
13401     return 0;
13402
13403   if (saved_order)
13404     memcpy (saved_order, order, sizeof order);
13405
13406   if (base)
13407     {
13408       *base = base_reg;
13409
13410       for (i = 0; i < nops; i++)
13411         {
13412           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13413           if (reg_rtxs)
13414             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13415         }
13416
13417       *load_offset = unsorted_offsets[order[0]];
13418     }
13419
13420   if (TARGET_THUMB1
13421       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13422     return 0;
13423
13424   if (unsorted_offsets[order[0]] == 0)
13425     stm_case = 1; /* stmia */
13426   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13427     stm_case = 2; /* stmib */
13428   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13429     stm_case = 3; /* stmda */
13430   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13431     stm_case = 4; /* stmdb */
13432   else
13433     return 0;
13434
13435   if (!multiple_operation_profitable_p (false, nops, 0))
13436     return 0;
13437
13438   return stm_case;
13439 }
13440 \f
13441 /* Routines for use in generating RTL.  */
13442
13443 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13444    the instruction; REGS and MEMS are arrays containing the operands.
13445    BASEREG is the base register to be used in addressing the memory operands.
13446    WBACK_OFFSET is nonzero if the instruction should update the base
13447    register.  */
13448
13449 static rtx
13450 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13451                          HOST_WIDE_INT wback_offset)
13452 {
13453   int i = 0, j;
13454   rtx result;
13455
13456   if (!multiple_operation_profitable_p (false, count, 0))
13457     {
13458       rtx seq;
13459
13460       start_sequence ();
13461
13462       for (i = 0; i < count; i++)
13463         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13464
13465       if (wback_offset != 0)
13466         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13467
13468       seq = get_insns ();
13469       end_sequence ();
13470
13471       return seq;
13472     }
13473
13474   result = gen_rtx_PARALLEL (VOIDmode,
13475                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13476   if (wback_offset != 0)
13477     {
13478       XVECEXP (result, 0, 0)
13479         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13480       i = 1;
13481       count++;
13482     }
13483
13484   for (j = 0; i < count; i++, j++)
13485     XVECEXP (result, 0, i)
13486       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13487
13488   return result;
13489 }
13490
13491 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13492    the instruction; REGS and MEMS are arrays containing the operands.
13493    BASEREG is the base register to be used in addressing the memory operands.
13494    WBACK_OFFSET is nonzero if the instruction should update the base
13495    register.  */
13496
13497 static rtx
13498 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13499                           HOST_WIDE_INT wback_offset)
13500 {
13501   int i = 0, j;
13502   rtx result;
13503
13504   if (GET_CODE (basereg) == PLUS)
13505     basereg = XEXP (basereg, 0);
13506
13507   if (!multiple_operation_profitable_p (false, count, 0))
13508     {
13509       rtx seq;
13510
13511       start_sequence ();
13512
13513       for (i = 0; i < count; i++)
13514         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13515
13516       if (wback_offset != 0)
13517         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13518
13519       seq = get_insns ();
13520       end_sequence ();
13521
13522       return seq;
13523     }
13524
13525   result = gen_rtx_PARALLEL (VOIDmode,
13526                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13527   if (wback_offset != 0)
13528     {
13529       XVECEXP (result, 0, 0)
13530         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13531       i = 1;
13532       count++;
13533     }
13534
13535   for (j = 0; i < count; i++, j++)
13536     XVECEXP (result, 0, i)
13537       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13538
13539   return result;
13540 }
13541
13542 /* Generate either a load-multiple or a store-multiple instruction.  This
13543    function can be used in situations where we can start with a single MEM
13544    rtx and adjust its address upwards.
13545    COUNT is the number of operations in the instruction, not counting a
13546    possible update of the base register.  REGS is an array containing the
13547    register operands.
13548    BASEREG is the base register to be used in addressing the memory operands,
13549    which are constructed from BASEMEM.
13550    WRITE_BACK specifies whether the generated instruction should include an
13551    update of the base register.
13552    OFFSETP is used to pass an offset to and from this function; this offset
13553    is not used when constructing the address (instead BASEMEM should have an
13554    appropriate offset in its address), it is used only for setting
13555    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13556
13557 static rtx
13558 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13559                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13560 {
13561   rtx mems[MAX_LDM_STM_OPS];
13562   HOST_WIDE_INT offset = *offsetp;
13563   int i;
13564
13565   gcc_assert (count <= MAX_LDM_STM_OPS);
13566
13567   if (GET_CODE (basereg) == PLUS)
13568     basereg = XEXP (basereg, 0);
13569
13570   for (i = 0; i < count; i++)
13571     {
13572       rtx addr = plus_constant (Pmode, basereg, i * 4);
13573       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13574       offset += 4;
13575     }
13576
13577   if (write_back)
13578     *offsetp = offset;
13579
13580   if (is_load)
13581     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13582                                     write_back ? 4 * count : 0);
13583   else
13584     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13585                                      write_back ? 4 * count : 0);
13586 }
13587
13588 rtx
13589 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13590                        rtx basemem, HOST_WIDE_INT *offsetp)
13591 {
13592   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13593                               offsetp);
13594 }
13595
13596 rtx
13597 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13598                         rtx basemem, HOST_WIDE_INT *offsetp)
13599 {
13600   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13601                               offsetp);
13602 }
13603
13604 /* Called from a peephole2 expander to turn a sequence of loads into an
13605    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13606    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13607    is true if we can reorder the registers because they are used commutatively
13608    subsequently.
13609    Returns true iff we could generate a new instruction.  */
13610
13611 bool
13612 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13613 {
13614   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13615   rtx mems[MAX_LDM_STM_OPS];
13616   int i, j, base_reg;
13617   rtx base_reg_rtx;
13618   HOST_WIDE_INT offset;
13619   int write_back = FALSE;
13620   int ldm_case;
13621   rtx addr;
13622
13623   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13624                                      &base_reg, &offset, !sort_regs);
13625
13626   if (ldm_case == 0)
13627     return false;
13628
13629   if (sort_regs)
13630     for (i = 0; i < nops - 1; i++)
13631       for (j = i + 1; j < nops; j++)
13632         if (regs[i] > regs[j])
13633           {
13634             int t = regs[i];
13635             regs[i] = regs[j];
13636             regs[j] = t;
13637           }
13638   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13639
13640   if (TARGET_THUMB1)
13641     {
13642       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13643       gcc_assert (ldm_case == 1 || ldm_case == 5);
13644       write_back = TRUE;
13645     }
13646
13647   if (ldm_case == 5)
13648     {
13649       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13650       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13651       offset = 0;
13652       if (!TARGET_THUMB1)
13653         base_reg_rtx = newbase;
13654     }
13655
13656   for (i = 0; i < nops; i++)
13657     {
13658       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13659       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13660                                               SImode, addr, 0);
13661     }
13662   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13663                                       write_back ? offset + i * 4 : 0));
13664   return true;
13665 }
13666
13667 /* Called from a peephole2 expander to turn a sequence of stores into an
13668    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13669    NOPS indicates how many separate stores we are trying to combine.
13670    Returns true iff we could generate a new instruction.  */
13671
13672 bool
13673 gen_stm_seq (rtx *operands, int nops)
13674 {
13675   int i;
13676   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13677   rtx mems[MAX_LDM_STM_OPS];
13678   int base_reg;
13679   rtx base_reg_rtx;
13680   HOST_WIDE_INT offset;
13681   int write_back = FALSE;
13682   int stm_case;
13683   rtx addr;
13684   bool base_reg_dies;
13685
13686   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13687                                       mem_order, &base_reg, &offset, true);
13688
13689   if (stm_case == 0)
13690     return false;
13691
13692   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13693
13694   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13695   if (TARGET_THUMB1)
13696     {
13697       gcc_assert (base_reg_dies);
13698       write_back = TRUE;
13699     }
13700
13701   if (stm_case == 5)
13702     {
13703       gcc_assert (base_reg_dies);
13704       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13705       offset = 0;
13706     }
13707
13708   addr = plus_constant (Pmode, base_reg_rtx, offset);
13709
13710   for (i = 0; i < nops; i++)
13711     {
13712       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13713       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13714                                               SImode, addr, 0);
13715     }
13716   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13717                                        write_back ? offset + i * 4 : 0));
13718   return true;
13719 }
13720
13721 /* Called from a peephole2 expander to turn a sequence of stores that are
13722    preceded by constant loads into an STM instruction.  OPERANDS are the
13723    operands found by the peephole matcher; NOPS indicates how many
13724    separate stores we are trying to combine; there are 2 * NOPS
13725    instructions in the peephole.
13726    Returns true iff we could generate a new instruction.  */
13727
13728 bool
13729 gen_const_stm_seq (rtx *operands, int nops)
13730 {
13731   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13732   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13733   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13734   rtx mems[MAX_LDM_STM_OPS];
13735   int base_reg;
13736   rtx base_reg_rtx;
13737   HOST_WIDE_INT offset;
13738   int write_back = FALSE;
13739   int stm_case;
13740   rtx addr;
13741   bool base_reg_dies;
13742   int i, j;
13743   HARD_REG_SET allocated;
13744
13745   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13746                                       mem_order, &base_reg, &offset, false);
13747
13748   if (stm_case == 0)
13749     return false;
13750
13751   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13752
13753   /* If the same register is used more than once, try to find a free
13754      register.  */
13755   CLEAR_HARD_REG_SET (allocated);
13756   for (i = 0; i < nops; i++)
13757     {
13758       for (j = i + 1; j < nops; j++)
13759         if (regs[i] == regs[j])
13760           {
13761             rtx t = peep2_find_free_register (0, nops * 2,
13762                                               TARGET_THUMB1 ? "l" : "r",
13763                                               SImode, &allocated);
13764             if (t == NULL_RTX)
13765               return false;
13766             reg_rtxs[i] = t;
13767             regs[i] = REGNO (t);
13768           }
13769     }
13770
13771   /* Compute an ordering that maps the register numbers to an ascending
13772      sequence.  */
13773   reg_order[0] = 0;
13774   for (i = 0; i < nops; i++)
13775     if (regs[i] < regs[reg_order[0]])
13776       reg_order[0] = i;
13777
13778   for (i = 1; i < nops; i++)
13779     {
13780       int this_order = reg_order[i - 1];
13781       for (j = 0; j < nops; j++)
13782         if (regs[j] > regs[reg_order[i - 1]]
13783             && (this_order == reg_order[i - 1]
13784                 || regs[j] < regs[this_order]))
13785           this_order = j;
13786       reg_order[i] = this_order;
13787     }
13788
13789   /* Ensure that registers that must be live after the instruction end
13790      up with the correct value.  */
13791   for (i = 0; i < nops; i++)
13792     {
13793       int this_order = reg_order[i];
13794       if ((this_order != mem_order[i]
13795            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13796           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13797         return false;
13798     }
13799
13800   /* Load the constants.  */
13801   for (i = 0; i < nops; i++)
13802     {
13803       rtx op = operands[2 * nops + mem_order[i]];
13804       sorted_regs[i] = regs[reg_order[i]];
13805       emit_move_insn (reg_rtxs[reg_order[i]], op);
13806     }
13807
13808   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13809
13810   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13811   if (TARGET_THUMB1)
13812     {
13813       gcc_assert (base_reg_dies);
13814       write_back = TRUE;
13815     }
13816
13817   if (stm_case == 5)
13818     {
13819       gcc_assert (base_reg_dies);
13820       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13821       offset = 0;
13822     }
13823
13824   addr = plus_constant (Pmode, base_reg_rtx, offset);
13825
13826   for (i = 0; i < nops; i++)
13827     {
13828       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13829       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13830                                               SImode, addr, 0);
13831     }
13832   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13833                                        write_back ? offset + i * 4 : 0));
13834   return true;
13835 }
13836
13837 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13838    unaligned copies on processors which support unaligned semantics for those
13839    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13840    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13841    An interleave factor of 1 (the minimum) will perform no interleaving.
13842    Load/store multiple are used for aligned addresses where possible.  */
13843
13844 static void
13845 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13846                                    HOST_WIDE_INT length,
13847                                    unsigned int interleave_factor)
13848 {
13849   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13850   int *regnos = XALLOCAVEC (int, interleave_factor);
13851   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13852   HOST_WIDE_INT i, j;
13853   HOST_WIDE_INT remaining = length, words;
13854   rtx halfword_tmp = NULL, byte_tmp = NULL;
13855   rtx dst, src;
13856   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13857   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13858   HOST_WIDE_INT srcoffset, dstoffset;
13859   HOST_WIDE_INT src_autoinc, dst_autoinc;
13860   rtx mem, addr;
13861
13862   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13863
13864   /* Use hard registers if we have aligned source or destination so we can use
13865      load/store multiple with contiguous registers.  */
13866   if (dst_aligned || src_aligned)
13867     for (i = 0; i < interleave_factor; i++)
13868       regs[i] = gen_rtx_REG (SImode, i);
13869   else
13870     for (i = 0; i < interleave_factor; i++)
13871       regs[i] = gen_reg_rtx (SImode);
13872
13873   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13874   src = copy_addr_to_reg (XEXP (srcbase, 0));
13875
13876   srcoffset = dstoffset = 0;
13877
13878   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13879      For copying the last bytes we want to subtract this offset again.  */
13880   src_autoinc = dst_autoinc = 0;
13881
13882   for (i = 0; i < interleave_factor; i++)
13883     regnos[i] = i;
13884
13885   /* Copy BLOCK_SIZE_BYTES chunks.  */
13886
13887   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13888     {
13889       /* Load words.  */
13890       if (src_aligned && interleave_factor > 1)
13891         {
13892           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13893                                             TRUE, srcbase, &srcoffset));
13894           src_autoinc += UNITS_PER_WORD * interleave_factor;
13895         }
13896       else
13897         {
13898           for (j = 0; j < interleave_factor; j++)
13899             {
13900               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13901                                                  - src_autoinc));
13902               mem = adjust_automodify_address (srcbase, SImode, addr,
13903                                                srcoffset + j * UNITS_PER_WORD);
13904               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13905             }
13906           srcoffset += block_size_bytes;
13907         }
13908
13909       /* Store words.  */
13910       if (dst_aligned && interleave_factor > 1)
13911         {
13912           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13913                                              TRUE, dstbase, &dstoffset));
13914           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13915         }
13916       else
13917         {
13918           for (j = 0; j < interleave_factor; j++)
13919             {
13920               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13921                                                  - dst_autoinc));
13922               mem = adjust_automodify_address (dstbase, SImode, addr,
13923                                                dstoffset + j * UNITS_PER_WORD);
13924               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13925             }
13926           dstoffset += block_size_bytes;
13927         }
13928
13929       remaining -= block_size_bytes;
13930     }
13931
13932   /* Copy any whole words left (note these aren't interleaved with any
13933      subsequent halfword/byte load/stores in the interests of simplicity).  */
13934
13935   words = remaining / UNITS_PER_WORD;
13936
13937   gcc_assert (words < interleave_factor);
13938
13939   if (src_aligned && words > 1)
13940     {
13941       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13942                                         &srcoffset));
13943       src_autoinc += UNITS_PER_WORD * words;
13944     }
13945   else
13946     {
13947       for (j = 0; j < words; j++)
13948         {
13949           addr = plus_constant (Pmode, src,
13950                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13951           mem = adjust_automodify_address (srcbase, SImode, addr,
13952                                            srcoffset + j * UNITS_PER_WORD);
13953           if (src_aligned)
13954             emit_move_insn (regs[j], mem);
13955           else
13956             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13957         }
13958       srcoffset += words * UNITS_PER_WORD;
13959     }
13960
13961   if (dst_aligned && words > 1)
13962     {
13963       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13964                                          &dstoffset));
13965       dst_autoinc += words * UNITS_PER_WORD;
13966     }
13967   else
13968     {
13969       for (j = 0; j < words; j++)
13970         {
13971           addr = plus_constant (Pmode, dst,
13972                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13973           mem = adjust_automodify_address (dstbase, SImode, addr,
13974                                            dstoffset + j * UNITS_PER_WORD);
13975           if (dst_aligned)
13976             emit_move_insn (mem, regs[j]);
13977           else
13978             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13979         }
13980       dstoffset += words * UNITS_PER_WORD;
13981     }
13982
13983   remaining -= words * UNITS_PER_WORD;
13984
13985   gcc_assert (remaining < 4);
13986
13987   /* Copy a halfword if necessary.  */
13988
13989   if (remaining >= 2)
13990     {
13991       halfword_tmp = gen_reg_rtx (SImode);
13992
13993       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13994       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13995       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13996
13997       /* Either write out immediately, or delay until we've loaded the last
13998          byte, depending on interleave factor.  */
13999       if (interleave_factor == 1)
14000         {
14001           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14002           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14003           emit_insn (gen_unaligned_storehi (mem,
14004                        gen_lowpart (HImode, halfword_tmp)));
14005           halfword_tmp = NULL;
14006           dstoffset += 2;
14007         }
14008
14009       remaining -= 2;
14010       srcoffset += 2;
14011     }
14012
14013   gcc_assert (remaining < 2);
14014
14015   /* Copy last byte.  */
14016
14017   if ((remaining & 1) != 0)
14018     {
14019       byte_tmp = gen_reg_rtx (SImode);
14020
14021       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14022       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14023       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14024
14025       if (interleave_factor == 1)
14026         {
14027           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14028           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14029           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14030           byte_tmp = NULL;
14031           dstoffset++;
14032         }
14033
14034       remaining--;
14035       srcoffset++;
14036     }
14037
14038   /* Store last halfword if we haven't done so already.  */
14039
14040   if (halfword_tmp)
14041     {
14042       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14043       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14044       emit_insn (gen_unaligned_storehi (mem,
14045                    gen_lowpart (HImode, halfword_tmp)));
14046       dstoffset += 2;
14047     }
14048
14049   /* Likewise for last byte.  */
14050
14051   if (byte_tmp)
14052     {
14053       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14054       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14055       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14056       dstoffset++;
14057     }
14058
14059   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14060 }
14061
14062 /* From mips_adjust_block_mem:
14063
14064    Helper function for doing a loop-based block operation on memory
14065    reference MEM.  Each iteration of the loop will operate on LENGTH
14066    bytes of MEM.
14067
14068    Create a new base register for use within the loop and point it to
14069    the start of MEM.  Create a new memory reference that uses this
14070    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14071
14072 static void
14073 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14074                       rtx *loop_mem)
14075 {
14076   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14077
14078   /* Although the new mem does not refer to a known location,
14079      it does keep up to LENGTH bytes of alignment.  */
14080   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14081   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14082 }
14083
14084 /* From mips_block_move_loop:
14085
14086    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14087    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14088    the memory regions do not overlap.  */
14089
14090 static void
14091 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14092                                unsigned int interleave_factor,
14093                                HOST_WIDE_INT bytes_per_iter)
14094 {
14095   rtx src_reg, dest_reg, final_src, test;
14096   HOST_WIDE_INT leftover;
14097
14098   leftover = length % bytes_per_iter;
14099   length -= leftover;
14100
14101   /* Create registers and memory references for use within the loop.  */
14102   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14103   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14104
14105   /* Calculate the value that SRC_REG should have after the last iteration of
14106      the loop.  */
14107   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14108                                    0, 0, OPTAB_WIDEN);
14109
14110   /* Emit the start of the loop.  */
14111   rtx_code_label *label = gen_label_rtx ();
14112   emit_label (label);
14113
14114   /* Emit the loop body.  */
14115   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14116                                      interleave_factor);
14117
14118   /* Move on to the next block.  */
14119   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14120   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14121
14122   /* Emit the loop condition.  */
14123   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14124   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14125
14126   /* Mop up any left-over bytes.  */
14127   if (leftover)
14128     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14129 }
14130
14131 /* Emit a block move when either the source or destination is unaligned (not
14132    aligned to a four-byte boundary).  This may need further tuning depending on
14133    core type, optimize_size setting, etc.  */
14134
14135 static int
14136 arm_movmemqi_unaligned (rtx *operands)
14137 {
14138   HOST_WIDE_INT length = INTVAL (operands[2]);
14139
14140   if (optimize_size)
14141     {
14142       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14143       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14144       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14145          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14146          or dst_aligned though: allow more interleaving in those cases since the
14147          resulting code can be smaller.  */
14148       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14149       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14150
14151       if (length > 12)
14152         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14153                                        interleave_factor, bytes_per_iter);
14154       else
14155         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14156                                            interleave_factor);
14157     }
14158   else
14159     {
14160       /* Note that the loop created by arm_block_move_unaligned_loop may be
14161          subject to loop unrolling, which makes tuning this condition a little
14162          redundant.  */
14163       if (length > 32)
14164         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14165       else
14166         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14167     }
14168
14169   return 1;
14170 }
14171
14172 int
14173 arm_gen_movmemqi (rtx *operands)
14174 {
14175   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14176   HOST_WIDE_INT srcoffset, dstoffset;
14177   rtx src, dst, srcbase, dstbase;
14178   rtx part_bytes_reg = NULL;
14179   rtx mem;
14180
14181   if (!CONST_INT_P (operands[2])
14182       || !CONST_INT_P (operands[3])
14183       || INTVAL (operands[2]) > 64)
14184     return 0;
14185
14186   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14187     return arm_movmemqi_unaligned (operands);
14188
14189   if (INTVAL (operands[3]) & 3)
14190     return 0;
14191
14192   dstbase = operands[0];
14193   srcbase = operands[1];
14194
14195   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14196   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14197
14198   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14199   out_words_to_go = INTVAL (operands[2]) / 4;
14200   last_bytes = INTVAL (operands[2]) & 3;
14201   dstoffset = srcoffset = 0;
14202
14203   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14204     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14205
14206   while (in_words_to_go >= 2)
14207     {
14208       if (in_words_to_go > 4)
14209         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14210                                           TRUE, srcbase, &srcoffset));
14211       else
14212         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14213                                           src, FALSE, srcbase,
14214                                           &srcoffset));
14215
14216       if (out_words_to_go)
14217         {
14218           if (out_words_to_go > 4)
14219             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14220                                                TRUE, dstbase, &dstoffset));
14221           else if (out_words_to_go != 1)
14222             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14223                                                out_words_to_go, dst,
14224                                                (last_bytes == 0
14225                                                 ? FALSE : TRUE),
14226                                                dstbase, &dstoffset));
14227           else
14228             {
14229               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14230               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14231               if (last_bytes != 0)
14232                 {
14233                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14234                   dstoffset += 4;
14235                 }
14236             }
14237         }
14238
14239       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14240       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14241     }
14242
14243   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14244   if (out_words_to_go)
14245     {
14246       rtx sreg;
14247
14248       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14249       sreg = copy_to_reg (mem);
14250
14251       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14252       emit_move_insn (mem, sreg);
14253       in_words_to_go--;
14254
14255       gcc_assert (!in_words_to_go);     /* Sanity check */
14256     }
14257
14258   if (in_words_to_go)
14259     {
14260       gcc_assert (in_words_to_go > 0);
14261
14262       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14263       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14264     }
14265
14266   gcc_assert (!last_bytes || part_bytes_reg);
14267
14268   if (BYTES_BIG_ENDIAN && last_bytes)
14269     {
14270       rtx tmp = gen_reg_rtx (SImode);
14271
14272       /* The bytes we want are in the top end of the word.  */
14273       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14274                               GEN_INT (8 * (4 - last_bytes))));
14275       part_bytes_reg = tmp;
14276
14277       while (last_bytes)
14278         {
14279           mem = adjust_automodify_address (dstbase, QImode,
14280                                            plus_constant (Pmode, dst,
14281                                                           last_bytes - 1),
14282                                            dstoffset + last_bytes - 1);
14283           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14284
14285           if (--last_bytes)
14286             {
14287               tmp = gen_reg_rtx (SImode);
14288               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14289               part_bytes_reg = tmp;
14290             }
14291         }
14292
14293     }
14294   else
14295     {
14296       if (last_bytes > 1)
14297         {
14298           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14299           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14300           last_bytes -= 2;
14301           if (last_bytes)
14302             {
14303               rtx tmp = gen_reg_rtx (SImode);
14304               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14305               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14306               part_bytes_reg = tmp;
14307               dstoffset += 2;
14308             }
14309         }
14310
14311       if (last_bytes)
14312         {
14313           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14314           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14315         }
14316     }
14317
14318   return 1;
14319 }
14320
14321 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14322 by mode size.  */
14323 inline static rtx
14324 next_consecutive_mem (rtx mem)
14325 {
14326   machine_mode mode = GET_MODE (mem);
14327   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14328   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14329
14330   return adjust_automodify_address (mem, mode, addr, offset);
14331 }
14332
14333 /* Copy using LDRD/STRD instructions whenever possible.
14334    Returns true upon success. */
14335 bool
14336 gen_movmem_ldrd_strd (rtx *operands)
14337 {
14338   unsigned HOST_WIDE_INT len;
14339   HOST_WIDE_INT align;
14340   rtx src, dst, base;
14341   rtx reg0;
14342   bool src_aligned, dst_aligned;
14343   bool src_volatile, dst_volatile;
14344
14345   gcc_assert (CONST_INT_P (operands[2]));
14346   gcc_assert (CONST_INT_P (operands[3]));
14347
14348   len = UINTVAL (operands[2]);
14349   if (len > 64)
14350     return false;
14351
14352   /* Maximum alignment we can assume for both src and dst buffers.  */
14353   align = INTVAL (operands[3]);
14354
14355   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14356     return false;
14357
14358   /* Place src and dst addresses in registers
14359      and update the corresponding mem rtx.  */
14360   dst = operands[0];
14361   dst_volatile = MEM_VOLATILE_P (dst);
14362   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14363   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14364   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14365
14366   src = operands[1];
14367   src_volatile = MEM_VOLATILE_P (src);
14368   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14369   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14370   src = adjust_automodify_address (src, VOIDmode, base, 0);
14371
14372   if (!unaligned_access && !(src_aligned && dst_aligned))
14373     return false;
14374
14375   if (src_volatile || dst_volatile)
14376     return false;
14377
14378   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14379   if (!(dst_aligned || src_aligned))
14380     return arm_gen_movmemqi (operands);
14381
14382   /* If the either src or dst is unaligned we'll be accessing it as pairs
14383      of unaligned SImode accesses.  Otherwise we can generate DImode
14384      ldrd/strd instructions.  */
14385   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14386   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14387
14388   while (len >= 8)
14389     {
14390       len -= 8;
14391       reg0 = gen_reg_rtx (DImode);
14392       rtx low_reg = NULL_RTX;
14393       rtx hi_reg = NULL_RTX;
14394
14395       if (!src_aligned || !dst_aligned)
14396         {
14397           low_reg = gen_lowpart (SImode, reg0);
14398           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14399         }
14400       if (src_aligned)
14401         emit_move_insn (reg0, src);
14402       else
14403         {
14404           emit_insn (gen_unaligned_loadsi (low_reg, src));
14405           src = next_consecutive_mem (src);
14406           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14407         }
14408
14409       if (dst_aligned)
14410         emit_move_insn (dst, reg0);
14411       else
14412         {
14413           emit_insn (gen_unaligned_storesi (dst, low_reg));
14414           dst = next_consecutive_mem (dst);
14415           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14416         }
14417
14418       src = next_consecutive_mem (src);
14419       dst = next_consecutive_mem (dst);
14420     }
14421
14422   gcc_assert (len < 8);
14423   if (len >= 4)
14424     {
14425       /* More than a word but less than a double-word to copy.  Copy a word.  */
14426       reg0 = gen_reg_rtx (SImode);
14427       src = adjust_address (src, SImode, 0);
14428       dst = adjust_address (dst, SImode, 0);
14429       if (src_aligned)
14430         emit_move_insn (reg0, src);
14431       else
14432         emit_insn (gen_unaligned_loadsi (reg0, src));
14433
14434       if (dst_aligned)
14435         emit_move_insn (dst, reg0);
14436       else
14437         emit_insn (gen_unaligned_storesi (dst, reg0));
14438
14439       src = next_consecutive_mem (src);
14440       dst = next_consecutive_mem (dst);
14441       len -= 4;
14442     }
14443
14444   if (len == 0)
14445     return true;
14446
14447   /* Copy the remaining bytes.  */
14448   if (len >= 2)
14449     {
14450       dst = adjust_address (dst, HImode, 0);
14451       src = adjust_address (src, HImode, 0);
14452       reg0 = gen_reg_rtx (SImode);
14453       if (src_aligned)
14454         emit_insn (gen_zero_extendhisi2 (reg0, src));
14455       else
14456         emit_insn (gen_unaligned_loadhiu (reg0, src));
14457
14458       if (dst_aligned)
14459         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14460       else
14461         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14462
14463       src = next_consecutive_mem (src);
14464       dst = next_consecutive_mem (dst);
14465       if (len == 2)
14466         return true;
14467     }
14468
14469   dst = adjust_address (dst, QImode, 0);
14470   src = adjust_address (src, QImode, 0);
14471   reg0 = gen_reg_rtx (QImode);
14472   emit_move_insn (reg0, src);
14473   emit_move_insn (dst, reg0);
14474   return true;
14475 }
14476
14477 /* Select a dominance comparison mode if possible for a test of the general
14478    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14479    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14480    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14481    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14482    In all cases OP will be either EQ or NE, but we don't need to know which
14483    here.  If we are unable to support a dominance comparison we return
14484    CC mode.  This will then fail to match for the RTL expressions that
14485    generate this call.  */
14486 machine_mode
14487 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14488 {
14489   enum rtx_code cond1, cond2;
14490   int swapped = 0;
14491
14492   /* Currently we will probably get the wrong result if the individual
14493      comparisons are not simple.  This also ensures that it is safe to
14494      reverse a comparison if necessary.  */
14495   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14496        != CCmode)
14497       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14498           != CCmode))
14499     return CCmode;
14500
14501   /* The if_then_else variant of this tests the second condition if the
14502      first passes, but is true if the first fails.  Reverse the first
14503      condition to get a true "inclusive-or" expression.  */
14504   if (cond_or == DOM_CC_NX_OR_Y)
14505     cond1 = reverse_condition (cond1);
14506
14507   /* If the comparisons are not equal, and one doesn't dominate the other,
14508      then we can't do this.  */
14509   if (cond1 != cond2
14510       && !comparison_dominates_p (cond1, cond2)
14511       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14512     return CCmode;
14513
14514   if (swapped)
14515     std::swap (cond1, cond2);
14516
14517   switch (cond1)
14518     {
14519     case EQ:
14520       if (cond_or == DOM_CC_X_AND_Y)
14521         return CC_DEQmode;
14522
14523       switch (cond2)
14524         {
14525         case EQ: return CC_DEQmode;
14526         case LE: return CC_DLEmode;
14527         case LEU: return CC_DLEUmode;
14528         case GE: return CC_DGEmode;
14529         case GEU: return CC_DGEUmode;
14530         default: gcc_unreachable ();
14531         }
14532
14533     case LT:
14534       if (cond_or == DOM_CC_X_AND_Y)
14535         return CC_DLTmode;
14536
14537       switch (cond2)
14538         {
14539         case  LT:
14540             return CC_DLTmode;
14541         case LE:
14542           return CC_DLEmode;
14543         case NE:
14544           return CC_DNEmode;
14545         default:
14546           gcc_unreachable ();
14547         }
14548
14549     case GT:
14550       if (cond_or == DOM_CC_X_AND_Y)
14551         return CC_DGTmode;
14552
14553       switch (cond2)
14554         {
14555         case GT:
14556           return CC_DGTmode;
14557         case GE:
14558           return CC_DGEmode;
14559         case NE:
14560           return CC_DNEmode;
14561         default:
14562           gcc_unreachable ();
14563         }
14564
14565     case LTU:
14566       if (cond_or == DOM_CC_X_AND_Y)
14567         return CC_DLTUmode;
14568
14569       switch (cond2)
14570         {
14571         case LTU:
14572           return CC_DLTUmode;
14573         case LEU:
14574           return CC_DLEUmode;
14575         case NE:
14576           return CC_DNEmode;
14577         default:
14578           gcc_unreachable ();
14579         }
14580
14581     case GTU:
14582       if (cond_or == DOM_CC_X_AND_Y)
14583         return CC_DGTUmode;
14584
14585       switch (cond2)
14586         {
14587         case GTU:
14588           return CC_DGTUmode;
14589         case GEU:
14590           return CC_DGEUmode;
14591         case NE:
14592           return CC_DNEmode;
14593         default:
14594           gcc_unreachable ();
14595         }
14596
14597     /* The remaining cases only occur when both comparisons are the
14598        same.  */
14599     case NE:
14600       gcc_assert (cond1 == cond2);
14601       return CC_DNEmode;
14602
14603     case LE:
14604       gcc_assert (cond1 == cond2);
14605       return CC_DLEmode;
14606
14607     case GE:
14608       gcc_assert (cond1 == cond2);
14609       return CC_DGEmode;
14610
14611     case LEU:
14612       gcc_assert (cond1 == cond2);
14613       return CC_DLEUmode;
14614
14615     case GEU:
14616       gcc_assert (cond1 == cond2);
14617       return CC_DGEUmode;
14618
14619     default:
14620       gcc_unreachable ();
14621     }
14622 }
14623
14624 machine_mode
14625 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14626 {
14627   /* All floating point compares return CCFP if it is an equality
14628      comparison, and CCFPE otherwise.  */
14629   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14630     {
14631       switch (op)
14632         {
14633         case EQ:
14634         case NE:
14635         case UNORDERED:
14636         case ORDERED:
14637         case UNLT:
14638         case UNLE:
14639         case UNGT:
14640         case UNGE:
14641         case UNEQ:
14642         case LTGT:
14643           return CCFPmode;
14644
14645         case LT:
14646         case LE:
14647         case GT:
14648         case GE:
14649           return CCFPEmode;
14650
14651         default:
14652           gcc_unreachable ();
14653         }
14654     }
14655
14656   /* A compare with a shifted operand.  Because of canonicalization, the
14657      comparison will have to be swapped when we emit the assembler.  */
14658   if (GET_MODE (y) == SImode
14659       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14660       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14661           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14662           || GET_CODE (x) == ROTATERT))
14663     return CC_SWPmode;
14664
14665   /* This operation is performed swapped, but since we only rely on the Z
14666      flag we don't need an additional mode.  */
14667   if (GET_MODE (y) == SImode
14668       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14669       && GET_CODE (x) == NEG
14670       && (op == EQ || op == NE))
14671     return CC_Zmode;
14672
14673   /* This is a special case that is used by combine to allow a
14674      comparison of a shifted byte load to be split into a zero-extend
14675      followed by a comparison of the shifted integer (only valid for
14676      equalities and unsigned inequalities).  */
14677   if (GET_MODE (x) == SImode
14678       && GET_CODE (x) == ASHIFT
14679       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14680       && GET_CODE (XEXP (x, 0)) == SUBREG
14681       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14682       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14683       && (op == EQ || op == NE
14684           || op == GEU || op == GTU || op == LTU || op == LEU)
14685       && CONST_INT_P (y))
14686     return CC_Zmode;
14687
14688   /* A construct for a conditional compare, if the false arm contains
14689      0, then both conditions must be true, otherwise either condition
14690      must be true.  Not all conditions are possible, so CCmode is
14691      returned if it can't be done.  */
14692   if (GET_CODE (x) == IF_THEN_ELSE
14693       && (XEXP (x, 2) == const0_rtx
14694           || XEXP (x, 2) == const1_rtx)
14695       && COMPARISON_P (XEXP (x, 0))
14696       && COMPARISON_P (XEXP (x, 1)))
14697     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14698                                          INTVAL (XEXP (x, 2)));
14699
14700   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14701   if (GET_CODE (x) == AND
14702       && (op == EQ || op == NE)
14703       && COMPARISON_P (XEXP (x, 0))
14704       && COMPARISON_P (XEXP (x, 1)))
14705     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14706                                          DOM_CC_X_AND_Y);
14707
14708   if (GET_CODE (x) == IOR
14709       && (op == EQ || op == NE)
14710       && COMPARISON_P (XEXP (x, 0))
14711       && COMPARISON_P (XEXP (x, 1)))
14712     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14713                                          DOM_CC_X_OR_Y);
14714
14715   /* An operation (on Thumb) where we want to test for a single bit.
14716      This is done by shifting that bit up into the top bit of a
14717      scratch register; we can then branch on the sign bit.  */
14718   if (TARGET_THUMB1
14719       && GET_MODE (x) == SImode
14720       && (op == EQ || op == NE)
14721       && GET_CODE (x) == ZERO_EXTRACT
14722       && XEXP (x, 1) == const1_rtx)
14723     return CC_Nmode;
14724
14725   /* An operation that sets the condition codes as a side-effect, the
14726      V flag is not set correctly, so we can only use comparisons where
14727      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14728      instead.)  */
14729   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14730   if (GET_MODE (x) == SImode
14731       && y == const0_rtx
14732       && (op == EQ || op == NE || op == LT || op == GE)
14733       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14734           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14735           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14736           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14737           || GET_CODE (x) == LSHIFTRT
14738           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14739           || GET_CODE (x) == ROTATERT
14740           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14741     return CC_NOOVmode;
14742
14743   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14744     return CC_Zmode;
14745
14746   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14747       && GET_CODE (x) == PLUS
14748       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14749     return CC_Cmode;
14750
14751   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14752     {
14753       switch (op)
14754         {
14755         case EQ:
14756         case NE:
14757           /* A DImode comparison against zero can be implemented by
14758              or'ing the two halves together.  */
14759           if (y == const0_rtx)
14760             return CC_Zmode;
14761
14762           /* We can do an equality test in three Thumb instructions.  */
14763           if (!TARGET_32BIT)
14764             return CC_Zmode;
14765
14766           /* FALLTHROUGH */
14767
14768         case LTU:
14769         case LEU:
14770         case GTU:
14771         case GEU:
14772           /* DImode unsigned comparisons can be implemented by cmp +
14773              cmpeq without a scratch register.  Not worth doing in
14774              Thumb-2.  */
14775           if (TARGET_32BIT)
14776             return CC_CZmode;
14777
14778           /* FALLTHROUGH */
14779
14780         case LT:
14781         case LE:
14782         case GT:
14783         case GE:
14784           /* DImode signed and unsigned comparisons can be implemented
14785              by cmp + sbcs with a scratch register, but that does not
14786              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14787           gcc_assert (op != EQ && op != NE);
14788           return CC_NCVmode;
14789
14790         default:
14791           gcc_unreachable ();
14792         }
14793     }
14794
14795   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14796     return GET_MODE (x);
14797
14798   return CCmode;
14799 }
14800
14801 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14802    return the rtx for register 0 in the proper mode.  FP means this is a
14803    floating point compare: I don't think that it is needed on the arm.  */
14804 rtx
14805 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14806 {
14807   machine_mode mode;
14808   rtx cc_reg;
14809   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14810
14811   /* We might have X as a constant, Y as a register because of the predicates
14812      used for cmpdi.  If so, force X to a register here.  */
14813   if (dimode_comparison && !REG_P (x))
14814     x = force_reg (DImode, x);
14815
14816   mode = SELECT_CC_MODE (code, x, y);
14817   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14818
14819   if (dimode_comparison
14820       && mode != CC_CZmode)
14821     {
14822       rtx clobber, set;
14823
14824       /* To compare two non-zero values for equality, XOR them and
14825          then compare against zero.  Not used for ARM mode; there
14826          CC_CZmode is cheaper.  */
14827       if (mode == CC_Zmode && y != const0_rtx)
14828         {
14829           gcc_assert (!reload_completed);
14830           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14831           y = const0_rtx;
14832         }
14833
14834       /* A scratch register is required.  */
14835       if (reload_completed)
14836         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14837       else
14838         scratch = gen_rtx_SCRATCH (SImode);
14839
14840       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14841       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14842       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14843     }
14844   else
14845     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14846
14847   return cc_reg;
14848 }
14849
14850 /* Generate a sequence of insns that will generate the correct return
14851    address mask depending on the physical architecture that the program
14852    is running on.  */
14853 rtx
14854 arm_gen_return_addr_mask (void)
14855 {
14856   rtx reg = gen_reg_rtx (Pmode);
14857
14858   emit_insn (gen_return_addr_mask (reg));
14859   return reg;
14860 }
14861
14862 void
14863 arm_reload_in_hi (rtx *operands)
14864 {
14865   rtx ref = operands[1];
14866   rtx base, scratch;
14867   HOST_WIDE_INT offset = 0;
14868
14869   if (GET_CODE (ref) == SUBREG)
14870     {
14871       offset = SUBREG_BYTE (ref);
14872       ref = SUBREG_REG (ref);
14873     }
14874
14875   if (REG_P (ref))
14876     {
14877       /* We have a pseudo which has been spilt onto the stack; there
14878          are two cases here: the first where there is a simple
14879          stack-slot replacement and a second where the stack-slot is
14880          out of range, or is used as a subreg.  */
14881       if (reg_equiv_mem (REGNO (ref)))
14882         {
14883           ref = reg_equiv_mem (REGNO (ref));
14884           base = find_replacement (&XEXP (ref, 0));
14885         }
14886       else
14887         /* The slot is out of range, or was dressed up in a SUBREG.  */
14888         base = reg_equiv_address (REGNO (ref));
14889
14890       /* PR 62554: If there is no equivalent memory location then just move
14891          the value as an SImode register move.  This happens when the target
14892          architecture variant does not have an HImode register move.  */
14893       if (base == NULL)
14894         {
14895           gcc_assert (REG_P (operands[0]));
14896           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14897                                 gen_rtx_SUBREG (SImode, ref, 0)));
14898           return;
14899         }
14900     }
14901   else
14902     base = find_replacement (&XEXP (ref, 0));
14903
14904   /* Handle the case where the address is too complex to be offset by 1.  */
14905   if (GET_CODE (base) == MINUS
14906       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14907     {
14908       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14909
14910       emit_set_insn (base_plus, base);
14911       base = base_plus;
14912     }
14913   else if (GET_CODE (base) == PLUS)
14914     {
14915       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14916       HOST_WIDE_INT hi, lo;
14917
14918       offset += INTVAL (XEXP (base, 1));
14919       base = XEXP (base, 0);
14920
14921       /* Rework the address into a legal sequence of insns.  */
14922       /* Valid range for lo is -4095 -> 4095 */
14923       lo = (offset >= 0
14924             ? (offset & 0xfff)
14925             : -((-offset) & 0xfff));
14926
14927       /* Corner case, if lo is the max offset then we would be out of range
14928          once we have added the additional 1 below, so bump the msb into the
14929          pre-loading insn(s).  */
14930       if (lo == 4095)
14931         lo &= 0x7ff;
14932
14933       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14934              ^ (HOST_WIDE_INT) 0x80000000)
14935             - (HOST_WIDE_INT) 0x80000000);
14936
14937       gcc_assert (hi + lo == offset);
14938
14939       if (hi != 0)
14940         {
14941           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14942
14943           /* Get the base address; addsi3 knows how to handle constants
14944              that require more than one insn.  */
14945           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14946           base = base_plus;
14947           offset = lo;
14948         }
14949     }
14950
14951   /* Operands[2] may overlap operands[0] (though it won't overlap
14952      operands[1]), that's why we asked for a DImode reg -- so we can
14953      use the bit that does not overlap.  */
14954   if (REGNO (operands[2]) == REGNO (operands[0]))
14955     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14956   else
14957     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14958
14959   emit_insn (gen_zero_extendqisi2 (scratch,
14960                                    gen_rtx_MEM (QImode,
14961                                                 plus_constant (Pmode, base,
14962                                                                offset))));
14963   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14964                                    gen_rtx_MEM (QImode,
14965                                                 plus_constant (Pmode, base,
14966                                                                offset + 1))));
14967   if (!BYTES_BIG_ENDIAN)
14968     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14969                    gen_rtx_IOR (SImode,
14970                                 gen_rtx_ASHIFT
14971                                 (SImode,
14972                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14973                                  GEN_INT (8)),
14974                                 scratch));
14975   else
14976     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14977                    gen_rtx_IOR (SImode,
14978                                 gen_rtx_ASHIFT (SImode, scratch,
14979                                                 GEN_INT (8)),
14980                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14981 }
14982
14983 /* Handle storing a half-word to memory during reload by synthesizing as two
14984    byte stores.  Take care not to clobber the input values until after we
14985    have moved them somewhere safe.  This code assumes that if the DImode
14986    scratch in operands[2] overlaps either the input value or output address
14987    in some way, then that value must die in this insn (we absolutely need
14988    two scratch registers for some corner cases).  */
14989 void
14990 arm_reload_out_hi (rtx *operands)
14991 {
14992   rtx ref = operands[0];
14993   rtx outval = operands[1];
14994   rtx base, scratch;
14995   HOST_WIDE_INT offset = 0;
14996
14997   if (GET_CODE (ref) == SUBREG)
14998     {
14999       offset = SUBREG_BYTE (ref);
15000       ref = SUBREG_REG (ref);
15001     }
15002
15003   if (REG_P (ref))
15004     {
15005       /* We have a pseudo which has been spilt onto the stack; there
15006          are two cases here: the first where there is a simple
15007          stack-slot replacement and a second where the stack-slot is
15008          out of range, or is used as a subreg.  */
15009       if (reg_equiv_mem (REGNO (ref)))
15010         {
15011           ref = reg_equiv_mem (REGNO (ref));
15012           base = find_replacement (&XEXP (ref, 0));
15013         }
15014       else
15015         /* The slot is out of range, or was dressed up in a SUBREG.  */
15016         base = reg_equiv_address (REGNO (ref));
15017
15018       /* PR 62254: If there is no equivalent memory location then just move
15019          the value as an SImode register move.  This happens when the target
15020          architecture variant does not have an HImode register move.  */
15021       if (base == NULL)
15022         {
15023           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15024
15025           if (REG_P (outval))
15026             {
15027               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15028                                     gen_rtx_SUBREG (SImode, outval, 0)));
15029             }
15030           else /* SUBREG_P (outval)  */
15031             {
15032               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15033                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15034                                       SUBREG_REG (outval)));
15035               else
15036                 /* FIXME: Handle other cases ?  */
15037                 gcc_unreachable ();
15038             }
15039           return;
15040         }
15041     }
15042   else
15043     base = find_replacement (&XEXP (ref, 0));
15044
15045   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15046
15047   /* Handle the case where the address is too complex to be offset by 1.  */
15048   if (GET_CODE (base) == MINUS
15049       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15050     {
15051       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15052
15053       /* Be careful not to destroy OUTVAL.  */
15054       if (reg_overlap_mentioned_p (base_plus, outval))
15055         {
15056           /* Updating base_plus might destroy outval, see if we can
15057              swap the scratch and base_plus.  */
15058           if (!reg_overlap_mentioned_p (scratch, outval))
15059             std::swap (scratch, base_plus);
15060           else
15061             {
15062               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15063
15064               /* Be conservative and copy OUTVAL into the scratch now,
15065                  this should only be necessary if outval is a subreg
15066                  of something larger than a word.  */
15067               /* XXX Might this clobber base?  I can't see how it can,
15068                  since scratch is known to overlap with OUTVAL, and
15069                  must be wider than a word.  */
15070               emit_insn (gen_movhi (scratch_hi, outval));
15071               outval = scratch_hi;
15072             }
15073         }
15074
15075       emit_set_insn (base_plus, base);
15076       base = base_plus;
15077     }
15078   else if (GET_CODE (base) == PLUS)
15079     {
15080       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15081       HOST_WIDE_INT hi, lo;
15082
15083       offset += INTVAL (XEXP (base, 1));
15084       base = XEXP (base, 0);
15085
15086       /* Rework the address into a legal sequence of insns.  */
15087       /* Valid range for lo is -4095 -> 4095 */
15088       lo = (offset >= 0
15089             ? (offset & 0xfff)
15090             : -((-offset) & 0xfff));
15091
15092       /* Corner case, if lo is the max offset then we would be out of range
15093          once we have added the additional 1 below, so bump the msb into the
15094          pre-loading insn(s).  */
15095       if (lo == 4095)
15096         lo &= 0x7ff;
15097
15098       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15099              ^ (HOST_WIDE_INT) 0x80000000)
15100             - (HOST_WIDE_INT) 0x80000000);
15101
15102       gcc_assert (hi + lo == offset);
15103
15104       if (hi != 0)
15105         {
15106           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15107
15108           /* Be careful not to destroy OUTVAL.  */
15109           if (reg_overlap_mentioned_p (base_plus, outval))
15110             {
15111               /* Updating base_plus might destroy outval, see if we
15112                  can swap the scratch and base_plus.  */
15113               if (!reg_overlap_mentioned_p (scratch, outval))
15114                 std::swap (scratch, base_plus);
15115               else
15116                 {
15117                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15118
15119                   /* Be conservative and copy outval into scratch now,
15120                      this should only be necessary if outval is a
15121                      subreg of something larger than a word.  */
15122                   /* XXX Might this clobber base?  I can't see how it
15123                      can, since scratch is known to overlap with
15124                      outval.  */
15125                   emit_insn (gen_movhi (scratch_hi, outval));
15126                   outval = scratch_hi;
15127                 }
15128             }
15129
15130           /* Get the base address; addsi3 knows how to handle constants
15131              that require more than one insn.  */
15132           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15133           base = base_plus;
15134           offset = lo;
15135         }
15136     }
15137
15138   if (BYTES_BIG_ENDIAN)
15139     {
15140       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15141                                          plus_constant (Pmode, base,
15142                                                         offset + 1)),
15143                             gen_lowpart (QImode, outval)));
15144       emit_insn (gen_lshrsi3 (scratch,
15145                               gen_rtx_SUBREG (SImode, outval, 0),
15146                               GEN_INT (8)));
15147       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15148                                                                 offset)),
15149                             gen_lowpart (QImode, scratch)));
15150     }
15151   else
15152     {
15153       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15154                                                                 offset)),
15155                             gen_lowpart (QImode, outval)));
15156       emit_insn (gen_lshrsi3 (scratch,
15157                               gen_rtx_SUBREG (SImode, outval, 0),
15158                               GEN_INT (8)));
15159       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15160                                          plus_constant (Pmode, base,
15161                                                         offset + 1)),
15162                             gen_lowpart (QImode, scratch)));
15163     }
15164 }
15165
15166 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15167    (padded to the size of a word) should be passed in a register.  */
15168
15169 static bool
15170 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15171 {
15172   if (TARGET_AAPCS_BASED)
15173     return must_pass_in_stack_var_size (mode, type);
15174   else
15175     return must_pass_in_stack_var_size_or_pad (mode, type);
15176 }
15177
15178
15179 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15180    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15181    the default.  For AAPCS based ABIs small aggregate types are placed
15182    in the lowest memory address.  */
15183
15184 static pad_direction
15185 arm_function_arg_padding (machine_mode mode, const_tree type)
15186 {
15187   if (!TARGET_AAPCS_BASED)
15188     return default_function_arg_padding (mode, type);
15189
15190   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15191     return PAD_DOWNWARD;
15192
15193   return PAD_UPWARD;
15194 }
15195
15196
15197 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15198    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15199    register has useful data, and return the opposite if the most
15200    significant byte does.  */
15201
15202 bool
15203 arm_pad_reg_upward (machine_mode mode,
15204                     tree type, int first ATTRIBUTE_UNUSED)
15205 {
15206   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15207     {
15208       /* For AAPCS, small aggregates, small fixed-point types,
15209          and small complex types are always padded upwards.  */
15210       if (type)
15211         {
15212           if ((AGGREGATE_TYPE_P (type)
15213                || TREE_CODE (type) == COMPLEX_TYPE
15214                || FIXED_POINT_TYPE_P (type))
15215               && int_size_in_bytes (type) <= 4)
15216             return true;
15217         }
15218       else
15219         {
15220           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15221               && GET_MODE_SIZE (mode) <= 4)
15222             return true;
15223         }
15224     }
15225
15226   /* Otherwise, use default padding.  */
15227   return !BYTES_BIG_ENDIAN;
15228 }
15229
15230 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15231    assuming that the address in the base register is word aligned.  */
15232 bool
15233 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15234 {
15235   HOST_WIDE_INT max_offset;
15236
15237   /* Offset must be a multiple of 4 in Thumb mode.  */
15238   if (TARGET_THUMB2 && ((offset & 3) != 0))
15239     return false;
15240
15241   if (TARGET_THUMB2)
15242     max_offset = 1020;
15243   else if (TARGET_ARM)
15244     max_offset = 255;
15245   else
15246     return false;
15247
15248   return ((offset <= max_offset) && (offset >= -max_offset));
15249 }
15250
15251 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15252    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15253    Assumes that the address in the base register RN is word aligned.  Pattern
15254    guarantees that both memory accesses use the same base register,
15255    the offsets are constants within the range, and the gap between the offsets is 4.
15256    If preload complete then check that registers are legal.  WBACK indicates whether
15257    address is updated.  LOAD indicates whether memory access is load or store.  */
15258 bool
15259 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15260                        bool wback, bool load)
15261 {
15262   unsigned int t, t2, n;
15263
15264   if (!reload_completed)
15265     return true;
15266
15267   if (!offset_ok_for_ldrd_strd (offset))
15268     return false;
15269
15270   t = REGNO (rt);
15271   t2 = REGNO (rt2);
15272   n = REGNO (rn);
15273
15274   if ((TARGET_THUMB2)
15275       && ((wback && (n == t || n == t2))
15276           || (t == SP_REGNUM)
15277           || (t == PC_REGNUM)
15278           || (t2 == SP_REGNUM)
15279           || (t2 == PC_REGNUM)
15280           || (!load && (n == PC_REGNUM))
15281           || (load && (t == t2))
15282           /* Triggers Cortex-M3 LDRD errata.  */
15283           || (!wback && load && fix_cm3_ldrd && (n == t))))
15284     return false;
15285
15286   if ((TARGET_ARM)
15287       && ((wback && (n == t || n == t2))
15288           || (t2 == PC_REGNUM)
15289           || (t % 2 != 0)   /* First destination register is not even.  */
15290           || (t2 != t + 1)
15291           /* PC can be used as base register (for offset addressing only),
15292              but it is depricated.  */
15293           || (n == PC_REGNUM)))
15294     return false;
15295
15296   return true;
15297 }
15298
15299 /* Return true if a 64-bit access with alignment ALIGN and with a
15300    constant offset OFFSET from the base pointer is permitted on this
15301    architecture.  */
15302 static bool
15303 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15304 {
15305   return (unaligned_access
15306           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15307           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15308 }
15309
15310 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15311    operand MEM's address contains an immediate offset from the base
15312    register and has no side effects, in which case it sets BASE,
15313    OFFSET and ALIGN accordingly.  */
15314 static bool
15315 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15316 {
15317   rtx addr;
15318
15319   gcc_assert (base != NULL && offset != NULL);
15320
15321   /* TODO: Handle more general memory operand patterns, such as
15322      PRE_DEC and PRE_INC.  */
15323
15324   if (side_effects_p (mem))
15325     return false;
15326
15327   /* Can't deal with subregs.  */
15328   if (GET_CODE (mem) == SUBREG)
15329     return false;
15330
15331   gcc_assert (MEM_P (mem));
15332
15333   *offset = const0_rtx;
15334   *align = MEM_ALIGN (mem);
15335
15336   addr = XEXP (mem, 0);
15337
15338   /* If addr isn't valid for DImode, then we can't handle it.  */
15339   if (!arm_legitimate_address_p (DImode, addr,
15340                                  reload_in_progress || reload_completed))
15341     return false;
15342
15343   if (REG_P (addr))
15344     {
15345       *base = addr;
15346       return true;
15347     }
15348   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15349     {
15350       *base = XEXP (addr, 0);
15351       *offset = XEXP (addr, 1);
15352       return (REG_P (*base) && CONST_INT_P (*offset));
15353     }
15354
15355   return false;
15356 }
15357
15358 /* Called from a peephole2 to replace two word-size accesses with a
15359    single LDRD/STRD instruction.  Returns true iff we can generate a
15360    new instruction sequence.  That is, both accesses use the same base
15361    register and the gap between constant offsets is 4.  This function
15362    may reorder its operands to match ldrd/strd RTL templates.
15363    OPERANDS are the operands found by the peephole matcher;
15364    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15365    corresponding memory operands.  LOAD indicaates whether the access
15366    is load or store.  CONST_STORE indicates a store of constant
15367    integer values held in OPERANDS[4,5] and assumes that the pattern
15368    is of length 4 insn, for the purpose of checking dead registers.
15369    COMMUTE indicates that register operands may be reordered.  */
15370 bool
15371 gen_operands_ldrd_strd (rtx *operands, bool load,
15372                         bool const_store, bool commute)
15373 {
15374   int nops = 2;
15375   HOST_WIDE_INT offsets[2], offset, align[2];
15376   rtx base = NULL_RTX;
15377   rtx cur_base, cur_offset, tmp;
15378   int i, gap;
15379   HARD_REG_SET regset;
15380
15381   gcc_assert (!const_store || !load);
15382   /* Check that the memory references are immediate offsets from the
15383      same base register.  Extract the base register, the destination
15384      registers, and the corresponding memory offsets.  */
15385   for (i = 0; i < nops; i++)
15386     {
15387       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15388                                  &align[i]))
15389         return false;
15390
15391       if (i == 0)
15392         base = cur_base;
15393       else if (REGNO (base) != REGNO (cur_base))
15394         return false;
15395
15396       offsets[i] = INTVAL (cur_offset);
15397       if (GET_CODE (operands[i]) == SUBREG)
15398         {
15399           tmp = SUBREG_REG (operands[i]);
15400           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15401           operands[i] = tmp;
15402         }
15403     }
15404
15405   /* Make sure there is no dependency between the individual loads.  */
15406   if (load && REGNO (operands[0]) == REGNO (base))
15407     return false; /* RAW */
15408
15409   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15410     return false; /* WAW */
15411
15412   /* If the same input register is used in both stores
15413      when storing different constants, try to find a free register.
15414      For example, the code
15415         mov r0, 0
15416         str r0, [r2]
15417         mov r0, 1
15418         str r0, [r2, #4]
15419      can be transformed into
15420         mov r1, 0
15421         mov r0, 1
15422         strd r1, r0, [r2]
15423      in Thumb mode assuming that r1 is free.
15424      For ARM mode do the same but only if the starting register
15425      can be made to be even.  */
15426   if (const_store
15427       && REGNO (operands[0]) == REGNO (operands[1])
15428       && INTVAL (operands[4]) != INTVAL (operands[5]))
15429     {
15430     if (TARGET_THUMB2)
15431       {
15432         CLEAR_HARD_REG_SET (regset);
15433         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15434         if (tmp == NULL_RTX)
15435           return false;
15436
15437         /* Use the new register in the first load to ensure that
15438            if the original input register is not dead after peephole,
15439            then it will have the correct constant value.  */
15440         operands[0] = tmp;
15441       }
15442     else if (TARGET_ARM)
15443       {
15444         int regno = REGNO (operands[0]);
15445         if (!peep2_reg_dead_p (4, operands[0]))
15446           {
15447             /* When the input register is even and is not dead after the
15448                pattern, it has to hold the second constant but we cannot
15449                form a legal STRD in ARM mode with this register as the second
15450                register.  */
15451             if (regno % 2 == 0)
15452               return false;
15453
15454             /* Is regno-1 free? */
15455             SET_HARD_REG_SET (regset);
15456             CLEAR_HARD_REG_BIT(regset, regno - 1);
15457             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15458             if (tmp == NULL_RTX)
15459               return false;
15460
15461             operands[0] = tmp;
15462           }
15463         else
15464           {
15465             /* Find a DImode register.  */
15466             CLEAR_HARD_REG_SET (regset);
15467             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15468             if (tmp != NULL_RTX)
15469               {
15470                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15471                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15472               }
15473             else
15474               {
15475                 /* Can we use the input register to form a DI register?  */
15476                 SET_HARD_REG_SET (regset);
15477                 CLEAR_HARD_REG_BIT(regset,
15478                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15479                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15480                 if (tmp == NULL_RTX)
15481                   return false;
15482                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15483               }
15484           }
15485
15486         gcc_assert (operands[0] != NULL_RTX);
15487         gcc_assert (operands[1] != NULL_RTX);
15488         gcc_assert (REGNO (operands[0]) % 2 == 0);
15489         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15490       }
15491     }
15492
15493   /* Make sure the instructions are ordered with lower memory access first.  */
15494   if (offsets[0] > offsets[1])
15495     {
15496       gap = offsets[0] - offsets[1];
15497       offset = offsets[1];
15498
15499       /* Swap the instructions such that lower memory is accessed first.  */
15500       std::swap (operands[0], operands[1]);
15501       std::swap (operands[2], operands[3]);
15502       std::swap (align[0], align[1]);
15503       if (const_store)
15504         std::swap (operands[4], operands[5]);
15505     }
15506   else
15507     {
15508       gap = offsets[1] - offsets[0];
15509       offset = offsets[0];
15510     }
15511
15512   /* Make sure accesses are to consecutive memory locations.  */
15513   if (gap != 4)
15514     return false;
15515
15516   if (!align_ok_ldrd_strd (align[0], offset))
15517     return false;
15518
15519   /* Make sure we generate legal instructions.  */
15520   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15521                              false, load))
15522     return true;
15523
15524   /* In Thumb state, where registers are almost unconstrained, there
15525      is little hope to fix it.  */
15526   if (TARGET_THUMB2)
15527     return false;
15528
15529   if (load && commute)
15530     {
15531       /* Try reordering registers.  */
15532       std::swap (operands[0], operands[1]);
15533       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15534                                  false, load))
15535         return true;
15536     }
15537
15538   if (const_store)
15539     {
15540       /* If input registers are dead after this pattern, they can be
15541          reordered or replaced by other registers that are free in the
15542          current pattern.  */
15543       if (!peep2_reg_dead_p (4, operands[0])
15544           || !peep2_reg_dead_p (4, operands[1]))
15545         return false;
15546
15547       /* Try to reorder the input registers.  */
15548       /* For example, the code
15549            mov r0, 0
15550            mov r1, 1
15551            str r1, [r2]
15552            str r0, [r2, #4]
15553          can be transformed into
15554            mov r1, 0
15555            mov r0, 1
15556            strd r0, [r2]
15557       */
15558       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15559                                   false, false))
15560         {
15561           std::swap (operands[0], operands[1]);
15562           return true;
15563         }
15564
15565       /* Try to find a free DI register.  */
15566       CLEAR_HARD_REG_SET (regset);
15567       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15568       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15569       while (true)
15570         {
15571           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15572           if (tmp == NULL_RTX)
15573             return false;
15574
15575           /* DREG must be an even-numbered register in DImode.
15576              Split it into SI registers.  */
15577           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15578           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15579           gcc_assert (operands[0] != NULL_RTX);
15580           gcc_assert (operands[1] != NULL_RTX);
15581           gcc_assert (REGNO (operands[0]) % 2 == 0);
15582           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15583
15584           return (operands_ok_ldrd_strd (operands[0], operands[1],
15585                                          base, offset,
15586                                          false, load));
15587         }
15588     }
15589
15590   return false;
15591 }
15592
15593
15594
15595 \f
15596 /* Print a symbolic form of X to the debug file, F.  */
15597 static void
15598 arm_print_value (FILE *f, rtx x)
15599 {
15600   switch (GET_CODE (x))
15601     {
15602     case CONST_INT:
15603       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15604       return;
15605
15606     case CONST_DOUBLE:
15607       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15608       return;
15609
15610     case CONST_VECTOR:
15611       {
15612         int i;
15613
15614         fprintf (f, "<");
15615         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15616           {
15617             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15618             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15619               fputc (',', f);
15620           }
15621         fprintf (f, ">");
15622       }
15623       return;
15624
15625     case CONST_STRING:
15626       fprintf (f, "\"%s\"", XSTR (x, 0));
15627       return;
15628
15629     case SYMBOL_REF:
15630       fprintf (f, "`%s'", XSTR (x, 0));
15631       return;
15632
15633     case LABEL_REF:
15634       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15635       return;
15636
15637     case CONST:
15638       arm_print_value (f, XEXP (x, 0));
15639       return;
15640
15641     case PLUS:
15642       arm_print_value (f, XEXP (x, 0));
15643       fprintf (f, "+");
15644       arm_print_value (f, XEXP (x, 1));
15645       return;
15646
15647     case PC:
15648       fprintf (f, "pc");
15649       return;
15650
15651     default:
15652       fprintf (f, "????");
15653       return;
15654     }
15655 }
15656 \f
15657 /* Routines for manipulation of the constant pool.  */
15658
15659 /* Arm instructions cannot load a large constant directly into a
15660    register; they have to come from a pc relative load.  The constant
15661    must therefore be placed in the addressable range of the pc
15662    relative load.  Depending on the precise pc relative load
15663    instruction the range is somewhere between 256 bytes and 4k.  This
15664    means that we often have to dump a constant inside a function, and
15665    generate code to branch around it.
15666
15667    It is important to minimize this, since the branches will slow
15668    things down and make the code larger.
15669
15670    Normally we can hide the table after an existing unconditional
15671    branch so that there is no interruption of the flow, but in the
15672    worst case the code looks like this:
15673
15674         ldr     rn, L1
15675         ...
15676         b       L2
15677         align
15678         L1:     .long value
15679         L2:
15680         ...
15681
15682         ldr     rn, L3
15683         ...
15684         b       L4
15685         align
15686         L3:     .long value
15687         L4:
15688         ...
15689
15690    We fix this by performing a scan after scheduling, which notices
15691    which instructions need to have their operands fetched from the
15692    constant table and builds the table.
15693
15694    The algorithm starts by building a table of all the constants that
15695    need fixing up and all the natural barriers in the function (places
15696    where a constant table can be dropped without breaking the flow).
15697    For each fixup we note how far the pc-relative replacement will be
15698    able to reach and the offset of the instruction into the function.
15699
15700    Having built the table we then group the fixes together to form
15701    tables that are as large as possible (subject to addressing
15702    constraints) and emit each table of constants after the last
15703    barrier that is within range of all the instructions in the group.
15704    If a group does not contain a barrier, then we forcibly create one
15705    by inserting a jump instruction into the flow.  Once the table has
15706    been inserted, the insns are then modified to reference the
15707    relevant entry in the pool.
15708
15709    Possible enhancements to the algorithm (not implemented) are:
15710
15711    1) For some processors and object formats, there may be benefit in
15712    aligning the pools to the start of cache lines; this alignment
15713    would need to be taken into account when calculating addressability
15714    of a pool.  */
15715
15716 /* These typedefs are located at the start of this file, so that
15717    they can be used in the prototypes there.  This comment is to
15718    remind readers of that fact so that the following structures
15719    can be understood more easily.
15720
15721      typedef struct minipool_node    Mnode;
15722      typedef struct minipool_fixup   Mfix;  */
15723
15724 struct minipool_node
15725 {
15726   /* Doubly linked chain of entries.  */
15727   Mnode * next;
15728   Mnode * prev;
15729   /* The maximum offset into the code that this entry can be placed.  While
15730      pushing fixes for forward references, all entries are sorted in order
15731      of increasing max_address.  */
15732   HOST_WIDE_INT max_address;
15733   /* Similarly for an entry inserted for a backwards ref.  */
15734   HOST_WIDE_INT min_address;
15735   /* The number of fixes referencing this entry.  This can become zero
15736      if we "unpush" an entry.  In this case we ignore the entry when we
15737      come to emit the code.  */
15738   int refcount;
15739   /* The offset from the start of the minipool.  */
15740   HOST_WIDE_INT offset;
15741   /* The value in table.  */
15742   rtx value;
15743   /* The mode of value.  */
15744   machine_mode mode;
15745   /* The size of the value.  With iWMMXt enabled
15746      sizes > 4 also imply an alignment of 8-bytes.  */
15747   int fix_size;
15748 };
15749
15750 struct minipool_fixup
15751 {
15752   Mfix *            next;
15753   rtx_insn *        insn;
15754   HOST_WIDE_INT     address;
15755   rtx *             loc;
15756   machine_mode mode;
15757   int               fix_size;
15758   rtx               value;
15759   Mnode *           minipool;
15760   HOST_WIDE_INT     forwards;
15761   HOST_WIDE_INT     backwards;
15762 };
15763
15764 /* Fixes less than a word need padding out to a word boundary.  */
15765 #define MINIPOOL_FIX_SIZE(mode) \
15766   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15767
15768 static Mnode *  minipool_vector_head;
15769 static Mnode *  minipool_vector_tail;
15770 static rtx_code_label   *minipool_vector_label;
15771 static int      minipool_pad;
15772
15773 /* The linked list of all minipool fixes required for this function.  */
15774 Mfix *          minipool_fix_head;
15775 Mfix *          minipool_fix_tail;
15776 /* The fix entry for the current minipool, once it has been placed.  */
15777 Mfix *          minipool_barrier;
15778
15779 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15780 #define JUMP_TABLES_IN_TEXT_SECTION 0
15781 #endif
15782
15783 static HOST_WIDE_INT
15784 get_jump_table_size (rtx_jump_table_data *insn)
15785 {
15786   /* ADDR_VECs only take room if read-only data does into the text
15787      section.  */
15788   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15789     {
15790       rtx body = PATTERN (insn);
15791       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15792       HOST_WIDE_INT size;
15793       HOST_WIDE_INT modesize;
15794
15795       modesize = GET_MODE_SIZE (GET_MODE (body));
15796       size = modesize * XVECLEN (body, elt);
15797       switch (modesize)
15798         {
15799         case 1:
15800           /* Round up size  of TBB table to a halfword boundary.  */
15801           size = (size + 1) & ~HOST_WIDE_INT_1;
15802           break;
15803         case 2:
15804           /* No padding necessary for TBH.  */
15805           break;
15806         case 4:
15807           /* Add two bytes for alignment on Thumb.  */
15808           if (TARGET_THUMB)
15809             size += 2;
15810           break;
15811         default:
15812           gcc_unreachable ();
15813         }
15814       return size;
15815     }
15816
15817   return 0;
15818 }
15819
15820 /* Return the maximum amount of padding that will be inserted before
15821    label LABEL.  */
15822
15823 static HOST_WIDE_INT
15824 get_label_padding (rtx label)
15825 {
15826   HOST_WIDE_INT align, min_insn_size;
15827
15828   align = 1 << label_to_alignment (label);
15829   min_insn_size = TARGET_THUMB ? 2 : 4;
15830   return align > min_insn_size ? align - min_insn_size : 0;
15831 }
15832
15833 /* Move a minipool fix MP from its current location to before MAX_MP.
15834    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15835    constraints may need updating.  */
15836 static Mnode *
15837 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15838                                HOST_WIDE_INT max_address)
15839 {
15840   /* The code below assumes these are different.  */
15841   gcc_assert (mp != max_mp);
15842
15843   if (max_mp == NULL)
15844     {
15845       if (max_address < mp->max_address)
15846         mp->max_address = max_address;
15847     }
15848   else
15849     {
15850       if (max_address > max_mp->max_address - mp->fix_size)
15851         mp->max_address = max_mp->max_address - mp->fix_size;
15852       else
15853         mp->max_address = max_address;
15854
15855       /* Unlink MP from its current position.  Since max_mp is non-null,
15856        mp->prev must be non-null.  */
15857       mp->prev->next = mp->next;
15858       if (mp->next != NULL)
15859         mp->next->prev = mp->prev;
15860       else
15861         minipool_vector_tail = mp->prev;
15862
15863       /* Re-insert it before MAX_MP.  */
15864       mp->next = max_mp;
15865       mp->prev = max_mp->prev;
15866       max_mp->prev = mp;
15867
15868       if (mp->prev != NULL)
15869         mp->prev->next = mp;
15870       else
15871         minipool_vector_head = mp;
15872     }
15873
15874   /* Save the new entry.  */
15875   max_mp = mp;
15876
15877   /* Scan over the preceding entries and adjust their addresses as
15878      required.  */
15879   while (mp->prev != NULL
15880          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15881     {
15882       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15883       mp = mp->prev;
15884     }
15885
15886   return max_mp;
15887 }
15888
15889 /* Add a constant to the minipool for a forward reference.  Returns the
15890    node added or NULL if the constant will not fit in this pool.  */
15891 static Mnode *
15892 add_minipool_forward_ref (Mfix *fix)
15893 {
15894   /* If set, max_mp is the first pool_entry that has a lower
15895      constraint than the one we are trying to add.  */
15896   Mnode *       max_mp = NULL;
15897   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15898   Mnode *       mp;
15899
15900   /* If the minipool starts before the end of FIX->INSN then this FIX
15901      can not be placed into the current pool.  Furthermore, adding the
15902      new constant pool entry may cause the pool to start FIX_SIZE bytes
15903      earlier.  */
15904   if (minipool_vector_head &&
15905       (fix->address + get_attr_length (fix->insn)
15906        >= minipool_vector_head->max_address - fix->fix_size))
15907     return NULL;
15908
15909   /* Scan the pool to see if a constant with the same value has
15910      already been added.  While we are doing this, also note the
15911      location where we must insert the constant if it doesn't already
15912      exist.  */
15913   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15914     {
15915       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15916           && fix->mode == mp->mode
15917           && (!LABEL_P (fix->value)
15918               || (CODE_LABEL_NUMBER (fix->value)
15919                   == CODE_LABEL_NUMBER (mp->value)))
15920           && rtx_equal_p (fix->value, mp->value))
15921         {
15922           /* More than one fix references this entry.  */
15923           mp->refcount++;
15924           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15925         }
15926
15927       /* Note the insertion point if necessary.  */
15928       if (max_mp == NULL
15929           && mp->max_address > max_address)
15930         max_mp = mp;
15931
15932       /* If we are inserting an 8-bytes aligned quantity and
15933          we have not already found an insertion point, then
15934          make sure that all such 8-byte aligned quantities are
15935          placed at the start of the pool.  */
15936       if (ARM_DOUBLEWORD_ALIGN
15937           && max_mp == NULL
15938           && fix->fix_size >= 8
15939           && mp->fix_size < 8)
15940         {
15941           max_mp = mp;
15942           max_address = mp->max_address;
15943         }
15944     }
15945
15946   /* The value is not currently in the minipool, so we need to create
15947      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15948      the end of the list since the placement is less constrained than
15949      any existing entry.  Otherwise, we insert the new fix before
15950      MAX_MP and, if necessary, adjust the constraints on the other
15951      entries.  */
15952   mp = XNEW (Mnode);
15953   mp->fix_size = fix->fix_size;
15954   mp->mode = fix->mode;
15955   mp->value = fix->value;
15956   mp->refcount = 1;
15957   /* Not yet required for a backwards ref.  */
15958   mp->min_address = -65536;
15959
15960   if (max_mp == NULL)
15961     {
15962       mp->max_address = max_address;
15963       mp->next = NULL;
15964       mp->prev = minipool_vector_tail;
15965
15966       if (mp->prev == NULL)
15967         {
15968           minipool_vector_head = mp;
15969           minipool_vector_label = gen_label_rtx ();
15970         }
15971       else
15972         mp->prev->next = mp;
15973
15974       minipool_vector_tail = mp;
15975     }
15976   else
15977     {
15978       if (max_address > max_mp->max_address - mp->fix_size)
15979         mp->max_address = max_mp->max_address - mp->fix_size;
15980       else
15981         mp->max_address = max_address;
15982
15983       mp->next = max_mp;
15984       mp->prev = max_mp->prev;
15985       max_mp->prev = mp;
15986       if (mp->prev != NULL)
15987         mp->prev->next = mp;
15988       else
15989         minipool_vector_head = mp;
15990     }
15991
15992   /* Save the new entry.  */
15993   max_mp = mp;
15994
15995   /* Scan over the preceding entries and adjust their addresses as
15996      required.  */
15997   while (mp->prev != NULL
15998          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15999     {
16000       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16001       mp = mp->prev;
16002     }
16003
16004   return max_mp;
16005 }
16006
16007 static Mnode *
16008 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16009                                 HOST_WIDE_INT  min_address)
16010 {
16011   HOST_WIDE_INT offset;
16012
16013   /* The code below assumes these are different.  */
16014   gcc_assert (mp != min_mp);
16015
16016   if (min_mp == NULL)
16017     {
16018       if (min_address > mp->min_address)
16019         mp->min_address = min_address;
16020     }
16021   else
16022     {
16023       /* We will adjust this below if it is too loose.  */
16024       mp->min_address = min_address;
16025
16026       /* Unlink MP from its current position.  Since min_mp is non-null,
16027          mp->next must be non-null.  */
16028       mp->next->prev = mp->prev;
16029       if (mp->prev != NULL)
16030         mp->prev->next = mp->next;
16031       else
16032         minipool_vector_head = mp->next;
16033
16034       /* Reinsert it after MIN_MP.  */
16035       mp->prev = min_mp;
16036       mp->next = min_mp->next;
16037       min_mp->next = mp;
16038       if (mp->next != NULL)
16039         mp->next->prev = mp;
16040       else
16041         minipool_vector_tail = mp;
16042     }
16043
16044   min_mp = mp;
16045
16046   offset = 0;
16047   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16048     {
16049       mp->offset = offset;
16050       if (mp->refcount > 0)
16051         offset += mp->fix_size;
16052
16053       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16054         mp->next->min_address = mp->min_address + mp->fix_size;
16055     }
16056
16057   return min_mp;
16058 }
16059
16060 /* Add a constant to the minipool for a backward reference.  Returns the
16061    node added or NULL if the constant will not fit in this pool.
16062
16063    Note that the code for insertion for a backwards reference can be
16064    somewhat confusing because the calculated offsets for each fix do
16065    not take into account the size of the pool (which is still under
16066    construction.  */
16067 static Mnode *
16068 add_minipool_backward_ref (Mfix *fix)
16069 {
16070   /* If set, min_mp is the last pool_entry that has a lower constraint
16071      than the one we are trying to add.  */
16072   Mnode *min_mp = NULL;
16073   /* This can be negative, since it is only a constraint.  */
16074   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16075   Mnode *mp;
16076
16077   /* If we can't reach the current pool from this insn, or if we can't
16078      insert this entry at the end of the pool without pushing other
16079      fixes out of range, then we don't try.  This ensures that we
16080      can't fail later on.  */
16081   if (min_address >= minipool_barrier->address
16082       || (minipool_vector_tail->min_address + fix->fix_size
16083           >= minipool_barrier->address))
16084     return NULL;
16085
16086   /* Scan the pool to see if a constant with the same value has
16087      already been added.  While we are doing this, also note the
16088      location where we must insert the constant if it doesn't already
16089      exist.  */
16090   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16091     {
16092       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16093           && fix->mode == mp->mode
16094           && (!LABEL_P (fix->value)
16095               || (CODE_LABEL_NUMBER (fix->value)
16096                   == CODE_LABEL_NUMBER (mp->value)))
16097           && rtx_equal_p (fix->value, mp->value)
16098           /* Check that there is enough slack to move this entry to the
16099              end of the table (this is conservative).  */
16100           && (mp->max_address
16101               > (minipool_barrier->address
16102                  + minipool_vector_tail->offset
16103                  + minipool_vector_tail->fix_size)))
16104         {
16105           mp->refcount++;
16106           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16107         }
16108
16109       if (min_mp != NULL)
16110         mp->min_address += fix->fix_size;
16111       else
16112         {
16113           /* Note the insertion point if necessary.  */
16114           if (mp->min_address < min_address)
16115             {
16116               /* For now, we do not allow the insertion of 8-byte alignment
16117                  requiring nodes anywhere but at the start of the pool.  */
16118               if (ARM_DOUBLEWORD_ALIGN
16119                   && fix->fix_size >= 8 && mp->fix_size < 8)
16120                 return NULL;
16121               else
16122                 min_mp = mp;
16123             }
16124           else if (mp->max_address
16125                    < minipool_barrier->address + mp->offset + fix->fix_size)
16126             {
16127               /* Inserting before this entry would push the fix beyond
16128                  its maximum address (which can happen if we have
16129                  re-located a forwards fix); force the new fix to come
16130                  after it.  */
16131               if (ARM_DOUBLEWORD_ALIGN
16132                   && fix->fix_size >= 8 && mp->fix_size < 8)
16133                 return NULL;
16134               else
16135                 {
16136                   min_mp = mp;
16137                   min_address = mp->min_address + fix->fix_size;
16138                 }
16139             }
16140           /* Do not insert a non-8-byte aligned quantity before 8-byte
16141              aligned quantities.  */
16142           else if (ARM_DOUBLEWORD_ALIGN
16143                    && fix->fix_size < 8
16144                    && mp->fix_size >= 8)
16145             {
16146               min_mp = mp;
16147               min_address = mp->min_address + fix->fix_size;
16148             }
16149         }
16150     }
16151
16152   /* We need to create a new entry.  */
16153   mp = XNEW (Mnode);
16154   mp->fix_size = fix->fix_size;
16155   mp->mode = fix->mode;
16156   mp->value = fix->value;
16157   mp->refcount = 1;
16158   mp->max_address = minipool_barrier->address + 65536;
16159
16160   mp->min_address = min_address;
16161
16162   if (min_mp == NULL)
16163     {
16164       mp->prev = NULL;
16165       mp->next = minipool_vector_head;
16166
16167       if (mp->next == NULL)
16168         {
16169           minipool_vector_tail = mp;
16170           minipool_vector_label = gen_label_rtx ();
16171         }
16172       else
16173         mp->next->prev = mp;
16174
16175       minipool_vector_head = mp;
16176     }
16177   else
16178     {
16179       mp->next = min_mp->next;
16180       mp->prev = min_mp;
16181       min_mp->next = mp;
16182
16183       if (mp->next != NULL)
16184         mp->next->prev = mp;
16185       else
16186         minipool_vector_tail = mp;
16187     }
16188
16189   /* Save the new entry.  */
16190   min_mp = mp;
16191
16192   if (mp->prev)
16193     mp = mp->prev;
16194   else
16195     mp->offset = 0;
16196
16197   /* Scan over the following entries and adjust their offsets.  */
16198   while (mp->next != NULL)
16199     {
16200       if (mp->next->min_address < mp->min_address + mp->fix_size)
16201         mp->next->min_address = mp->min_address + mp->fix_size;
16202
16203       if (mp->refcount)
16204         mp->next->offset = mp->offset + mp->fix_size;
16205       else
16206         mp->next->offset = mp->offset;
16207
16208       mp = mp->next;
16209     }
16210
16211   return min_mp;
16212 }
16213
16214 static void
16215 assign_minipool_offsets (Mfix *barrier)
16216 {
16217   HOST_WIDE_INT offset = 0;
16218   Mnode *mp;
16219
16220   minipool_barrier = barrier;
16221
16222   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16223     {
16224       mp->offset = offset;
16225
16226       if (mp->refcount > 0)
16227         offset += mp->fix_size;
16228     }
16229 }
16230
16231 /* Output the literal table */
16232 static void
16233 dump_minipool (rtx_insn *scan)
16234 {
16235   Mnode * mp;
16236   Mnode * nmp;
16237   int align64 = 0;
16238
16239   if (ARM_DOUBLEWORD_ALIGN)
16240     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16241       if (mp->refcount > 0 && mp->fix_size >= 8)
16242         {
16243           align64 = 1;
16244           break;
16245         }
16246
16247   if (dump_file)
16248     fprintf (dump_file,
16249              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16250              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16251
16252   scan = emit_label_after (gen_label_rtx (), scan);
16253   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16254   scan = emit_label_after (minipool_vector_label, scan);
16255
16256   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16257     {
16258       if (mp->refcount > 0)
16259         {
16260           if (dump_file)
16261             {
16262               fprintf (dump_file,
16263                        ";;  Offset %u, min %ld, max %ld ",
16264                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16265                        (unsigned long) mp->max_address);
16266               arm_print_value (dump_file, mp->value);
16267               fputc ('\n', dump_file);
16268             }
16269
16270           rtx val = copy_rtx (mp->value);
16271
16272           switch (GET_MODE_SIZE (mp->mode))
16273             {
16274 #ifdef HAVE_consttable_1
16275             case 1:
16276               scan = emit_insn_after (gen_consttable_1 (val), scan);
16277               break;
16278
16279 #endif
16280 #ifdef HAVE_consttable_2
16281             case 2:
16282               scan = emit_insn_after (gen_consttable_2 (val), scan);
16283               break;
16284
16285 #endif
16286 #ifdef HAVE_consttable_4
16287             case 4:
16288               scan = emit_insn_after (gen_consttable_4 (val), scan);
16289               break;
16290
16291 #endif
16292 #ifdef HAVE_consttable_8
16293             case 8:
16294               scan = emit_insn_after (gen_consttable_8 (val), scan);
16295               break;
16296
16297 #endif
16298 #ifdef HAVE_consttable_16
16299             case 16:
16300               scan = emit_insn_after (gen_consttable_16 (val), scan);
16301               break;
16302
16303 #endif
16304             default:
16305               gcc_unreachable ();
16306             }
16307         }
16308
16309       nmp = mp->next;
16310       free (mp);
16311     }
16312
16313   minipool_vector_head = minipool_vector_tail = NULL;
16314   scan = emit_insn_after (gen_consttable_end (), scan);
16315   scan = emit_barrier_after (scan);
16316 }
16317
16318 /* Return the cost of forcibly inserting a barrier after INSN.  */
16319 static int
16320 arm_barrier_cost (rtx_insn *insn)
16321 {
16322   /* Basing the location of the pool on the loop depth is preferable,
16323      but at the moment, the basic block information seems to be
16324      corrupt by this stage of the compilation.  */
16325   int base_cost = 50;
16326   rtx_insn *next = next_nonnote_insn (insn);
16327
16328   if (next != NULL && LABEL_P (next))
16329     base_cost -= 20;
16330
16331   switch (GET_CODE (insn))
16332     {
16333     case CODE_LABEL:
16334       /* It will always be better to place the table before the label, rather
16335          than after it.  */
16336       return 50;
16337
16338     case INSN:
16339     case CALL_INSN:
16340       return base_cost;
16341
16342     case JUMP_INSN:
16343       return base_cost - 10;
16344
16345     default:
16346       return base_cost + 10;
16347     }
16348 }
16349
16350 /* Find the best place in the insn stream in the range
16351    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16352    Create the barrier by inserting a jump and add a new fix entry for
16353    it.  */
16354 static Mfix *
16355 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16356 {
16357   HOST_WIDE_INT count = 0;
16358   rtx_barrier *barrier;
16359   rtx_insn *from = fix->insn;
16360   /* The instruction after which we will insert the jump.  */
16361   rtx_insn *selected = NULL;
16362   int selected_cost;
16363   /* The address at which the jump instruction will be placed.  */
16364   HOST_WIDE_INT selected_address;
16365   Mfix * new_fix;
16366   HOST_WIDE_INT max_count = max_address - fix->address;
16367   rtx_code_label *label = gen_label_rtx ();
16368
16369   selected_cost = arm_barrier_cost (from);
16370   selected_address = fix->address;
16371
16372   while (from && count < max_count)
16373     {
16374       rtx_jump_table_data *tmp;
16375       int new_cost;
16376
16377       /* This code shouldn't have been called if there was a natural barrier
16378          within range.  */
16379       gcc_assert (!BARRIER_P (from));
16380
16381       /* Count the length of this insn.  This must stay in sync with the
16382          code that pushes minipool fixes.  */
16383       if (LABEL_P (from))
16384         count += get_label_padding (from);
16385       else
16386         count += get_attr_length (from);
16387
16388       /* If there is a jump table, add its length.  */
16389       if (tablejump_p (from, NULL, &tmp))
16390         {
16391           count += get_jump_table_size (tmp);
16392
16393           /* Jump tables aren't in a basic block, so base the cost on
16394              the dispatch insn.  If we select this location, we will
16395              still put the pool after the table.  */
16396           new_cost = arm_barrier_cost (from);
16397
16398           if (count < max_count
16399               && (!selected || new_cost <= selected_cost))
16400             {
16401               selected = tmp;
16402               selected_cost = new_cost;
16403               selected_address = fix->address + count;
16404             }
16405
16406           /* Continue after the dispatch table.  */
16407           from = NEXT_INSN (tmp);
16408           continue;
16409         }
16410
16411       new_cost = arm_barrier_cost (from);
16412
16413       if (count < max_count
16414           && (!selected || new_cost <= selected_cost))
16415         {
16416           selected = from;
16417           selected_cost = new_cost;
16418           selected_address = fix->address + count;
16419         }
16420
16421       from = NEXT_INSN (from);
16422     }
16423
16424   /* Make sure that we found a place to insert the jump.  */
16425   gcc_assert (selected);
16426
16427   /* Make sure we do not split a call and its corresponding
16428      CALL_ARG_LOCATION note.  */
16429   if (CALL_P (selected))
16430     {
16431       rtx_insn *next = NEXT_INSN (selected);
16432       if (next && NOTE_P (next)
16433           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16434           selected = next;
16435     }
16436
16437   /* Create a new JUMP_INSN that branches around a barrier.  */
16438   from = emit_jump_insn_after (gen_jump (label), selected);
16439   JUMP_LABEL (from) = label;
16440   barrier = emit_barrier_after (from);
16441   emit_label_after (label, barrier);
16442
16443   /* Create a minipool barrier entry for the new barrier.  */
16444   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16445   new_fix->insn = barrier;
16446   new_fix->address = selected_address;
16447   new_fix->next = fix->next;
16448   fix->next = new_fix;
16449
16450   return new_fix;
16451 }
16452
16453 /* Record that there is a natural barrier in the insn stream at
16454    ADDRESS.  */
16455 static void
16456 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16457 {
16458   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16459
16460   fix->insn = insn;
16461   fix->address = address;
16462
16463   fix->next = NULL;
16464   if (minipool_fix_head != NULL)
16465     minipool_fix_tail->next = fix;
16466   else
16467     minipool_fix_head = fix;
16468
16469   minipool_fix_tail = fix;
16470 }
16471
16472 /* Record INSN, which will need fixing up to load a value from the
16473    minipool.  ADDRESS is the offset of the insn since the start of the
16474    function; LOC is a pointer to the part of the insn which requires
16475    fixing; VALUE is the constant that must be loaded, which is of type
16476    MODE.  */
16477 static void
16478 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16479                    machine_mode mode, rtx value)
16480 {
16481   gcc_assert (!arm_disable_literal_pool);
16482   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16483
16484   fix->insn = insn;
16485   fix->address = address;
16486   fix->loc = loc;
16487   fix->mode = mode;
16488   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16489   fix->value = value;
16490   fix->forwards = get_attr_pool_range (insn);
16491   fix->backwards = get_attr_neg_pool_range (insn);
16492   fix->minipool = NULL;
16493
16494   /* If an insn doesn't have a range defined for it, then it isn't
16495      expecting to be reworked by this code.  Better to stop now than
16496      to generate duff assembly code.  */
16497   gcc_assert (fix->forwards || fix->backwards);
16498
16499   /* If an entry requires 8-byte alignment then assume all constant pools
16500      require 4 bytes of padding.  Trying to do this later on a per-pool
16501      basis is awkward because existing pool entries have to be modified.  */
16502   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16503     minipool_pad = 4;
16504
16505   if (dump_file)
16506     {
16507       fprintf (dump_file,
16508                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16509                GET_MODE_NAME (mode),
16510                INSN_UID (insn), (unsigned long) address,
16511                -1 * (long)fix->backwards, (long)fix->forwards);
16512       arm_print_value (dump_file, fix->value);
16513       fprintf (dump_file, "\n");
16514     }
16515
16516   /* Add it to the chain of fixes.  */
16517   fix->next = NULL;
16518
16519   if (minipool_fix_head != NULL)
16520     minipool_fix_tail->next = fix;
16521   else
16522     minipool_fix_head = fix;
16523
16524   minipool_fix_tail = fix;
16525 }
16526
16527 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16528    Returns the number of insns needed, or 99 if we always want to synthesize
16529    the value.  */
16530 int
16531 arm_max_const_double_inline_cost ()
16532 {
16533   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16534 }
16535
16536 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16537    Returns the number of insns needed, or 99 if we don't know how to
16538    do it.  */
16539 int
16540 arm_const_double_inline_cost (rtx val)
16541 {
16542   rtx lowpart, highpart;
16543   machine_mode mode;
16544
16545   mode = GET_MODE (val);
16546
16547   if (mode == VOIDmode)
16548     mode = DImode;
16549
16550   gcc_assert (GET_MODE_SIZE (mode) == 8);
16551
16552   lowpart = gen_lowpart (SImode, val);
16553   highpart = gen_highpart_mode (SImode, mode, val);
16554
16555   gcc_assert (CONST_INT_P (lowpart));
16556   gcc_assert (CONST_INT_P (highpart));
16557
16558   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16559                             NULL_RTX, NULL_RTX, 0, 0)
16560           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16561                               NULL_RTX, NULL_RTX, 0, 0));
16562 }
16563
16564 /* Cost of loading a SImode constant.  */
16565 static inline int
16566 arm_const_inline_cost (enum rtx_code code, rtx val)
16567 {
16568   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16569                            NULL_RTX, NULL_RTX, 1, 0);
16570 }
16571
16572 /* Return true if it is worthwhile to split a 64-bit constant into two
16573    32-bit operations.  This is the case if optimizing for size, or
16574    if we have load delay slots, or if one 32-bit part can be done with
16575    a single data operation.  */
16576 bool
16577 arm_const_double_by_parts (rtx val)
16578 {
16579   machine_mode mode = GET_MODE (val);
16580   rtx part;
16581
16582   if (optimize_size || arm_ld_sched)
16583     return true;
16584
16585   if (mode == VOIDmode)
16586     mode = DImode;
16587
16588   part = gen_highpart_mode (SImode, mode, val);
16589
16590   gcc_assert (CONST_INT_P (part));
16591
16592   if (const_ok_for_arm (INTVAL (part))
16593       || const_ok_for_arm (~INTVAL (part)))
16594     return true;
16595
16596   part = gen_lowpart (SImode, val);
16597
16598   gcc_assert (CONST_INT_P (part));
16599
16600   if (const_ok_for_arm (INTVAL (part))
16601       || const_ok_for_arm (~INTVAL (part)))
16602     return true;
16603
16604   return false;
16605 }
16606
16607 /* Return true if it is possible to inline both the high and low parts
16608    of a 64-bit constant into 32-bit data processing instructions.  */
16609 bool
16610 arm_const_double_by_immediates (rtx val)
16611 {
16612   machine_mode mode = GET_MODE (val);
16613   rtx part;
16614
16615   if (mode == VOIDmode)
16616     mode = DImode;
16617
16618   part = gen_highpart_mode (SImode, mode, val);
16619
16620   gcc_assert (CONST_INT_P (part));
16621
16622   if (!const_ok_for_arm (INTVAL (part)))
16623     return false;
16624
16625   part = gen_lowpart (SImode, val);
16626
16627   gcc_assert (CONST_INT_P (part));
16628
16629   if (!const_ok_for_arm (INTVAL (part)))
16630     return false;
16631
16632   return true;
16633 }
16634
16635 /* Scan INSN and note any of its operands that need fixing.
16636    If DO_PUSHES is false we do not actually push any of the fixups
16637    needed.  */
16638 static void
16639 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16640 {
16641   int opno;
16642
16643   extract_constrain_insn (insn);
16644
16645   if (recog_data.n_alternatives == 0)
16646     return;
16647
16648   /* Fill in recog_op_alt with information about the constraints of
16649      this insn.  */
16650   preprocess_constraints (insn);
16651
16652   const operand_alternative *op_alt = which_op_alt ();
16653   for (opno = 0; opno < recog_data.n_operands; opno++)
16654     {
16655       /* Things we need to fix can only occur in inputs.  */
16656       if (recog_data.operand_type[opno] != OP_IN)
16657         continue;
16658
16659       /* If this alternative is a memory reference, then any mention
16660          of constants in this alternative is really to fool reload
16661          into allowing us to accept one there.  We need to fix them up
16662          now so that we output the right code.  */
16663       if (op_alt[opno].memory_ok)
16664         {
16665           rtx op = recog_data.operand[opno];
16666
16667           if (CONSTANT_P (op))
16668             {
16669               if (do_pushes)
16670                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16671                                    recog_data.operand_mode[opno], op);
16672             }
16673           else if (MEM_P (op)
16674                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16675                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16676             {
16677               if (do_pushes)
16678                 {
16679                   rtx cop = avoid_constant_pool_reference (op);
16680
16681                   /* Casting the address of something to a mode narrower
16682                      than a word can cause avoid_constant_pool_reference()
16683                      to return the pool reference itself.  That's no good to
16684                      us here.  Lets just hope that we can use the
16685                      constant pool value directly.  */
16686                   if (op == cop)
16687                     cop = get_pool_constant (XEXP (op, 0));
16688
16689                   push_minipool_fix (insn, address,
16690                                      recog_data.operand_loc[opno],
16691                                      recog_data.operand_mode[opno], cop);
16692                 }
16693
16694             }
16695         }
16696     }
16697
16698   return;
16699 }
16700
16701 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16702    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16703    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16704    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16705    or four masks, depending on whether it is being computed for a
16706    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16707    respectively.  The tree for the type of the argument or a field within an
16708    argument is passed in ARG_TYPE, the current register this argument or field
16709    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16710    argument or field starts at is passed in STARTING_BIT and the last used bit
16711    is kept in LAST_USED_BIT which is also updated accordingly.  */
16712
16713 static unsigned HOST_WIDE_INT
16714 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16715                                uint32_t * padding_bits_to_clear,
16716                                unsigned starting_bit, int * last_used_bit)
16717
16718 {
16719   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16720
16721   if (TREE_CODE (arg_type) == RECORD_TYPE)
16722     {
16723       unsigned current_bit = starting_bit;
16724       tree field;
16725       long int offset, size;
16726
16727
16728       field = TYPE_FIELDS (arg_type);
16729       while (field)
16730         {
16731           /* The offset within a structure is always an offset from
16732              the start of that structure.  Make sure we take that into the
16733              calculation of the register based offset that we use here.  */
16734           offset = starting_bit;
16735           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16736           offset %= 32;
16737
16738           /* This is the actual size of the field, for bitfields this is the
16739              bitfield width and not the container size.  */
16740           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16741
16742           if (*last_used_bit != offset)
16743             {
16744               if (offset < *last_used_bit)
16745                 {
16746                   /* This field's offset is before the 'last_used_bit', that
16747                      means this field goes on the next register.  So we need to
16748                      pad the rest of the current register and increase the
16749                      register number.  */
16750                   uint32_t mask;
16751                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16752                   mask++;
16753
16754                   padding_bits_to_clear[*regno] |= mask;
16755                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16756                   (*regno)++;
16757                 }
16758               else
16759                 {
16760                   /* Otherwise we pad the bits between the last field's end and
16761                      the start of the new field.  */
16762                   uint32_t mask;
16763
16764                   mask = ((uint32_t)-1) >> (32 - offset);
16765                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16766                   padding_bits_to_clear[*regno] |= mask;
16767                 }
16768               current_bit = offset;
16769             }
16770
16771           /* Calculate further padding bits for inner structs/unions too.  */
16772           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16773             {
16774               *last_used_bit = current_bit;
16775               not_to_clear_reg_mask
16776                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16777                                                   padding_bits_to_clear, offset,
16778                                                   last_used_bit);
16779             }
16780           else
16781             {
16782               /* Update 'current_bit' with this field's size.  If the
16783                  'current_bit' lies in a subsequent register, update 'regno' and
16784                  reset 'current_bit' to point to the current bit in that new
16785                  register.  */
16786               current_bit += size;
16787               while (current_bit >= 32)
16788                 {
16789                   current_bit-=32;
16790                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16791                   (*regno)++;
16792                 }
16793               *last_used_bit = current_bit;
16794             }
16795
16796           field = TREE_CHAIN (field);
16797         }
16798       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16799     }
16800   else if (TREE_CODE (arg_type) == UNION_TYPE)
16801     {
16802       tree field, field_t;
16803       int i, regno_t, field_size;
16804       int max_reg = -1;
16805       int max_bit = -1;
16806       uint32_t mask;
16807       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16808         = {-1, -1, -1, -1};
16809
16810       /* To compute the padding bits in a union we only consider bits as
16811          padding bits if they are always either a padding bit or fall outside a
16812          fields size for all fields in the union.  */
16813       field = TYPE_FIELDS (arg_type);
16814       while (field)
16815         {
16816           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16817             = {0U, 0U, 0U, 0U};
16818           int last_used_bit_t = *last_used_bit;
16819           regno_t = *regno;
16820           field_t = TREE_TYPE (field);
16821
16822           /* If the field's type is either a record or a union make sure to
16823              compute their padding bits too.  */
16824           if (RECORD_OR_UNION_TYPE_P (field_t))
16825             not_to_clear_reg_mask
16826               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16827                                                 &padding_bits_to_clear_t[0],
16828                                                 starting_bit, &last_used_bit_t);
16829           else
16830             {
16831               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16832               regno_t = (field_size / 32) + *regno;
16833               last_used_bit_t = (starting_bit + field_size) % 32;
16834             }
16835
16836           for (i = *regno; i < regno_t; i++)
16837             {
16838               /* For all but the last register used by this field only keep the
16839                  padding bits that were padding bits in this field.  */
16840               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16841             }
16842
16843             /* For the last register, keep all padding bits that were padding
16844                bits in this field and any padding bits that are still valid
16845                as padding bits but fall outside of this field's size.  */
16846             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16847             padding_bits_to_clear_res[regno_t]
16848               &= padding_bits_to_clear_t[regno_t] | mask;
16849
16850           /* Update the maximum size of the fields in terms of registers used
16851              ('max_reg') and the 'last_used_bit' in said register.  */
16852           if (max_reg < regno_t)
16853             {
16854               max_reg = regno_t;
16855               max_bit = last_used_bit_t;
16856             }
16857           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16858             max_bit = last_used_bit_t;
16859
16860           field = TREE_CHAIN (field);
16861         }
16862
16863       /* Update the current padding_bits_to_clear using the intersection of the
16864          padding bits of all the fields.  */
16865       for (i=*regno; i < max_reg; i++)
16866         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16867
16868       /* Do not keep trailing padding bits, we do not know yet whether this
16869          is the end of the argument.  */
16870       mask = ((uint32_t) 1 << max_bit) - 1;
16871       padding_bits_to_clear[max_reg]
16872         |= padding_bits_to_clear_res[max_reg] & mask;
16873
16874       *regno = max_reg;
16875       *last_used_bit = max_bit;
16876     }
16877   else
16878     /* This function should only be used for structs and unions.  */
16879     gcc_unreachable ();
16880
16881   return not_to_clear_reg_mask;
16882 }
16883
16884 /* In the context of ARMv8-M Security Extensions, this function is used for both
16885    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16886    registers are used when returning or passing arguments, which is then
16887    returned as a mask.  It will also compute a mask to indicate padding/unused
16888    bits for each of these registers, and passes this through the
16889    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16890    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16891    the starting register used to pass this argument or return value is passed
16892    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16893    for struct and union types.  */
16894
16895 static unsigned HOST_WIDE_INT
16896 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16897                              uint32_t * padding_bits_to_clear)
16898
16899 {
16900   int last_used_bit = 0;
16901   unsigned HOST_WIDE_INT not_to_clear_mask;
16902
16903   if (RECORD_OR_UNION_TYPE_P (arg_type))
16904     {
16905       not_to_clear_mask
16906         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16907                                          padding_bits_to_clear, 0,
16908                                          &last_used_bit);
16909
16910
16911       /* If the 'last_used_bit' is not zero, that means we are still using a
16912          part of the last 'regno'.  In such cases we must clear the trailing
16913          bits.  Otherwise we are not using regno and we should mark it as to
16914          clear.  */
16915       if (last_used_bit != 0)
16916         padding_bits_to_clear[regno]
16917           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16918       else
16919         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16920     }
16921   else
16922     {
16923       not_to_clear_mask = 0;
16924       /* We are not dealing with structs nor unions.  So these arguments may be
16925          passed in floating point registers too.  In some cases a BLKmode is
16926          used when returning or passing arguments in multiple VFP registers.  */
16927       if (GET_MODE (arg_rtx) == BLKmode)
16928         {
16929           int i, arg_regs;
16930           rtx reg;
16931
16932           /* This should really only occur when dealing with the hard-float
16933              ABI.  */
16934           gcc_assert (TARGET_HARD_FLOAT_ABI);
16935
16936           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16937             {
16938               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16939               gcc_assert (REG_P (reg));
16940
16941               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16942
16943               /* If we are dealing with DF mode, make sure we don't
16944                  clear either of the registers it addresses.  */
16945               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16946               if (arg_regs > 1)
16947                 {
16948                   unsigned HOST_WIDE_INT mask;
16949                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16950                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16951                   not_to_clear_mask |= mask;
16952                 }
16953             }
16954         }
16955       else
16956         {
16957           /* Otherwise we can rely on the MODE to determine how many registers
16958              are being used by this argument.  */
16959           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16960           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16961           if (arg_regs > 1)
16962             {
16963               unsigned HOST_WIDE_INT
16964               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16965               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16966               not_to_clear_mask |= mask;
16967             }
16968         }
16969     }
16970
16971   return not_to_clear_mask;
16972 }
16973
16974 /* Clears caller saved registers not used to pass arguments before a
16975    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16976    registers is done in __gnu_cmse_nonsecure_call libcall.
16977    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16978
16979 static void
16980 cmse_nonsecure_call_clear_caller_saved (void)
16981 {
16982   basic_block bb;
16983
16984   FOR_EACH_BB_FN (bb, cfun)
16985     {
16986       rtx_insn *insn;
16987
16988       FOR_BB_INSNS (bb, insn)
16989         {
16990           uint64_t to_clear_mask, float_mask;
16991           rtx_insn *seq;
16992           rtx pat, call, unspec, reg, cleared_reg, tmp;
16993           unsigned int regno, maxregno;
16994           rtx address;
16995           CUMULATIVE_ARGS args_so_far_v;
16996           cumulative_args_t args_so_far;
16997           tree arg_type, fntype;
16998           bool using_r4, first_param = true;
16999           function_args_iterator args_iter;
17000           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17001           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
17002
17003           if (!NONDEBUG_INSN_P (insn))
17004             continue;
17005
17006           if (!CALL_P (insn))
17007             continue;
17008
17009           pat = PATTERN (insn);
17010           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17011           call = XVECEXP (pat, 0, 0);
17012
17013           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17014           if (GET_CODE (call) == SET)
17015               call = SET_SRC (call);
17016
17017           /* Check if it is a cmse_nonsecure_call.  */
17018           unspec = XEXP (call, 0);
17019           if (GET_CODE (unspec) != UNSPEC
17020               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17021             continue;
17022
17023           /* Determine the caller-saved registers we need to clear.  */
17024           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17025           maxregno = NUM_ARG_REGS - 1;
17026           /* Only look at the caller-saved floating point registers in case of
17027              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17028              lazy store and loads which clear both caller- and callee-saved
17029              registers.  */
17030           if (TARGET_HARD_FLOAT_ABI)
17031             {
17032               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17033               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17034               to_clear_mask |= float_mask;
17035               maxregno = D7_VFP_REGNUM;
17036             }
17037
17038           /* Make sure the register used to hold the function address is not
17039              cleared.  */
17040           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17041           gcc_assert (MEM_P (address));
17042           gcc_assert (REG_P (XEXP (address, 0)));
17043           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17044
17045           /* Set basic block of call insn so that df rescan is performed on
17046              insns inserted here.  */
17047           set_block_for_insn (insn, bb);
17048           df_set_flags (DF_DEFER_INSN_RESCAN);
17049           start_sequence ();
17050
17051           /* Make sure the scheduler doesn't schedule other insns beyond
17052              here.  */
17053           emit_insn (gen_blockage ());
17054
17055           /* Walk through all arguments and clear registers appropriately.
17056           */
17057           fntype = TREE_TYPE (MEM_EXPR (address));
17058           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17059                                     NULL_TREE);
17060           args_so_far = pack_cumulative_args (&args_so_far_v);
17061           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17062             {
17063               rtx arg_rtx;
17064               machine_mode arg_mode = TYPE_MODE (arg_type);
17065
17066               if (VOID_TYPE_P (arg_type))
17067                 continue;
17068
17069               if (!first_param)
17070                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17071                                           true);
17072
17073               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17074                                           true);
17075               gcc_assert (REG_P (arg_rtx));
17076               to_clear_mask
17077                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17078                                                REGNO (arg_rtx),
17079                                                padding_bits_to_clear_ptr);
17080
17081               first_param = false;
17082             }
17083
17084           /* Clear padding bits where needed.  */
17085           cleared_reg = XEXP (address, 0);
17086           reg = gen_rtx_REG (SImode, IP_REGNUM);
17087           using_r4 = false;
17088           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17089             {
17090               if (padding_bits_to_clear[regno] == 0)
17091                 continue;
17092
17093               /* If this is a Thumb-1 target copy the address of the function
17094                  we are calling from 'r4' into 'ip' such that we can use r4 to
17095                  clear the unused bits in the arguments.  */
17096               if (TARGET_THUMB1 && !using_r4)
17097                 {
17098                   using_r4 =  true;
17099                   reg = cleared_reg;
17100                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17101                                           reg);
17102                 }
17103
17104               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17105               emit_move_insn (reg, tmp);
17106               /* Also fill the top half of the negated
17107                  padding_bits_to_clear.  */
17108               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17109                 {
17110                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17111                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17112                                                                 GEN_INT (16),
17113                                                                 GEN_INT (16)),
17114                                           tmp));
17115                 }
17116
17117               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17118                                      gen_rtx_REG (SImode, regno),
17119                                      reg));
17120
17121             }
17122           if (using_r4)
17123             emit_move_insn (cleared_reg,
17124                             gen_rtx_REG (SImode, IP_REGNUM));
17125
17126           /* We use right shift and left shift to clear the LSB of the address
17127              we jump to instead of using bic, to avoid having to use an extra
17128              register on Thumb-1.  */
17129           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17130           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17131           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17132           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17133
17134           /* Clearing all registers that leak before doing a non-secure
17135              call.  */
17136           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17137             {
17138               if (!(to_clear_mask & (1LL << regno)))
17139                 continue;
17140
17141               /* If regno is an even vfp register and its successor is also to
17142                  be cleared, use vmov.  */
17143               if (IS_VFP_REGNUM (regno))
17144                 {
17145                   if (TARGET_VFP_DOUBLE
17146                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17147                       && to_clear_mask & (1LL << (regno + 1)))
17148                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17149                                     CONST0_RTX (DFmode));
17150                   else
17151                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17152                                     CONST0_RTX (SFmode));
17153                 }
17154               else
17155                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17156             }
17157
17158           seq = get_insns ();
17159           end_sequence ();
17160           emit_insn_before (seq, insn);
17161
17162         }
17163     }
17164 }
17165
17166 /* Rewrite move insn into subtract of 0 if the condition codes will
17167    be useful in next conditional jump insn.  */
17168
17169 static void
17170 thumb1_reorg (void)
17171 {
17172   basic_block bb;
17173
17174   FOR_EACH_BB_FN (bb, cfun)
17175     {
17176       rtx dest, src;
17177       rtx cmp, op0, op1, set = NULL;
17178       rtx_insn *prev, *insn = BB_END (bb);
17179       bool insn_clobbered = false;
17180
17181       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17182         insn = PREV_INSN (insn);
17183
17184       /* Find the last cbranchsi4_insn in basic block BB.  */
17185       if (insn == BB_HEAD (bb)
17186           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17187         continue;
17188
17189       /* Get the register with which we are comparing.  */
17190       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17191       op0 = XEXP (cmp, 0);
17192       op1 = XEXP (cmp, 1);
17193
17194       /* Check that comparison is against ZERO.  */
17195       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17196         continue;
17197
17198       /* Find the first flag setting insn before INSN in basic block BB.  */
17199       gcc_assert (insn != BB_HEAD (bb));
17200       for (prev = PREV_INSN (insn);
17201            (!insn_clobbered
17202             && prev != BB_HEAD (bb)
17203             && (NOTE_P (prev)
17204                 || DEBUG_INSN_P (prev)
17205                 || ((set = single_set (prev)) != NULL
17206                     && get_attr_conds (prev) == CONDS_NOCOND)));
17207            prev = PREV_INSN (prev))
17208         {
17209           if (reg_set_p (op0, prev))
17210             insn_clobbered = true;
17211         }
17212
17213       /* Skip if op0 is clobbered by insn other than prev. */
17214       if (insn_clobbered)
17215         continue;
17216
17217       if (!set)
17218         continue;
17219
17220       dest = SET_DEST (set);
17221       src = SET_SRC (set);
17222       if (!low_register_operand (dest, SImode)
17223           || !low_register_operand (src, SImode))
17224         continue;
17225
17226       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17227          in INSN.  Both src and dest of the move insn are checked.  */
17228       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17229         {
17230           dest = copy_rtx (dest);
17231           src = copy_rtx (src);
17232           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17233           PATTERN (prev) = gen_rtx_SET (dest, src);
17234           INSN_CODE (prev) = -1;
17235           /* Set test register in INSN to dest.  */
17236           XEXP (cmp, 0) = copy_rtx (dest);
17237           INSN_CODE (insn) = -1;
17238         }
17239     }
17240 }
17241
17242 /* Convert instructions to their cc-clobbering variant if possible, since
17243    that allows us to use smaller encodings.  */
17244
17245 static void
17246 thumb2_reorg (void)
17247 {
17248   basic_block bb;
17249   regset_head live;
17250
17251   INIT_REG_SET (&live);
17252
17253   /* We are freeing block_for_insn in the toplev to keep compatibility
17254      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17255   compute_bb_for_insn ();
17256   df_analyze ();
17257
17258   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17259
17260   FOR_EACH_BB_FN (bb, cfun)
17261     {
17262       if ((current_tune->disparage_flag_setting_t16_encodings
17263            == tune_params::DISPARAGE_FLAGS_ALL)
17264           && optimize_bb_for_speed_p (bb))
17265         continue;
17266
17267       rtx_insn *insn;
17268       Convert_Action action = SKIP;
17269       Convert_Action action_for_partial_flag_setting
17270         = ((current_tune->disparage_flag_setting_t16_encodings
17271             != tune_params::DISPARAGE_FLAGS_NEITHER)
17272            && optimize_bb_for_speed_p (bb))
17273           ? SKIP : CONV;
17274
17275       COPY_REG_SET (&live, DF_LR_OUT (bb));
17276       df_simulate_initialize_backwards (bb, &live);
17277       FOR_BB_INSNS_REVERSE (bb, insn)
17278         {
17279           if (NONJUMP_INSN_P (insn)
17280               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17281               && GET_CODE (PATTERN (insn)) == SET)
17282             {
17283               action = SKIP;
17284               rtx pat = PATTERN (insn);
17285               rtx dst = XEXP (pat, 0);
17286               rtx src = XEXP (pat, 1);
17287               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17288
17289               if (UNARY_P (src) || BINARY_P (src))
17290                   op0 = XEXP (src, 0);
17291
17292               if (BINARY_P (src))
17293                   op1 = XEXP (src, 1);
17294
17295               if (low_register_operand (dst, SImode))
17296                 {
17297                   switch (GET_CODE (src))
17298                     {
17299                     case PLUS:
17300                       /* Adding two registers and storing the result
17301                          in the first source is already a 16-bit
17302                          operation.  */
17303                       if (rtx_equal_p (dst, op0)
17304                           && register_operand (op1, SImode))
17305                         break;
17306
17307                       if (low_register_operand (op0, SImode))
17308                         {
17309                           /* ADDS <Rd>,<Rn>,<Rm>  */
17310                           if (low_register_operand (op1, SImode))
17311                             action = CONV;
17312                           /* ADDS <Rdn>,#<imm8>  */
17313                           /* SUBS <Rdn>,#<imm8>  */
17314                           else if (rtx_equal_p (dst, op0)
17315                                    && CONST_INT_P (op1)
17316                                    && IN_RANGE (INTVAL (op1), -255, 255))
17317                             action = CONV;
17318                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17319                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17320                           else if (CONST_INT_P (op1)
17321                                    && IN_RANGE (INTVAL (op1), -7, 7))
17322                             action = CONV;
17323                         }
17324                       /* ADCS <Rd>, <Rn>  */
17325                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17326                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17327                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17328                                                        SImode)
17329                               && COMPARISON_P (op1)
17330                               && cc_register (XEXP (op1, 0), VOIDmode)
17331                               && maybe_get_arm_condition_code (op1) == ARM_CS
17332                               && XEXP (op1, 1) == const0_rtx)
17333                         action = CONV;
17334                       break;
17335
17336                     case MINUS:
17337                       /* RSBS <Rd>,<Rn>,#0
17338                          Not handled here: see NEG below.  */
17339                       /* SUBS <Rd>,<Rn>,#<imm3>
17340                          SUBS <Rdn>,#<imm8>
17341                          Not handled here: see PLUS above.  */
17342                       /* SUBS <Rd>,<Rn>,<Rm>  */
17343                       if (low_register_operand (op0, SImode)
17344                           && low_register_operand (op1, SImode))
17345                             action = CONV;
17346                       break;
17347
17348                     case MULT:
17349                       /* MULS <Rdm>,<Rn>,<Rdm>
17350                          As an exception to the rule, this is only used
17351                          when optimizing for size since MULS is slow on all
17352                          known implementations.  We do not even want to use
17353                          MULS in cold code, if optimizing for speed, so we
17354                          test the global flag here.  */
17355                       if (!optimize_size)
17356                         break;
17357                       /* Fall through.  */
17358                     case AND:
17359                     case IOR:
17360                     case XOR:
17361                       /* ANDS <Rdn>,<Rm>  */
17362                       if (rtx_equal_p (dst, op0)
17363                           && low_register_operand (op1, SImode))
17364                         action = action_for_partial_flag_setting;
17365                       else if (rtx_equal_p (dst, op1)
17366                                && low_register_operand (op0, SImode))
17367                         action = action_for_partial_flag_setting == SKIP
17368                                  ? SKIP : SWAP_CONV;
17369                       break;
17370
17371                     case ASHIFTRT:
17372                     case ASHIFT:
17373                     case LSHIFTRT:
17374                       /* ASRS <Rdn>,<Rm> */
17375                       /* LSRS <Rdn>,<Rm> */
17376                       /* LSLS <Rdn>,<Rm> */
17377                       if (rtx_equal_p (dst, op0)
17378                           && low_register_operand (op1, SImode))
17379                         action = action_for_partial_flag_setting;
17380                       /* ASRS <Rd>,<Rm>,#<imm5> */
17381                       /* LSRS <Rd>,<Rm>,#<imm5> */
17382                       /* LSLS <Rd>,<Rm>,#<imm5> */
17383                       else if (low_register_operand (op0, SImode)
17384                                && CONST_INT_P (op1)
17385                                && IN_RANGE (INTVAL (op1), 0, 31))
17386                         action = action_for_partial_flag_setting;
17387                       break;
17388
17389                     case ROTATERT:
17390                       /* RORS <Rdn>,<Rm>  */
17391                       if (rtx_equal_p (dst, op0)
17392                           && low_register_operand (op1, SImode))
17393                         action = action_for_partial_flag_setting;
17394                       break;
17395
17396                     case NOT:
17397                       /* MVNS <Rd>,<Rm>  */
17398                       if (low_register_operand (op0, SImode))
17399                         action = action_for_partial_flag_setting;
17400                       break;
17401
17402                     case NEG:
17403                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17404                       if (low_register_operand (op0, SImode))
17405                         action = CONV;
17406                       break;
17407
17408                     case CONST_INT:
17409                       /* MOVS <Rd>,#<imm8>  */
17410                       if (CONST_INT_P (src)
17411                           && IN_RANGE (INTVAL (src), 0, 255))
17412                         action = action_for_partial_flag_setting;
17413                       break;
17414
17415                     case REG:
17416                       /* MOVS and MOV<c> with registers have different
17417                          encodings, so are not relevant here.  */
17418                       break;
17419
17420                     default:
17421                       break;
17422                     }
17423                 }
17424
17425               if (action != SKIP)
17426                 {
17427                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17428                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17429                   rtvec vec;
17430
17431                   if (action == SWAP_CONV)
17432                     {
17433                       src = copy_rtx (src);
17434                       XEXP (src, 0) = op1;
17435                       XEXP (src, 1) = op0;
17436                       pat = gen_rtx_SET (dst, src);
17437                       vec = gen_rtvec (2, pat, clobber);
17438                     }
17439                   else /* action == CONV */
17440                     vec = gen_rtvec (2, pat, clobber);
17441
17442                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17443                   INSN_CODE (insn) = -1;
17444                 }
17445             }
17446
17447           if (NONDEBUG_INSN_P (insn))
17448             df_simulate_one_insn_backwards (bb, insn, &live);
17449         }
17450     }
17451
17452   CLEAR_REG_SET (&live);
17453 }
17454
17455 /* Gcc puts the pool in the wrong place for ARM, since we can only
17456    load addresses a limited distance around the pc.  We do some
17457    special munging to move the constant pool values to the correct
17458    point in the code.  */
17459 static void
17460 arm_reorg (void)
17461 {
17462   rtx_insn *insn;
17463   HOST_WIDE_INT address = 0;
17464   Mfix * fix;
17465
17466   if (use_cmse)
17467     cmse_nonsecure_call_clear_caller_saved ();
17468   if (TARGET_THUMB1)
17469     thumb1_reorg ();
17470   else if (TARGET_THUMB2)
17471     thumb2_reorg ();
17472
17473   /* Ensure all insns that must be split have been split at this point.
17474      Otherwise, the pool placement code below may compute incorrect
17475      insn lengths.  Note that when optimizing, all insns have already
17476      been split at this point.  */
17477   if (!optimize)
17478     split_all_insns_noflow ();
17479
17480   /* Make sure we do not attempt to create a literal pool even though it should
17481      no longer be necessary to create any.  */
17482   if (arm_disable_literal_pool)
17483     return ;
17484
17485   minipool_fix_head = minipool_fix_tail = NULL;
17486
17487   /* The first insn must always be a note, or the code below won't
17488      scan it properly.  */
17489   insn = get_insns ();
17490   gcc_assert (NOTE_P (insn));
17491   minipool_pad = 0;
17492
17493   /* Scan all the insns and record the operands that will need fixing.  */
17494   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17495     {
17496       if (BARRIER_P (insn))
17497         push_minipool_barrier (insn, address);
17498       else if (INSN_P (insn))
17499         {
17500           rtx_jump_table_data *table;
17501
17502           note_invalid_constants (insn, address, true);
17503           address += get_attr_length (insn);
17504
17505           /* If the insn is a vector jump, add the size of the table
17506              and skip the table.  */
17507           if (tablejump_p (insn, NULL, &table))
17508             {
17509               address += get_jump_table_size (table);
17510               insn = table;
17511             }
17512         }
17513       else if (LABEL_P (insn))
17514         /* Add the worst-case padding due to alignment.  We don't add
17515            the _current_ padding because the minipool insertions
17516            themselves might change it.  */
17517         address += get_label_padding (insn);
17518     }
17519
17520   fix = minipool_fix_head;
17521
17522   /* Now scan the fixups and perform the required changes.  */
17523   while (fix)
17524     {
17525       Mfix * ftmp;
17526       Mfix * fdel;
17527       Mfix *  last_added_fix;
17528       Mfix * last_barrier = NULL;
17529       Mfix * this_fix;
17530
17531       /* Skip any further barriers before the next fix.  */
17532       while (fix && BARRIER_P (fix->insn))
17533         fix = fix->next;
17534
17535       /* No more fixes.  */
17536       if (fix == NULL)
17537         break;
17538
17539       last_added_fix = NULL;
17540
17541       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17542         {
17543           if (BARRIER_P (ftmp->insn))
17544             {
17545               if (ftmp->address >= minipool_vector_head->max_address)
17546                 break;
17547
17548               last_barrier = ftmp;
17549             }
17550           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17551             break;
17552
17553           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17554         }
17555
17556       /* If we found a barrier, drop back to that; any fixes that we
17557          could have reached but come after the barrier will now go in
17558          the next mini-pool.  */
17559       if (last_barrier != NULL)
17560         {
17561           /* Reduce the refcount for those fixes that won't go into this
17562              pool after all.  */
17563           for (fdel = last_barrier->next;
17564                fdel && fdel != ftmp;
17565                fdel = fdel->next)
17566             {
17567               fdel->minipool->refcount--;
17568               fdel->minipool = NULL;
17569             }
17570
17571           ftmp = last_barrier;
17572         }
17573       else
17574         {
17575           /* ftmp is first fix that we can't fit into this pool and
17576              there no natural barriers that we could use.  Insert a
17577              new barrier in the code somewhere between the previous
17578              fix and this one, and arrange to jump around it.  */
17579           HOST_WIDE_INT max_address;
17580
17581           /* The last item on the list of fixes must be a barrier, so
17582              we can never run off the end of the list of fixes without
17583              last_barrier being set.  */
17584           gcc_assert (ftmp);
17585
17586           max_address = minipool_vector_head->max_address;
17587           /* Check that there isn't another fix that is in range that
17588              we couldn't fit into this pool because the pool was
17589              already too large: we need to put the pool before such an
17590              instruction.  The pool itself may come just after the
17591              fix because create_fix_barrier also allows space for a
17592              jump instruction.  */
17593           if (ftmp->address < max_address)
17594             max_address = ftmp->address + 1;
17595
17596           last_barrier = create_fix_barrier (last_added_fix, max_address);
17597         }
17598
17599       assign_minipool_offsets (last_barrier);
17600
17601       while (ftmp)
17602         {
17603           if (!BARRIER_P (ftmp->insn)
17604               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17605                   == NULL))
17606             break;
17607
17608           ftmp = ftmp->next;
17609         }
17610
17611       /* Scan over the fixes we have identified for this pool, fixing them
17612          up and adding the constants to the pool itself.  */
17613       for (this_fix = fix; this_fix && ftmp != this_fix;
17614            this_fix = this_fix->next)
17615         if (!BARRIER_P (this_fix->insn))
17616           {
17617             rtx addr
17618               = plus_constant (Pmode,
17619                                gen_rtx_LABEL_REF (VOIDmode,
17620                                                   minipool_vector_label),
17621                                this_fix->minipool->offset);
17622             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17623           }
17624
17625       dump_minipool (last_barrier->insn);
17626       fix = ftmp;
17627     }
17628
17629   /* From now on we must synthesize any constants that we can't handle
17630      directly.  This can happen if the RTL gets split during final
17631      instruction generation.  */
17632   cfun->machine->after_arm_reorg = 1;
17633
17634   /* Free the minipool memory.  */
17635   obstack_free (&minipool_obstack, minipool_startobj);
17636 }
17637 \f
17638 /* Routines to output assembly language.  */
17639
17640 /* Return string representation of passed in real value.  */
17641 static const char *
17642 fp_const_from_val (REAL_VALUE_TYPE *r)
17643 {
17644   if (!fp_consts_inited)
17645     init_fp_table ();
17646
17647   gcc_assert (real_equal (r, &value_fp0));
17648   return "0";
17649 }
17650
17651 /* OPERANDS[0] is the entire list of insns that constitute pop,
17652    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17653    is in the list, UPDATE is true iff the list contains explicit
17654    update of base register.  */
17655 void
17656 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17657                          bool update)
17658 {
17659   int i;
17660   char pattern[100];
17661   int offset;
17662   const char *conditional;
17663   int num_saves = XVECLEN (operands[0], 0);
17664   unsigned int regno;
17665   unsigned int regno_base = REGNO (operands[1]);
17666   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17667
17668   offset = 0;
17669   offset += update ? 1 : 0;
17670   offset += return_pc ? 1 : 0;
17671
17672   /* Is the base register in the list?  */
17673   for (i = offset; i < num_saves; i++)
17674     {
17675       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17676       /* If SP is in the list, then the base register must be SP.  */
17677       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17678       /* If base register is in the list, there must be no explicit update.  */
17679       if (regno == regno_base)
17680         gcc_assert (!update);
17681     }
17682
17683   conditional = reverse ? "%?%D0" : "%?%d0";
17684   /* Can't use POP if returning from an interrupt.  */
17685   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17686     sprintf (pattern, "pop%s\t{", conditional);
17687   else
17688     {
17689       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17690          It's just a convention, their semantics are identical.  */
17691       if (regno_base == SP_REGNUM)
17692         sprintf (pattern, "ldmfd%s\t", conditional);
17693       else if (update)
17694         sprintf (pattern, "ldmia%s\t", conditional);
17695       else
17696         sprintf (pattern, "ldm%s\t", conditional);
17697
17698       strcat (pattern, reg_names[regno_base]);
17699       if (update)
17700         strcat (pattern, "!, {");
17701       else
17702         strcat (pattern, ", {");
17703     }
17704
17705   /* Output the first destination register.  */
17706   strcat (pattern,
17707           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17708
17709   /* Output the rest of the destination registers.  */
17710   for (i = offset + 1; i < num_saves; i++)
17711     {
17712       strcat (pattern, ", ");
17713       strcat (pattern,
17714               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17715     }
17716
17717   strcat (pattern, "}");
17718
17719   if (interrupt_p && return_pc)
17720     strcat (pattern, "^");
17721
17722   output_asm_insn (pattern, &cond);
17723 }
17724
17725
17726 /* Output the assembly for a store multiple.  */
17727
17728 const char *
17729 vfp_output_vstmd (rtx * operands)
17730 {
17731   char pattern[100];
17732   int p;
17733   int base;
17734   int i;
17735   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17736                    ? XEXP (operands[0], 0)
17737                    : XEXP (XEXP (operands[0], 0), 0);
17738   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17739
17740   if (push_p)
17741     strcpy (pattern, "vpush%?.64\t{%P1");
17742   else
17743     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17744
17745   p = strlen (pattern);
17746
17747   gcc_assert (REG_P (operands[1]));
17748
17749   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17750   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17751     {
17752       p += sprintf (&pattern[p], ", d%d", base + i);
17753     }
17754   strcpy (&pattern[p], "}");
17755
17756   output_asm_insn (pattern, operands);
17757   return "";
17758 }
17759
17760
17761 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17762    number of bytes pushed.  */
17763
17764 static int
17765 vfp_emit_fstmd (int base_reg, int count)
17766 {
17767   rtx par;
17768   rtx dwarf;
17769   rtx tmp, reg;
17770   int i;
17771
17772   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17773      register pairs are stored by a store multiple insn.  We avoid this
17774      by pushing an extra pair.  */
17775   if (count == 2 && !arm_arch6)
17776     {
17777       if (base_reg == LAST_VFP_REGNUM - 3)
17778         base_reg -= 2;
17779       count++;
17780     }
17781
17782   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17783      larger stores into multiple parts (up to a maximum of two, in
17784      practice).  */
17785   if (count > 16)
17786     {
17787       int saved;
17788       /* NOTE: base_reg is an internal register number, so each D register
17789          counts as 2.  */
17790       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17791       saved += vfp_emit_fstmd (base_reg, 16);
17792       return saved;
17793     }
17794
17795   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17796   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17797
17798   reg = gen_rtx_REG (DFmode, base_reg);
17799   base_reg += 2;
17800
17801   XVECEXP (par, 0, 0)
17802     = gen_rtx_SET (gen_frame_mem
17803                    (BLKmode,
17804                     gen_rtx_PRE_MODIFY (Pmode,
17805                                         stack_pointer_rtx,
17806                                         plus_constant
17807                                         (Pmode, stack_pointer_rtx,
17808                                          - (count * 8)))
17809                     ),
17810                    gen_rtx_UNSPEC (BLKmode,
17811                                    gen_rtvec (1, reg),
17812                                    UNSPEC_PUSH_MULT));
17813
17814   tmp = gen_rtx_SET (stack_pointer_rtx,
17815                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17816   RTX_FRAME_RELATED_P (tmp) = 1;
17817   XVECEXP (dwarf, 0, 0) = tmp;
17818
17819   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17820   RTX_FRAME_RELATED_P (tmp) = 1;
17821   XVECEXP (dwarf, 0, 1) = tmp;
17822
17823   for (i = 1; i < count; i++)
17824     {
17825       reg = gen_rtx_REG (DFmode, base_reg);
17826       base_reg += 2;
17827       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17828
17829       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17830                                         plus_constant (Pmode,
17831                                                        stack_pointer_rtx,
17832                                                        i * 8)),
17833                          reg);
17834       RTX_FRAME_RELATED_P (tmp) = 1;
17835       XVECEXP (dwarf, 0, i + 1) = tmp;
17836     }
17837
17838   par = emit_insn (par);
17839   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17840   RTX_FRAME_RELATED_P (par) = 1;
17841
17842   return count * 8;
17843 }
17844
17845 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17846    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17847
17848 bool
17849 detect_cmse_nonsecure_call (tree addr)
17850 {
17851   if (!addr)
17852     return FALSE;
17853
17854   tree fntype = TREE_TYPE (addr);
17855   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17856                                     TYPE_ATTRIBUTES (fntype)))
17857     return TRUE;
17858   return FALSE;
17859 }
17860
17861
17862 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17863    the call target.  */
17864
17865 void
17866 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17867 {
17868   rtx insn;
17869
17870   insn = emit_call_insn (pat);
17871
17872   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17873      If the call might use such an entry, add a use of the PIC register
17874      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17875   if (TARGET_VXWORKS_RTP
17876       && flag_pic
17877       && !sibcall
17878       && GET_CODE (addr) == SYMBOL_REF
17879       && (SYMBOL_REF_DECL (addr)
17880           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17881           : !SYMBOL_REF_LOCAL_P (addr)))
17882     {
17883       require_pic_register ();
17884       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17885     }
17886
17887   if (TARGET_AAPCS_BASED)
17888     {
17889       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17890          linker.  We need to add an IP clobber to allow setting
17891          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17892          is not needed since it's a fixed register.  */
17893       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17894       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17895     }
17896 }
17897
17898 /* Output a 'call' insn.  */
17899 const char *
17900 output_call (rtx *operands)
17901 {
17902   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17903
17904   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17905   if (REGNO (operands[0]) == LR_REGNUM)
17906     {
17907       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17908       output_asm_insn ("mov%?\t%0, %|lr", operands);
17909     }
17910
17911   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17912
17913   if (TARGET_INTERWORK || arm_arch4t)
17914     output_asm_insn ("bx%?\t%0", operands);
17915   else
17916     output_asm_insn ("mov%?\t%|pc, %0", operands);
17917
17918   return "";
17919 }
17920
17921 /* Output a move from arm registers to arm registers of a long double
17922    OPERANDS[0] is the destination.
17923    OPERANDS[1] is the source.  */
17924 const char *
17925 output_mov_long_double_arm_from_arm (rtx *operands)
17926 {
17927   /* We have to be careful here because the two might overlap.  */
17928   int dest_start = REGNO (operands[0]);
17929   int src_start = REGNO (operands[1]);
17930   rtx ops[2];
17931   int i;
17932
17933   if (dest_start < src_start)
17934     {
17935       for (i = 0; i < 3; i++)
17936         {
17937           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17938           ops[1] = gen_rtx_REG (SImode, src_start + i);
17939           output_asm_insn ("mov%?\t%0, %1", ops);
17940         }
17941     }
17942   else
17943     {
17944       for (i = 2; i >= 0; i--)
17945         {
17946           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17947           ops[1] = gen_rtx_REG (SImode, src_start + i);
17948           output_asm_insn ("mov%?\t%0, %1", ops);
17949         }
17950     }
17951
17952   return "";
17953 }
17954
17955 void
17956 arm_emit_movpair (rtx dest, rtx src)
17957  {
17958   /* If the src is an immediate, simplify it.  */
17959   if (CONST_INT_P (src))
17960     {
17961       HOST_WIDE_INT val = INTVAL (src);
17962       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17963       if ((val >> 16) & 0x0000ffff)
17964         {
17965           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17966                                                GEN_INT (16)),
17967                          GEN_INT ((val >> 16) & 0x0000ffff));
17968           rtx_insn *insn = get_last_insn ();
17969           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17970         }
17971       return;
17972     }
17973    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17974    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17975    rtx_insn *insn = get_last_insn ();
17976    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17977  }
17978
17979 /* Output a move between double words.  It must be REG<-MEM
17980    or MEM<-REG.  */
17981 const char *
17982 output_move_double (rtx *operands, bool emit, int *count)
17983 {
17984   enum rtx_code code0 = GET_CODE (operands[0]);
17985   enum rtx_code code1 = GET_CODE (operands[1]);
17986   rtx otherops[3];
17987   if (count)
17988     *count = 1;
17989
17990   /* The only case when this might happen is when
17991      you are looking at the length of a DImode instruction
17992      that has an invalid constant in it.  */
17993   if (code0 == REG && code1 != MEM)
17994     {
17995       gcc_assert (!emit);
17996       *count = 2;
17997       return "";
17998     }
17999
18000   if (code0 == REG)
18001     {
18002       unsigned int reg0 = REGNO (operands[0]);
18003
18004       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18005
18006       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18007
18008       switch (GET_CODE (XEXP (operands[1], 0)))
18009         {
18010         case REG:
18011
18012           if (emit)
18013             {
18014               if (TARGET_LDRD
18015                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18016                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18017               else
18018                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18019             }
18020           break;
18021
18022         case PRE_INC:
18023           gcc_assert (TARGET_LDRD);
18024           if (emit)
18025             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18026           break;
18027
18028         case PRE_DEC:
18029           if (emit)
18030             {
18031               if (TARGET_LDRD)
18032                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18033               else
18034                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18035             }
18036           break;
18037
18038         case POST_INC:
18039           if (emit)
18040             {
18041               if (TARGET_LDRD)
18042                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18043               else
18044                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18045             }
18046           break;
18047
18048         case POST_DEC:
18049           gcc_assert (TARGET_LDRD);
18050           if (emit)
18051             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18052           break;
18053
18054         case PRE_MODIFY:
18055         case POST_MODIFY:
18056           /* Autoicrement addressing modes should never have overlapping
18057              base and destination registers, and overlapping index registers
18058              are already prohibited, so this doesn't need to worry about
18059              fix_cm3_ldrd.  */
18060           otherops[0] = operands[0];
18061           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18062           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18063
18064           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18065             {
18066               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18067                 {
18068                   /* Registers overlap so split out the increment.  */
18069                   if (emit)
18070                     {
18071                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18072                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18073                     }
18074                   if (count)
18075                     *count = 2;
18076                 }
18077               else
18078                 {
18079                   /* Use a single insn if we can.
18080                      FIXME: IWMMXT allows offsets larger than ldrd can
18081                      handle, fix these up with a pair of ldr.  */
18082                   if (TARGET_THUMB2
18083                       || !CONST_INT_P (otherops[2])
18084                       || (INTVAL (otherops[2]) > -256
18085                           && INTVAL (otherops[2]) < 256))
18086                     {
18087                       if (emit)
18088                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18089                     }
18090                   else
18091                     {
18092                       if (emit)
18093                         {
18094                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18095                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18096                         }
18097                       if (count)
18098                         *count = 2;
18099
18100                     }
18101                 }
18102             }
18103           else
18104             {
18105               /* Use a single insn if we can.
18106                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18107                  fix these up with a pair of ldr.  */
18108               if (TARGET_THUMB2
18109                   || !CONST_INT_P (otherops[2])
18110                   || (INTVAL (otherops[2]) > -256
18111                       && INTVAL (otherops[2]) < 256))
18112                 {
18113                   if (emit)
18114                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18115                 }
18116               else
18117                 {
18118                   if (emit)
18119                     {
18120                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18121                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18122                     }
18123                   if (count)
18124                     *count = 2;
18125                 }
18126             }
18127           break;
18128
18129         case LABEL_REF:
18130         case CONST:
18131           /* We might be able to use ldrd %0, %1 here.  However the range is
18132              different to ldr/adr, and it is broken on some ARMv7-M
18133              implementations.  */
18134           /* Use the second register of the pair to avoid problematic
18135              overlap.  */
18136           otherops[1] = operands[1];
18137           if (emit)
18138             output_asm_insn ("adr%?\t%0, %1", otherops);
18139           operands[1] = otherops[0];
18140           if (emit)
18141             {
18142               if (TARGET_LDRD)
18143                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18144               else
18145                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18146             }
18147
18148           if (count)
18149             *count = 2;
18150           break;
18151
18152           /* ??? This needs checking for thumb2.  */
18153         default:
18154           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18155                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18156             {
18157               otherops[0] = operands[0];
18158               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18159               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18160
18161               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18162                 {
18163                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18164                     {
18165                       switch ((int) INTVAL (otherops[2]))
18166                         {
18167                         case -8:
18168                           if (emit)
18169                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18170                           return "";
18171                         case -4:
18172                           if (TARGET_THUMB2)
18173                             break;
18174                           if (emit)
18175                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18176                           return "";
18177                         case 4:
18178                           if (TARGET_THUMB2)
18179                             break;
18180                           if (emit)
18181                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18182                           return "";
18183                         }
18184                     }
18185                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18186                   operands[1] = otherops[0];
18187                   if (TARGET_LDRD
18188                       && (REG_P (otherops[2])
18189                           || TARGET_THUMB2
18190                           || (CONST_INT_P (otherops[2])
18191                               && INTVAL (otherops[2]) > -256
18192                               && INTVAL (otherops[2]) < 256)))
18193                     {
18194                       if (reg_overlap_mentioned_p (operands[0],
18195                                                    otherops[2]))
18196                         {
18197                           /* Swap base and index registers over to
18198                              avoid a conflict.  */
18199                           std::swap (otherops[1], otherops[2]);
18200                         }
18201                       /* If both registers conflict, it will usually
18202                          have been fixed by a splitter.  */
18203                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18204                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18205                         {
18206                           if (emit)
18207                             {
18208                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18209                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18210                             }
18211                           if (count)
18212                             *count = 2;
18213                         }
18214                       else
18215                         {
18216                           otherops[0] = operands[0];
18217                           if (emit)
18218                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18219                         }
18220                       return "";
18221                     }
18222
18223                   if (CONST_INT_P (otherops[2]))
18224                     {
18225                       if (emit)
18226                         {
18227                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18228                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18229                           else
18230                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18231                         }
18232                     }
18233                   else
18234                     {
18235                       if (emit)
18236                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18237                     }
18238                 }
18239               else
18240                 {
18241                   if (emit)
18242                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18243                 }
18244
18245               if (count)
18246                 *count = 2;
18247
18248               if (TARGET_LDRD)
18249                 return "ldrd%?\t%0, [%1]";
18250
18251               return "ldmia%?\t%1, %M0";
18252             }
18253           else
18254             {
18255               otherops[1] = adjust_address (operands[1], SImode, 4);
18256               /* Take care of overlapping base/data reg.  */
18257               if (reg_mentioned_p (operands[0], operands[1]))
18258                 {
18259                   if (emit)
18260                     {
18261                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18262                       output_asm_insn ("ldr%?\t%0, %1", operands);
18263                     }
18264                   if (count)
18265                     *count = 2;
18266
18267                 }
18268               else
18269                 {
18270                   if (emit)
18271                     {
18272                       output_asm_insn ("ldr%?\t%0, %1", operands);
18273                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18274                     }
18275                   if (count)
18276                     *count = 2;
18277                 }
18278             }
18279         }
18280     }
18281   else
18282     {
18283       /* Constraints should ensure this.  */
18284       gcc_assert (code0 == MEM && code1 == REG);
18285       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18286                   || (TARGET_ARM && TARGET_LDRD));
18287
18288       switch (GET_CODE (XEXP (operands[0], 0)))
18289         {
18290         case REG:
18291           if (emit)
18292             {
18293               if (TARGET_LDRD)
18294                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18295               else
18296                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18297             }
18298           break;
18299
18300         case PRE_INC:
18301           gcc_assert (TARGET_LDRD);
18302           if (emit)
18303             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18304           break;
18305
18306         case PRE_DEC:
18307           if (emit)
18308             {
18309               if (TARGET_LDRD)
18310                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18311               else
18312                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18313             }
18314           break;
18315
18316         case POST_INC:
18317           if (emit)
18318             {
18319               if (TARGET_LDRD)
18320                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18321               else
18322                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18323             }
18324           break;
18325
18326         case POST_DEC:
18327           gcc_assert (TARGET_LDRD);
18328           if (emit)
18329             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18330           break;
18331
18332         case PRE_MODIFY:
18333         case POST_MODIFY:
18334           otherops[0] = operands[1];
18335           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18336           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18337
18338           /* IWMMXT allows offsets larger than ldrd can handle,
18339              fix these up with a pair of ldr.  */
18340           if (!TARGET_THUMB2
18341               && CONST_INT_P (otherops[2])
18342               && (INTVAL(otherops[2]) <= -256
18343                   || INTVAL(otherops[2]) >= 256))
18344             {
18345               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18346                 {
18347                   if (emit)
18348                     {
18349                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18350                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18351                     }
18352                   if (count)
18353                     *count = 2;
18354                 }
18355               else
18356                 {
18357                   if (emit)
18358                     {
18359                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18360                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18361                     }
18362                   if (count)
18363                     *count = 2;
18364                 }
18365             }
18366           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18367             {
18368               if (emit)
18369                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18370             }
18371           else
18372             {
18373               if (emit)
18374                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18375             }
18376           break;
18377
18378         case PLUS:
18379           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18380           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18381             {
18382               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18383                 {
18384                 case -8:
18385                   if (emit)
18386                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18387                   return "";
18388
18389                 case -4:
18390                   if (TARGET_THUMB2)
18391                     break;
18392                   if (emit)
18393                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18394                   return "";
18395
18396                 case 4:
18397                   if (TARGET_THUMB2)
18398                     break;
18399                   if (emit)
18400                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18401                   return "";
18402                 }
18403             }
18404           if (TARGET_LDRD
18405               && (REG_P (otherops[2])
18406                   || TARGET_THUMB2
18407                   || (CONST_INT_P (otherops[2])
18408                       && INTVAL (otherops[2]) > -256
18409                       && INTVAL (otherops[2]) < 256)))
18410             {
18411               otherops[0] = operands[1];
18412               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18413               if (emit)
18414                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18415               return "";
18416             }
18417           /* Fall through */
18418
18419         default:
18420           otherops[0] = adjust_address (operands[0], SImode, 4);
18421           otherops[1] = operands[1];
18422           if (emit)
18423             {
18424               output_asm_insn ("str%?\t%1, %0", operands);
18425               output_asm_insn ("str%?\t%H1, %0", otherops);
18426             }
18427           if (count)
18428             *count = 2;
18429         }
18430     }
18431
18432   return "";
18433 }
18434
18435 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18436    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18437
18438 const char *
18439 output_move_quad (rtx *operands)
18440 {
18441   if (REG_P (operands[0]))
18442     {
18443       /* Load, or reg->reg move.  */
18444
18445       if (MEM_P (operands[1]))
18446         {
18447           switch (GET_CODE (XEXP (operands[1], 0)))
18448             {
18449             case REG:
18450               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18451               break;
18452
18453             case LABEL_REF:
18454             case CONST:
18455               output_asm_insn ("adr%?\t%0, %1", operands);
18456               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18457               break;
18458
18459             default:
18460               gcc_unreachable ();
18461             }
18462         }
18463       else
18464         {
18465           rtx ops[2];
18466           int dest, src, i;
18467
18468           gcc_assert (REG_P (operands[1]));
18469
18470           dest = REGNO (operands[0]);
18471           src = REGNO (operands[1]);
18472
18473           /* This seems pretty dumb, but hopefully GCC won't try to do it
18474              very often.  */
18475           if (dest < src)
18476             for (i = 0; i < 4; i++)
18477               {
18478                 ops[0] = gen_rtx_REG (SImode, dest + i);
18479                 ops[1] = gen_rtx_REG (SImode, src + i);
18480                 output_asm_insn ("mov%?\t%0, %1", ops);
18481               }
18482           else
18483             for (i = 3; i >= 0; i--)
18484               {
18485                 ops[0] = gen_rtx_REG (SImode, dest + i);
18486                 ops[1] = gen_rtx_REG (SImode, src + i);
18487                 output_asm_insn ("mov%?\t%0, %1", ops);
18488               }
18489         }
18490     }
18491   else
18492     {
18493       gcc_assert (MEM_P (operands[0]));
18494       gcc_assert (REG_P (operands[1]));
18495       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18496
18497       switch (GET_CODE (XEXP (operands[0], 0)))
18498         {
18499         case REG:
18500           output_asm_insn ("stm%?\t%m0, %M1", operands);
18501           break;
18502
18503         default:
18504           gcc_unreachable ();
18505         }
18506     }
18507
18508   return "";
18509 }
18510
18511 /* Output a VFP load or store instruction.  */
18512
18513 const char *
18514 output_move_vfp (rtx *operands)
18515 {
18516   rtx reg, mem, addr, ops[2];
18517   int load = REG_P (operands[0]);
18518   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18519   int sp = (!TARGET_VFP_FP16INST
18520             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18521   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18522   const char *templ;
18523   char buff[50];
18524   machine_mode mode;
18525
18526   reg = operands[!load];
18527   mem = operands[load];
18528
18529   mode = GET_MODE (reg);
18530
18531   gcc_assert (REG_P (reg));
18532   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18533   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18534               || mode == SFmode
18535               || mode == DFmode
18536               || mode == HImode
18537               || mode == SImode
18538               || mode == DImode
18539               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18540   gcc_assert (MEM_P (mem));
18541
18542   addr = XEXP (mem, 0);
18543
18544   switch (GET_CODE (addr))
18545     {
18546     case PRE_DEC:
18547       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18548       ops[0] = XEXP (addr, 0);
18549       ops[1] = reg;
18550       break;
18551
18552     case POST_INC:
18553       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18554       ops[0] = XEXP (addr, 0);
18555       ops[1] = reg;
18556       break;
18557
18558     default:
18559       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18560       ops[0] = reg;
18561       ops[1] = mem;
18562       break;
18563     }
18564
18565   sprintf (buff, templ,
18566            load ? "ld" : "st",
18567            dp ? "64" : sp ? "32" : "16",
18568            dp ? "P" : "",
18569            integer_p ? "\t%@ int" : "");
18570   output_asm_insn (buff, ops);
18571
18572   return "";
18573 }
18574
18575 /* Output a Neon double-word or quad-word load or store, or a load
18576    or store for larger structure modes.
18577
18578    WARNING: The ordering of elements is weird in big-endian mode,
18579    because the EABI requires that vectors stored in memory appear
18580    as though they were stored by a VSTM, as required by the EABI.
18581    GCC RTL defines element ordering based on in-memory order.
18582    This can be different from the architectural ordering of elements
18583    within a NEON register. The intrinsics defined in arm_neon.h use the
18584    NEON register element ordering, not the GCC RTL element ordering.
18585
18586    For example, the in-memory ordering of a big-endian a quadword
18587    vector with 16-bit elements when stored from register pair {d0,d1}
18588    will be (lowest address first, d0[N] is NEON register element N):
18589
18590      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18591
18592    When necessary, quadword registers (dN, dN+1) are moved to ARM
18593    registers from rN in the order:
18594
18595      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18596
18597    So that STM/LDM can be used on vectors in ARM registers, and the
18598    same memory layout will result as if VSTM/VLDM were used.
18599
18600    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18601    possible, which allows use of appropriate alignment tags.
18602    Note that the choice of "64" is independent of the actual vector
18603    element size; this size simply ensures that the behavior is
18604    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18605
18606    Due to limitations of those instructions, use of VST1.64/VLD1.64
18607    is not possible if:
18608     - the address contains PRE_DEC, or
18609     - the mode refers to more than 4 double-word registers
18610
18611    In those cases, it would be possible to replace VSTM/VLDM by a
18612    sequence of instructions; this is not currently implemented since
18613    this is not certain to actually improve performance.  */
18614
18615 const char *
18616 output_move_neon (rtx *operands)
18617 {
18618   rtx reg, mem, addr, ops[2];
18619   int regno, nregs, load = REG_P (operands[0]);
18620   const char *templ;
18621   char buff[50];
18622   machine_mode mode;
18623
18624   reg = operands[!load];
18625   mem = operands[load];
18626
18627   mode = GET_MODE (reg);
18628
18629   gcc_assert (REG_P (reg));
18630   regno = REGNO (reg);
18631   nregs = REG_NREGS (reg) / 2;
18632   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18633               || NEON_REGNO_OK_FOR_QUAD (regno));
18634   gcc_assert (VALID_NEON_DREG_MODE (mode)
18635               || VALID_NEON_QREG_MODE (mode)
18636               || VALID_NEON_STRUCT_MODE (mode));
18637   gcc_assert (MEM_P (mem));
18638
18639   addr = XEXP (mem, 0);
18640
18641   /* Strip off const from addresses like (const (plus (...))).  */
18642   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18643     addr = XEXP (addr, 0);
18644
18645   switch (GET_CODE (addr))
18646     {
18647     case POST_INC:
18648       /* We have to use vldm / vstm for too-large modes.  */
18649       if (nregs > 4)
18650         {
18651           templ = "v%smia%%?\t%%0!, %%h1";
18652           ops[0] = XEXP (addr, 0);
18653         }
18654       else
18655         {
18656           templ = "v%s1.64\t%%h1, %%A0";
18657           ops[0] = mem;
18658         }
18659       ops[1] = reg;
18660       break;
18661
18662     case PRE_DEC:
18663       /* We have to use vldm / vstm in this case, since there is no
18664          pre-decrement form of the vld1 / vst1 instructions.  */
18665       templ = "v%smdb%%?\t%%0!, %%h1";
18666       ops[0] = XEXP (addr, 0);
18667       ops[1] = reg;
18668       break;
18669
18670     case POST_MODIFY:
18671       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18672       gcc_unreachable ();
18673
18674     case REG:
18675       /* We have to use vldm / vstm for too-large modes.  */
18676       if (nregs > 1)
18677         {
18678           if (nregs > 4)
18679             templ = "v%smia%%?\t%%m0, %%h1";
18680           else
18681             templ = "v%s1.64\t%%h1, %%A0";
18682
18683           ops[0] = mem;
18684           ops[1] = reg;
18685           break;
18686         }
18687       /* Fall through.  */
18688     case LABEL_REF:
18689     case PLUS:
18690       {
18691         int i;
18692         int overlap = -1;
18693         for (i = 0; i < nregs; i++)
18694           {
18695             /* We're only using DImode here because it's a convenient size.  */
18696             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18697             ops[1] = adjust_address (mem, DImode, 8 * i);
18698             if (reg_overlap_mentioned_p (ops[0], mem))
18699               {
18700                 gcc_assert (overlap == -1);
18701                 overlap = i;
18702               }
18703             else
18704               {
18705                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18706                 output_asm_insn (buff, ops);
18707               }
18708           }
18709         if (overlap != -1)
18710           {
18711             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18712             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18713             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18714             output_asm_insn (buff, ops);
18715           }
18716
18717         return "";
18718       }
18719
18720     default:
18721       gcc_unreachable ();
18722     }
18723
18724   sprintf (buff, templ, load ? "ld" : "st");
18725   output_asm_insn (buff, ops);
18726
18727   return "";
18728 }
18729
18730 /* Compute and return the length of neon_mov<mode>, where <mode> is
18731    one of VSTRUCT modes: EI, OI, CI or XI.  */
18732 int
18733 arm_attr_length_move_neon (rtx_insn *insn)
18734 {
18735   rtx reg, mem, addr;
18736   int load;
18737   machine_mode mode;
18738
18739   extract_insn_cached (insn);
18740
18741   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18742     {
18743       mode = GET_MODE (recog_data.operand[0]);
18744       switch (mode)
18745         {
18746         case E_EImode:
18747         case E_OImode:
18748           return 8;
18749         case E_CImode:
18750           return 12;
18751         case E_XImode:
18752           return 16;
18753         default:
18754           gcc_unreachable ();
18755         }
18756     }
18757
18758   load = REG_P (recog_data.operand[0]);
18759   reg = recog_data.operand[!load];
18760   mem = recog_data.operand[load];
18761
18762   gcc_assert (MEM_P (mem));
18763
18764   addr = XEXP (mem, 0);
18765
18766   /* Strip off const from addresses like (const (plus (...))).  */
18767   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18768     addr = XEXP (addr, 0);
18769
18770   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18771     {
18772       int insns = REG_NREGS (reg) / 2;
18773       return insns * 4;
18774     }
18775   else
18776     return 4;
18777 }
18778
18779 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18780    return zero.  */
18781
18782 int
18783 arm_address_offset_is_imm (rtx_insn *insn)
18784 {
18785   rtx mem, addr;
18786
18787   extract_insn_cached (insn);
18788
18789   if (REG_P (recog_data.operand[0]))
18790     return 0;
18791
18792   mem = recog_data.operand[0];
18793
18794   gcc_assert (MEM_P (mem));
18795
18796   addr = XEXP (mem, 0);
18797
18798   if (REG_P (addr)
18799       || (GET_CODE (addr) == PLUS
18800           && REG_P (XEXP (addr, 0))
18801           && CONST_INT_P (XEXP (addr, 1))))
18802     return 1;
18803   else
18804     return 0;
18805 }
18806
18807 /* Output an ADD r, s, #n where n may be too big for one instruction.
18808    If adding zero to one register, output nothing.  */
18809 const char *
18810 output_add_immediate (rtx *operands)
18811 {
18812   HOST_WIDE_INT n = INTVAL (operands[2]);
18813
18814   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18815     {
18816       if (n < 0)
18817         output_multi_immediate (operands,
18818                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18819                                 -n);
18820       else
18821         output_multi_immediate (operands,
18822                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18823                                 n);
18824     }
18825
18826   return "";
18827 }
18828
18829 /* Output a multiple immediate operation.
18830    OPERANDS is the vector of operands referred to in the output patterns.
18831    INSTR1 is the output pattern to use for the first constant.
18832    INSTR2 is the output pattern to use for subsequent constants.
18833    IMMED_OP is the index of the constant slot in OPERANDS.
18834    N is the constant value.  */
18835 static const char *
18836 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18837                         int immed_op, HOST_WIDE_INT n)
18838 {
18839 #if HOST_BITS_PER_WIDE_INT > 32
18840   n &= 0xffffffff;
18841 #endif
18842
18843   if (n == 0)
18844     {
18845       /* Quick and easy output.  */
18846       operands[immed_op] = const0_rtx;
18847       output_asm_insn (instr1, operands);
18848     }
18849   else
18850     {
18851       int i;
18852       const char * instr = instr1;
18853
18854       /* Note that n is never zero here (which would give no output).  */
18855       for (i = 0; i < 32; i += 2)
18856         {
18857           if (n & (3 << i))
18858             {
18859               operands[immed_op] = GEN_INT (n & (255 << i));
18860               output_asm_insn (instr, operands);
18861               instr = instr2;
18862               i += 6;
18863             }
18864         }
18865     }
18866
18867   return "";
18868 }
18869
18870 /* Return the name of a shifter operation.  */
18871 static const char *
18872 arm_shift_nmem(enum rtx_code code)
18873 {
18874   switch (code)
18875     {
18876     case ASHIFT:
18877       return ARM_LSL_NAME;
18878
18879     case ASHIFTRT:
18880       return "asr";
18881
18882     case LSHIFTRT:
18883       return "lsr";
18884
18885     case ROTATERT:
18886       return "ror";
18887
18888     default:
18889       abort();
18890     }
18891 }
18892
18893 /* Return the appropriate ARM instruction for the operation code.
18894    The returned result should not be overwritten.  OP is the rtx of the
18895    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18896    was shifted.  */
18897 const char *
18898 arithmetic_instr (rtx op, int shift_first_arg)
18899 {
18900   switch (GET_CODE (op))
18901     {
18902     case PLUS:
18903       return "add";
18904
18905     case MINUS:
18906       return shift_first_arg ? "rsb" : "sub";
18907
18908     case IOR:
18909       return "orr";
18910
18911     case XOR:
18912       return "eor";
18913
18914     case AND:
18915       return "and";
18916
18917     case ASHIFT:
18918     case ASHIFTRT:
18919     case LSHIFTRT:
18920     case ROTATERT:
18921       return arm_shift_nmem(GET_CODE(op));
18922
18923     default:
18924       gcc_unreachable ();
18925     }
18926 }
18927
18928 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18929    for the operation code.  The returned result should not be overwritten.
18930    OP is the rtx code of the shift.
18931    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18932    shift.  */
18933 static const char *
18934 shift_op (rtx op, HOST_WIDE_INT *amountp)
18935 {
18936   const char * mnem;
18937   enum rtx_code code = GET_CODE (op);
18938
18939   switch (code)
18940     {
18941     case ROTATE:
18942       if (!CONST_INT_P (XEXP (op, 1)))
18943         {
18944           output_operand_lossage ("invalid shift operand");
18945           return NULL;
18946         }
18947
18948       code = ROTATERT;
18949       *amountp = 32 - INTVAL (XEXP (op, 1));
18950       mnem = "ror";
18951       break;
18952
18953     case ASHIFT:
18954     case ASHIFTRT:
18955     case LSHIFTRT:
18956     case ROTATERT:
18957       mnem = arm_shift_nmem(code);
18958       if (CONST_INT_P (XEXP (op, 1)))
18959         {
18960           *amountp = INTVAL (XEXP (op, 1));
18961         }
18962       else if (REG_P (XEXP (op, 1)))
18963         {
18964           *amountp = -1;
18965           return mnem;
18966         }
18967       else
18968         {
18969           output_operand_lossage ("invalid shift operand");
18970           return NULL;
18971         }
18972       break;
18973
18974     case MULT:
18975       /* We never have to worry about the amount being other than a
18976          power of 2, since this case can never be reloaded from a reg.  */
18977       if (!CONST_INT_P (XEXP (op, 1)))
18978         {
18979           output_operand_lossage ("invalid shift operand");
18980           return NULL;
18981         }
18982
18983       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18984
18985       /* Amount must be a power of two.  */
18986       if (*amountp & (*amountp - 1))
18987         {
18988           output_operand_lossage ("invalid shift operand");
18989           return NULL;
18990         }
18991
18992       *amountp = exact_log2 (*amountp);
18993       gcc_assert (IN_RANGE (*amountp, 0, 31));
18994       return ARM_LSL_NAME;
18995
18996     default:
18997       output_operand_lossage ("invalid shift operand");
18998       return NULL;
18999     }
19000
19001   /* This is not 100% correct, but follows from the desire to merge
19002      multiplication by a power of 2 with the recognizer for a
19003      shift.  >=32 is not a valid shift for "lsl", so we must try and
19004      output a shift that produces the correct arithmetical result.
19005      Using lsr #32 is identical except for the fact that the carry bit
19006      is not set correctly if we set the flags; but we never use the
19007      carry bit from such an operation, so we can ignore that.  */
19008   if (code == ROTATERT)
19009     /* Rotate is just modulo 32.  */
19010     *amountp &= 31;
19011   else if (*amountp != (*amountp & 31))
19012     {
19013       if (code == ASHIFT)
19014         mnem = "lsr";
19015       *amountp = 32;
19016     }
19017
19018   /* Shifts of 0 are no-ops.  */
19019   if (*amountp == 0)
19020     return NULL;
19021
19022   return mnem;
19023 }
19024
19025 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19026    because /bin/as is horribly restrictive.  The judgement about
19027    whether or not each character is 'printable' (and can be output as
19028    is) or not (and must be printed with an octal escape) must be made
19029    with reference to the *host* character set -- the situation is
19030    similar to that discussed in the comments above pp_c_char in
19031    c-pretty-print.c.  */
19032
19033 #define MAX_ASCII_LEN 51
19034
19035 void
19036 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19037 {
19038   int i;
19039   int len_so_far = 0;
19040
19041   fputs ("\t.ascii\t\"", stream);
19042
19043   for (i = 0; i < len; i++)
19044     {
19045       int c = p[i];
19046
19047       if (len_so_far >= MAX_ASCII_LEN)
19048         {
19049           fputs ("\"\n\t.ascii\t\"", stream);
19050           len_so_far = 0;
19051         }
19052
19053       if (ISPRINT (c))
19054         {
19055           if (c == '\\' || c == '\"')
19056             {
19057               putc ('\\', stream);
19058               len_so_far++;
19059             }
19060           putc (c, stream);
19061           len_so_far++;
19062         }
19063       else
19064         {
19065           fprintf (stream, "\\%03o", c);
19066           len_so_far += 4;
19067         }
19068     }
19069
19070   fputs ("\"\n", stream);
19071 }
19072 \f
19073 /* Whether a register is callee saved or not.  This is necessary because high
19074    registers are marked as caller saved when optimizing for size on Thumb-1
19075    targets despite being callee saved in order to avoid using them.  */
19076 #define callee_saved_reg_p(reg) \
19077   (!call_used_regs[reg] \
19078    || (TARGET_THUMB1 && optimize_size \
19079        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19080
19081 /* Compute the register save mask for registers 0 through 12
19082    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19083
19084 static unsigned long
19085 arm_compute_save_reg0_reg12_mask (void)
19086 {
19087   unsigned long func_type = arm_current_func_type ();
19088   unsigned long save_reg_mask = 0;
19089   unsigned int reg;
19090
19091   if (IS_INTERRUPT (func_type))
19092     {
19093       unsigned int max_reg;
19094       /* Interrupt functions must not corrupt any registers,
19095          even call clobbered ones.  If this is a leaf function
19096          we can just examine the registers used by the RTL, but
19097          otherwise we have to assume that whatever function is
19098          called might clobber anything, and so we have to save
19099          all the call-clobbered registers as well.  */
19100       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19101         /* FIQ handlers have registers r8 - r12 banked, so
19102            we only need to check r0 - r7, Normal ISRs only
19103            bank r14 and r15, so we must check up to r12.
19104            r13 is the stack pointer which is always preserved,
19105            so we do not need to consider it here.  */
19106         max_reg = 7;
19107       else
19108         max_reg = 12;
19109
19110       for (reg = 0; reg <= max_reg; reg++)
19111         if (df_regs_ever_live_p (reg)
19112             || (! crtl->is_leaf && call_used_regs[reg]))
19113           save_reg_mask |= (1 << reg);
19114
19115       /* Also save the pic base register if necessary.  */
19116       if (flag_pic
19117           && !TARGET_SINGLE_PIC_BASE
19118           && arm_pic_register != INVALID_REGNUM
19119           && crtl->uses_pic_offset_table)
19120         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19121     }
19122   else if (IS_VOLATILE(func_type))
19123     {
19124       /* For noreturn functions we historically omitted register saves
19125          altogether.  However this really messes up debugging.  As a
19126          compromise save just the frame pointers.  Combined with the link
19127          register saved elsewhere this should be sufficient to get
19128          a backtrace.  */
19129       if (frame_pointer_needed)
19130         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19131       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19132         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19133       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19134         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19135     }
19136   else
19137     {
19138       /* In the normal case we only need to save those registers
19139          which are call saved and which are used by this function.  */
19140       for (reg = 0; reg <= 11; reg++)
19141         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19142           save_reg_mask |= (1 << reg);
19143
19144       /* Handle the frame pointer as a special case.  */
19145       if (frame_pointer_needed)
19146         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19147
19148       /* If we aren't loading the PIC register,
19149          don't stack it even though it may be live.  */
19150       if (flag_pic
19151           && !TARGET_SINGLE_PIC_BASE
19152           && arm_pic_register != INVALID_REGNUM
19153           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19154               || crtl->uses_pic_offset_table))
19155         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19156
19157       /* The prologue will copy SP into R0, so save it.  */
19158       if (IS_STACKALIGN (func_type))
19159         save_reg_mask |= 1;
19160     }
19161
19162   /* Save registers so the exception handler can modify them.  */
19163   if (crtl->calls_eh_return)
19164     {
19165       unsigned int i;
19166
19167       for (i = 0; ; i++)
19168         {
19169           reg = EH_RETURN_DATA_REGNO (i);
19170           if (reg == INVALID_REGNUM)
19171             break;
19172           save_reg_mask |= 1 << reg;
19173         }
19174     }
19175
19176   return save_reg_mask;
19177 }
19178
19179 /* Return true if r3 is live at the start of the function.  */
19180
19181 static bool
19182 arm_r3_live_at_start_p (void)
19183 {
19184   /* Just look at cfg info, which is still close enough to correct at this
19185      point.  This gives false positives for broken functions that might use
19186      uninitialized data that happens to be allocated in r3, but who cares?  */
19187   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19188 }
19189
19190 /* Compute the number of bytes used to store the static chain register on the
19191    stack, above the stack frame.  We need to know this accurately to get the
19192    alignment of the rest of the stack frame correct.  */
19193
19194 static int
19195 arm_compute_static_chain_stack_bytes (void)
19196 {
19197   /* See the defining assertion in arm_expand_prologue.  */
19198   if (IS_NESTED (arm_current_func_type ())
19199       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19200           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19201                || flag_stack_clash_protection)
19202               && !df_regs_ever_live_p (LR_REGNUM)))
19203       && arm_r3_live_at_start_p ()
19204       && crtl->args.pretend_args_size == 0)
19205     return 4;
19206
19207   return 0;
19208 }
19209
19210 /* Compute a bit mask of which core registers need to be
19211    saved on the stack for the current function.
19212    This is used by arm_compute_frame_layout, which may add extra registers.  */
19213
19214 static unsigned long
19215 arm_compute_save_core_reg_mask (void)
19216 {
19217   unsigned int save_reg_mask = 0;
19218   unsigned long func_type = arm_current_func_type ();
19219   unsigned int reg;
19220
19221   if (IS_NAKED (func_type))
19222     /* This should never really happen.  */
19223     return 0;
19224
19225   /* If we are creating a stack frame, then we must save the frame pointer,
19226      IP (which will hold the old stack pointer), LR and the PC.  */
19227   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19228     save_reg_mask |=
19229       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19230       | (1 << IP_REGNUM)
19231       | (1 << LR_REGNUM)
19232       | (1 << PC_REGNUM);
19233
19234   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19235
19236   /* Decide if we need to save the link register.
19237      Interrupt routines have their own banked link register,
19238      so they never need to save it.
19239      Otherwise if we do not use the link register we do not need to save
19240      it.  If we are pushing other registers onto the stack however, we
19241      can save an instruction in the epilogue by pushing the link register
19242      now and then popping it back into the PC.  This incurs extra memory
19243      accesses though, so we only do it when optimizing for size, and only
19244      if we know that we will not need a fancy return sequence.  */
19245   if (df_regs_ever_live_p (LR_REGNUM)
19246       || (save_reg_mask
19247           && optimize_size
19248           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19249           && !crtl->tail_call_emit
19250           && !crtl->calls_eh_return))
19251     save_reg_mask |= 1 << LR_REGNUM;
19252
19253   if (cfun->machine->lr_save_eliminated)
19254     save_reg_mask &= ~ (1 << LR_REGNUM);
19255
19256   if (TARGET_REALLY_IWMMXT
19257       && ((bit_count (save_reg_mask)
19258            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19259                            arm_compute_static_chain_stack_bytes())
19260            ) % 2) != 0)
19261     {
19262       /* The total number of registers that are going to be pushed
19263          onto the stack is odd.  We need to ensure that the stack
19264          is 64-bit aligned before we start to save iWMMXt registers,
19265          and also before we start to create locals.  (A local variable
19266          might be a double or long long which we will load/store using
19267          an iWMMXt instruction).  Therefore we need to push another
19268          ARM register, so that the stack will be 64-bit aligned.  We
19269          try to avoid using the arg registers (r0 -r3) as they might be
19270          used to pass values in a tail call.  */
19271       for (reg = 4; reg <= 12; reg++)
19272         if ((save_reg_mask & (1 << reg)) == 0)
19273           break;
19274
19275       if (reg <= 12)
19276         save_reg_mask |= (1 << reg);
19277       else
19278         {
19279           cfun->machine->sibcall_blocked = 1;
19280           save_reg_mask |= (1 << 3);
19281         }
19282     }
19283
19284   /* We may need to push an additional register for use initializing the
19285      PIC base register.  */
19286   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19287       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19288     {
19289       reg = thumb_find_work_register (1 << 4);
19290       if (!call_used_regs[reg])
19291         save_reg_mask |= (1 << reg);
19292     }
19293
19294   return save_reg_mask;
19295 }
19296
19297 /* Compute a bit mask of which core registers need to be
19298    saved on the stack for the current function.  */
19299 static unsigned long
19300 thumb1_compute_save_core_reg_mask (void)
19301 {
19302   unsigned long mask;
19303   unsigned reg;
19304
19305   mask = 0;
19306   for (reg = 0; reg < 12; reg ++)
19307     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19308       mask |= 1 << reg;
19309
19310   /* Handle the frame pointer as a special case.  */
19311   if (frame_pointer_needed)
19312     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19313
19314   if (flag_pic
19315       && !TARGET_SINGLE_PIC_BASE
19316       && arm_pic_register != INVALID_REGNUM
19317       && crtl->uses_pic_offset_table)
19318     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19319
19320   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19321   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19322     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19323
19324   /* LR will also be pushed if any lo regs are pushed.  */
19325   if (mask & 0xff || thumb_force_lr_save ())
19326     mask |= (1 << LR_REGNUM);
19327
19328   /* Make sure we have a low work register if we need one.
19329      We will need one if we are going to push a high register,
19330      but we are not currently intending to push a low register.  */
19331   if ((mask & 0xff) == 0
19332       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19333     {
19334       /* Use thumb_find_work_register to choose which register
19335          we will use.  If the register is live then we will
19336          have to push it.  Use LAST_LO_REGNUM as our fallback
19337          choice for the register to select.  */
19338       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19339       /* Make sure the register returned by thumb_find_work_register is
19340          not part of the return value.  */
19341       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19342         reg = LAST_LO_REGNUM;
19343
19344       if (callee_saved_reg_p (reg))
19345         mask |= 1 << reg;
19346     }
19347
19348   /* The 504 below is 8 bytes less than 512 because there are two possible
19349      alignment words.  We can't tell here if they will be present or not so we
19350      have to play it safe and assume that they are. */
19351   if ((CALLER_INTERWORKING_SLOT_SIZE +
19352        ROUND_UP_WORD (get_frame_size ()) +
19353        crtl->outgoing_args_size) >= 504)
19354     {
19355       /* This is the same as the code in thumb1_expand_prologue() which
19356          determines which register to use for stack decrement. */
19357       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19358         if (mask & (1 << reg))
19359           break;
19360
19361       if (reg > LAST_LO_REGNUM)
19362         {
19363           /* Make sure we have a register available for stack decrement. */
19364           mask |= 1 << LAST_LO_REGNUM;
19365         }
19366     }
19367
19368   return mask;
19369 }
19370
19371
19372 /* Return the number of bytes required to save VFP registers.  */
19373 static int
19374 arm_get_vfp_saved_size (void)
19375 {
19376   unsigned int regno;
19377   int count;
19378   int saved;
19379
19380   saved = 0;
19381   /* Space for saved VFP registers.  */
19382   if (TARGET_HARD_FLOAT)
19383     {
19384       count = 0;
19385       for (regno = FIRST_VFP_REGNUM;
19386            regno < LAST_VFP_REGNUM;
19387            regno += 2)
19388         {
19389           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19390               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19391             {
19392               if (count > 0)
19393                 {
19394                   /* Workaround ARM10 VFPr1 bug.  */
19395                   if (count == 2 && !arm_arch6)
19396                     count++;
19397                   saved += count * 8;
19398                 }
19399               count = 0;
19400             }
19401           else
19402             count++;
19403         }
19404       if (count > 0)
19405         {
19406           if (count == 2 && !arm_arch6)
19407             count++;
19408           saved += count * 8;
19409         }
19410     }
19411   return saved;
19412 }
19413
19414
19415 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19416    everything bar the final return instruction.  If simple_return is true,
19417    then do not output epilogue, because it has already been emitted in RTL.  */
19418 const char *
19419 output_return_instruction (rtx operand, bool really_return, bool reverse,
19420                            bool simple_return)
19421 {
19422   char conditional[10];
19423   char instr[100];
19424   unsigned reg;
19425   unsigned long live_regs_mask;
19426   unsigned long func_type;
19427   arm_stack_offsets *offsets;
19428
19429   func_type = arm_current_func_type ();
19430
19431   if (IS_NAKED (func_type))
19432     return "";
19433
19434   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19435     {
19436       /* If this function was declared non-returning, and we have
19437          found a tail call, then we have to trust that the called
19438          function won't return.  */
19439       if (really_return)
19440         {
19441           rtx ops[2];
19442
19443           /* Otherwise, trap an attempted return by aborting.  */
19444           ops[0] = operand;
19445           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19446                                        : "abort");
19447           assemble_external_libcall (ops[1]);
19448           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19449         }
19450
19451       return "";
19452     }
19453
19454   gcc_assert (!cfun->calls_alloca || really_return);
19455
19456   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19457
19458   cfun->machine->return_used_this_function = 1;
19459
19460   offsets = arm_get_frame_offsets ();
19461   live_regs_mask = offsets->saved_regs_mask;
19462
19463   if (!simple_return && live_regs_mask)
19464     {
19465       const char * return_reg;
19466
19467       /* If we do not have any special requirements for function exit
19468          (e.g. interworking) then we can load the return address
19469          directly into the PC.  Otherwise we must load it into LR.  */
19470       if (really_return
19471           && !IS_CMSE_ENTRY (func_type)
19472           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19473         return_reg = reg_names[PC_REGNUM];
19474       else
19475         return_reg = reg_names[LR_REGNUM];
19476
19477       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19478         {
19479           /* There are three possible reasons for the IP register
19480              being saved.  1) a stack frame was created, in which case
19481              IP contains the old stack pointer, or 2) an ISR routine
19482              corrupted it, or 3) it was saved to align the stack on
19483              iWMMXt.  In case 1, restore IP into SP, otherwise just
19484              restore IP.  */
19485           if (frame_pointer_needed)
19486             {
19487               live_regs_mask &= ~ (1 << IP_REGNUM);
19488               live_regs_mask |=   (1 << SP_REGNUM);
19489             }
19490           else
19491             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19492         }
19493
19494       /* On some ARM architectures it is faster to use LDR rather than
19495          LDM to load a single register.  On other architectures, the
19496          cost is the same.  In 26 bit mode, or for exception handlers,
19497          we have to use LDM to load the PC so that the CPSR is also
19498          restored.  */
19499       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19500         if (live_regs_mask == (1U << reg))
19501           break;
19502
19503       if (reg <= LAST_ARM_REGNUM
19504           && (reg != LR_REGNUM
19505               || ! really_return
19506               || ! IS_INTERRUPT (func_type)))
19507         {
19508           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19509                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19510         }
19511       else
19512         {
19513           char *p;
19514           int first = 1;
19515
19516           /* Generate the load multiple instruction to restore the
19517              registers.  Note we can get here, even if
19518              frame_pointer_needed is true, but only if sp already
19519              points to the base of the saved core registers.  */
19520           if (live_regs_mask & (1 << SP_REGNUM))
19521             {
19522               unsigned HOST_WIDE_INT stack_adjust;
19523
19524               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19525               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19526
19527               if (stack_adjust && arm_arch5 && TARGET_ARM)
19528                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19529               else
19530                 {
19531                   /* If we can't use ldmib (SA110 bug),
19532                      then try to pop r3 instead.  */
19533                   if (stack_adjust)
19534                     live_regs_mask |= 1 << 3;
19535
19536                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19537                 }
19538             }
19539           /* For interrupt returns we have to use an LDM rather than
19540              a POP so that we can use the exception return variant.  */
19541           else if (IS_INTERRUPT (func_type))
19542             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19543           else
19544             sprintf (instr, "pop%s\t{", conditional);
19545
19546           p = instr + strlen (instr);
19547
19548           for (reg = 0; reg <= SP_REGNUM; reg++)
19549             if (live_regs_mask & (1 << reg))
19550               {
19551                 int l = strlen (reg_names[reg]);
19552
19553                 if (first)
19554                   first = 0;
19555                 else
19556                   {
19557                     memcpy (p, ", ", 2);
19558                     p += 2;
19559                   }
19560
19561                 memcpy (p, "%|", 2);
19562                 memcpy (p + 2, reg_names[reg], l);
19563                 p += l + 2;
19564               }
19565
19566           if (live_regs_mask & (1 << LR_REGNUM))
19567             {
19568               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19569               /* If returning from an interrupt, restore the CPSR.  */
19570               if (IS_INTERRUPT (func_type))
19571                 strcat (p, "^");
19572             }
19573           else
19574             strcpy (p, "}");
19575         }
19576
19577       output_asm_insn (instr, & operand);
19578
19579       /* See if we need to generate an extra instruction to
19580          perform the actual function return.  */
19581       if (really_return
19582           && func_type != ARM_FT_INTERWORKED
19583           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19584         {
19585           /* The return has already been handled
19586              by loading the LR into the PC.  */
19587           return "";
19588         }
19589     }
19590
19591   if (really_return)
19592     {
19593       switch ((int) ARM_FUNC_TYPE (func_type))
19594         {
19595         case ARM_FT_ISR:
19596         case ARM_FT_FIQ:
19597           /* ??? This is wrong for unified assembly syntax.  */
19598           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19599           break;
19600
19601         case ARM_FT_INTERWORKED:
19602           gcc_assert (arm_arch5 || arm_arch4t);
19603           sprintf (instr, "bx%s\t%%|lr", conditional);
19604           break;
19605
19606         case ARM_FT_EXCEPTION:
19607           /* ??? This is wrong for unified assembly syntax.  */
19608           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19609           break;
19610
19611         default:
19612           if (IS_CMSE_ENTRY (func_type))
19613             {
19614               /* Check if we have to clear the 'GE bits' which is only used if
19615                  parallel add and subtraction instructions are available.  */
19616               if (TARGET_INT_SIMD)
19617                 snprintf (instr, sizeof (instr),
19618                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19619               else
19620                 snprintf (instr, sizeof (instr),
19621                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19622
19623               output_asm_insn (instr, & operand);
19624               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19625                 {
19626                   /* Clear the cumulative exception-status bits (0-4,7) and the
19627                      condition code bits (28-31) of the FPSCR.  We need to
19628                      remember to clear the first scratch register used (IP) and
19629                      save and restore the second (r4).  */
19630                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19631                   output_asm_insn (instr, & operand);
19632                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19633                   output_asm_insn (instr, & operand);
19634                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19635                   output_asm_insn (instr, & operand);
19636                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19637                   output_asm_insn (instr, & operand);
19638                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19639                   output_asm_insn (instr, & operand);
19640                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19641                   output_asm_insn (instr, & operand);
19642                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19643                   output_asm_insn (instr, & operand);
19644                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19645                   output_asm_insn (instr, & operand);
19646                 }
19647               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19648             }
19649           /* Use bx if it's available.  */
19650           else if (arm_arch5 || arm_arch4t)
19651             sprintf (instr, "bx%s\t%%|lr", conditional);
19652           else
19653             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19654           break;
19655         }
19656
19657       output_asm_insn (instr, & operand);
19658     }
19659
19660   return "";
19661 }
19662
19663 /* Output in FILE asm statements needed to declare the NAME of the function
19664    defined by its DECL node.  */
19665
19666 void
19667 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19668 {
19669   size_t cmse_name_len;
19670   char *cmse_name = 0;
19671   char cmse_prefix[] = "__acle_se_";
19672
19673   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19674      extra function label for each function with the 'cmse_nonsecure_entry'
19675      attribute.  This extra function label should be prepended with
19676      '__acle_se_', telling the linker that it needs to create secure gateway
19677      veneers for this function.  */
19678   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19679                                     DECL_ATTRIBUTES (decl)))
19680     {
19681       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19682       cmse_name = XALLOCAVEC (char, cmse_name_len);
19683       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19684       targetm.asm_out.globalize_label (file, cmse_name);
19685
19686       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19687       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19688     }
19689
19690   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19691   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19692   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19693   ASM_OUTPUT_LABEL (file, name);
19694
19695   if (cmse_name)
19696     ASM_OUTPUT_LABEL (file, cmse_name);
19697
19698   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19699 }
19700
19701 /* Write the function name into the code section, directly preceding
19702    the function prologue.
19703
19704    Code will be output similar to this:
19705      t0
19706          .ascii "arm_poke_function_name", 0
19707          .align
19708      t1
19709          .word 0xff000000 + (t1 - t0)
19710      arm_poke_function_name
19711          mov     ip, sp
19712          stmfd   sp!, {fp, ip, lr, pc}
19713          sub     fp, ip, #4
19714
19715    When performing a stack backtrace, code can inspect the value
19716    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19717    at location pc - 12 and the top 8 bits are set, then we know
19718    that there is a function name embedded immediately preceding this
19719    location and has length ((pc[-3]) & 0xff000000).
19720
19721    We assume that pc is declared as a pointer to an unsigned long.
19722
19723    It is of no benefit to output the function name if we are assembling
19724    a leaf function.  These function types will not contain a stack
19725    backtrace structure, therefore it is not possible to determine the
19726    function name.  */
19727 void
19728 arm_poke_function_name (FILE *stream, const char *name)
19729 {
19730   unsigned long alignlength;
19731   unsigned long length;
19732   rtx           x;
19733
19734   length      = strlen (name) + 1;
19735   alignlength = ROUND_UP_WORD (length);
19736
19737   ASM_OUTPUT_ASCII (stream, name, length);
19738   ASM_OUTPUT_ALIGN (stream, 2);
19739   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19740   assemble_aligned_integer (UNITS_PER_WORD, x);
19741 }
19742
19743 /* Place some comments into the assembler stream
19744    describing the current function.  */
19745 static void
19746 arm_output_function_prologue (FILE *f)
19747 {
19748   unsigned long func_type;
19749
19750   /* Sanity check.  */
19751   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19752
19753   func_type = arm_current_func_type ();
19754
19755   switch ((int) ARM_FUNC_TYPE (func_type))
19756     {
19757     default:
19758     case ARM_FT_NORMAL:
19759       break;
19760     case ARM_FT_INTERWORKED:
19761       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19762       break;
19763     case ARM_FT_ISR:
19764       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19765       break;
19766     case ARM_FT_FIQ:
19767       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19768       break;
19769     case ARM_FT_EXCEPTION:
19770       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19771       break;
19772     }
19773
19774   if (IS_NAKED (func_type))
19775     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19776
19777   if (IS_VOLATILE (func_type))
19778     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19779
19780   if (IS_NESTED (func_type))
19781     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19782   if (IS_STACKALIGN (func_type))
19783     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19784   if (IS_CMSE_ENTRY (func_type))
19785     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19786
19787   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19788                crtl->args.size,
19789                crtl->args.pretend_args_size,
19790                (HOST_WIDE_INT) get_frame_size ());
19791
19792   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19793                frame_pointer_needed,
19794                cfun->machine->uses_anonymous_args);
19795
19796   if (cfun->machine->lr_save_eliminated)
19797     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19798
19799   if (crtl->calls_eh_return)
19800     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19801
19802 }
19803
19804 static void
19805 arm_output_function_epilogue (FILE *)
19806 {
19807   arm_stack_offsets *offsets;
19808
19809   if (TARGET_THUMB1)
19810     {
19811       int regno;
19812
19813       /* Emit any call-via-reg trampolines that are needed for v4t support
19814          of call_reg and call_value_reg type insns.  */
19815       for (regno = 0; regno < LR_REGNUM; regno++)
19816         {
19817           rtx label = cfun->machine->call_via[regno];
19818
19819           if (label != NULL)
19820             {
19821               switch_to_section (function_section (current_function_decl));
19822               targetm.asm_out.internal_label (asm_out_file, "L",
19823                                               CODE_LABEL_NUMBER (label));
19824               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19825             }
19826         }
19827
19828       /* ??? Probably not safe to set this here, since it assumes that a
19829          function will be emitted as assembly immediately after we generate
19830          RTL for it.  This does not happen for inline functions.  */
19831       cfun->machine->return_used_this_function = 0;
19832     }
19833   else /* TARGET_32BIT */
19834     {
19835       /* We need to take into account any stack-frame rounding.  */
19836       offsets = arm_get_frame_offsets ();
19837
19838       gcc_assert (!use_return_insn (FALSE, NULL)
19839                   || (cfun->machine->return_used_this_function != 0)
19840                   || offsets->saved_regs == offsets->outgoing_args
19841                   || frame_pointer_needed);
19842     }
19843 }
19844
19845 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19846    STR and STRD.  If an even number of registers are being pushed, one
19847    or more STRD patterns are created for each register pair.  If an
19848    odd number of registers are pushed, emit an initial STR followed by
19849    as many STRD instructions as are needed.  This works best when the
19850    stack is initially 64-bit aligned (the normal case), since it
19851    ensures that each STRD is also 64-bit aligned.  */
19852 static void
19853 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19854 {
19855   int num_regs = 0;
19856   int i;
19857   int regno;
19858   rtx par = NULL_RTX;
19859   rtx dwarf = NULL_RTX;
19860   rtx tmp;
19861   bool first = true;
19862
19863   num_regs = bit_count (saved_regs_mask);
19864
19865   /* Must be at least one register to save, and can't save SP or PC.  */
19866   gcc_assert (num_regs > 0 && num_regs <= 14);
19867   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19868   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19869
19870   /* Create sequence for DWARF info.  All the frame-related data for
19871      debugging is held in this wrapper.  */
19872   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19873
19874   /* Describe the stack adjustment.  */
19875   tmp = gen_rtx_SET (stack_pointer_rtx,
19876                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19877   RTX_FRAME_RELATED_P (tmp) = 1;
19878   XVECEXP (dwarf, 0, 0) = tmp;
19879
19880   /* Find the first register.  */
19881   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19882     ;
19883
19884   i = 0;
19885
19886   /* If there's an odd number of registers to push.  Start off by
19887      pushing a single register.  This ensures that subsequent strd
19888      operations are dword aligned (assuming that SP was originally
19889      64-bit aligned).  */
19890   if ((num_regs & 1) != 0)
19891     {
19892       rtx reg, mem, insn;
19893
19894       reg = gen_rtx_REG (SImode, regno);
19895       if (num_regs == 1)
19896         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19897                                                      stack_pointer_rtx));
19898       else
19899         mem = gen_frame_mem (Pmode,
19900                              gen_rtx_PRE_MODIFY
19901                              (Pmode, stack_pointer_rtx,
19902                               plus_constant (Pmode, stack_pointer_rtx,
19903                                              -4 * num_regs)));
19904
19905       tmp = gen_rtx_SET (mem, reg);
19906       RTX_FRAME_RELATED_P (tmp) = 1;
19907       insn = emit_insn (tmp);
19908       RTX_FRAME_RELATED_P (insn) = 1;
19909       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19910       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19911       RTX_FRAME_RELATED_P (tmp) = 1;
19912       i++;
19913       regno++;
19914       XVECEXP (dwarf, 0, i) = tmp;
19915       first = false;
19916     }
19917
19918   while (i < num_regs)
19919     if (saved_regs_mask & (1 << regno))
19920       {
19921         rtx reg1, reg2, mem1, mem2;
19922         rtx tmp0, tmp1, tmp2;
19923         int regno2;
19924
19925         /* Find the register to pair with this one.  */
19926         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19927              regno2++)
19928           ;
19929
19930         reg1 = gen_rtx_REG (SImode, regno);
19931         reg2 = gen_rtx_REG (SImode, regno2);
19932
19933         if (first)
19934           {
19935             rtx insn;
19936
19937             first = false;
19938             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19939                                                         stack_pointer_rtx,
19940                                                         -4 * num_regs));
19941             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19942                                                         stack_pointer_rtx,
19943                                                         -4 * (num_regs - 1)));
19944             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19945                                 plus_constant (Pmode, stack_pointer_rtx,
19946                                                -4 * (num_regs)));
19947             tmp1 = gen_rtx_SET (mem1, reg1);
19948             tmp2 = gen_rtx_SET (mem2, reg2);
19949             RTX_FRAME_RELATED_P (tmp0) = 1;
19950             RTX_FRAME_RELATED_P (tmp1) = 1;
19951             RTX_FRAME_RELATED_P (tmp2) = 1;
19952             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19953             XVECEXP (par, 0, 0) = tmp0;
19954             XVECEXP (par, 0, 1) = tmp1;
19955             XVECEXP (par, 0, 2) = tmp2;
19956             insn = emit_insn (par);
19957             RTX_FRAME_RELATED_P (insn) = 1;
19958             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19959           }
19960         else
19961           {
19962             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19963                                                         stack_pointer_rtx,
19964                                                         4 * i));
19965             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19966                                                         stack_pointer_rtx,
19967                                                         4 * (i + 1)));
19968             tmp1 = gen_rtx_SET (mem1, reg1);
19969             tmp2 = gen_rtx_SET (mem2, reg2);
19970             RTX_FRAME_RELATED_P (tmp1) = 1;
19971             RTX_FRAME_RELATED_P (tmp2) = 1;
19972             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19973             XVECEXP (par, 0, 0) = tmp1;
19974             XVECEXP (par, 0, 1) = tmp2;
19975             emit_insn (par);
19976           }
19977
19978         /* Create unwind information.  This is an approximation.  */
19979         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19980                                            plus_constant (Pmode,
19981                                                           stack_pointer_rtx,
19982                                                           4 * i)),
19983                             reg1);
19984         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19985                                            plus_constant (Pmode,
19986                                                           stack_pointer_rtx,
19987                                                           4 * (i + 1))),
19988                             reg2);
19989
19990         RTX_FRAME_RELATED_P (tmp1) = 1;
19991         RTX_FRAME_RELATED_P (tmp2) = 1;
19992         XVECEXP (dwarf, 0, i + 1) = tmp1;
19993         XVECEXP (dwarf, 0, i + 2) = tmp2;
19994         i += 2;
19995         regno = regno2 + 1;
19996       }
19997     else
19998       regno++;
19999
20000   return;
20001 }
20002
20003 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20004    whenever possible, otherwise it emits single-word stores.  The first store
20005    also allocates stack space for all saved registers, using writeback with
20006    post-addressing mode.  All other stores use offset addressing.  If no STRD
20007    can be emitted, this function emits a sequence of single-word stores,
20008    and not an STM as before, because single-word stores provide more freedom
20009    scheduling and can be turned into an STM by peephole optimizations.  */
20010 static void
20011 arm_emit_strd_push (unsigned long saved_regs_mask)
20012 {
20013   int num_regs = 0;
20014   int i, j, dwarf_index  = 0;
20015   int offset = 0;
20016   rtx dwarf = NULL_RTX;
20017   rtx insn = NULL_RTX;
20018   rtx tmp, mem;
20019
20020   /* TODO: A more efficient code can be emitted by changing the
20021      layout, e.g., first push all pairs that can use STRD to keep the
20022      stack aligned, and then push all other registers.  */
20023   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20024     if (saved_regs_mask & (1 << i))
20025       num_regs++;
20026
20027   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20028   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20029   gcc_assert (num_regs > 0);
20030
20031   /* Create sequence for DWARF info.  */
20032   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20033
20034   /* For dwarf info, we generate explicit stack update.  */
20035   tmp = gen_rtx_SET (stack_pointer_rtx,
20036                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20037   RTX_FRAME_RELATED_P (tmp) = 1;
20038   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20039
20040   /* Save registers.  */
20041   offset = - 4 * num_regs;
20042   j = 0;
20043   while (j <= LAST_ARM_REGNUM)
20044     if (saved_regs_mask & (1 << j))
20045       {
20046         if ((j % 2 == 0)
20047             && (saved_regs_mask & (1 << (j + 1))))
20048           {
20049             /* Current register and previous register form register pair for
20050                which STRD can be generated.  */
20051             if (offset < 0)
20052               {
20053                 /* Allocate stack space for all saved registers.  */
20054                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20055                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20056                 mem = gen_frame_mem (DImode, tmp);
20057                 offset = 0;
20058               }
20059             else if (offset > 0)
20060               mem = gen_frame_mem (DImode,
20061                                    plus_constant (Pmode,
20062                                                   stack_pointer_rtx,
20063                                                   offset));
20064             else
20065               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20066
20067             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20068             RTX_FRAME_RELATED_P (tmp) = 1;
20069             tmp = emit_insn (tmp);
20070
20071             /* Record the first store insn.  */
20072             if (dwarf_index == 1)
20073               insn = tmp;
20074
20075             /* Generate dwarf info.  */
20076             mem = gen_frame_mem (SImode,
20077                                  plus_constant (Pmode,
20078                                                 stack_pointer_rtx,
20079                                                 offset));
20080             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20081             RTX_FRAME_RELATED_P (tmp) = 1;
20082             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20083
20084             mem = gen_frame_mem (SImode,
20085                                  plus_constant (Pmode,
20086                                                 stack_pointer_rtx,
20087                                                 offset + 4));
20088             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20089             RTX_FRAME_RELATED_P (tmp) = 1;
20090             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20091
20092             offset += 8;
20093             j += 2;
20094           }
20095         else
20096           {
20097             /* Emit a single word store.  */
20098             if (offset < 0)
20099               {
20100                 /* Allocate stack space for all saved registers.  */
20101                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20102                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20103                 mem = gen_frame_mem (SImode, tmp);
20104                 offset = 0;
20105               }
20106             else if (offset > 0)
20107               mem = gen_frame_mem (SImode,
20108                                    plus_constant (Pmode,
20109                                                   stack_pointer_rtx,
20110                                                   offset));
20111             else
20112               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20113
20114             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20115             RTX_FRAME_RELATED_P (tmp) = 1;
20116             tmp = emit_insn (tmp);
20117
20118             /* Record the first store insn.  */
20119             if (dwarf_index == 1)
20120               insn = tmp;
20121
20122             /* Generate dwarf info.  */
20123             mem = gen_frame_mem (SImode,
20124                                  plus_constant(Pmode,
20125                                                stack_pointer_rtx,
20126                                                offset));
20127             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20128             RTX_FRAME_RELATED_P (tmp) = 1;
20129             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20130
20131             offset += 4;
20132             j += 1;
20133           }
20134       }
20135     else
20136       j++;
20137
20138   /* Attach dwarf info to the first insn we generate.  */
20139   gcc_assert (insn != NULL_RTX);
20140   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20141   RTX_FRAME_RELATED_P (insn) = 1;
20142 }
20143
20144 /* Generate and emit an insn that we will recognize as a push_multi.
20145    Unfortunately, since this insn does not reflect very well the actual
20146    semantics of the operation, we need to annotate the insn for the benefit
20147    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20148    MASK for registers that should be annotated for DWARF2 frame unwind
20149    information.  */
20150 static rtx
20151 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20152 {
20153   int num_regs = 0;
20154   int num_dwarf_regs = 0;
20155   int i, j;
20156   rtx par;
20157   rtx dwarf;
20158   int dwarf_par_index;
20159   rtx tmp, reg;
20160
20161   /* We don't record the PC in the dwarf frame information.  */
20162   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20163
20164   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20165     {
20166       if (mask & (1 << i))
20167         num_regs++;
20168       if (dwarf_regs_mask & (1 << i))
20169         num_dwarf_regs++;
20170     }
20171
20172   gcc_assert (num_regs && num_regs <= 16);
20173   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20174
20175   /* For the body of the insn we are going to generate an UNSPEC in
20176      parallel with several USEs.  This allows the insn to be recognized
20177      by the push_multi pattern in the arm.md file.
20178
20179      The body of the insn looks something like this:
20180
20181        (parallel [
20182            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20183                                         (const_int:SI <num>)))
20184                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20185            (use (reg:SI XX))
20186            (use (reg:SI YY))
20187            ...
20188         ])
20189
20190      For the frame note however, we try to be more explicit and actually
20191      show each register being stored into the stack frame, plus a (single)
20192      decrement of the stack pointer.  We do it this way in order to be
20193      friendly to the stack unwinding code, which only wants to see a single
20194      stack decrement per instruction.  The RTL we generate for the note looks
20195      something like this:
20196
20197       (sequence [
20198            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20199            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20200            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20201            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20202            ...
20203         ])
20204
20205      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20206      instead we'd have a parallel expression detailing all
20207      the stores to the various memory addresses so that debug
20208      information is more up-to-date. Remember however while writing
20209      this to take care of the constraints with the push instruction.
20210
20211      Note also that this has to be taken care of for the VFP registers.
20212
20213      For more see PR43399.  */
20214
20215   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20216   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20217   dwarf_par_index = 1;
20218
20219   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20220     {
20221       if (mask & (1 << i))
20222         {
20223           reg = gen_rtx_REG (SImode, i);
20224
20225           XVECEXP (par, 0, 0)
20226             = gen_rtx_SET (gen_frame_mem
20227                            (BLKmode,
20228                             gen_rtx_PRE_MODIFY (Pmode,
20229                                                 stack_pointer_rtx,
20230                                                 plus_constant
20231                                                 (Pmode, stack_pointer_rtx,
20232                                                  -4 * num_regs))
20233                             ),
20234                            gen_rtx_UNSPEC (BLKmode,
20235                                            gen_rtvec (1, reg),
20236                                            UNSPEC_PUSH_MULT));
20237
20238           if (dwarf_regs_mask & (1 << i))
20239             {
20240               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20241                                  reg);
20242               RTX_FRAME_RELATED_P (tmp) = 1;
20243               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20244             }
20245
20246           break;
20247         }
20248     }
20249
20250   for (j = 1, i++; j < num_regs; i++)
20251     {
20252       if (mask & (1 << i))
20253         {
20254           reg = gen_rtx_REG (SImode, i);
20255
20256           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20257
20258           if (dwarf_regs_mask & (1 << i))
20259             {
20260               tmp
20261                 = gen_rtx_SET (gen_frame_mem
20262                                (SImode,
20263                                 plus_constant (Pmode, stack_pointer_rtx,
20264                                                4 * j)),
20265                                reg);
20266               RTX_FRAME_RELATED_P (tmp) = 1;
20267               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20268             }
20269
20270           j++;
20271         }
20272     }
20273
20274   par = emit_insn (par);
20275
20276   tmp = gen_rtx_SET (stack_pointer_rtx,
20277                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20278   RTX_FRAME_RELATED_P (tmp) = 1;
20279   XVECEXP (dwarf, 0, 0) = tmp;
20280
20281   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20282
20283   return par;
20284 }
20285
20286 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20287    SIZE is the offset to be adjusted.
20288    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20289 static void
20290 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20291 {
20292   rtx dwarf;
20293
20294   RTX_FRAME_RELATED_P (insn) = 1;
20295   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20296   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20297 }
20298
20299 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20300    SAVED_REGS_MASK shows which registers need to be restored.
20301
20302    Unfortunately, since this insn does not reflect very well the actual
20303    semantics of the operation, we need to annotate the insn for the benefit
20304    of DWARF2 frame unwind information.  */
20305 static void
20306 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20307 {
20308   int num_regs = 0;
20309   int i, j;
20310   rtx par;
20311   rtx dwarf = NULL_RTX;
20312   rtx tmp, reg;
20313   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20314   int offset_adj;
20315   int emit_update;
20316
20317   offset_adj = return_in_pc ? 1 : 0;
20318   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20319     if (saved_regs_mask & (1 << i))
20320       num_regs++;
20321
20322   gcc_assert (num_regs && num_regs <= 16);
20323
20324   /* If SP is in reglist, then we don't emit SP update insn.  */
20325   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20326
20327   /* The parallel needs to hold num_regs SETs
20328      and one SET for the stack update.  */
20329   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20330
20331   if (return_in_pc)
20332     XVECEXP (par, 0, 0) = ret_rtx;
20333
20334   if (emit_update)
20335     {
20336       /* Increment the stack pointer, based on there being
20337          num_regs 4-byte registers to restore.  */
20338       tmp = gen_rtx_SET (stack_pointer_rtx,
20339                          plus_constant (Pmode,
20340                                         stack_pointer_rtx,
20341                                         4 * num_regs));
20342       RTX_FRAME_RELATED_P (tmp) = 1;
20343       XVECEXP (par, 0, offset_adj) = tmp;
20344     }
20345
20346   /* Now restore every reg, which may include PC.  */
20347   for (j = 0, i = 0; j < num_regs; i++)
20348     if (saved_regs_mask & (1 << i))
20349       {
20350         reg = gen_rtx_REG (SImode, i);
20351         if ((num_regs == 1) && emit_update && !return_in_pc)
20352           {
20353             /* Emit single load with writeback.  */
20354             tmp = gen_frame_mem (SImode,
20355                                  gen_rtx_POST_INC (Pmode,
20356                                                    stack_pointer_rtx));
20357             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20358             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20359             return;
20360           }
20361
20362         tmp = gen_rtx_SET (reg,
20363                            gen_frame_mem
20364                            (SImode,
20365                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20366         RTX_FRAME_RELATED_P (tmp) = 1;
20367         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20368
20369         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20370            should not have PC, skip PC.  */
20371         if (i != PC_REGNUM)
20372           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20373
20374         j++;
20375       }
20376
20377   if (return_in_pc)
20378     par = emit_jump_insn (par);
20379   else
20380     par = emit_insn (par);
20381
20382   REG_NOTES (par) = dwarf;
20383   if (!return_in_pc)
20384     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20385                                  stack_pointer_rtx, stack_pointer_rtx);
20386 }
20387
20388 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20389    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20390
20391    Unfortunately, since this insn does not reflect very well the actual
20392    semantics of the operation, we need to annotate the insn for the benefit
20393    of DWARF2 frame unwind information.  */
20394 static void
20395 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20396 {
20397   int i, j;
20398   rtx par;
20399   rtx dwarf = NULL_RTX;
20400   rtx tmp, reg;
20401
20402   gcc_assert (num_regs && num_regs <= 32);
20403
20404     /* Workaround ARM10 VFPr1 bug.  */
20405   if (num_regs == 2 && !arm_arch6)
20406     {
20407       if (first_reg == 15)
20408         first_reg--;
20409
20410       num_regs++;
20411     }
20412
20413   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20414      there could be up to 32 D-registers to restore.
20415      If there are more than 16 D-registers, make two recursive calls,
20416      each of which emits one pop_multi instruction.  */
20417   if (num_regs > 16)
20418     {
20419       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20420       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20421       return;
20422     }
20423
20424   /* The parallel needs to hold num_regs SETs
20425      and one SET for the stack update.  */
20426   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20427
20428   /* Increment the stack pointer, based on there being
20429      num_regs 8-byte registers to restore.  */
20430   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20431   RTX_FRAME_RELATED_P (tmp) = 1;
20432   XVECEXP (par, 0, 0) = tmp;
20433
20434   /* Now show every reg that will be restored, using a SET for each.  */
20435   for (j = 0, i=first_reg; j < num_regs; i += 2)
20436     {
20437       reg = gen_rtx_REG (DFmode, i);
20438
20439       tmp = gen_rtx_SET (reg,
20440                          gen_frame_mem
20441                          (DFmode,
20442                           plus_constant (Pmode, base_reg, 8 * j)));
20443       RTX_FRAME_RELATED_P (tmp) = 1;
20444       XVECEXP (par, 0, j + 1) = tmp;
20445
20446       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20447
20448       j++;
20449     }
20450
20451   par = emit_insn (par);
20452   REG_NOTES (par) = dwarf;
20453
20454   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20455   if (REGNO (base_reg) == IP_REGNUM)
20456     {
20457       RTX_FRAME_RELATED_P (par) = 1;
20458       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20459     }
20460   else
20461     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20462                                  base_reg, base_reg);
20463 }
20464
20465 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20466    number of registers are being popped, multiple LDRD patterns are created for
20467    all register pairs.  If odd number of registers are popped, last register is
20468    loaded by using LDR pattern.  */
20469 static void
20470 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20471 {
20472   int num_regs = 0;
20473   int i, j;
20474   rtx par = NULL_RTX;
20475   rtx dwarf = NULL_RTX;
20476   rtx tmp, reg, tmp1;
20477   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20478
20479   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20480     if (saved_regs_mask & (1 << i))
20481       num_regs++;
20482
20483   gcc_assert (num_regs && num_regs <= 16);
20484
20485   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20486      to be popped.  So, if num_regs is even, now it will become odd,
20487      and we can generate pop with PC.  If num_regs is odd, it will be
20488      even now, and ldr with return can be generated for PC.  */
20489   if (return_in_pc)
20490     num_regs--;
20491
20492   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20493
20494   /* Var j iterates over all the registers to gather all the registers in
20495      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20496      A PARALLEL RTX of register-pair is created here, so that pattern for
20497      LDRD can be matched.  As PC is always last register to be popped, and
20498      we have already decremented num_regs if PC, we don't have to worry
20499      about PC in this loop.  */
20500   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20501     if (saved_regs_mask & (1 << j))
20502       {
20503         /* Create RTX for memory load.  */
20504         reg = gen_rtx_REG (SImode, j);
20505         tmp = gen_rtx_SET (reg,
20506                            gen_frame_mem (SImode,
20507                                plus_constant (Pmode,
20508                                               stack_pointer_rtx, 4 * i)));
20509         RTX_FRAME_RELATED_P (tmp) = 1;
20510
20511         if (i % 2 == 0)
20512           {
20513             /* When saved-register index (i) is even, the RTX to be emitted is
20514                yet to be created.  Hence create it first.  The LDRD pattern we
20515                are generating is :
20516                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20517                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20518                where target registers need not be consecutive.  */
20519             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20520             dwarf = NULL_RTX;
20521           }
20522
20523         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20524            added as 0th element and if i is odd, reg_i is added as 1st element
20525            of LDRD pattern shown above.  */
20526         XVECEXP (par, 0, (i % 2)) = tmp;
20527         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20528
20529         if ((i % 2) == 1)
20530           {
20531             /* When saved-register index (i) is odd, RTXs for both the registers
20532                to be loaded are generated in above given LDRD pattern, and the
20533                pattern can be emitted now.  */
20534             par = emit_insn (par);
20535             REG_NOTES (par) = dwarf;
20536             RTX_FRAME_RELATED_P (par) = 1;
20537           }
20538
20539         i++;
20540       }
20541
20542   /* If the number of registers pushed is odd AND return_in_pc is false OR
20543      number of registers are even AND return_in_pc is true, last register is
20544      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20545      then LDR with post increment.  */
20546
20547   /* Increment the stack pointer, based on there being
20548      num_regs 4-byte registers to restore.  */
20549   tmp = gen_rtx_SET (stack_pointer_rtx,
20550                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20551   RTX_FRAME_RELATED_P (tmp) = 1;
20552   tmp = emit_insn (tmp);
20553   if (!return_in_pc)
20554     {
20555       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20556                                    stack_pointer_rtx, stack_pointer_rtx);
20557     }
20558
20559   dwarf = NULL_RTX;
20560
20561   if (((num_regs % 2) == 1 && !return_in_pc)
20562       || ((num_regs % 2) == 0 && return_in_pc))
20563     {
20564       /* Scan for the single register to be popped.  Skip until the saved
20565          register is found.  */
20566       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20567
20568       /* Gen LDR with post increment here.  */
20569       tmp1 = gen_rtx_MEM (SImode,
20570                           gen_rtx_POST_INC (SImode,
20571                                             stack_pointer_rtx));
20572       set_mem_alias_set (tmp1, get_frame_alias_set ());
20573
20574       reg = gen_rtx_REG (SImode, j);
20575       tmp = gen_rtx_SET (reg, tmp1);
20576       RTX_FRAME_RELATED_P (tmp) = 1;
20577       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20578
20579       if (return_in_pc)
20580         {
20581           /* If return_in_pc, j must be PC_REGNUM.  */
20582           gcc_assert (j == PC_REGNUM);
20583           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20584           XVECEXP (par, 0, 0) = ret_rtx;
20585           XVECEXP (par, 0, 1) = tmp;
20586           par = emit_jump_insn (par);
20587         }
20588       else
20589         {
20590           par = emit_insn (tmp);
20591           REG_NOTES (par) = dwarf;
20592           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20593                                        stack_pointer_rtx, stack_pointer_rtx);
20594         }
20595
20596     }
20597   else if ((num_regs % 2) == 1 && return_in_pc)
20598     {
20599       /* There are 2 registers to be popped.  So, generate the pattern
20600          pop_multiple_with_stack_update_and_return to pop in PC.  */
20601       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20602     }
20603
20604   return;
20605 }
20606
20607 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20608    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20609    offset addressing and then generates one separate stack udpate. This provides
20610    more scheduling freedom, compared to writeback on every load.  However,
20611    if the function returns using load into PC directly
20612    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20613    before the last load.  TODO: Add a peephole optimization to recognize
20614    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20615    peephole optimization to merge the load at stack-offset zero
20616    with the stack update instruction using load with writeback
20617    in post-index addressing mode.  */
20618 static void
20619 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20620 {
20621   int j = 0;
20622   int offset = 0;
20623   rtx par = NULL_RTX;
20624   rtx dwarf = NULL_RTX;
20625   rtx tmp, mem;
20626
20627   /* Restore saved registers.  */
20628   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20629   j = 0;
20630   while (j <= LAST_ARM_REGNUM)
20631     if (saved_regs_mask & (1 << j))
20632       {
20633         if ((j % 2) == 0
20634             && (saved_regs_mask & (1 << (j + 1)))
20635             && (j + 1) != PC_REGNUM)
20636           {
20637             /* Current register and next register form register pair for which
20638                LDRD can be generated. PC is always the last register popped, and
20639                we handle it separately.  */
20640             if (offset > 0)
20641               mem = gen_frame_mem (DImode,
20642                                    plus_constant (Pmode,
20643                                                   stack_pointer_rtx,
20644                                                   offset));
20645             else
20646               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20647
20648             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20649             tmp = emit_insn (tmp);
20650             RTX_FRAME_RELATED_P (tmp) = 1;
20651
20652             /* Generate dwarf info.  */
20653
20654             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20655                                     gen_rtx_REG (SImode, j),
20656                                     NULL_RTX);
20657             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20658                                     gen_rtx_REG (SImode, j + 1),
20659                                     dwarf);
20660
20661             REG_NOTES (tmp) = dwarf;
20662
20663             offset += 8;
20664             j += 2;
20665           }
20666         else if (j != PC_REGNUM)
20667           {
20668             /* Emit a single word load.  */
20669             if (offset > 0)
20670               mem = gen_frame_mem (SImode,
20671                                    plus_constant (Pmode,
20672                                                   stack_pointer_rtx,
20673                                                   offset));
20674             else
20675               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20676
20677             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20678             tmp = emit_insn (tmp);
20679             RTX_FRAME_RELATED_P (tmp) = 1;
20680
20681             /* Generate dwarf info.  */
20682             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20683                                               gen_rtx_REG (SImode, j),
20684                                               NULL_RTX);
20685
20686             offset += 4;
20687             j += 1;
20688           }
20689         else /* j == PC_REGNUM */
20690           j++;
20691       }
20692     else
20693       j++;
20694
20695   /* Update the stack.  */
20696   if (offset > 0)
20697     {
20698       tmp = gen_rtx_SET (stack_pointer_rtx,
20699                          plus_constant (Pmode,
20700                                         stack_pointer_rtx,
20701                                         offset));
20702       tmp = emit_insn (tmp);
20703       arm_add_cfa_adjust_cfa_note (tmp, offset,
20704                                    stack_pointer_rtx, stack_pointer_rtx);
20705       offset = 0;
20706     }
20707
20708   if (saved_regs_mask & (1 << PC_REGNUM))
20709     {
20710       /* Only PC is to be popped.  */
20711       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20712       XVECEXP (par, 0, 0) = ret_rtx;
20713       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20714                          gen_frame_mem (SImode,
20715                                         gen_rtx_POST_INC (SImode,
20716                                                           stack_pointer_rtx)));
20717       RTX_FRAME_RELATED_P (tmp) = 1;
20718       XVECEXP (par, 0, 1) = tmp;
20719       par = emit_jump_insn (par);
20720
20721       /* Generate dwarf info.  */
20722       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20723                               gen_rtx_REG (SImode, PC_REGNUM),
20724                               NULL_RTX);
20725       REG_NOTES (par) = dwarf;
20726       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20727                                    stack_pointer_rtx, stack_pointer_rtx);
20728     }
20729 }
20730
20731 /* Calculate the size of the return value that is passed in registers.  */
20732 static unsigned
20733 arm_size_return_regs (void)
20734 {
20735   machine_mode mode;
20736
20737   if (crtl->return_rtx != 0)
20738     mode = GET_MODE (crtl->return_rtx);
20739   else
20740     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20741
20742   return GET_MODE_SIZE (mode);
20743 }
20744
20745 /* Return true if the current function needs to save/restore LR.  */
20746 static bool
20747 thumb_force_lr_save (void)
20748 {
20749   return !cfun->machine->lr_save_eliminated
20750          && (!crtl->is_leaf
20751              || thumb_far_jump_used_p ()
20752              || df_regs_ever_live_p (LR_REGNUM));
20753 }
20754
20755 /* We do not know if r3 will be available because
20756    we do have an indirect tailcall happening in this
20757    particular case.  */
20758 static bool
20759 is_indirect_tailcall_p (rtx call)
20760 {
20761   rtx pat = PATTERN (call);
20762
20763   /* Indirect tail call.  */
20764   pat = XVECEXP (pat, 0, 0);
20765   if (GET_CODE (pat) == SET)
20766     pat = SET_SRC (pat);
20767
20768   pat = XEXP (XEXP (pat, 0), 0);
20769   return REG_P (pat);
20770 }
20771
20772 /* Return true if r3 is used by any of the tail call insns in the
20773    current function.  */
20774 static bool
20775 any_sibcall_could_use_r3 (void)
20776 {
20777   edge_iterator ei;
20778   edge e;
20779
20780   if (!crtl->tail_call_emit)
20781     return false;
20782   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20783     if (e->flags & EDGE_SIBCALL)
20784       {
20785         rtx_insn *call = BB_END (e->src);
20786         if (!CALL_P (call))
20787           call = prev_nonnote_nondebug_insn (call);
20788         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20789         if (find_regno_fusage (call, USE, 3)
20790             || is_indirect_tailcall_p (call))
20791           return true;
20792       }
20793   return false;
20794 }
20795
20796
20797 /* Compute the distance from register FROM to register TO.
20798    These can be the arg pointer (26), the soft frame pointer (25),
20799    the stack pointer (13) or the hard frame pointer (11).
20800    In thumb mode r7 is used as the soft frame pointer, if needed.
20801    Typical stack layout looks like this:
20802
20803        old stack pointer -> |    |
20804                              ----
20805                             |    | \
20806                             |    |   saved arguments for
20807                             |    |   vararg functions
20808                             |    | /
20809                               --
20810    hard FP & arg pointer -> |    | \
20811                             |    |   stack
20812                             |    |   frame
20813                             |    | /
20814                               --
20815                             |    | \
20816                             |    |   call saved
20817                             |    |   registers
20818       soft frame pointer -> |    | /
20819                               --
20820                             |    | \
20821                             |    |   local
20822                             |    |   variables
20823      locals base pointer -> |    | /
20824                               --
20825                             |    | \
20826                             |    |   outgoing
20827                             |    |   arguments
20828    current stack pointer -> |    | /
20829                               --
20830
20831   For a given function some or all of these stack components
20832   may not be needed, giving rise to the possibility of
20833   eliminating some of the registers.
20834
20835   The values returned by this function must reflect the behavior
20836   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20837
20838   The sign of the number returned reflects the direction of stack
20839   growth, so the values are positive for all eliminations except
20840   from the soft frame pointer to the hard frame pointer.
20841
20842   SFP may point just inside the local variables block to ensure correct
20843   alignment.  */
20844
20845
20846 /* Return cached stack offsets.  */
20847
20848 static arm_stack_offsets *
20849 arm_get_frame_offsets (void)
20850 {
20851   struct arm_stack_offsets *offsets;
20852
20853   offsets = &cfun->machine->stack_offsets;
20854
20855   return offsets;
20856 }
20857
20858
20859 /* Calculate stack offsets.  These are used to calculate register elimination
20860    offsets and in prologue/epilogue code.  Also calculates which registers
20861    should be saved.  */
20862
20863 static void
20864 arm_compute_frame_layout (void)
20865 {
20866   struct arm_stack_offsets *offsets;
20867   unsigned long func_type;
20868   int saved;
20869   int core_saved;
20870   HOST_WIDE_INT frame_size;
20871   int i;
20872
20873   offsets = &cfun->machine->stack_offsets;
20874
20875   /* Initially this is the size of the local variables.  It will translated
20876      into an offset once we have determined the size of preceding data.  */
20877   frame_size = ROUND_UP_WORD (get_frame_size ());
20878
20879   /* Space for variadic functions.  */
20880   offsets->saved_args = crtl->args.pretend_args_size;
20881
20882   /* In Thumb mode this is incorrect, but never used.  */
20883   offsets->frame
20884     = (offsets->saved_args
20885        + arm_compute_static_chain_stack_bytes ()
20886        + (frame_pointer_needed ? 4 : 0));
20887
20888   if (TARGET_32BIT)
20889     {
20890       unsigned int regno;
20891
20892       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20893       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20894       saved = core_saved;
20895
20896       /* We know that SP will be doubleword aligned on entry, and we must
20897          preserve that condition at any subroutine call.  We also require the
20898          soft frame pointer to be doubleword aligned.  */
20899
20900       if (TARGET_REALLY_IWMMXT)
20901         {
20902           /* Check for the call-saved iWMMXt registers.  */
20903           for (regno = FIRST_IWMMXT_REGNUM;
20904                regno <= LAST_IWMMXT_REGNUM;
20905                regno++)
20906             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20907               saved += 8;
20908         }
20909
20910       func_type = arm_current_func_type ();
20911       /* Space for saved VFP registers.  */
20912       if (! IS_VOLATILE (func_type)
20913           && TARGET_HARD_FLOAT)
20914         saved += arm_get_vfp_saved_size ();
20915     }
20916   else /* TARGET_THUMB1 */
20917     {
20918       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20919       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20920       saved = core_saved;
20921       if (TARGET_BACKTRACE)
20922         saved += 16;
20923     }
20924
20925   /* Saved registers include the stack frame.  */
20926   offsets->saved_regs
20927     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20928   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20929
20930   /* A leaf function does not need any stack alignment if it has nothing
20931      on the stack.  */
20932   if (crtl->is_leaf && frame_size == 0
20933       /* However if it calls alloca(), we have a dynamically allocated
20934          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20935       && ! cfun->calls_alloca)
20936     {
20937       offsets->outgoing_args = offsets->soft_frame;
20938       offsets->locals_base = offsets->soft_frame;
20939       return;
20940     }
20941
20942   /* Ensure SFP has the correct alignment.  */
20943   if (ARM_DOUBLEWORD_ALIGN
20944       && (offsets->soft_frame & 7))
20945     {
20946       offsets->soft_frame += 4;
20947       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20948          when there is a stack frame as the alignment will be rolled into
20949          the normal stack adjustment.  */
20950       if (frame_size + crtl->outgoing_args_size == 0)
20951         {
20952           int reg = -1;
20953
20954           /* Register r3 is caller-saved.  Normally it does not need to be
20955              saved on entry by the prologue.  However if we choose to save
20956              it for padding then we may confuse the compiler into thinking
20957              a prologue sequence is required when in fact it is not.  This
20958              will occur when shrink-wrapping if r3 is used as a scratch
20959              register and there are no other callee-saved writes.
20960
20961              This situation can be avoided when other callee-saved registers
20962              are available and r3 is not mandatory if we choose a callee-saved
20963              register for padding.  */
20964           bool prefer_callee_reg_p = false;
20965
20966           /* If it is safe to use r3, then do so.  This sometimes
20967              generates better code on Thumb-2 by avoiding the need to
20968              use 32-bit push/pop instructions.  */
20969           if (! any_sibcall_could_use_r3 ()
20970               && arm_size_return_regs () <= 12
20971               && (offsets->saved_regs_mask & (1 << 3)) == 0
20972               && (TARGET_THUMB2
20973                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20974             {
20975               reg = 3;
20976               if (!TARGET_THUMB2)
20977                 prefer_callee_reg_p = true;
20978             }
20979           if (reg == -1
20980               || prefer_callee_reg_p)
20981             {
20982               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20983                 {
20984                   /* Avoid fixed registers; they may be changed at
20985                      arbitrary times so it's unsafe to restore them
20986                      during the epilogue.  */
20987                   if (!fixed_regs[i]
20988                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20989                     {
20990                       reg = i;
20991                       break;
20992                     }
20993                 }
20994             }
20995
20996           if (reg != -1)
20997             {
20998               offsets->saved_regs += 4;
20999               offsets->saved_regs_mask |= (1 << reg);
21000             }
21001         }
21002     }
21003
21004   offsets->locals_base = offsets->soft_frame + frame_size;
21005   offsets->outgoing_args = (offsets->locals_base
21006                             + crtl->outgoing_args_size);
21007
21008   if (ARM_DOUBLEWORD_ALIGN)
21009     {
21010       /* Ensure SP remains doubleword aligned.  */
21011       if (offsets->outgoing_args & 7)
21012         offsets->outgoing_args += 4;
21013       gcc_assert (!(offsets->outgoing_args & 7));
21014     }
21015 }
21016
21017
21018 /* Calculate the relative offsets for the different stack pointers.  Positive
21019    offsets are in the direction of stack growth.  */
21020
21021 HOST_WIDE_INT
21022 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21023 {
21024   arm_stack_offsets *offsets;
21025
21026   offsets = arm_get_frame_offsets ();
21027
21028   /* OK, now we have enough information to compute the distances.
21029      There must be an entry in these switch tables for each pair
21030      of registers in ELIMINABLE_REGS, even if some of the entries
21031      seem to be redundant or useless.  */
21032   switch (from)
21033     {
21034     case ARG_POINTER_REGNUM:
21035       switch (to)
21036         {
21037         case THUMB_HARD_FRAME_POINTER_REGNUM:
21038           return 0;
21039
21040         case FRAME_POINTER_REGNUM:
21041           /* This is the reverse of the soft frame pointer
21042              to hard frame pointer elimination below.  */
21043           return offsets->soft_frame - offsets->saved_args;
21044
21045         case ARM_HARD_FRAME_POINTER_REGNUM:
21046           /* This is only non-zero in the case where the static chain register
21047              is stored above the frame.  */
21048           return offsets->frame - offsets->saved_args - 4;
21049
21050         case STACK_POINTER_REGNUM:
21051           /* If nothing has been pushed on the stack at all
21052              then this will return -4.  This *is* correct!  */
21053           return offsets->outgoing_args - (offsets->saved_args + 4);
21054
21055         default:
21056           gcc_unreachable ();
21057         }
21058       gcc_unreachable ();
21059
21060     case FRAME_POINTER_REGNUM:
21061       switch (to)
21062         {
21063         case THUMB_HARD_FRAME_POINTER_REGNUM:
21064           return 0;
21065
21066         case ARM_HARD_FRAME_POINTER_REGNUM:
21067           /* The hard frame pointer points to the top entry in the
21068              stack frame.  The soft frame pointer to the bottom entry
21069              in the stack frame.  If there is no stack frame at all,
21070              then they are identical.  */
21071
21072           return offsets->frame - offsets->soft_frame;
21073
21074         case STACK_POINTER_REGNUM:
21075           return offsets->outgoing_args - offsets->soft_frame;
21076
21077         default:
21078           gcc_unreachable ();
21079         }
21080       gcc_unreachable ();
21081
21082     default:
21083       /* You cannot eliminate from the stack pointer.
21084          In theory you could eliminate from the hard frame
21085          pointer to the stack pointer, but this will never
21086          happen, since if a stack frame is not needed the
21087          hard frame pointer will never be used.  */
21088       gcc_unreachable ();
21089     }
21090 }
21091
21092 /* Given FROM and TO register numbers, say whether this elimination is
21093    allowed.  Frame pointer elimination is automatically handled.
21094
21095    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21096    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21097    pointer, we must eliminate FRAME_POINTER_REGNUM into
21098    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21099    ARG_POINTER_REGNUM.  */
21100
21101 bool
21102 arm_can_eliminate (const int from, const int to)
21103 {
21104   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21105           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21106           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21107           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21108            true);
21109 }
21110
21111 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21112    number of bytes pushed.  */
21113
21114 static int
21115 arm_save_coproc_regs(void)
21116 {
21117   int saved_size = 0;
21118   unsigned reg;
21119   unsigned start_reg;
21120   rtx insn;
21121
21122   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21123     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21124       {
21125         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21126         insn = gen_rtx_MEM (V2SImode, insn);
21127         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21128         RTX_FRAME_RELATED_P (insn) = 1;
21129         saved_size += 8;
21130       }
21131
21132   if (TARGET_HARD_FLOAT)
21133     {
21134       start_reg = FIRST_VFP_REGNUM;
21135
21136       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21137         {
21138           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21139               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21140             {
21141               if (start_reg != reg)
21142                 saved_size += vfp_emit_fstmd (start_reg,
21143                                               (reg - start_reg) / 2);
21144               start_reg = reg + 2;
21145             }
21146         }
21147       if (start_reg != reg)
21148         saved_size += vfp_emit_fstmd (start_reg,
21149                                       (reg - start_reg) / 2);
21150     }
21151   return saved_size;
21152 }
21153
21154
21155 /* Set the Thumb frame pointer from the stack pointer.  */
21156
21157 static void
21158 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21159 {
21160   HOST_WIDE_INT amount;
21161   rtx insn, dwarf;
21162
21163   amount = offsets->outgoing_args - offsets->locals_base;
21164   if (amount < 1024)
21165     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21166                                   stack_pointer_rtx, GEN_INT (amount)));
21167   else
21168     {
21169       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21170       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21171          expects the first two operands to be the same.  */
21172       if (TARGET_THUMB2)
21173         {
21174           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21175                                         stack_pointer_rtx,
21176                                         hard_frame_pointer_rtx));
21177         }
21178       else
21179         {
21180           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21181                                         hard_frame_pointer_rtx,
21182                                         stack_pointer_rtx));
21183         }
21184       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21185                            plus_constant (Pmode, stack_pointer_rtx, amount));
21186       RTX_FRAME_RELATED_P (dwarf) = 1;
21187       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21188     }
21189
21190   RTX_FRAME_RELATED_P (insn) = 1;
21191 }
21192
21193 struct scratch_reg {
21194   rtx reg;
21195   bool saved;
21196 };
21197
21198 /* Return a short-lived scratch register for use as a 2nd scratch register on
21199    function entry after the registers are saved in the prologue.  This register
21200    must be released by means of release_scratch_register_on_entry.  IP is not
21201    considered since it is always used as the 1st scratch register if available.
21202
21203    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21204    mask of live registers.  */
21205
21206 static void
21207 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21208                                unsigned long live_regs)
21209 {
21210   int regno = -1;
21211
21212   sr->saved = false;
21213
21214   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21215     regno = LR_REGNUM;
21216   else
21217     {
21218       unsigned int i;
21219
21220       for (i = 4; i < 11; i++)
21221         if (regno1 != i && (live_regs & (1 << i)) != 0)
21222           {
21223             regno = i;
21224             break;
21225           }
21226
21227       if (regno < 0)
21228         {
21229           /* If IP is used as the 1st scratch register for a nested function,
21230              then either r3 wasn't available or is used to preserve IP.  */
21231           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21232             regno1 = 3;
21233           regno = (regno1 == 3 ? 2 : 3);
21234           sr->saved
21235             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21236                                regno);
21237         }
21238     }
21239
21240   sr->reg = gen_rtx_REG (SImode, regno);
21241   if (sr->saved)
21242     {
21243       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21244       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21245       rtx x = gen_rtx_SET (stack_pointer_rtx,
21246                            plus_constant (Pmode, stack_pointer_rtx, -4));
21247       RTX_FRAME_RELATED_P (insn) = 1;
21248       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21249     }
21250 }
21251
21252 /* Release a scratch register obtained from the preceding function.  */
21253
21254 static void
21255 release_scratch_register_on_entry (struct scratch_reg *sr)
21256 {
21257   if (sr->saved)
21258     {
21259       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21260       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21261       rtx x = gen_rtx_SET (stack_pointer_rtx,
21262                            plus_constant (Pmode, stack_pointer_rtx, 4));
21263       RTX_FRAME_RELATED_P (insn) = 1;
21264       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21265     }
21266 }
21267
21268 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21269
21270 #if PROBE_INTERVAL > 4096
21271 #error Cannot use indexed addressing mode for stack probing
21272 #endif
21273
21274 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21275    inclusive.  These are offsets from the current stack pointer.  REGNO1
21276    is the index number of the 1st scratch register and LIVE_REGS is the
21277    mask of live registers.  */
21278
21279 static void
21280 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21281                             unsigned int regno1, unsigned long live_regs)
21282 {
21283   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21284
21285   /* See if we have a constant small number of probes to generate.  If so,
21286      that's the easy case.  */
21287   if (size <= PROBE_INTERVAL)
21288     {
21289       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21290       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21291       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21292     }
21293
21294   /* The run-time loop is made up of 10 insns in the generic case while the
21295      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21296   else if (size <= 5 * PROBE_INTERVAL)
21297     {
21298       HOST_WIDE_INT i, rem;
21299
21300       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21301       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21302       emit_stack_probe (reg1);
21303
21304       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21305          it exceeds SIZE.  If only two probes are needed, this will not
21306          generate any code.  Then probe at FIRST + SIZE.  */
21307       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21308         {
21309           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21310           emit_stack_probe (reg1);
21311         }
21312
21313       rem = size - (i - PROBE_INTERVAL);
21314       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21315         {
21316           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21317           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21318         }
21319       else
21320         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21321     }
21322
21323   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21324      extra careful with variables wrapping around because we might be at
21325      the very top (or the very bottom) of the address space and we have
21326      to be able to handle this case properly; in particular, we use an
21327      equality test for the loop condition.  */
21328   else
21329     {
21330       HOST_WIDE_INT rounded_size;
21331       struct scratch_reg sr;
21332
21333       get_scratch_register_on_entry (&sr, regno1, live_regs);
21334
21335       emit_move_insn (reg1, GEN_INT (first));
21336
21337
21338       /* Step 1: round SIZE to the previous multiple of the interval.  */
21339
21340       rounded_size = size & -PROBE_INTERVAL;
21341       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21342
21343
21344       /* Step 2: compute initial and final value of the loop counter.  */
21345
21346       /* TEST_ADDR = SP + FIRST.  */
21347       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21348
21349       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21350       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21351
21352
21353       /* Step 3: the loop
21354
21355          do
21356            {
21357              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21358              probe at TEST_ADDR
21359            }
21360          while (TEST_ADDR != LAST_ADDR)
21361
21362          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21363          until it is equal to ROUNDED_SIZE.  */
21364
21365       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21366
21367
21368       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21369          that SIZE is equal to ROUNDED_SIZE.  */
21370
21371       if (size != rounded_size)
21372         {
21373           HOST_WIDE_INT rem = size - rounded_size;
21374
21375           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21376             {
21377               emit_set_insn (sr.reg,
21378                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21379               emit_stack_probe (plus_constant (Pmode, sr.reg,
21380                                                PROBE_INTERVAL - rem));
21381             }
21382           else
21383             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21384         }
21385
21386       release_scratch_register_on_entry (&sr);
21387     }
21388
21389   /* Make sure nothing is scheduled before we are done.  */
21390   emit_insn (gen_blockage ());
21391 }
21392
21393 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21394    absolute addresses.  */
21395
21396 const char *
21397 output_probe_stack_range (rtx reg1, rtx reg2)
21398 {
21399   static int labelno = 0;
21400   char loop_lab[32];
21401   rtx xops[2];
21402
21403   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21404
21405   /* Loop.  */
21406   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21407
21408   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21409   xops[0] = reg1;
21410   xops[1] = GEN_INT (PROBE_INTERVAL);
21411   output_asm_insn ("sub\t%0, %0, %1", xops);
21412
21413   /* Probe at TEST_ADDR.  */
21414   output_asm_insn ("str\tr0, [%0, #0]", xops);
21415
21416   /* Test if TEST_ADDR == LAST_ADDR.  */
21417   xops[1] = reg2;
21418   output_asm_insn ("cmp\t%0, %1", xops);
21419
21420   /* Branch.  */
21421   fputs ("\tbne\t", asm_out_file);
21422   assemble_name_raw (asm_out_file, loop_lab);
21423   fputc ('\n', asm_out_file);
21424
21425   return "";
21426 }
21427
21428 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21429    function.  */
21430 void
21431 arm_expand_prologue (void)
21432 {
21433   rtx amount;
21434   rtx insn;
21435   rtx ip_rtx;
21436   unsigned long live_regs_mask;
21437   unsigned long func_type;
21438   int fp_offset = 0;
21439   int saved_pretend_args = 0;
21440   int saved_regs = 0;
21441   unsigned HOST_WIDE_INT args_to_push;
21442   HOST_WIDE_INT size;
21443   arm_stack_offsets *offsets;
21444   bool clobber_ip;
21445
21446   func_type = arm_current_func_type ();
21447
21448   /* Naked functions don't have prologues.  */
21449   if (IS_NAKED (func_type))
21450     {
21451       if (flag_stack_usage_info)
21452         current_function_static_stack_size = 0;
21453       return;
21454     }
21455
21456   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21457   args_to_push = crtl->args.pretend_args_size;
21458
21459   /* Compute which register we will have to save onto the stack.  */
21460   offsets = arm_get_frame_offsets ();
21461   live_regs_mask = offsets->saved_regs_mask;
21462
21463   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21464
21465   if (IS_STACKALIGN (func_type))
21466     {
21467       rtx r0, r1;
21468
21469       /* Handle a word-aligned stack pointer.  We generate the following:
21470
21471           mov r0, sp
21472           bic r1, r0, #7
21473           mov sp, r1
21474           <save and restore r0 in normal prologue/epilogue>
21475           mov sp, r0
21476           bx lr
21477
21478          The unwinder doesn't need to know about the stack realignment.
21479          Just tell it we saved SP in r0.  */
21480       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21481
21482       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21483       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21484
21485       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21486       RTX_FRAME_RELATED_P (insn) = 1;
21487       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21488
21489       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21490
21491       /* ??? The CFA changes here, which may cause GDB to conclude that it
21492          has entered a different function.  That said, the unwind info is
21493          correct, individually, before and after this instruction because
21494          we've described the save of SP, which will override the default
21495          handling of SP as restoring from the CFA.  */
21496       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21497     }
21498
21499   /* The static chain register is the same as the IP register.  If it is
21500      clobbered when creating the frame, we need to save and restore it.  */
21501   clobber_ip = IS_NESTED (func_type)
21502                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21503                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21504                         || flag_stack_clash_protection)
21505                        && !df_regs_ever_live_p (LR_REGNUM)
21506                        && arm_r3_live_at_start_p ()));
21507
21508   /* Find somewhere to store IP whilst the frame is being created.
21509      We try the following places in order:
21510
21511        1. The last argument register r3 if it is available.
21512        2. A slot on the stack above the frame if there are no
21513           arguments to push onto the stack.
21514        3. Register r3 again, after pushing the argument registers
21515           onto the stack, if this is a varargs function.
21516        4. The last slot on the stack created for the arguments to
21517           push, if this isn't a varargs function.
21518
21519      Note - we only need to tell the dwarf2 backend about the SP
21520      adjustment in the second variant; the static chain register
21521      doesn't need to be unwound, as it doesn't contain a value
21522      inherited from the caller.  */
21523   if (clobber_ip)
21524     {
21525       if (!arm_r3_live_at_start_p ())
21526         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21527       else if (args_to_push == 0)
21528         {
21529           rtx addr, dwarf;
21530
21531           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21532           saved_regs += 4;
21533
21534           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21535           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21536           fp_offset = 4;
21537
21538           /* Just tell the dwarf backend that we adjusted SP.  */
21539           dwarf = gen_rtx_SET (stack_pointer_rtx,
21540                                plus_constant (Pmode, stack_pointer_rtx,
21541                                               -fp_offset));
21542           RTX_FRAME_RELATED_P (insn) = 1;
21543           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21544         }
21545       else
21546         {
21547           /* Store the args on the stack.  */
21548           if (cfun->machine->uses_anonymous_args)
21549             {
21550               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21551                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21552               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21553               saved_pretend_args = 1;
21554             }
21555           else
21556             {
21557               rtx addr, dwarf;
21558
21559               if (args_to_push == 4)
21560                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21561               else
21562                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21563                                            plus_constant (Pmode,
21564                                                           stack_pointer_rtx,
21565                                                           -args_to_push));
21566
21567               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21568
21569               /* Just tell the dwarf backend that we adjusted SP.  */
21570               dwarf = gen_rtx_SET (stack_pointer_rtx,
21571                                    plus_constant (Pmode, stack_pointer_rtx,
21572                                                   -args_to_push));
21573               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21574             }
21575
21576           RTX_FRAME_RELATED_P (insn) = 1;
21577           fp_offset = args_to_push;
21578           args_to_push = 0;
21579         }
21580     }
21581
21582   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21583     {
21584       if (IS_INTERRUPT (func_type))
21585         {
21586           /* Interrupt functions must not corrupt any registers.
21587              Creating a frame pointer however, corrupts the IP
21588              register, so we must push it first.  */
21589           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21590
21591           /* Do not set RTX_FRAME_RELATED_P on this insn.
21592              The dwarf stack unwinding code only wants to see one
21593              stack decrement per function, and this is not it.  If
21594              this instruction is labeled as being part of the frame
21595              creation sequence then dwarf2out_frame_debug_expr will
21596              die when it encounters the assignment of IP to FP
21597              later on, since the use of SP here establishes SP as
21598              the CFA register and not IP.
21599
21600              Anyway this instruction is not really part of the stack
21601              frame creation although it is part of the prologue.  */
21602         }
21603
21604       insn = emit_set_insn (ip_rtx,
21605                             plus_constant (Pmode, stack_pointer_rtx,
21606                                            fp_offset));
21607       RTX_FRAME_RELATED_P (insn) = 1;
21608     }
21609
21610   if (args_to_push)
21611     {
21612       /* Push the argument registers, or reserve space for them.  */
21613       if (cfun->machine->uses_anonymous_args)
21614         insn = emit_multi_reg_push
21615           ((0xf0 >> (args_to_push / 4)) & 0xf,
21616            (0xf0 >> (args_to_push / 4)) & 0xf);
21617       else
21618         insn = emit_insn
21619           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21620                        GEN_INT (- args_to_push)));
21621       RTX_FRAME_RELATED_P (insn) = 1;
21622     }
21623
21624   /* If this is an interrupt service routine, and the link register
21625      is going to be pushed, and we're not generating extra
21626      push of IP (needed when frame is needed and frame layout if apcs),
21627      subtracting four from LR now will mean that the function return
21628      can be done with a single instruction.  */
21629   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21630       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21631       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21632       && TARGET_ARM)
21633     {
21634       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21635
21636       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21637     }
21638
21639   if (live_regs_mask)
21640     {
21641       unsigned long dwarf_regs_mask = live_regs_mask;
21642
21643       saved_regs += bit_count (live_regs_mask) * 4;
21644       if (optimize_size && !frame_pointer_needed
21645           && saved_regs == offsets->saved_regs - offsets->saved_args)
21646         {
21647           /* If no coprocessor registers are being pushed and we don't have
21648              to worry about a frame pointer then push extra registers to
21649              create the stack frame.  This is done in a way that does not
21650              alter the frame layout, so is independent of the epilogue.  */
21651           int n;
21652           int frame;
21653           n = 0;
21654           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21655             n++;
21656           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21657           if (frame && n * 4 >= frame)
21658             {
21659               n = frame / 4;
21660               live_regs_mask |= (1 << n) - 1;
21661               saved_regs += frame;
21662             }
21663         }
21664
21665       if (TARGET_LDRD
21666           && current_tune->prefer_ldrd_strd
21667           && !optimize_function_for_size_p (cfun))
21668         {
21669           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21670           if (TARGET_THUMB2)
21671             thumb2_emit_strd_push (live_regs_mask);
21672           else if (TARGET_ARM
21673                    && !TARGET_APCS_FRAME
21674                    && !IS_INTERRUPT (func_type))
21675             arm_emit_strd_push (live_regs_mask);
21676           else
21677             {
21678               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21679               RTX_FRAME_RELATED_P (insn) = 1;
21680             }
21681         }
21682       else
21683         {
21684           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21685           RTX_FRAME_RELATED_P (insn) = 1;
21686         }
21687     }
21688
21689   if (! IS_VOLATILE (func_type))
21690     saved_regs += arm_save_coproc_regs ();
21691
21692   if (frame_pointer_needed && TARGET_ARM)
21693     {
21694       /* Create the new frame pointer.  */
21695       if (TARGET_APCS_FRAME)
21696         {
21697           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21698           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21699           RTX_FRAME_RELATED_P (insn) = 1;
21700         }
21701       else
21702         {
21703           insn = GEN_INT (saved_regs - (4 + fp_offset));
21704           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21705                                         stack_pointer_rtx, insn));
21706           RTX_FRAME_RELATED_P (insn) = 1;
21707         }
21708     }
21709
21710   size = offsets->outgoing_args - offsets->saved_args;
21711   if (flag_stack_usage_info)
21712     current_function_static_stack_size = size;
21713
21714   /* If this isn't an interrupt service routine and we have a frame, then do
21715      stack checking.  We use IP as the first scratch register, except for the
21716      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21717   if (!IS_INTERRUPT (func_type)
21718       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21719           || flag_stack_clash_protection))
21720     {
21721       unsigned int regno;
21722
21723       if (!IS_NESTED (func_type) || clobber_ip)
21724         regno = IP_REGNUM;
21725       else if (df_regs_ever_live_p (LR_REGNUM))
21726         regno = LR_REGNUM;
21727       else
21728         regno = 3;
21729
21730       if (crtl->is_leaf && !cfun->calls_alloca)
21731         {
21732           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21733             arm_emit_probe_stack_range (get_stack_check_protect (),
21734                                         size - get_stack_check_protect (),
21735                                         regno, live_regs_mask);
21736         }
21737       else if (size > 0)
21738         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21739                                     regno, live_regs_mask);
21740     }
21741
21742   /* Recover the static chain register.  */
21743   if (clobber_ip)
21744     {
21745       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21746         insn = gen_rtx_REG (SImode, 3);
21747       else
21748         {
21749           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21750           insn = gen_frame_mem (SImode, insn);
21751         }
21752       emit_set_insn (ip_rtx, insn);
21753       emit_insn (gen_force_register_use (ip_rtx));
21754     }
21755
21756   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21757     {
21758       /* This add can produce multiple insns for a large constant, so we
21759          need to get tricky.  */
21760       rtx_insn *last = get_last_insn ();
21761
21762       amount = GEN_INT (offsets->saved_args + saved_regs
21763                         - offsets->outgoing_args);
21764
21765       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21766                                     amount));
21767       do
21768         {
21769           last = last ? NEXT_INSN (last) : get_insns ();
21770           RTX_FRAME_RELATED_P (last) = 1;
21771         }
21772       while (last != insn);
21773
21774       /* If the frame pointer is needed, emit a special barrier that
21775          will prevent the scheduler from moving stores to the frame
21776          before the stack adjustment.  */
21777       if (frame_pointer_needed)
21778         emit_insn (gen_stack_tie (stack_pointer_rtx,
21779                                   hard_frame_pointer_rtx));
21780     }
21781
21782
21783   if (frame_pointer_needed && TARGET_THUMB2)
21784     thumb_set_frame_pointer (offsets);
21785
21786   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21787     {
21788       unsigned long mask;
21789
21790       mask = live_regs_mask;
21791       mask &= THUMB2_WORK_REGS;
21792       if (!IS_NESTED (func_type))
21793         mask |= (1 << IP_REGNUM);
21794       arm_load_pic_register (mask);
21795     }
21796
21797   /* If we are profiling, make sure no instructions are scheduled before
21798      the call to mcount.  Similarly if the user has requested no
21799      scheduling in the prolog.  Similarly if we want non-call exceptions
21800      using the EABI unwinder, to prevent faulting instructions from being
21801      swapped with a stack adjustment.  */
21802   if (crtl->profile || !TARGET_SCHED_PROLOG
21803       || (arm_except_unwind_info (&global_options) == UI_TARGET
21804           && cfun->can_throw_non_call_exceptions))
21805     emit_insn (gen_blockage ());
21806
21807   /* If the link register is being kept alive, with the return address in it,
21808      then make sure that it does not get reused by the ce2 pass.  */
21809   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21810     cfun->machine->lr_save_eliminated = 1;
21811 }
21812 \f
21813 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21814 static void
21815 arm_print_condition (FILE *stream)
21816 {
21817   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21818     {
21819       /* Branch conversion is not implemented for Thumb-2.  */
21820       if (TARGET_THUMB)
21821         {
21822           output_operand_lossage ("predicated Thumb instruction");
21823           return;
21824         }
21825       if (current_insn_predicate != NULL)
21826         {
21827           output_operand_lossage
21828             ("predicated instruction in conditional sequence");
21829           return;
21830         }
21831
21832       fputs (arm_condition_codes[arm_current_cc], stream);
21833     }
21834   else if (current_insn_predicate)
21835     {
21836       enum arm_cond_code code;
21837
21838       if (TARGET_THUMB1)
21839         {
21840           output_operand_lossage ("predicated Thumb instruction");
21841           return;
21842         }
21843
21844       code = get_arm_condition_code (current_insn_predicate);
21845       fputs (arm_condition_codes[code], stream);
21846     }
21847 }
21848
21849
21850 /* Globally reserved letters: acln
21851    Puncutation letters currently used: @_|?().!#
21852    Lower case letters currently used: bcdefhimpqtvwxyz
21853    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21854    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21855
21856    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21857
21858    If CODE is 'd', then the X is a condition operand and the instruction
21859    should only be executed if the condition is true.
21860    if CODE is 'D', then the X is a condition operand and the instruction
21861    should only be executed if the condition is false: however, if the mode
21862    of the comparison is CCFPEmode, then always execute the instruction -- we
21863    do this because in these circumstances !GE does not necessarily imply LT;
21864    in these cases the instruction pattern will take care to make sure that
21865    an instruction containing %d will follow, thereby undoing the effects of
21866    doing this instruction unconditionally.
21867    If CODE is 'N' then X is a floating point operand that must be negated
21868    before output.
21869    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21870    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21871 static void
21872 arm_print_operand (FILE *stream, rtx x, int code)
21873 {
21874   switch (code)
21875     {
21876     case '@':
21877       fputs (ASM_COMMENT_START, stream);
21878       return;
21879
21880     case '_':
21881       fputs (user_label_prefix, stream);
21882       return;
21883
21884     case '|':
21885       fputs (REGISTER_PREFIX, stream);
21886       return;
21887
21888     case '?':
21889       arm_print_condition (stream);
21890       return;
21891
21892     case '.':
21893       /* The current condition code for a condition code setting instruction.
21894          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21895       fputc('s', stream);
21896       arm_print_condition (stream);
21897       return;
21898
21899     case '!':
21900       /* If the instruction is conditionally executed then print
21901          the current condition code, otherwise print 's'.  */
21902       gcc_assert (TARGET_THUMB2);
21903       if (current_insn_predicate)
21904         arm_print_condition (stream);
21905       else
21906         fputc('s', stream);
21907       break;
21908
21909     /* %# is a "break" sequence. It doesn't output anything, but is used to
21910        separate e.g. operand numbers from following text, if that text consists
21911        of further digits which we don't want to be part of the operand
21912        number.  */
21913     case '#':
21914       return;
21915
21916     case 'N':
21917       {
21918         REAL_VALUE_TYPE r;
21919         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21920         fprintf (stream, "%s", fp_const_from_val (&r));
21921       }
21922       return;
21923
21924     /* An integer or symbol address without a preceding # sign.  */
21925     case 'c':
21926       switch (GET_CODE (x))
21927         {
21928         case CONST_INT:
21929           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21930           break;
21931
21932         case SYMBOL_REF:
21933           output_addr_const (stream, x);
21934           break;
21935
21936         case CONST:
21937           if (GET_CODE (XEXP (x, 0)) == PLUS
21938               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21939             {
21940               output_addr_const (stream, x);
21941               break;
21942             }
21943           /* Fall through.  */
21944
21945         default:
21946           output_operand_lossage ("Unsupported operand for code '%c'", code);
21947         }
21948       return;
21949
21950     /* An integer that we want to print in HEX.  */
21951     case 'x':
21952       switch (GET_CODE (x))
21953         {
21954         case CONST_INT:
21955           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21956           break;
21957
21958         default:
21959           output_operand_lossage ("Unsupported operand for code '%c'", code);
21960         }
21961       return;
21962
21963     case 'B':
21964       if (CONST_INT_P (x))
21965         {
21966           HOST_WIDE_INT val;
21967           val = ARM_SIGN_EXTEND (~INTVAL (x));
21968           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21969         }
21970       else
21971         {
21972           putc ('~', stream);
21973           output_addr_const (stream, x);
21974         }
21975       return;
21976
21977     case 'b':
21978       /* Print the log2 of a CONST_INT.  */
21979       {
21980         HOST_WIDE_INT val;
21981
21982         if (!CONST_INT_P (x)
21983             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21984           output_operand_lossage ("Unsupported operand for code '%c'", code);
21985         else
21986           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21987       }
21988       return;
21989
21990     case 'L':
21991       /* The low 16 bits of an immediate constant.  */
21992       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21993       return;
21994
21995     case 'i':
21996       fprintf (stream, "%s", arithmetic_instr (x, 1));
21997       return;
21998
21999     case 'I':
22000       fprintf (stream, "%s", arithmetic_instr (x, 0));
22001       return;
22002
22003     case 'S':
22004       {
22005         HOST_WIDE_INT val;
22006         const char *shift;
22007
22008         shift = shift_op (x, &val);
22009
22010         if (shift)
22011           {
22012             fprintf (stream, ", %s ", shift);
22013             if (val == -1)
22014               arm_print_operand (stream, XEXP (x, 1), 0);
22015             else
22016               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22017           }
22018       }
22019       return;
22020
22021       /* An explanation of the 'Q', 'R' and 'H' register operands:
22022
22023          In a pair of registers containing a DI or DF value the 'Q'
22024          operand returns the register number of the register containing
22025          the least significant part of the value.  The 'R' operand returns
22026          the register number of the register containing the most
22027          significant part of the value.
22028
22029          The 'H' operand returns the higher of the two register numbers.
22030          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22031          same as the 'Q' operand, since the most significant part of the
22032          value is held in the lower number register.  The reverse is true
22033          on systems where WORDS_BIG_ENDIAN is false.
22034
22035          The purpose of these operands is to distinguish between cases
22036          where the endian-ness of the values is important (for example
22037          when they are added together), and cases where the endian-ness
22038          is irrelevant, but the order of register operations is important.
22039          For example when loading a value from memory into a register
22040          pair, the endian-ness does not matter.  Provided that the value
22041          from the lower memory address is put into the lower numbered
22042          register, and the value from the higher address is put into the
22043          higher numbered register, the load will work regardless of whether
22044          the value being loaded is big-wordian or little-wordian.  The
22045          order of the two register loads can matter however, if the address
22046          of the memory location is actually held in one of the registers
22047          being overwritten by the load.
22048
22049          The 'Q' and 'R' constraints are also available for 64-bit
22050          constants.  */
22051     case 'Q':
22052       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22053         {
22054           rtx part = gen_lowpart (SImode, x);
22055           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22056           return;
22057         }
22058
22059       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22060         {
22061           output_operand_lossage ("invalid operand for code '%c'", code);
22062           return;
22063         }
22064
22065       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22066       return;
22067
22068     case 'R':
22069       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22070         {
22071           machine_mode mode = GET_MODE (x);
22072           rtx part;
22073
22074           if (mode == VOIDmode)
22075             mode = DImode;
22076           part = gen_highpart_mode (SImode, mode, x);
22077           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22078           return;
22079         }
22080
22081       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22082         {
22083           output_operand_lossage ("invalid operand for code '%c'", code);
22084           return;
22085         }
22086
22087       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22088       return;
22089
22090     case 'H':
22091       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22092         {
22093           output_operand_lossage ("invalid operand for code '%c'", code);
22094           return;
22095         }
22096
22097       asm_fprintf (stream, "%r", REGNO (x) + 1);
22098       return;
22099
22100     case 'J':
22101       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22102         {
22103           output_operand_lossage ("invalid operand for code '%c'", code);
22104           return;
22105         }
22106
22107       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22108       return;
22109
22110     case 'K':
22111       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22112         {
22113           output_operand_lossage ("invalid operand for code '%c'", code);
22114           return;
22115         }
22116
22117       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22118       return;
22119
22120     case 'm':
22121       asm_fprintf (stream, "%r",
22122                    REG_P (XEXP (x, 0))
22123                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22124       return;
22125
22126     case 'M':
22127       asm_fprintf (stream, "{%r-%r}",
22128                    REGNO (x),
22129                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22130       return;
22131
22132     /* Like 'M', but writing doubleword vector registers, for use by Neon
22133        insns.  */
22134     case 'h':
22135       {
22136         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22137         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22138         if (numregs == 1)
22139           asm_fprintf (stream, "{d%d}", regno);
22140         else
22141           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22142       }
22143       return;
22144
22145     case 'd':
22146       /* CONST_TRUE_RTX means always -- that's the default.  */
22147       if (x == const_true_rtx)
22148         return;
22149
22150       if (!COMPARISON_P (x))
22151         {
22152           output_operand_lossage ("invalid operand for code '%c'", code);
22153           return;
22154         }
22155
22156       fputs (arm_condition_codes[get_arm_condition_code (x)],
22157              stream);
22158       return;
22159
22160     case 'D':
22161       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22162          want to do that.  */
22163       if (x == const_true_rtx)
22164         {
22165           output_operand_lossage ("instruction never executed");
22166           return;
22167         }
22168       if (!COMPARISON_P (x))
22169         {
22170           output_operand_lossage ("invalid operand for code '%c'", code);
22171           return;
22172         }
22173
22174       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22175                                  (get_arm_condition_code (x))],
22176              stream);
22177       return;
22178
22179     case 's':
22180     case 'V':
22181     case 'W':
22182     case 'X':
22183     case 'Y':
22184     case 'Z':
22185       /* Former Maverick support, removed after GCC-4.7.  */
22186       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22187       return;
22188
22189     case 'U':
22190       if (!REG_P (x)
22191           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22192           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22193         /* Bad value for wCG register number.  */
22194         {
22195           output_operand_lossage ("invalid operand for code '%c'", code);
22196           return;
22197         }
22198
22199       else
22200         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22201       return;
22202
22203       /* Print an iWMMXt control register name.  */
22204     case 'w':
22205       if (!CONST_INT_P (x)
22206           || INTVAL (x) < 0
22207           || INTVAL (x) >= 16)
22208         /* Bad value for wC register number.  */
22209         {
22210           output_operand_lossage ("invalid operand for code '%c'", code);
22211           return;
22212         }
22213
22214       else
22215         {
22216           static const char * wc_reg_names [16] =
22217             {
22218               "wCID",  "wCon",  "wCSSF", "wCASF",
22219               "wC4",   "wC5",   "wC6",   "wC7",
22220               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22221               "wC12",  "wC13",  "wC14",  "wC15"
22222             };
22223
22224           fputs (wc_reg_names [INTVAL (x)], stream);
22225         }
22226       return;
22227
22228     /* Print the high single-precision register of a VFP double-precision
22229        register.  */
22230     case 'p':
22231       {
22232         machine_mode mode = GET_MODE (x);
22233         int regno;
22234
22235         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22236           {
22237             output_operand_lossage ("invalid operand for code '%c'", code);
22238             return;
22239           }
22240
22241         regno = REGNO (x);
22242         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22243           {
22244             output_operand_lossage ("invalid operand for code '%c'", code);
22245             return;
22246           }
22247
22248         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22249       }
22250       return;
22251
22252     /* Print a VFP/Neon double precision or quad precision register name.  */
22253     case 'P':
22254     case 'q':
22255       {
22256         machine_mode mode = GET_MODE (x);
22257         int is_quad = (code == 'q');
22258         int regno;
22259
22260         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22261           {
22262             output_operand_lossage ("invalid operand for code '%c'", code);
22263             return;
22264           }
22265
22266         if (!REG_P (x)
22267             || !IS_VFP_REGNUM (REGNO (x)))
22268           {
22269             output_operand_lossage ("invalid operand for code '%c'", code);
22270             return;
22271           }
22272
22273         regno = REGNO (x);
22274         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22275             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22276           {
22277             output_operand_lossage ("invalid operand for code '%c'", code);
22278             return;
22279           }
22280
22281         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22282           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22283       }
22284       return;
22285
22286     /* These two codes print the low/high doubleword register of a Neon quad
22287        register, respectively.  For pair-structure types, can also print
22288        low/high quadword registers.  */
22289     case 'e':
22290     case 'f':
22291       {
22292         machine_mode mode = GET_MODE (x);
22293         int regno;
22294
22295         if ((GET_MODE_SIZE (mode) != 16
22296              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22297           {
22298             output_operand_lossage ("invalid operand for code '%c'", code);
22299             return;
22300           }
22301
22302         regno = REGNO (x);
22303         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22304           {
22305             output_operand_lossage ("invalid operand for code '%c'", code);
22306             return;
22307           }
22308
22309         if (GET_MODE_SIZE (mode) == 16)
22310           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22311                                   + (code == 'f' ? 1 : 0));
22312         else
22313           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22314                                   + (code == 'f' ? 1 : 0));
22315       }
22316       return;
22317
22318     /* Print a VFPv3 floating-point constant, represented as an integer
22319        index.  */
22320     case 'G':
22321       {
22322         int index = vfp3_const_double_index (x);
22323         gcc_assert (index != -1);
22324         fprintf (stream, "%d", index);
22325       }
22326       return;
22327
22328     /* Print bits representing opcode features for Neon.
22329
22330        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22331        and polynomials as unsigned.
22332
22333        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22334
22335        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22336
22337     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22338     case 'T':
22339       {
22340         HOST_WIDE_INT bits = INTVAL (x);
22341         fputc ("uspf"[bits & 3], stream);
22342       }
22343       return;
22344
22345     /* Likewise, but signed and unsigned integers are both 'i'.  */
22346     case 'F':
22347       {
22348         HOST_WIDE_INT bits = INTVAL (x);
22349         fputc ("iipf"[bits & 3], stream);
22350       }
22351       return;
22352
22353     /* As for 'T', but emit 'u' instead of 'p'.  */
22354     case 't':
22355       {
22356         HOST_WIDE_INT bits = INTVAL (x);
22357         fputc ("usuf"[bits & 3], stream);
22358       }
22359       return;
22360
22361     /* Bit 2: rounding (vs none).  */
22362     case 'O':
22363       {
22364         HOST_WIDE_INT bits = INTVAL (x);
22365         fputs ((bits & 4) != 0 ? "r" : "", stream);
22366       }
22367       return;
22368
22369     /* Memory operand for vld1/vst1 instruction.  */
22370     case 'A':
22371       {
22372         rtx addr;
22373         bool postinc = FALSE;
22374         rtx postinc_reg = NULL;
22375         unsigned align, memsize, align_bits;
22376
22377         gcc_assert (MEM_P (x));
22378         addr = XEXP (x, 0);
22379         if (GET_CODE (addr) == POST_INC)
22380           {
22381             postinc = 1;
22382             addr = XEXP (addr, 0);
22383           }
22384         if (GET_CODE (addr) == POST_MODIFY)
22385           {
22386             postinc_reg = XEXP( XEXP (addr, 1), 1);
22387             addr = XEXP (addr, 0);
22388           }
22389         asm_fprintf (stream, "[%r", REGNO (addr));
22390
22391         /* We know the alignment of this access, so we can emit a hint in the
22392            instruction (for some alignments) as an aid to the memory subsystem
22393            of the target.  */
22394         align = MEM_ALIGN (x) >> 3;
22395         memsize = MEM_SIZE (x);
22396
22397         /* Only certain alignment specifiers are supported by the hardware.  */
22398         if (memsize == 32 && (align % 32) == 0)
22399           align_bits = 256;
22400         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22401           align_bits = 128;
22402         else if (memsize >= 8 && (align % 8) == 0)
22403           align_bits = 64;
22404         else
22405           align_bits = 0;
22406
22407         if (align_bits != 0)
22408           asm_fprintf (stream, ":%d", align_bits);
22409
22410         asm_fprintf (stream, "]");
22411
22412         if (postinc)
22413           fputs("!", stream);
22414         if (postinc_reg)
22415           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22416       }
22417       return;
22418
22419     case 'C':
22420       {
22421         rtx addr;
22422
22423         gcc_assert (MEM_P (x));
22424         addr = XEXP (x, 0);
22425         gcc_assert (REG_P (addr));
22426         asm_fprintf (stream, "[%r]", REGNO (addr));
22427       }
22428       return;
22429
22430     /* Translate an S register number into a D register number and element index.  */
22431     case 'y':
22432       {
22433         machine_mode mode = GET_MODE (x);
22434         int regno;
22435
22436         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22437           {
22438             output_operand_lossage ("invalid operand for code '%c'", code);
22439             return;
22440           }
22441
22442         regno = REGNO (x);
22443         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22444           {
22445             output_operand_lossage ("invalid operand for code '%c'", code);
22446             return;
22447           }
22448
22449         regno = regno - FIRST_VFP_REGNUM;
22450         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22451       }
22452       return;
22453
22454     case 'v':
22455         gcc_assert (CONST_DOUBLE_P (x));
22456         int result;
22457         result = vfp3_const_double_for_fract_bits (x);
22458         if (result == 0)
22459           result = vfp3_const_double_for_bits (x);
22460         fprintf (stream, "#%d", result);
22461         return;
22462
22463     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22464        number into a D register number and element index.  */
22465     case 'z':
22466       {
22467         machine_mode mode = GET_MODE (x);
22468         int regno;
22469
22470         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22471           {
22472             output_operand_lossage ("invalid operand for code '%c'", code);
22473             return;
22474           }
22475
22476         regno = REGNO (x);
22477         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22478           {
22479             output_operand_lossage ("invalid operand for code '%c'", code);
22480             return;
22481           }
22482
22483         regno = regno - FIRST_VFP_REGNUM;
22484         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22485       }
22486       return;
22487
22488     default:
22489       if (x == 0)
22490         {
22491           output_operand_lossage ("missing operand");
22492           return;
22493         }
22494
22495       switch (GET_CODE (x))
22496         {
22497         case REG:
22498           asm_fprintf (stream, "%r", REGNO (x));
22499           break;
22500
22501         case MEM:
22502           output_address (GET_MODE (x), XEXP (x, 0));
22503           break;
22504
22505         case CONST_DOUBLE:
22506           {
22507             char fpstr[20];
22508             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22509                               sizeof (fpstr), 0, 1);
22510             fprintf (stream, "#%s", fpstr);
22511           }
22512           break;
22513
22514         default:
22515           gcc_assert (GET_CODE (x) != NEG);
22516           fputc ('#', stream);
22517           if (GET_CODE (x) == HIGH)
22518             {
22519               fputs (":lower16:", stream);
22520               x = XEXP (x, 0);
22521             }
22522
22523           output_addr_const (stream, x);
22524           break;
22525         }
22526     }
22527 }
22528 \f
22529 /* Target hook for printing a memory address.  */
22530 static void
22531 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22532 {
22533   if (TARGET_32BIT)
22534     {
22535       int is_minus = GET_CODE (x) == MINUS;
22536
22537       if (REG_P (x))
22538         asm_fprintf (stream, "[%r]", REGNO (x));
22539       else if (GET_CODE (x) == PLUS || is_minus)
22540         {
22541           rtx base = XEXP (x, 0);
22542           rtx index = XEXP (x, 1);
22543           HOST_WIDE_INT offset = 0;
22544           if (!REG_P (base)
22545               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22546             {
22547               /* Ensure that BASE is a register.  */
22548               /* (one of them must be).  */
22549               /* Also ensure the SP is not used as in index register.  */
22550               std::swap (base, index);
22551             }
22552           switch (GET_CODE (index))
22553             {
22554             case CONST_INT:
22555               offset = INTVAL (index);
22556               if (is_minus)
22557                 offset = -offset;
22558               asm_fprintf (stream, "[%r, #%wd]",
22559                            REGNO (base), offset);
22560               break;
22561
22562             case REG:
22563               asm_fprintf (stream, "[%r, %s%r]",
22564                            REGNO (base), is_minus ? "-" : "",
22565                            REGNO (index));
22566               break;
22567
22568             case MULT:
22569             case ASHIFTRT:
22570             case LSHIFTRT:
22571             case ASHIFT:
22572             case ROTATERT:
22573               {
22574                 asm_fprintf (stream, "[%r, %s%r",
22575                              REGNO (base), is_minus ? "-" : "",
22576                              REGNO (XEXP (index, 0)));
22577                 arm_print_operand (stream, index, 'S');
22578                 fputs ("]", stream);
22579                 break;
22580               }
22581
22582             default:
22583               gcc_unreachable ();
22584             }
22585         }
22586       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22587                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22588         {
22589           gcc_assert (REG_P (XEXP (x, 0)));
22590
22591           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22592             asm_fprintf (stream, "[%r, #%s%d]!",
22593                          REGNO (XEXP (x, 0)),
22594                          GET_CODE (x) == PRE_DEC ? "-" : "",
22595                          GET_MODE_SIZE (mode));
22596           else
22597             asm_fprintf (stream, "[%r], #%s%d",
22598                          REGNO (XEXP (x, 0)),
22599                          GET_CODE (x) == POST_DEC ? "-" : "",
22600                          GET_MODE_SIZE (mode));
22601         }
22602       else if (GET_CODE (x) == PRE_MODIFY)
22603         {
22604           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22605           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22606             asm_fprintf (stream, "#%wd]!",
22607                          INTVAL (XEXP (XEXP (x, 1), 1)));
22608           else
22609             asm_fprintf (stream, "%r]!",
22610                          REGNO (XEXP (XEXP (x, 1), 1)));
22611         }
22612       else if (GET_CODE (x) == POST_MODIFY)
22613         {
22614           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22615           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22616             asm_fprintf (stream, "#%wd",
22617                          INTVAL (XEXP (XEXP (x, 1), 1)));
22618           else
22619             asm_fprintf (stream, "%r",
22620                          REGNO (XEXP (XEXP (x, 1), 1)));
22621         }
22622       else output_addr_const (stream, x);
22623     }
22624   else
22625     {
22626       if (REG_P (x))
22627         asm_fprintf (stream, "[%r]", REGNO (x));
22628       else if (GET_CODE (x) == POST_INC)
22629         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22630       else if (GET_CODE (x) == PLUS)
22631         {
22632           gcc_assert (REG_P (XEXP (x, 0)));
22633           if (CONST_INT_P (XEXP (x, 1)))
22634             asm_fprintf (stream, "[%r, #%wd]",
22635                          REGNO (XEXP (x, 0)),
22636                          INTVAL (XEXP (x, 1)));
22637           else
22638             asm_fprintf (stream, "[%r, %r]",
22639                          REGNO (XEXP (x, 0)),
22640                          REGNO (XEXP (x, 1)));
22641         }
22642       else
22643         output_addr_const (stream, x);
22644     }
22645 }
22646 \f
22647 /* Target hook for indicating whether a punctuation character for
22648    TARGET_PRINT_OPERAND is valid.  */
22649 static bool
22650 arm_print_operand_punct_valid_p (unsigned char code)
22651 {
22652   return (code == '@' || code == '|' || code == '.'
22653           || code == '(' || code == ')' || code == '#'
22654           || (TARGET_32BIT && (code == '?'))
22655           || (TARGET_THUMB2 && (code == '!'))
22656           || (TARGET_THUMB && (code == '_')));
22657 }
22658 \f
22659 /* Target hook for assembling integer objects.  The ARM version needs to
22660    handle word-sized values specially.  */
22661 static bool
22662 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22663 {
22664   machine_mode mode;
22665
22666   if (size == UNITS_PER_WORD && aligned_p)
22667     {
22668       fputs ("\t.word\t", asm_out_file);
22669       output_addr_const (asm_out_file, x);
22670
22671       /* Mark symbols as position independent.  We only do this in the
22672          .text segment, not in the .data segment.  */
22673       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22674           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22675         {
22676           /* See legitimize_pic_address for an explanation of the
22677              TARGET_VXWORKS_RTP check.  */
22678           /* References to weak symbols cannot be resolved locally:
22679              they may be overridden by a non-weak definition at link
22680              time.  */
22681           if (!arm_pic_data_is_text_relative
22682               || (GET_CODE (x) == SYMBOL_REF
22683                   && (!SYMBOL_REF_LOCAL_P (x)
22684                       || (SYMBOL_REF_DECL (x)
22685                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22686             fputs ("(GOT)", asm_out_file);
22687           else
22688             fputs ("(GOTOFF)", asm_out_file);
22689         }
22690       fputc ('\n', asm_out_file);
22691       return true;
22692     }
22693
22694   mode = GET_MODE (x);
22695
22696   if (arm_vector_mode_supported_p (mode))
22697     {
22698       int i, units;
22699
22700       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22701
22702       units = CONST_VECTOR_NUNITS (x);
22703       size = GET_MODE_UNIT_SIZE (mode);
22704
22705       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22706         for (i = 0; i < units; i++)
22707           {
22708             rtx elt = CONST_VECTOR_ELT (x, i);
22709             assemble_integer
22710               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22711           }
22712       else
22713         for (i = 0; i < units; i++)
22714           {
22715             rtx elt = CONST_VECTOR_ELT (x, i);
22716             assemble_real
22717               (*CONST_DOUBLE_REAL_VALUE (elt),
22718                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22719                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22720           }
22721
22722       return true;
22723     }
22724
22725   return default_assemble_integer (x, size, aligned_p);
22726 }
22727
22728 static void
22729 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22730 {
22731   section *s;
22732
22733   if (!TARGET_AAPCS_BASED)
22734     {
22735       (is_ctor ?
22736        default_named_section_asm_out_constructor
22737        : default_named_section_asm_out_destructor) (symbol, priority);
22738       return;
22739     }
22740
22741   /* Put these in the .init_array section, using a special relocation.  */
22742   if (priority != DEFAULT_INIT_PRIORITY)
22743     {
22744       char buf[18];
22745       sprintf (buf, "%s.%.5u",
22746                is_ctor ? ".init_array" : ".fini_array",
22747                priority);
22748       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22749     }
22750   else if (is_ctor)
22751     s = ctors_section;
22752   else
22753     s = dtors_section;
22754
22755   switch_to_section (s);
22756   assemble_align (POINTER_SIZE);
22757   fputs ("\t.word\t", asm_out_file);
22758   output_addr_const (asm_out_file, symbol);
22759   fputs ("(target1)\n", asm_out_file);
22760 }
22761
22762 /* Add a function to the list of static constructors.  */
22763
22764 static void
22765 arm_elf_asm_constructor (rtx symbol, int priority)
22766 {
22767   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22768 }
22769
22770 /* Add a function to the list of static destructors.  */
22771
22772 static void
22773 arm_elf_asm_destructor (rtx symbol, int priority)
22774 {
22775   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22776 }
22777 \f
22778 /* A finite state machine takes care of noticing whether or not instructions
22779    can be conditionally executed, and thus decrease execution time and code
22780    size by deleting branch instructions.  The fsm is controlled by
22781    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22782
22783 /* The state of the fsm controlling condition codes are:
22784    0: normal, do nothing special
22785    1: make ASM_OUTPUT_OPCODE not output this instruction
22786    2: make ASM_OUTPUT_OPCODE not output this instruction
22787    3: make instructions conditional
22788    4: make instructions conditional
22789
22790    State transitions (state->state by whom under condition):
22791    0 -> 1 final_prescan_insn if the `target' is a label
22792    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22793    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22794    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22795    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22796           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22797    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22798           (the target insn is arm_target_insn).
22799
22800    If the jump clobbers the conditions then we use states 2 and 4.
22801
22802    A similar thing can be done with conditional return insns.
22803
22804    XXX In case the `target' is an unconditional branch, this conditionalising
22805    of the instructions always reduces code size, but not always execution
22806    time.  But then, I want to reduce the code size to somewhere near what
22807    /bin/cc produces.  */
22808
22809 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22810    instructions.  When a COND_EXEC instruction is seen the subsequent
22811    instructions are scanned so that multiple conditional instructions can be
22812    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22813    specify the length and true/false mask for the IT block.  These will be
22814    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22815
22816 /* Returns the index of the ARM condition code string in
22817    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22818    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22819
22820 enum arm_cond_code
22821 maybe_get_arm_condition_code (rtx comparison)
22822 {
22823   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22824   enum arm_cond_code code;
22825   enum rtx_code comp_code = GET_CODE (comparison);
22826
22827   if (GET_MODE_CLASS (mode) != MODE_CC)
22828     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22829                            XEXP (comparison, 1));
22830
22831   switch (mode)
22832     {
22833     case E_CC_DNEmode: code = ARM_NE; goto dominance;
22834     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22835     case E_CC_DGEmode: code = ARM_GE; goto dominance;
22836     case E_CC_DGTmode: code = ARM_GT; goto dominance;
22837     case E_CC_DLEmode: code = ARM_LE; goto dominance;
22838     case E_CC_DLTmode: code = ARM_LT; goto dominance;
22839     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22840     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22841     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22842     case E_CC_DLTUmode: code = ARM_CC;
22843
22844     dominance:
22845       if (comp_code == EQ)
22846         return ARM_INVERSE_CONDITION_CODE (code);
22847       if (comp_code == NE)
22848         return code;
22849       return ARM_NV;
22850
22851     case E_CC_NOOVmode:
22852       switch (comp_code)
22853         {
22854         case NE: return ARM_NE;
22855         case EQ: return ARM_EQ;
22856         case GE: return ARM_PL;
22857         case LT: return ARM_MI;
22858         default: return ARM_NV;
22859         }
22860
22861     case E_CC_Zmode:
22862       switch (comp_code)
22863         {
22864         case NE: return ARM_NE;
22865         case EQ: return ARM_EQ;
22866         default: return ARM_NV;
22867         }
22868
22869     case E_CC_Nmode:
22870       switch (comp_code)
22871         {
22872         case NE: return ARM_MI;
22873         case EQ: return ARM_PL;
22874         default: return ARM_NV;
22875         }
22876
22877     case E_CCFPEmode:
22878     case E_CCFPmode:
22879       /* We can handle all cases except UNEQ and LTGT.  */
22880       switch (comp_code)
22881         {
22882         case GE: return ARM_GE;
22883         case GT: return ARM_GT;
22884         case LE: return ARM_LS;
22885         case LT: return ARM_MI;
22886         case NE: return ARM_NE;
22887         case EQ: return ARM_EQ;
22888         case ORDERED: return ARM_VC;
22889         case UNORDERED: return ARM_VS;
22890         case UNLT: return ARM_LT;
22891         case UNLE: return ARM_LE;
22892         case UNGT: return ARM_HI;
22893         case UNGE: return ARM_PL;
22894           /* UNEQ and LTGT do not have a representation.  */
22895         case UNEQ: /* Fall through.  */
22896         case LTGT: /* Fall through.  */
22897         default: return ARM_NV;
22898         }
22899
22900     case E_CC_SWPmode:
22901       switch (comp_code)
22902         {
22903         case NE: return ARM_NE;
22904         case EQ: return ARM_EQ;
22905         case GE: return ARM_LE;
22906         case GT: return ARM_LT;
22907         case LE: return ARM_GE;
22908         case LT: return ARM_GT;
22909         case GEU: return ARM_LS;
22910         case GTU: return ARM_CC;
22911         case LEU: return ARM_CS;
22912         case LTU: return ARM_HI;
22913         default: return ARM_NV;
22914         }
22915
22916     case E_CC_Cmode:
22917       switch (comp_code)
22918         {
22919         case LTU: return ARM_CS;
22920         case GEU: return ARM_CC;
22921         case NE: return ARM_CS;
22922         case EQ: return ARM_CC;
22923         default: return ARM_NV;
22924         }
22925
22926     case E_CC_CZmode:
22927       switch (comp_code)
22928         {
22929         case NE: return ARM_NE;
22930         case EQ: return ARM_EQ;
22931         case GEU: return ARM_CS;
22932         case GTU: return ARM_HI;
22933         case LEU: return ARM_LS;
22934         case LTU: return ARM_CC;
22935         default: return ARM_NV;
22936         }
22937
22938     case E_CC_NCVmode:
22939       switch (comp_code)
22940         {
22941         case GE: return ARM_GE;
22942         case LT: return ARM_LT;
22943         case GEU: return ARM_CS;
22944         case LTU: return ARM_CC;
22945         default: return ARM_NV;
22946         }
22947
22948     case E_CC_Vmode:
22949       switch (comp_code)
22950         {
22951         case NE: return ARM_VS;
22952         case EQ: return ARM_VC;
22953         default: return ARM_NV;
22954         }
22955
22956     case E_CCmode:
22957       switch (comp_code)
22958         {
22959         case NE: return ARM_NE;
22960         case EQ: return ARM_EQ;
22961         case GE: return ARM_GE;
22962         case GT: return ARM_GT;
22963         case LE: return ARM_LE;
22964         case LT: return ARM_LT;
22965         case GEU: return ARM_CS;
22966         case GTU: return ARM_HI;
22967         case LEU: return ARM_LS;
22968         case LTU: return ARM_CC;
22969         default: return ARM_NV;
22970         }
22971
22972     default: gcc_unreachable ();
22973     }
22974 }
22975
22976 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22977 static enum arm_cond_code
22978 get_arm_condition_code (rtx comparison)
22979 {
22980   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22981   gcc_assert (code != ARM_NV);
22982   return code;
22983 }
22984
22985 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
22986    code registers when not targetting Thumb1.  The VFP condition register
22987    only exists when generating hard-float code.  */
22988 static bool
22989 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22990 {
22991   if (!TARGET_32BIT)
22992     return false;
22993
22994   *p1 = CC_REGNUM;
22995   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22996   return true;
22997 }
22998
22999 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23000    instructions.  */
23001 void
23002 thumb2_final_prescan_insn (rtx_insn *insn)
23003 {
23004   rtx_insn *first_insn = insn;
23005   rtx body = PATTERN (insn);
23006   rtx predicate;
23007   enum arm_cond_code code;
23008   int n;
23009   int mask;
23010   int max;
23011
23012   /* max_insns_skipped in the tune was already taken into account in the
23013      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23014      just emit the IT blocks as we can.  It does not make sense to split
23015      the IT blocks.  */
23016   max = MAX_INSN_PER_IT_BLOCK;
23017
23018   /* Remove the previous insn from the count of insns to be output.  */
23019   if (arm_condexec_count)
23020       arm_condexec_count--;
23021
23022   /* Nothing to do if we are already inside a conditional block.  */
23023   if (arm_condexec_count)
23024     return;
23025
23026   if (GET_CODE (body) != COND_EXEC)
23027     return;
23028
23029   /* Conditional jumps are implemented directly.  */
23030   if (JUMP_P (insn))
23031     return;
23032
23033   predicate = COND_EXEC_TEST (body);
23034   arm_current_cc = get_arm_condition_code (predicate);
23035
23036   n = get_attr_ce_count (insn);
23037   arm_condexec_count = 1;
23038   arm_condexec_mask = (1 << n) - 1;
23039   arm_condexec_masklen = n;
23040   /* See if subsequent instructions can be combined into the same block.  */
23041   for (;;)
23042     {
23043       insn = next_nonnote_insn (insn);
23044
23045       /* Jumping into the middle of an IT block is illegal, so a label or
23046          barrier terminates the block.  */
23047       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23048         break;
23049
23050       body = PATTERN (insn);
23051       /* USE and CLOBBER aren't really insns, so just skip them.  */
23052       if (GET_CODE (body) == USE
23053           || GET_CODE (body) == CLOBBER)
23054         continue;
23055
23056       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23057       if (GET_CODE (body) != COND_EXEC)
23058         break;
23059       /* Maximum number of conditionally executed instructions in a block.  */
23060       n = get_attr_ce_count (insn);
23061       if (arm_condexec_masklen + n > max)
23062         break;
23063
23064       predicate = COND_EXEC_TEST (body);
23065       code = get_arm_condition_code (predicate);
23066       mask = (1 << n) - 1;
23067       if (arm_current_cc == code)
23068         arm_condexec_mask |= (mask << arm_condexec_masklen);
23069       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23070         break;
23071
23072       arm_condexec_count++;
23073       arm_condexec_masklen += n;
23074
23075       /* A jump must be the last instruction in a conditional block.  */
23076       if (JUMP_P (insn))
23077         break;
23078     }
23079   /* Restore recog_data (getting the attributes of other insns can
23080      destroy this array, but final.c assumes that it remains intact
23081      across this call).  */
23082   extract_constrain_insn_cached (first_insn);
23083 }
23084
23085 void
23086 arm_final_prescan_insn (rtx_insn *insn)
23087 {
23088   /* BODY will hold the body of INSN.  */
23089   rtx body = PATTERN (insn);
23090
23091   /* This will be 1 if trying to repeat the trick, and things need to be
23092      reversed if it appears to fail.  */
23093   int reverse = 0;
23094
23095   /* If we start with a return insn, we only succeed if we find another one.  */
23096   int seeking_return = 0;
23097   enum rtx_code return_code = UNKNOWN;
23098
23099   /* START_INSN will hold the insn from where we start looking.  This is the
23100      first insn after the following code_label if REVERSE is true.  */
23101   rtx_insn *start_insn = insn;
23102
23103   /* If in state 4, check if the target branch is reached, in order to
23104      change back to state 0.  */
23105   if (arm_ccfsm_state == 4)
23106     {
23107       if (insn == arm_target_insn)
23108         {
23109           arm_target_insn = NULL;
23110           arm_ccfsm_state = 0;
23111         }
23112       return;
23113     }
23114
23115   /* If in state 3, it is possible to repeat the trick, if this insn is an
23116      unconditional branch to a label, and immediately following this branch
23117      is the previous target label which is only used once, and the label this
23118      branch jumps to is not too far off.  */
23119   if (arm_ccfsm_state == 3)
23120     {
23121       if (simplejump_p (insn))
23122         {
23123           start_insn = next_nonnote_insn (start_insn);
23124           if (BARRIER_P (start_insn))
23125             {
23126               /* XXX Isn't this always a barrier?  */
23127               start_insn = next_nonnote_insn (start_insn);
23128             }
23129           if (LABEL_P (start_insn)
23130               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23131               && LABEL_NUSES (start_insn) == 1)
23132             reverse = TRUE;
23133           else
23134             return;
23135         }
23136       else if (ANY_RETURN_P (body))
23137         {
23138           start_insn = next_nonnote_insn (start_insn);
23139           if (BARRIER_P (start_insn))
23140             start_insn = next_nonnote_insn (start_insn);
23141           if (LABEL_P (start_insn)
23142               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23143               && LABEL_NUSES (start_insn) == 1)
23144             {
23145               reverse = TRUE;
23146               seeking_return = 1;
23147               return_code = GET_CODE (body);
23148             }
23149           else
23150             return;
23151         }
23152       else
23153         return;
23154     }
23155
23156   gcc_assert (!arm_ccfsm_state || reverse);
23157   if (!JUMP_P (insn))
23158     return;
23159
23160   /* This jump might be paralleled with a clobber of the condition codes
23161      the jump should always come first */
23162   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23163     body = XVECEXP (body, 0, 0);
23164
23165   if (reverse
23166       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23167           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23168     {
23169       int insns_skipped;
23170       int fail = FALSE, succeed = FALSE;
23171       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23172       int then_not_else = TRUE;
23173       rtx_insn *this_insn = start_insn;
23174       rtx label = 0;
23175
23176       /* Register the insn jumped to.  */
23177       if (reverse)
23178         {
23179           if (!seeking_return)
23180             label = XEXP (SET_SRC (body), 0);
23181         }
23182       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23183         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23184       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23185         {
23186           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23187           then_not_else = FALSE;
23188         }
23189       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23190         {
23191           seeking_return = 1;
23192           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23193         }
23194       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23195         {
23196           seeking_return = 1;
23197           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23198           then_not_else = FALSE;
23199         }
23200       else
23201         gcc_unreachable ();
23202
23203       /* See how many insns this branch skips, and what kind of insns.  If all
23204          insns are okay, and the label or unconditional branch to the same
23205          label is not too far away, succeed.  */
23206       for (insns_skipped = 0;
23207            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23208         {
23209           rtx scanbody;
23210
23211           this_insn = next_nonnote_insn (this_insn);
23212           if (!this_insn)
23213             break;
23214
23215           switch (GET_CODE (this_insn))
23216             {
23217             case CODE_LABEL:
23218               /* Succeed if it is the target label, otherwise fail since
23219                  control falls in from somewhere else.  */
23220               if (this_insn == label)
23221                 {
23222                   arm_ccfsm_state = 1;
23223                   succeed = TRUE;
23224                 }
23225               else
23226                 fail = TRUE;
23227               break;
23228
23229             case BARRIER:
23230               /* Succeed if the following insn is the target label.
23231                  Otherwise fail.
23232                  If return insns are used then the last insn in a function
23233                  will be a barrier.  */
23234               this_insn = next_nonnote_insn (this_insn);
23235               if (this_insn && this_insn == label)
23236                 {
23237                   arm_ccfsm_state = 1;
23238                   succeed = TRUE;
23239                 }
23240               else
23241                 fail = TRUE;
23242               break;
23243
23244             case CALL_INSN:
23245               /* The AAPCS says that conditional calls should not be
23246                  used since they make interworking inefficient (the
23247                  linker can't transform BL<cond> into BLX).  That's
23248                  only a problem if the machine has BLX.  */
23249               if (arm_arch5)
23250                 {
23251                   fail = TRUE;
23252                   break;
23253                 }
23254
23255               /* Succeed if the following insn is the target label, or
23256                  if the following two insns are a barrier and the
23257                  target label.  */
23258               this_insn = next_nonnote_insn (this_insn);
23259               if (this_insn && BARRIER_P (this_insn))
23260                 this_insn = next_nonnote_insn (this_insn);
23261
23262               if (this_insn && this_insn == label
23263                   && insns_skipped < max_insns_skipped)
23264                 {
23265                   arm_ccfsm_state = 1;
23266                   succeed = TRUE;
23267                 }
23268               else
23269                 fail = TRUE;
23270               break;
23271
23272             case JUMP_INSN:
23273               /* If this is an unconditional branch to the same label, succeed.
23274                  If it is to another label, do nothing.  If it is conditional,
23275                  fail.  */
23276               /* XXX Probably, the tests for SET and the PC are
23277                  unnecessary.  */
23278
23279               scanbody = PATTERN (this_insn);
23280               if (GET_CODE (scanbody) == SET
23281                   && GET_CODE (SET_DEST (scanbody)) == PC)
23282                 {
23283                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23284                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23285                     {
23286                       arm_ccfsm_state = 2;
23287                       succeed = TRUE;
23288                     }
23289                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23290                     fail = TRUE;
23291                 }
23292               /* Fail if a conditional return is undesirable (e.g. on a
23293                  StrongARM), but still allow this if optimizing for size.  */
23294               else if (GET_CODE (scanbody) == return_code
23295                        && !use_return_insn (TRUE, NULL)
23296                        && !optimize_size)
23297                 fail = TRUE;
23298               else if (GET_CODE (scanbody) == return_code)
23299                 {
23300                   arm_ccfsm_state = 2;
23301                   succeed = TRUE;
23302                 }
23303               else if (GET_CODE (scanbody) == PARALLEL)
23304                 {
23305                   switch (get_attr_conds (this_insn))
23306                     {
23307                     case CONDS_NOCOND:
23308                       break;
23309                     default:
23310                       fail = TRUE;
23311                       break;
23312                     }
23313                 }
23314               else
23315                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23316
23317               break;
23318
23319             case INSN:
23320               /* Instructions using or affecting the condition codes make it
23321                  fail.  */
23322               scanbody = PATTERN (this_insn);
23323               if (!(GET_CODE (scanbody) == SET
23324                     || GET_CODE (scanbody) == PARALLEL)
23325                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23326                 fail = TRUE;
23327               break;
23328
23329             default:
23330               break;
23331             }
23332         }
23333       if (succeed)
23334         {
23335           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23336             arm_target_label = CODE_LABEL_NUMBER (label);
23337           else
23338             {
23339               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23340
23341               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23342                 {
23343                   this_insn = next_nonnote_insn (this_insn);
23344                   gcc_assert (!this_insn
23345                               || (!BARRIER_P (this_insn)
23346                                   && !LABEL_P (this_insn)));
23347                 }
23348               if (!this_insn)
23349                 {
23350                   /* Oh, dear! we ran off the end.. give up.  */
23351                   extract_constrain_insn_cached (insn);
23352                   arm_ccfsm_state = 0;
23353                   arm_target_insn = NULL;
23354                   return;
23355                 }
23356               arm_target_insn = this_insn;
23357             }
23358
23359           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23360              what it was.  */
23361           if (!reverse)
23362             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23363
23364           if (reverse || then_not_else)
23365             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23366         }
23367
23368       /* Restore recog_data (getting the attributes of other insns can
23369          destroy this array, but final.c assumes that it remains intact
23370          across this call.  */
23371       extract_constrain_insn_cached (insn);
23372     }
23373 }
23374
23375 /* Output IT instructions.  */
23376 void
23377 thumb2_asm_output_opcode (FILE * stream)
23378 {
23379   char buff[5];
23380   int n;
23381
23382   if (arm_condexec_mask)
23383     {
23384       for (n = 0; n < arm_condexec_masklen; n++)
23385         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23386       buff[n] = 0;
23387       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23388                   arm_condition_codes[arm_current_cc]);
23389       arm_condexec_mask = 0;
23390     }
23391 }
23392
23393 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23394    UNITS_PER_WORD bytes wide.  */
23395 static unsigned int
23396 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23397 {
23398   if (TARGET_32BIT
23399       && regno > PC_REGNUM
23400       && regno != FRAME_POINTER_REGNUM
23401       && regno != ARG_POINTER_REGNUM
23402       && !IS_VFP_REGNUM (regno))
23403     return 1;
23404
23405   return ARM_NUM_REGS (mode);
23406 }
23407
23408 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23409 static bool
23410 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23411 {
23412   if (GET_MODE_CLASS (mode) == MODE_CC)
23413     return (regno == CC_REGNUM
23414             || (TARGET_HARD_FLOAT
23415                 && regno == VFPCC_REGNUM));
23416
23417   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23418     return false;
23419
23420   if (TARGET_THUMB1)
23421     /* For the Thumb we only allow values bigger than SImode in
23422        registers 0 - 6, so that there is always a second low
23423        register available to hold the upper part of the value.
23424        We probably we ought to ensure that the register is the
23425        start of an even numbered register pair.  */
23426     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23427
23428   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23429     {
23430       if (mode == SFmode || mode == SImode)
23431         return VFP_REGNO_OK_FOR_SINGLE (regno);
23432
23433       if (mode == DFmode)
23434         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23435
23436       if (mode == HFmode)
23437         return VFP_REGNO_OK_FOR_SINGLE (regno);
23438
23439       /* VFP registers can hold HImode values.  */
23440       if (mode == HImode)
23441         return VFP_REGNO_OK_FOR_SINGLE (regno);
23442
23443       if (TARGET_NEON)
23444         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23445                || (VALID_NEON_QREG_MODE (mode)
23446                    && NEON_REGNO_OK_FOR_QUAD (regno))
23447                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23448                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23449                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23450                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23451                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23452
23453       return false;
23454     }
23455
23456   if (TARGET_REALLY_IWMMXT)
23457     {
23458       if (IS_IWMMXT_GR_REGNUM (regno))
23459         return mode == SImode;
23460
23461       if (IS_IWMMXT_REGNUM (regno))
23462         return VALID_IWMMXT_REG_MODE (mode);
23463     }
23464
23465   /* We allow almost any value to be stored in the general registers.
23466      Restrict doubleword quantities to even register pairs in ARM state
23467      so that we can use ldrd.  Do not allow very large Neon structure
23468      opaque modes in general registers; they would use too many.  */
23469   if (regno <= LAST_ARM_REGNUM)
23470     {
23471       if (ARM_NUM_REGS (mode) > 4)
23472         return false;
23473
23474       if (TARGET_THUMB2)
23475         return true;
23476
23477       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23478     }
23479
23480   if (regno == FRAME_POINTER_REGNUM
23481       || regno == ARG_POINTER_REGNUM)
23482     /* We only allow integers in the fake hard registers.  */
23483     return GET_MODE_CLASS (mode) == MODE_INT;
23484
23485   return false;
23486 }
23487
23488 /* Implement TARGET_MODES_TIEABLE_P.  */
23489
23490 static bool
23491 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23492 {
23493   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23494     return true;
23495
23496   /* We specifically want to allow elements of "structure" modes to
23497      be tieable to the structure.  This more general condition allows
23498      other rarer situations too.  */
23499   if (TARGET_NEON
23500       && (VALID_NEON_DREG_MODE (mode1)
23501           || VALID_NEON_QREG_MODE (mode1)
23502           || VALID_NEON_STRUCT_MODE (mode1))
23503       && (VALID_NEON_DREG_MODE (mode2)
23504           || VALID_NEON_QREG_MODE (mode2)
23505           || VALID_NEON_STRUCT_MODE (mode2)))
23506     return true;
23507
23508   return false;
23509 }
23510
23511 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23512    not used in arm mode.  */
23513
23514 enum reg_class
23515 arm_regno_class (int regno)
23516 {
23517   if (regno == PC_REGNUM)
23518     return NO_REGS;
23519
23520   if (TARGET_THUMB1)
23521     {
23522       if (regno == STACK_POINTER_REGNUM)
23523         return STACK_REG;
23524       if (regno == CC_REGNUM)
23525         return CC_REG;
23526       if (regno < 8)
23527         return LO_REGS;
23528       return HI_REGS;
23529     }
23530
23531   if (TARGET_THUMB2 && regno < 8)
23532     return LO_REGS;
23533
23534   if (   regno <= LAST_ARM_REGNUM
23535       || regno == FRAME_POINTER_REGNUM
23536       || regno == ARG_POINTER_REGNUM)
23537     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23538
23539   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23540     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23541
23542   if (IS_VFP_REGNUM (regno))
23543     {
23544       if (regno <= D7_VFP_REGNUM)
23545         return VFP_D0_D7_REGS;
23546       else if (regno <= LAST_LO_VFP_REGNUM)
23547         return VFP_LO_REGS;
23548       else
23549         return VFP_HI_REGS;
23550     }
23551
23552   if (IS_IWMMXT_REGNUM (regno))
23553     return IWMMXT_REGS;
23554
23555   if (IS_IWMMXT_GR_REGNUM (regno))
23556     return IWMMXT_GR_REGS;
23557
23558   return NO_REGS;
23559 }
23560
23561 /* Handle a special case when computing the offset
23562    of an argument from the frame pointer.  */
23563 int
23564 arm_debugger_arg_offset (int value, rtx addr)
23565 {
23566   rtx_insn *insn;
23567
23568   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23569   if (value != 0)
23570     return 0;
23571
23572   /* We can only cope with the case where the address is held in a register.  */
23573   if (!REG_P (addr))
23574     return 0;
23575
23576   /* If we are using the frame pointer to point at the argument, then
23577      an offset of 0 is correct.  */
23578   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23579     return 0;
23580
23581   /* If we are using the stack pointer to point at the
23582      argument, then an offset of 0 is correct.  */
23583   /* ??? Check this is consistent with thumb2 frame layout.  */
23584   if ((TARGET_THUMB || !frame_pointer_needed)
23585       && REGNO (addr) == SP_REGNUM)
23586     return 0;
23587
23588   /* Oh dear.  The argument is pointed to by a register rather
23589      than being held in a register, or being stored at a known
23590      offset from the frame pointer.  Since GDB only understands
23591      those two kinds of argument we must translate the address
23592      held in the register into an offset from the frame pointer.
23593      We do this by searching through the insns for the function
23594      looking to see where this register gets its value.  If the
23595      register is initialized from the frame pointer plus an offset
23596      then we are in luck and we can continue, otherwise we give up.
23597
23598      This code is exercised by producing debugging information
23599      for a function with arguments like this:
23600
23601            double func (double a, double b, int c, double d) {return d;}
23602
23603      Without this code the stab for parameter 'd' will be set to
23604      an offset of 0 from the frame pointer, rather than 8.  */
23605
23606   /* The if() statement says:
23607
23608      If the insn is a normal instruction
23609      and if the insn is setting the value in a register
23610      and if the register being set is the register holding the address of the argument
23611      and if the address is computing by an addition
23612      that involves adding to a register
23613      which is the frame pointer
23614      a constant integer
23615
23616      then...  */
23617
23618   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23619     {
23620       if (   NONJUMP_INSN_P (insn)
23621           && GET_CODE (PATTERN (insn)) == SET
23622           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23623           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23624           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23625           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23626           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23627              )
23628         {
23629           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23630
23631           break;
23632         }
23633     }
23634
23635   if (value == 0)
23636     {
23637       debug_rtx (addr);
23638       warning (0, "unable to compute real location of stacked parameter");
23639       value = 8; /* XXX magic hack */
23640     }
23641
23642   return value;
23643 }
23644 \f
23645 /* Implement TARGET_PROMOTED_TYPE.  */
23646
23647 static tree
23648 arm_promoted_type (const_tree t)
23649 {
23650   if (SCALAR_FLOAT_TYPE_P (t)
23651       && TYPE_PRECISION (t) == 16
23652       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23653     return float_type_node;
23654   return NULL_TREE;
23655 }
23656
23657 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23658    This simply adds HFmode as a supported mode; even though we don't
23659    implement arithmetic on this type directly, it's supported by
23660    optabs conversions, much the way the double-word arithmetic is
23661    special-cased in the default hook.  */
23662
23663 static bool
23664 arm_scalar_mode_supported_p (scalar_mode mode)
23665 {
23666   if (mode == HFmode)
23667     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23668   else if (ALL_FIXED_POINT_MODE_P (mode))
23669     return true;
23670   else
23671     return default_scalar_mode_supported_p (mode);
23672 }
23673
23674 /* Set the value of FLT_EVAL_METHOD.
23675    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23676
23677     0: evaluate all operations and constants, whose semantic type has at
23678        most the range and precision of type float, to the range and
23679        precision of float; evaluate all other operations and constants to
23680        the range and precision of the semantic type;
23681
23682     N, where _FloatN is a supported interchange floating type
23683        evaluate all operations and constants, whose semantic type has at
23684        most the range and precision of _FloatN type, to the range and
23685        precision of the _FloatN type; evaluate all other operations and
23686        constants to the range and precision of the semantic type;
23687
23688    If we have the ARMv8.2-A extensions then we support _Float16 in native
23689    precision, so we should set this to 16.  Otherwise, we support the type,
23690    but want to evaluate expressions in float precision, so set this to
23691    0.  */
23692
23693 static enum flt_eval_method
23694 arm_excess_precision (enum excess_precision_type type)
23695 {
23696   switch (type)
23697     {
23698       case EXCESS_PRECISION_TYPE_FAST:
23699       case EXCESS_PRECISION_TYPE_STANDARD:
23700         /* We can calculate either in 16-bit range and precision or
23701            32-bit range and precision.  Make that decision based on whether
23702            we have native support for the ARMv8.2-A 16-bit floating-point
23703            instructions or not.  */
23704         return (TARGET_VFP_FP16INST
23705                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23706                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23707       case EXCESS_PRECISION_TYPE_IMPLICIT:
23708         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23709       default:
23710         gcc_unreachable ();
23711     }
23712   return FLT_EVAL_METHOD_UNPREDICTABLE;
23713 }
23714
23715
23716 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23717    _Float16 if we are using anything other than ieee format for 16-bit
23718    floating point.  Otherwise, punt to the default implementation.  */
23719 static opt_scalar_float_mode
23720 arm_floatn_mode (int n, bool extended)
23721 {
23722   if (!extended && n == 16)
23723     {
23724       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23725         return HFmode;
23726       return opt_scalar_float_mode ();
23727     }
23728
23729   return default_floatn_mode (n, extended);
23730 }
23731
23732
23733 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23734    not to early-clobber SRC registers in the process.
23735
23736    We assume that the operands described by SRC and DEST represent a
23737    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23738    number of components into which the copy has been decomposed.  */
23739 void
23740 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23741 {
23742   unsigned int i;
23743
23744   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23745       || REGNO (operands[0]) < REGNO (operands[1]))
23746     {
23747       for (i = 0; i < count; i++)
23748         {
23749           operands[2 * i] = dest[i];
23750           operands[2 * i + 1] = src[i];
23751         }
23752     }
23753   else
23754     {
23755       for (i = 0; i < count; i++)
23756         {
23757           operands[2 * i] = dest[count - i - 1];
23758           operands[2 * i + 1] = src[count - i - 1];
23759         }
23760     }
23761 }
23762
23763 /* Split operands into moves from op[1] + op[2] into op[0].  */
23764
23765 void
23766 neon_split_vcombine (rtx operands[3])
23767 {
23768   unsigned int dest = REGNO (operands[0]);
23769   unsigned int src1 = REGNO (operands[1]);
23770   unsigned int src2 = REGNO (operands[2]);
23771   machine_mode halfmode = GET_MODE (operands[1]);
23772   unsigned int halfregs = REG_NREGS (operands[1]);
23773   rtx destlo, desthi;
23774
23775   if (src1 == dest && src2 == dest + halfregs)
23776     {
23777       /* No-op move.  Can't split to nothing; emit something.  */
23778       emit_note (NOTE_INSN_DELETED);
23779       return;
23780     }
23781
23782   /* Preserve register attributes for variable tracking.  */
23783   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23784   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23785                                GET_MODE_SIZE (halfmode));
23786
23787   /* Special case of reversed high/low parts.  Use VSWP.  */
23788   if (src2 == dest && src1 == dest + halfregs)
23789     {
23790       rtx x = gen_rtx_SET (destlo, operands[1]);
23791       rtx y = gen_rtx_SET (desthi, operands[2]);
23792       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23793       return;
23794     }
23795
23796   if (!reg_overlap_mentioned_p (operands[2], destlo))
23797     {
23798       /* Try to avoid unnecessary moves if part of the result
23799          is in the right place already.  */
23800       if (src1 != dest)
23801         emit_move_insn (destlo, operands[1]);
23802       if (src2 != dest + halfregs)
23803         emit_move_insn (desthi, operands[2]);
23804     }
23805   else
23806     {
23807       if (src2 != dest + halfregs)
23808         emit_move_insn (desthi, operands[2]);
23809       if (src1 != dest)
23810         emit_move_insn (destlo, operands[1]);
23811     }
23812 }
23813 \f
23814 /* Return the number (counting from 0) of
23815    the least significant set bit in MASK.  */
23816
23817 inline static int
23818 number_of_first_bit_set (unsigned mask)
23819 {
23820   return ctz_hwi (mask);
23821 }
23822
23823 /* Like emit_multi_reg_push, but allowing for a different set of
23824    registers to be described as saved.  MASK is the set of registers
23825    to be saved; REAL_REGS is the set of registers to be described as
23826    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23827
23828 static rtx_insn *
23829 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23830 {
23831   unsigned long regno;
23832   rtx par[10], tmp, reg;
23833   rtx_insn *insn;
23834   int i, j;
23835
23836   /* Build the parallel of the registers actually being stored.  */
23837   for (i = 0; mask; ++i, mask &= mask - 1)
23838     {
23839       regno = ctz_hwi (mask);
23840       reg = gen_rtx_REG (SImode, regno);
23841
23842       if (i == 0)
23843         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23844       else
23845         tmp = gen_rtx_USE (VOIDmode, reg);
23846
23847       par[i] = tmp;
23848     }
23849
23850   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23851   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23852   tmp = gen_frame_mem (BLKmode, tmp);
23853   tmp = gen_rtx_SET (tmp, par[0]);
23854   par[0] = tmp;
23855
23856   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23857   insn = emit_insn (tmp);
23858
23859   /* Always build the stack adjustment note for unwind info.  */
23860   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23861   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23862   par[0] = tmp;
23863
23864   /* Build the parallel of the registers recorded as saved for unwind.  */
23865   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23866     {
23867       regno = ctz_hwi (real_regs);
23868       reg = gen_rtx_REG (SImode, regno);
23869
23870       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23871       tmp = gen_frame_mem (SImode, tmp);
23872       tmp = gen_rtx_SET (tmp, reg);
23873       RTX_FRAME_RELATED_P (tmp) = 1;
23874       par[j + 1] = tmp;
23875     }
23876
23877   if (j == 0)
23878     tmp = par[0];
23879   else
23880     {
23881       RTX_FRAME_RELATED_P (par[0]) = 1;
23882       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23883     }
23884
23885   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23886
23887   return insn;
23888 }
23889
23890 /* Emit code to push or pop registers to or from the stack.  F is the
23891    assembly file.  MASK is the registers to pop.  */
23892 static void
23893 thumb_pop (FILE *f, unsigned long mask)
23894 {
23895   int regno;
23896   int lo_mask = mask & 0xFF;
23897
23898   gcc_assert (mask);
23899
23900   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23901     {
23902       /* Special case.  Do not generate a POP PC statement here, do it in
23903          thumb_exit() */
23904       thumb_exit (f, -1);
23905       return;
23906     }
23907
23908   fprintf (f, "\tpop\t{");
23909
23910   /* Look at the low registers first.  */
23911   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23912     {
23913       if (lo_mask & 1)
23914         {
23915           asm_fprintf (f, "%r", regno);
23916
23917           if ((lo_mask & ~1) != 0)
23918             fprintf (f, ", ");
23919         }
23920     }
23921
23922   if (mask & (1 << PC_REGNUM))
23923     {
23924       /* Catch popping the PC.  */
23925       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23926           || IS_CMSE_ENTRY (arm_current_func_type ()))
23927         {
23928           /* The PC is never poped directly, instead
23929              it is popped into r3 and then BX is used.  */
23930           fprintf (f, "}\n");
23931
23932           thumb_exit (f, -1);
23933
23934           return;
23935         }
23936       else
23937         {
23938           if (mask & 0xFF)
23939             fprintf (f, ", ");
23940
23941           asm_fprintf (f, "%r", PC_REGNUM);
23942         }
23943     }
23944
23945   fprintf (f, "}\n");
23946 }
23947
23948 /* Generate code to return from a thumb function.
23949    If 'reg_containing_return_addr' is -1, then the return address is
23950    actually on the stack, at the stack pointer.  */
23951 static void
23952 thumb_exit (FILE *f, int reg_containing_return_addr)
23953 {
23954   unsigned regs_available_for_popping;
23955   unsigned regs_to_pop;
23956   int pops_needed;
23957   unsigned available;
23958   unsigned required;
23959   machine_mode mode;
23960   int size;
23961   int restore_a4 = FALSE;
23962
23963   /* Compute the registers we need to pop.  */
23964   regs_to_pop = 0;
23965   pops_needed = 0;
23966
23967   if (reg_containing_return_addr == -1)
23968     {
23969       regs_to_pop |= 1 << LR_REGNUM;
23970       ++pops_needed;
23971     }
23972
23973   if (TARGET_BACKTRACE)
23974     {
23975       /* Restore the (ARM) frame pointer and stack pointer.  */
23976       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23977       pops_needed += 2;
23978     }
23979
23980   /* If there is nothing to pop then just emit the BX instruction and
23981      return.  */
23982   if (pops_needed == 0)
23983     {
23984       if (crtl->calls_eh_return)
23985         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23986
23987       if (IS_CMSE_ENTRY (arm_current_func_type ()))
23988         {
23989           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23990                        reg_containing_return_addr);
23991           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23992         }
23993       else
23994         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23995       return;
23996     }
23997   /* Otherwise if we are not supporting interworking and we have not created
23998      a backtrace structure and the function was not entered in ARM mode then
23999      just pop the return address straight into the PC.  */
24000   else if (!TARGET_INTERWORK
24001            && !TARGET_BACKTRACE
24002            && !is_called_in_ARM_mode (current_function_decl)
24003            && !crtl->calls_eh_return
24004            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24005     {
24006       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24007       return;
24008     }
24009
24010   /* Find out how many of the (return) argument registers we can corrupt.  */
24011   regs_available_for_popping = 0;
24012
24013   /* If returning via __builtin_eh_return, the bottom three registers
24014      all contain information needed for the return.  */
24015   if (crtl->calls_eh_return)
24016     size = 12;
24017   else
24018     {
24019       /* If we can deduce the registers used from the function's
24020          return value.  This is more reliable that examining
24021          df_regs_ever_live_p () because that will be set if the register is
24022          ever used in the function, not just if the register is used
24023          to hold a return value.  */
24024
24025       if (crtl->return_rtx != 0)
24026         mode = GET_MODE (crtl->return_rtx);
24027       else
24028         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24029
24030       size = GET_MODE_SIZE (mode);
24031
24032       if (size == 0)
24033         {
24034           /* In a void function we can use any argument register.
24035              In a function that returns a structure on the stack
24036              we can use the second and third argument registers.  */
24037           if (mode == VOIDmode)
24038             regs_available_for_popping =
24039               (1 << ARG_REGISTER (1))
24040               | (1 << ARG_REGISTER (2))
24041               | (1 << ARG_REGISTER (3));
24042           else
24043             regs_available_for_popping =
24044               (1 << ARG_REGISTER (2))
24045               | (1 << ARG_REGISTER (3));
24046         }
24047       else if (size <= 4)
24048         regs_available_for_popping =
24049           (1 << ARG_REGISTER (2))
24050           | (1 << ARG_REGISTER (3));
24051       else if (size <= 8)
24052         regs_available_for_popping =
24053           (1 << ARG_REGISTER (3));
24054     }
24055
24056   /* Match registers to be popped with registers into which we pop them.  */
24057   for (available = regs_available_for_popping,
24058        required  = regs_to_pop;
24059        required != 0 && available != 0;
24060        available &= ~(available & - available),
24061        required  &= ~(required  & - required))
24062     -- pops_needed;
24063
24064   /* If we have any popping registers left over, remove them.  */
24065   if (available > 0)
24066     regs_available_for_popping &= ~available;
24067
24068   /* Otherwise if we need another popping register we can use
24069      the fourth argument register.  */
24070   else if (pops_needed)
24071     {
24072       /* If we have not found any free argument registers and
24073          reg a4 contains the return address, we must move it.  */
24074       if (regs_available_for_popping == 0
24075           && reg_containing_return_addr == LAST_ARG_REGNUM)
24076         {
24077           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24078           reg_containing_return_addr = LR_REGNUM;
24079         }
24080       else if (size > 12)
24081         {
24082           /* Register a4 is being used to hold part of the return value,
24083              but we have dire need of a free, low register.  */
24084           restore_a4 = TRUE;
24085
24086           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24087         }
24088
24089       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24090         {
24091           /* The fourth argument register is available.  */
24092           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24093
24094           --pops_needed;
24095         }
24096     }
24097
24098   /* Pop as many registers as we can.  */
24099   thumb_pop (f, regs_available_for_popping);
24100
24101   /* Process the registers we popped.  */
24102   if (reg_containing_return_addr == -1)
24103     {
24104       /* The return address was popped into the lowest numbered register.  */
24105       regs_to_pop &= ~(1 << LR_REGNUM);
24106
24107       reg_containing_return_addr =
24108         number_of_first_bit_set (regs_available_for_popping);
24109
24110       /* Remove this register for the mask of available registers, so that
24111          the return address will not be corrupted by further pops.  */
24112       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24113     }
24114
24115   /* If we popped other registers then handle them here.  */
24116   if (regs_available_for_popping)
24117     {
24118       int frame_pointer;
24119
24120       /* Work out which register currently contains the frame pointer.  */
24121       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24122
24123       /* Move it into the correct place.  */
24124       asm_fprintf (f, "\tmov\t%r, %r\n",
24125                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24126
24127       /* (Temporarily) remove it from the mask of popped registers.  */
24128       regs_available_for_popping &= ~(1 << frame_pointer);
24129       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24130
24131       if (regs_available_for_popping)
24132         {
24133           int stack_pointer;
24134
24135           /* We popped the stack pointer as well,
24136              find the register that contains it.  */
24137           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24138
24139           /* Move it into the stack register.  */
24140           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24141
24142           /* At this point we have popped all necessary registers, so
24143              do not worry about restoring regs_available_for_popping
24144              to its correct value:
24145
24146              assert (pops_needed == 0)
24147              assert (regs_available_for_popping == (1 << frame_pointer))
24148              assert (regs_to_pop == (1 << STACK_POINTER))  */
24149         }
24150       else
24151         {
24152           /* Since we have just move the popped value into the frame
24153              pointer, the popping register is available for reuse, and
24154              we know that we still have the stack pointer left to pop.  */
24155           regs_available_for_popping |= (1 << frame_pointer);
24156         }
24157     }
24158
24159   /* If we still have registers left on the stack, but we no longer have
24160      any registers into which we can pop them, then we must move the return
24161      address into the link register and make available the register that
24162      contained it.  */
24163   if (regs_available_for_popping == 0 && pops_needed > 0)
24164     {
24165       regs_available_for_popping |= 1 << reg_containing_return_addr;
24166
24167       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24168                    reg_containing_return_addr);
24169
24170       reg_containing_return_addr = LR_REGNUM;
24171     }
24172
24173   /* If we have registers left on the stack then pop some more.
24174      We know that at most we will want to pop FP and SP.  */
24175   if (pops_needed > 0)
24176     {
24177       int  popped_into;
24178       int  move_to;
24179
24180       thumb_pop (f, regs_available_for_popping);
24181
24182       /* We have popped either FP or SP.
24183          Move whichever one it is into the correct register.  */
24184       popped_into = number_of_first_bit_set (regs_available_for_popping);
24185       move_to     = number_of_first_bit_set (regs_to_pop);
24186
24187       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24188       --pops_needed;
24189     }
24190
24191   /* If we still have not popped everything then we must have only
24192      had one register available to us and we are now popping the SP.  */
24193   if (pops_needed > 0)
24194     {
24195       int  popped_into;
24196
24197       thumb_pop (f, regs_available_for_popping);
24198
24199       popped_into = number_of_first_bit_set (regs_available_for_popping);
24200
24201       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24202       /*
24203         assert (regs_to_pop == (1 << STACK_POINTER))
24204         assert (pops_needed == 1)
24205       */
24206     }
24207
24208   /* If necessary restore the a4 register.  */
24209   if (restore_a4)
24210     {
24211       if (reg_containing_return_addr != LR_REGNUM)
24212         {
24213           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24214           reg_containing_return_addr = LR_REGNUM;
24215         }
24216
24217       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24218     }
24219
24220   if (crtl->calls_eh_return)
24221     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24222
24223   /* Return to caller.  */
24224   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24225     {
24226       /* This is for the cases where LR is not being used to contain the return
24227          address.  It may therefore contain information that we might not want
24228          to leak, hence it must be cleared.  The value in R0 will never be a
24229          secret at this point, so it is safe to use it, see the clearing code
24230          in 'cmse_nonsecure_entry_clear_before_return'.  */
24231       if (reg_containing_return_addr != LR_REGNUM)
24232         asm_fprintf (f, "\tmov\tlr, r0\n");
24233
24234       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24235       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24236     }
24237   else
24238     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24239 }
24240 \f
24241 /* Scan INSN just before assembler is output for it.
24242    For Thumb-1, we track the status of the condition codes; this
24243    information is used in the cbranchsi4_insn pattern.  */
24244 void
24245 thumb1_final_prescan_insn (rtx_insn *insn)
24246 {
24247   if (flag_print_asm_name)
24248     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24249                  INSN_ADDRESSES (INSN_UID (insn)));
24250   /* Don't overwrite the previous setter when we get to a cbranch.  */
24251   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24252     {
24253       enum attr_conds conds;
24254
24255       if (cfun->machine->thumb1_cc_insn)
24256         {
24257           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24258               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24259             CC_STATUS_INIT;
24260         }
24261       conds = get_attr_conds (insn);
24262       if (conds == CONDS_SET)
24263         {
24264           rtx set = single_set (insn);
24265           cfun->machine->thumb1_cc_insn = insn;
24266           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24267           cfun->machine->thumb1_cc_op1 = const0_rtx;
24268           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24269           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24270             {
24271               rtx src1 = XEXP (SET_SRC (set), 1);
24272               if (src1 == const0_rtx)
24273                 cfun->machine->thumb1_cc_mode = CCmode;
24274             }
24275           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24276             {
24277               /* Record the src register operand instead of dest because
24278                  cprop_hardreg pass propagates src.  */
24279               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24280             }
24281         }
24282       else if (conds != CONDS_NOCOND)
24283         cfun->machine->thumb1_cc_insn = NULL_RTX;
24284     }
24285
24286     /* Check if unexpected far jump is used.  */
24287     if (cfun->machine->lr_save_eliminated
24288         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24289       internal_error("Unexpected thumb1 far jump");
24290 }
24291
24292 int
24293 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24294 {
24295   unsigned HOST_WIDE_INT mask = 0xff;
24296   int i;
24297
24298   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24299   if (val == 0) /* XXX */
24300     return 0;
24301
24302   for (i = 0; i < 25; i++)
24303     if ((val & (mask << i)) == val)
24304       return 1;
24305
24306   return 0;
24307 }
24308
24309 /* Returns nonzero if the current function contains,
24310    or might contain a far jump.  */
24311 static int
24312 thumb_far_jump_used_p (void)
24313 {
24314   rtx_insn *insn;
24315   bool far_jump = false;
24316   unsigned int func_size = 0;
24317
24318   /* If we have already decided that far jumps may be used,
24319      do not bother checking again, and always return true even if
24320      it turns out that they are not being used.  Once we have made
24321      the decision that far jumps are present (and that hence the link
24322      register will be pushed onto the stack) we cannot go back on it.  */
24323   if (cfun->machine->far_jump_used)
24324     return 1;
24325
24326   /* If this function is not being called from the prologue/epilogue
24327      generation code then it must be being called from the
24328      INITIAL_ELIMINATION_OFFSET macro.  */
24329   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24330     {
24331       /* In this case we know that we are being asked about the elimination
24332          of the arg pointer register.  If that register is not being used,
24333          then there are no arguments on the stack, and we do not have to
24334          worry that a far jump might force the prologue to push the link
24335          register, changing the stack offsets.  In this case we can just
24336          return false, since the presence of far jumps in the function will
24337          not affect stack offsets.
24338
24339          If the arg pointer is live (or if it was live, but has now been
24340          eliminated and so set to dead) then we do have to test to see if
24341          the function might contain a far jump.  This test can lead to some
24342          false negatives, since before reload is completed, then length of
24343          branch instructions is not known, so gcc defaults to returning their
24344          longest length, which in turn sets the far jump attribute to true.
24345
24346          A false negative will not result in bad code being generated, but it
24347          will result in a needless push and pop of the link register.  We
24348          hope that this does not occur too often.
24349
24350          If we need doubleword stack alignment this could affect the other
24351          elimination offsets so we can't risk getting it wrong.  */
24352       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24353         cfun->machine->arg_pointer_live = 1;
24354       else if (!cfun->machine->arg_pointer_live)
24355         return 0;
24356     }
24357
24358   /* We should not change far_jump_used during or after reload, as there is
24359      no chance to change stack frame layout.  */
24360   if (reload_in_progress || reload_completed)
24361     return 0;
24362
24363   /* Check to see if the function contains a branch
24364      insn with the far jump attribute set.  */
24365   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24366     {
24367       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24368         {
24369           far_jump = true;
24370         }
24371       func_size += get_attr_length (insn);
24372     }
24373
24374   /* Attribute far_jump will always be true for thumb1 before
24375      shorten_branch pass.  So checking far_jump attribute before
24376      shorten_branch isn't much useful.
24377
24378      Following heuristic tries to estimate more accurately if a far jump
24379      may finally be used.  The heuristic is very conservative as there is
24380      no chance to roll-back the decision of not to use far jump.
24381
24382      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24383      2-byte insn is associated with a 4 byte constant pool.  Using
24384      function size 2048/3 as the threshold is conservative enough.  */
24385   if (far_jump)
24386     {
24387       if ((func_size * 3) >= 2048)
24388         {
24389           /* Record the fact that we have decided that
24390              the function does use far jumps.  */
24391           cfun->machine->far_jump_used = 1;
24392           return 1;
24393         }
24394     }
24395
24396   return 0;
24397 }
24398
24399 /* Return nonzero if FUNC must be entered in ARM mode.  */
24400 static bool
24401 is_called_in_ARM_mode (tree func)
24402 {
24403   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24404
24405   /* Ignore the problem about functions whose address is taken.  */
24406   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24407     return true;
24408
24409 #ifdef ARM_PE
24410   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24411 #else
24412   return false;
24413 #endif
24414 }
24415
24416 /* Given the stack offsets and register mask in OFFSETS, decide how
24417    many additional registers to push instead of subtracting a constant
24418    from SP.  For epilogues the principle is the same except we use pop.
24419    FOR_PROLOGUE indicates which we're generating.  */
24420 static int
24421 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24422 {
24423   HOST_WIDE_INT amount;
24424   unsigned long live_regs_mask = offsets->saved_regs_mask;
24425   /* Extract a mask of the ones we can give to the Thumb's push/pop
24426      instruction.  */
24427   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24428   /* Then count how many other high registers will need to be pushed.  */
24429   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24430   int n_free, reg_base, size;
24431
24432   if (!for_prologue && frame_pointer_needed)
24433     amount = offsets->locals_base - offsets->saved_regs;
24434   else
24435     amount = offsets->outgoing_args - offsets->saved_regs;
24436
24437   /* If the stack frame size is 512 exactly, we can save one load
24438      instruction, which should make this a win even when optimizing
24439      for speed.  */
24440   if (!optimize_size && amount != 512)
24441     return 0;
24442
24443   /* Can't do this if there are high registers to push.  */
24444   if (high_regs_pushed != 0)
24445     return 0;
24446
24447   /* Shouldn't do it in the prologue if no registers would normally
24448      be pushed at all.  In the epilogue, also allow it if we'll have
24449      a pop insn for the PC.  */
24450   if  (l_mask == 0
24451        && (for_prologue
24452            || TARGET_BACKTRACE
24453            || (live_regs_mask & 1 << LR_REGNUM) == 0
24454            || TARGET_INTERWORK
24455            || crtl->args.pretend_args_size != 0))
24456     return 0;
24457
24458   /* Don't do this if thumb_expand_prologue wants to emit instructions
24459      between the push and the stack frame allocation.  */
24460   if (for_prologue
24461       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24462           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24463     return 0;
24464
24465   reg_base = 0;
24466   n_free = 0;
24467   if (!for_prologue)
24468     {
24469       size = arm_size_return_regs ();
24470       reg_base = ARM_NUM_INTS (size);
24471       live_regs_mask >>= reg_base;
24472     }
24473
24474   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24475          && (for_prologue || call_used_regs[reg_base + n_free]))
24476     {
24477       live_regs_mask >>= 1;
24478       n_free++;
24479     }
24480
24481   if (n_free == 0)
24482     return 0;
24483   gcc_assert (amount / 4 * 4 == amount);
24484
24485   if (amount >= 512 && (amount - n_free * 4) < 512)
24486     return (amount - 508) / 4;
24487   if (amount <= n_free * 4)
24488     return amount / 4;
24489   return 0;
24490 }
24491
24492 /* The bits which aren't usefully expanded as rtl.  */
24493 const char *
24494 thumb1_unexpanded_epilogue (void)
24495 {
24496   arm_stack_offsets *offsets;
24497   int regno;
24498   unsigned long live_regs_mask = 0;
24499   int high_regs_pushed = 0;
24500   int extra_pop;
24501   int had_to_push_lr;
24502   int size;
24503
24504   if (cfun->machine->return_used_this_function != 0)
24505     return "";
24506
24507   if (IS_NAKED (arm_current_func_type ()))
24508     return "";
24509
24510   offsets = arm_get_frame_offsets ();
24511   live_regs_mask = offsets->saved_regs_mask;
24512   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24513
24514   /* If we can deduce the registers used from the function's return value.
24515      This is more reliable that examining df_regs_ever_live_p () because that
24516      will be set if the register is ever used in the function, not just if
24517      the register is used to hold a return value.  */
24518   size = arm_size_return_regs ();
24519
24520   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24521   if (extra_pop > 0)
24522     {
24523       unsigned long extra_mask = (1 << extra_pop) - 1;
24524       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24525     }
24526
24527   /* The prolog may have pushed some high registers to use as
24528      work registers.  e.g. the testsuite file:
24529      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24530      compiles to produce:
24531         push    {r4, r5, r6, r7, lr}
24532         mov     r7, r9
24533         mov     r6, r8
24534         push    {r6, r7}
24535      as part of the prolog.  We have to undo that pushing here.  */
24536
24537   if (high_regs_pushed)
24538     {
24539       unsigned long mask = live_regs_mask & 0xff;
24540       int next_hi_reg;
24541
24542       /* The available low registers depend on the size of the value we are
24543          returning.  */
24544       if (size <= 12)
24545         mask |=  1 << 3;
24546       if (size <= 8)
24547         mask |= 1 << 2;
24548
24549       if (mask == 0)
24550         /* Oh dear!  We have no low registers into which we can pop
24551            high registers!  */
24552         internal_error
24553           ("no low registers available for popping high registers");
24554
24555       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24556         if (live_regs_mask & (1 << next_hi_reg))
24557           break;
24558
24559       while (high_regs_pushed)
24560         {
24561           /* Find lo register(s) into which the high register(s) can
24562              be popped.  */
24563           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24564             {
24565               if (mask & (1 << regno))
24566                 high_regs_pushed--;
24567               if (high_regs_pushed == 0)
24568                 break;
24569             }
24570
24571           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24572
24573           /* Pop the values into the low register(s).  */
24574           thumb_pop (asm_out_file, mask);
24575
24576           /* Move the value(s) into the high registers.  */
24577           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24578             {
24579               if (mask & (1 << regno))
24580                 {
24581                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24582                                regno);
24583
24584                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24585                     if (live_regs_mask & (1 << next_hi_reg))
24586                       break;
24587                 }
24588             }
24589         }
24590       live_regs_mask &= ~0x0f00;
24591     }
24592
24593   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24594   live_regs_mask &= 0xff;
24595
24596   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24597     {
24598       /* Pop the return address into the PC.  */
24599       if (had_to_push_lr)
24600         live_regs_mask |= 1 << PC_REGNUM;
24601
24602       /* Either no argument registers were pushed or a backtrace
24603          structure was created which includes an adjusted stack
24604          pointer, so just pop everything.  */
24605       if (live_regs_mask)
24606         thumb_pop (asm_out_file, live_regs_mask);
24607
24608       /* We have either just popped the return address into the
24609          PC or it is was kept in LR for the entire function.
24610          Note that thumb_pop has already called thumb_exit if the
24611          PC was in the list.  */
24612       if (!had_to_push_lr)
24613         thumb_exit (asm_out_file, LR_REGNUM);
24614     }
24615   else
24616     {
24617       /* Pop everything but the return address.  */
24618       if (live_regs_mask)
24619         thumb_pop (asm_out_file, live_regs_mask);
24620
24621       if (had_to_push_lr)
24622         {
24623           if (size > 12)
24624             {
24625               /* We have no free low regs, so save one.  */
24626               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24627                            LAST_ARG_REGNUM);
24628             }
24629
24630           /* Get the return address into a temporary register.  */
24631           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24632
24633           if (size > 12)
24634             {
24635               /* Move the return address to lr.  */
24636               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24637                            LAST_ARG_REGNUM);
24638               /* Restore the low register.  */
24639               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24640                            IP_REGNUM);
24641               regno = LR_REGNUM;
24642             }
24643           else
24644             regno = LAST_ARG_REGNUM;
24645         }
24646       else
24647         regno = LR_REGNUM;
24648
24649       /* Remove the argument registers that were pushed onto the stack.  */
24650       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24651                    SP_REGNUM, SP_REGNUM,
24652                    crtl->args.pretend_args_size);
24653
24654       thumb_exit (asm_out_file, regno);
24655     }
24656
24657   return "";
24658 }
24659
24660 /* Functions to save and restore machine-specific function data.  */
24661 static struct machine_function *
24662 arm_init_machine_status (void)
24663 {
24664   struct machine_function *machine;
24665   machine = ggc_cleared_alloc<machine_function> ();
24666
24667 #if ARM_FT_UNKNOWN != 0
24668   machine->func_type = ARM_FT_UNKNOWN;
24669 #endif
24670   return machine;
24671 }
24672
24673 /* Return an RTX indicating where the return address to the
24674    calling function can be found.  */
24675 rtx
24676 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24677 {
24678   if (count != 0)
24679     return NULL_RTX;
24680
24681   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24682 }
24683
24684 /* Do anything needed before RTL is emitted for each function.  */
24685 void
24686 arm_init_expanders (void)
24687 {
24688   /* Arrange to initialize and mark the machine per-function status.  */
24689   init_machine_status = arm_init_machine_status;
24690
24691   /* This is to stop the combine pass optimizing away the alignment
24692      adjustment of va_arg.  */
24693   /* ??? It is claimed that this should not be necessary.  */
24694   if (cfun)
24695     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24696 }
24697
24698 /* Check that FUNC is called with a different mode.  */
24699
24700 bool
24701 arm_change_mode_p (tree func)
24702 {
24703   if (TREE_CODE (func) != FUNCTION_DECL)
24704     return false;
24705
24706   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24707
24708   if (!callee_tree)
24709     callee_tree = target_option_default_node;
24710
24711   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24712   int flags = callee_opts->x_target_flags;
24713
24714   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24715 }
24716
24717 /* Like arm_compute_initial_elimination offset.  Simpler because there
24718    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24719    to point at the base of the local variables after static stack
24720    space for a function has been allocated.  */
24721
24722 HOST_WIDE_INT
24723 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24724 {
24725   arm_stack_offsets *offsets;
24726
24727   offsets = arm_get_frame_offsets ();
24728
24729   switch (from)
24730     {
24731     case ARG_POINTER_REGNUM:
24732       switch (to)
24733         {
24734         case STACK_POINTER_REGNUM:
24735           return offsets->outgoing_args - offsets->saved_args;
24736
24737         case FRAME_POINTER_REGNUM:
24738           return offsets->soft_frame - offsets->saved_args;
24739
24740         case ARM_HARD_FRAME_POINTER_REGNUM:
24741           return offsets->saved_regs - offsets->saved_args;
24742
24743         case THUMB_HARD_FRAME_POINTER_REGNUM:
24744           return offsets->locals_base - offsets->saved_args;
24745
24746         default:
24747           gcc_unreachable ();
24748         }
24749       break;
24750
24751     case FRAME_POINTER_REGNUM:
24752       switch (to)
24753         {
24754         case STACK_POINTER_REGNUM:
24755           return offsets->outgoing_args - offsets->soft_frame;
24756
24757         case ARM_HARD_FRAME_POINTER_REGNUM:
24758           return offsets->saved_regs - offsets->soft_frame;
24759
24760         case THUMB_HARD_FRAME_POINTER_REGNUM:
24761           return offsets->locals_base - offsets->soft_frame;
24762
24763         default:
24764           gcc_unreachable ();
24765         }
24766       break;
24767
24768     default:
24769       gcc_unreachable ();
24770     }
24771 }
24772
24773 /* Generate the function's prologue.  */
24774
24775 void
24776 thumb1_expand_prologue (void)
24777 {
24778   rtx_insn *insn;
24779
24780   HOST_WIDE_INT amount;
24781   HOST_WIDE_INT size;
24782   arm_stack_offsets *offsets;
24783   unsigned long func_type;
24784   int regno;
24785   unsigned long live_regs_mask;
24786   unsigned long l_mask;
24787   unsigned high_regs_pushed = 0;
24788   bool lr_needs_saving;
24789
24790   func_type = arm_current_func_type ();
24791
24792   /* Naked functions don't have prologues.  */
24793   if (IS_NAKED (func_type))
24794     {
24795       if (flag_stack_usage_info)
24796         current_function_static_stack_size = 0;
24797       return;
24798     }
24799
24800   if (IS_INTERRUPT (func_type))
24801     {
24802       error ("interrupt Service Routines cannot be coded in Thumb mode");
24803       return;
24804     }
24805
24806   if (is_called_in_ARM_mode (current_function_decl))
24807     emit_insn (gen_prologue_thumb1_interwork ());
24808
24809   offsets = arm_get_frame_offsets ();
24810   live_regs_mask = offsets->saved_regs_mask;
24811   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24812
24813   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24814   l_mask = live_regs_mask & 0x40ff;
24815   /* Then count how many other high registers will need to be pushed.  */
24816   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24817
24818   if (crtl->args.pretend_args_size)
24819     {
24820       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24821
24822       if (cfun->machine->uses_anonymous_args)
24823         {
24824           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24825           unsigned long mask;
24826
24827           mask = 1ul << (LAST_ARG_REGNUM + 1);
24828           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24829
24830           insn = thumb1_emit_multi_reg_push (mask, 0);
24831         }
24832       else
24833         {
24834           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24835                                         stack_pointer_rtx, x));
24836         }
24837       RTX_FRAME_RELATED_P (insn) = 1;
24838     }
24839
24840   if (TARGET_BACKTRACE)
24841     {
24842       HOST_WIDE_INT offset = 0;
24843       unsigned work_register;
24844       rtx work_reg, x, arm_hfp_rtx;
24845
24846       /* We have been asked to create a stack backtrace structure.
24847          The code looks like this:
24848
24849          0   .align 2
24850          0   func:
24851          0     sub   SP, #16         Reserve space for 4 registers.
24852          2     push  {R7}            Push low registers.
24853          4     add   R7, SP, #20     Get the stack pointer before the push.
24854          6     str   R7, [SP, #8]    Store the stack pointer
24855                                         (before reserving the space).
24856          8     mov   R7, PC          Get hold of the start of this code + 12.
24857         10     str   R7, [SP, #16]   Store it.
24858         12     mov   R7, FP          Get hold of the current frame pointer.
24859         14     str   R7, [SP, #4]    Store it.
24860         16     mov   R7, LR          Get hold of the current return address.
24861         18     str   R7, [SP, #12]   Store it.
24862         20     add   R7, SP, #16     Point at the start of the
24863                                         backtrace structure.
24864         22     mov   FP, R7          Put this value into the frame pointer.  */
24865
24866       work_register = thumb_find_work_register (live_regs_mask);
24867       work_reg = gen_rtx_REG (SImode, work_register);
24868       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24869
24870       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24871                                     stack_pointer_rtx, GEN_INT (-16)));
24872       RTX_FRAME_RELATED_P (insn) = 1;
24873
24874       if (l_mask)
24875         {
24876           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24877           RTX_FRAME_RELATED_P (insn) = 1;
24878           lr_needs_saving = false;
24879
24880           offset = bit_count (l_mask) * UNITS_PER_WORD;
24881         }
24882
24883       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24884       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24885
24886       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24887       x = gen_frame_mem (SImode, x);
24888       emit_move_insn (x, work_reg);
24889
24890       /* Make sure that the instruction fetching the PC is in the right place
24891          to calculate "start of backtrace creation code + 12".  */
24892       /* ??? The stores using the common WORK_REG ought to be enough to
24893          prevent the scheduler from doing anything weird.  Failing that
24894          we could always move all of the following into an UNSPEC_VOLATILE.  */
24895       if (l_mask)
24896         {
24897           x = gen_rtx_REG (SImode, PC_REGNUM);
24898           emit_move_insn (work_reg, x);
24899
24900           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24901           x = gen_frame_mem (SImode, x);
24902           emit_move_insn (x, work_reg);
24903
24904           emit_move_insn (work_reg, arm_hfp_rtx);
24905
24906           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24907           x = gen_frame_mem (SImode, x);
24908           emit_move_insn (x, work_reg);
24909         }
24910       else
24911         {
24912           emit_move_insn (work_reg, arm_hfp_rtx);
24913
24914           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24915           x = gen_frame_mem (SImode, x);
24916           emit_move_insn (x, work_reg);
24917
24918           x = gen_rtx_REG (SImode, PC_REGNUM);
24919           emit_move_insn (work_reg, x);
24920
24921           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24922           x = gen_frame_mem (SImode, x);
24923           emit_move_insn (x, work_reg);
24924         }
24925
24926       x = gen_rtx_REG (SImode, LR_REGNUM);
24927       emit_move_insn (work_reg, x);
24928
24929       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24930       x = gen_frame_mem (SImode, x);
24931       emit_move_insn (x, work_reg);
24932
24933       x = GEN_INT (offset + 12);
24934       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24935
24936       emit_move_insn (arm_hfp_rtx, work_reg);
24937     }
24938   /* Optimization:  If we are not pushing any low registers but we are going
24939      to push some high registers then delay our first push.  This will just
24940      be a push of LR and we can combine it with the push of the first high
24941      register.  */
24942   else if ((l_mask & 0xff) != 0
24943            || (high_regs_pushed == 0 && lr_needs_saving))
24944     {
24945       unsigned long mask = l_mask;
24946       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24947       insn = thumb1_emit_multi_reg_push (mask, mask);
24948       RTX_FRAME_RELATED_P (insn) = 1;
24949       lr_needs_saving = false;
24950     }
24951
24952   if (high_regs_pushed)
24953     {
24954       unsigned pushable_regs;
24955       unsigned next_hi_reg;
24956       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24957                                                  : crtl->args.info.nregs;
24958       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24959
24960       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24961         if (live_regs_mask & (1 << next_hi_reg))
24962           break;
24963
24964       /* Here we need to mask out registers used for passing arguments
24965          even if they can be pushed.  This is to avoid using them to stash the high
24966          registers.  Such kind of stash may clobber the use of arguments.  */
24967       pushable_regs = l_mask & (~arg_regs_mask);
24968       if (lr_needs_saving)
24969         pushable_regs &= ~(1 << LR_REGNUM);
24970
24971       if (pushable_regs == 0)
24972         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24973
24974       while (high_regs_pushed > 0)
24975         {
24976           unsigned long real_regs_mask = 0;
24977           unsigned long push_mask = 0;
24978
24979           for (regno = LR_REGNUM; regno >= 0; regno --)
24980             {
24981               if (pushable_regs & (1 << regno))
24982                 {
24983                   emit_move_insn (gen_rtx_REG (SImode, regno),
24984                                   gen_rtx_REG (SImode, next_hi_reg));
24985
24986                   high_regs_pushed --;
24987                   real_regs_mask |= (1 << next_hi_reg);
24988                   push_mask |= (1 << regno);
24989
24990                   if (high_regs_pushed)
24991                     {
24992                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24993                            next_hi_reg --)
24994                         if (live_regs_mask & (1 << next_hi_reg))
24995                           break;
24996                     }
24997                   else
24998                     break;
24999                 }
25000             }
25001
25002           /* If we had to find a work register and we have not yet
25003              saved the LR then add it to the list of regs to push.  */
25004           if (lr_needs_saving)
25005             {
25006               push_mask |= 1 << LR_REGNUM;
25007               real_regs_mask |= 1 << LR_REGNUM;
25008               lr_needs_saving = false;
25009             }
25010
25011           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25012           RTX_FRAME_RELATED_P (insn) = 1;
25013         }
25014     }
25015
25016   /* Load the pic register before setting the frame pointer,
25017      so we can use r7 as a temporary work register.  */
25018   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25019     arm_load_pic_register (live_regs_mask);
25020
25021   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25022     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25023                     stack_pointer_rtx);
25024
25025   size = offsets->outgoing_args - offsets->saved_args;
25026   if (flag_stack_usage_info)
25027     current_function_static_stack_size = size;
25028
25029   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25030   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25031        || flag_stack_clash_protection)
25032       && size)
25033     sorry ("-fstack-check=specific for Thumb-1");
25034
25035   amount = offsets->outgoing_args - offsets->saved_regs;
25036   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25037   if (amount)
25038     {
25039       if (amount < 512)
25040         {
25041           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25042                                         GEN_INT (- amount)));
25043           RTX_FRAME_RELATED_P (insn) = 1;
25044         }
25045       else
25046         {
25047           rtx reg, dwarf;
25048
25049           /* The stack decrement is too big for an immediate value in a single
25050              insn.  In theory we could issue multiple subtracts, but after
25051              three of them it becomes more space efficient to place the full
25052              value in the constant pool and load into a register.  (Also the
25053              ARM debugger really likes to see only one stack decrement per
25054              function).  So instead we look for a scratch register into which
25055              we can load the decrement, and then we subtract this from the
25056              stack pointer.  Unfortunately on the thumb the only available
25057              scratch registers are the argument registers, and we cannot use
25058              these as they may hold arguments to the function.  Instead we
25059              attempt to locate a call preserved register which is used by this
25060              function.  If we can find one, then we know that it will have
25061              been pushed at the start of the prologue and so we can corrupt
25062              it now.  */
25063           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25064             if (live_regs_mask & (1 << regno))
25065               break;
25066
25067           gcc_assert(regno <= LAST_LO_REGNUM);
25068
25069           reg = gen_rtx_REG (SImode, regno);
25070
25071           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25072
25073           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25074                                         stack_pointer_rtx, reg));
25075
25076           dwarf = gen_rtx_SET (stack_pointer_rtx,
25077                                plus_constant (Pmode, stack_pointer_rtx,
25078                                               -amount));
25079           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25080           RTX_FRAME_RELATED_P (insn) = 1;
25081         }
25082     }
25083
25084   if (frame_pointer_needed)
25085     thumb_set_frame_pointer (offsets);
25086
25087   /* If we are profiling, make sure no instructions are scheduled before
25088      the call to mcount.  Similarly if the user has requested no
25089      scheduling in the prolog.  Similarly if we want non-call exceptions
25090      using the EABI unwinder, to prevent faulting instructions from being
25091      swapped with a stack adjustment.  */
25092   if (crtl->profile || !TARGET_SCHED_PROLOG
25093       || (arm_except_unwind_info (&global_options) == UI_TARGET
25094           && cfun->can_throw_non_call_exceptions))
25095     emit_insn (gen_blockage ());
25096
25097   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25098   if (live_regs_mask & 0xff)
25099     cfun->machine->lr_save_eliminated = 0;
25100 }
25101
25102 /* Clear caller saved registers not used to pass return values and leaked
25103    condition flags before exiting a cmse_nonsecure_entry function.  */
25104
25105 void
25106 cmse_nonsecure_entry_clear_before_return (void)
25107 {
25108   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25109   uint32_t padding_bits_to_clear = 0;
25110   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25111   auto_sbitmap to_clear_bitmap (maxregno + 1);
25112   tree result_type;
25113   rtx result_rtl;
25114
25115   bitmap_clear (to_clear_bitmap);
25116   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25117   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25118
25119   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25120      registers.  */
25121   if (TARGET_HARD_FLOAT)
25122     {
25123       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25124
25125       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25126
25127       /* Make sure we don't clear the two scratch registers used to clear the
25128          relevant FPSCR bits in output_return_instruction.  */
25129       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25130       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25131       emit_use (gen_rtx_REG (SImode, 4));
25132       bitmap_clear_bit (to_clear_bitmap, 4);
25133     }
25134
25135   /* If the user has defined registers to be caller saved, these are no longer
25136      restored by the function before returning and must thus be cleared for
25137      security purposes.  */
25138   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25139     {
25140       /* We do not touch registers that can be used to pass arguments as per
25141          the AAPCS, since these should never be made callee-saved by user
25142          options.  */
25143       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25144         continue;
25145       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25146         continue;
25147       if (call_used_regs[regno])
25148         bitmap_set_bit (to_clear_bitmap, regno);
25149     }
25150
25151   /* Make sure we do not clear the registers used to return the result in.  */
25152   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25153   if (!VOID_TYPE_P (result_type))
25154     {
25155       uint64_t to_clear_return_mask;
25156       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25157
25158       /* No need to check that we return in registers, because we don't
25159          support returning on stack yet.  */
25160       gcc_assert (REG_P (result_rtl));
25161       to_clear_return_mask
25162         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25163                                      padding_bits_to_clear_ptr);
25164       if (to_clear_return_mask)
25165         {
25166           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25167           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25168             {
25169               if (to_clear_return_mask & (1ULL << regno))
25170                 bitmap_clear_bit (to_clear_bitmap, regno);
25171             }
25172         }
25173     }
25174
25175   if (padding_bits_to_clear != 0)
25176     {
25177       rtx reg_rtx;
25178       auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25179
25180       /* Padding bits to clear is not 0 so we know we are dealing with
25181          returning a composite type, which only uses r0.  Let's make sure that
25182          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25183       bitmap_clear (to_clear_arg_regs_bitmap);
25184       bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25185                         NUM_ARG_REGS - 1);
25186       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25187
25188       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25189
25190       /* Fill the lower half of the negated padding_bits_to_clear.  */
25191       emit_move_insn (reg_rtx,
25192                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25193
25194       /* Also fill the top half of the negated padding_bits_to_clear.  */
25195       if (((~padding_bits_to_clear) >> 16) > 0)
25196         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25197                                                       GEN_INT (16),
25198                                                       GEN_INT (16)),
25199                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25200
25201       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25202                            gen_rtx_REG (SImode, R0_REGNUM),
25203                            reg_rtx));
25204     }
25205
25206   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25207     {
25208       if (!bitmap_bit_p (to_clear_bitmap, regno))
25209         continue;
25210
25211       if (IS_VFP_REGNUM (regno))
25212         {
25213           /* If regno is an even vfp register and its successor is also to
25214              be cleared, use vmov.  */
25215           if (TARGET_VFP_DOUBLE
25216               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25217               && bitmap_bit_p (to_clear_bitmap, regno + 1))
25218             {
25219               emit_move_insn (gen_rtx_REG (DFmode, regno),
25220                               CONST1_RTX (DFmode));
25221               emit_use (gen_rtx_REG (DFmode, regno));
25222               regno++;
25223             }
25224           else
25225             {
25226               emit_move_insn (gen_rtx_REG (SFmode, regno),
25227                               CONST1_RTX (SFmode));
25228               emit_use (gen_rtx_REG (SFmode, regno));
25229             }
25230         }
25231       else
25232         {
25233           if (TARGET_THUMB1)
25234             {
25235               if (regno == R0_REGNUM)
25236                 emit_move_insn (gen_rtx_REG (SImode, regno),
25237                                 const0_rtx);
25238               else
25239                 /* R0 has either been cleared before, see code above, or it
25240                    holds a return value, either way it is not secret
25241                    information.  */
25242                 emit_move_insn (gen_rtx_REG (SImode, regno),
25243                                 gen_rtx_REG (SImode, R0_REGNUM));
25244               emit_use (gen_rtx_REG (SImode, regno));
25245             }
25246           else
25247             {
25248               emit_move_insn (gen_rtx_REG (SImode, regno),
25249                               gen_rtx_REG (SImode, LR_REGNUM));
25250               emit_use (gen_rtx_REG (SImode, regno));
25251             }
25252         }
25253     }
25254 }
25255
25256 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25257    POP instruction can be generated.  LR should be replaced by PC.  All
25258    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25259    all we really need to check here is if single register is to be
25260    returned, or multiple register return.  */
25261 void
25262 thumb2_expand_return (bool simple_return)
25263 {
25264   int i, num_regs;
25265   unsigned long saved_regs_mask;
25266   arm_stack_offsets *offsets;
25267
25268   offsets = arm_get_frame_offsets ();
25269   saved_regs_mask = offsets->saved_regs_mask;
25270
25271   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25272     if (saved_regs_mask & (1 << i))
25273       num_regs++;
25274
25275   if (!simple_return && saved_regs_mask)
25276     {
25277       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25278          functions or adapt code to handle according to ACLE.  This path should
25279          not be reachable for cmse_nonsecure_entry functions though we prefer
25280          to assert it for now to ensure that future code changes do not silently
25281          change this behavior.  */
25282       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25283       if (num_regs == 1)
25284         {
25285           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25286           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25287           rtx addr = gen_rtx_MEM (SImode,
25288                                   gen_rtx_POST_INC (SImode,
25289                                                     stack_pointer_rtx));
25290           set_mem_alias_set (addr, get_frame_alias_set ());
25291           XVECEXP (par, 0, 0) = ret_rtx;
25292           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25293           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25294           emit_jump_insn (par);
25295         }
25296       else
25297         {
25298           saved_regs_mask &= ~ (1 << LR_REGNUM);
25299           saved_regs_mask |=   (1 << PC_REGNUM);
25300           arm_emit_multi_reg_pop (saved_regs_mask);
25301         }
25302     }
25303   else
25304     {
25305       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25306         cmse_nonsecure_entry_clear_before_return ();
25307       emit_jump_insn (simple_return_rtx);
25308     }
25309 }
25310
25311 void
25312 thumb1_expand_epilogue (void)
25313 {
25314   HOST_WIDE_INT amount;
25315   arm_stack_offsets *offsets;
25316   int regno;
25317
25318   /* Naked functions don't have prologues.  */
25319   if (IS_NAKED (arm_current_func_type ()))
25320     return;
25321
25322   offsets = arm_get_frame_offsets ();
25323   amount = offsets->outgoing_args - offsets->saved_regs;
25324
25325   if (frame_pointer_needed)
25326     {
25327       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25328       amount = offsets->locals_base - offsets->saved_regs;
25329     }
25330   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25331
25332   gcc_assert (amount >= 0);
25333   if (amount)
25334     {
25335       emit_insn (gen_blockage ());
25336
25337       if (amount < 512)
25338         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25339                                GEN_INT (amount)));
25340       else
25341         {
25342           /* r3 is always free in the epilogue.  */
25343           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25344
25345           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25346           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25347         }
25348     }
25349
25350   /* Emit a USE (stack_pointer_rtx), so that
25351      the stack adjustment will not be deleted.  */
25352   emit_insn (gen_force_register_use (stack_pointer_rtx));
25353
25354   if (crtl->profile || !TARGET_SCHED_PROLOG)
25355     emit_insn (gen_blockage ());
25356
25357   /* Emit a clobber for each insn that will be restored in the epilogue,
25358      so that flow2 will get register lifetimes correct.  */
25359   for (regno = 0; regno < 13; regno++)
25360     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25361       emit_clobber (gen_rtx_REG (SImode, regno));
25362
25363   if (! df_regs_ever_live_p (LR_REGNUM))
25364     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25365
25366   /* Clear all caller-saved regs that are not used to return.  */
25367   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25368     cmse_nonsecure_entry_clear_before_return ();
25369 }
25370
25371 /* Epilogue code for APCS frame.  */
25372 static void
25373 arm_expand_epilogue_apcs_frame (bool really_return)
25374 {
25375   unsigned long func_type;
25376   unsigned long saved_regs_mask;
25377   int num_regs = 0;
25378   int i;
25379   int floats_from_frame = 0;
25380   arm_stack_offsets *offsets;
25381
25382   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25383   func_type = arm_current_func_type ();
25384
25385   /* Get frame offsets for ARM.  */
25386   offsets = arm_get_frame_offsets ();
25387   saved_regs_mask = offsets->saved_regs_mask;
25388
25389   /* Find the offset of the floating-point save area in the frame.  */
25390   floats_from_frame
25391     = (offsets->saved_args
25392        + arm_compute_static_chain_stack_bytes ()
25393        - offsets->frame);
25394
25395   /* Compute how many core registers saved and how far away the floats are.  */
25396   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25397     if (saved_regs_mask & (1 << i))
25398       {
25399         num_regs++;
25400         floats_from_frame += 4;
25401       }
25402
25403   if (TARGET_HARD_FLOAT)
25404     {
25405       int start_reg;
25406       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25407
25408       /* The offset is from IP_REGNUM.  */
25409       int saved_size = arm_get_vfp_saved_size ();
25410       if (saved_size > 0)
25411         {
25412           rtx_insn *insn;
25413           floats_from_frame += saved_size;
25414           insn = emit_insn (gen_addsi3 (ip_rtx,
25415                                         hard_frame_pointer_rtx,
25416                                         GEN_INT (-floats_from_frame)));
25417           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25418                                        ip_rtx, hard_frame_pointer_rtx);
25419         }
25420
25421       /* Generate VFP register multi-pop.  */
25422       start_reg = FIRST_VFP_REGNUM;
25423
25424       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25425         /* Look for a case where a reg does not need restoring.  */
25426         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25427             && (!df_regs_ever_live_p (i + 1)
25428                 || call_used_regs[i + 1]))
25429           {
25430             if (start_reg != i)
25431               arm_emit_vfp_multi_reg_pop (start_reg,
25432                                           (i - start_reg) / 2,
25433                                           gen_rtx_REG (SImode,
25434                                                        IP_REGNUM));
25435             start_reg = i + 2;
25436           }
25437
25438       /* Restore the remaining regs that we have discovered (or possibly
25439          even all of them, if the conditional in the for loop never
25440          fired).  */
25441       if (start_reg != i)
25442         arm_emit_vfp_multi_reg_pop (start_reg,
25443                                     (i - start_reg) / 2,
25444                                     gen_rtx_REG (SImode, IP_REGNUM));
25445     }
25446
25447   if (TARGET_IWMMXT)
25448     {
25449       /* The frame pointer is guaranteed to be non-double-word aligned, as
25450          it is set to double-word-aligned old_stack_pointer - 4.  */
25451       rtx_insn *insn;
25452       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25453
25454       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25455         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25456           {
25457             rtx addr = gen_frame_mem (V2SImode,
25458                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25459                                                 - lrm_count * 4));
25460             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25461             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25462                                                gen_rtx_REG (V2SImode, i),
25463                                                NULL_RTX);
25464             lrm_count += 2;
25465           }
25466     }
25467
25468   /* saved_regs_mask should contain IP which contains old stack pointer
25469      at the time of activation creation.  Since SP and IP are adjacent registers,
25470      we can restore the value directly into SP.  */
25471   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25472   saved_regs_mask &= ~(1 << IP_REGNUM);
25473   saved_regs_mask |= (1 << SP_REGNUM);
25474
25475   /* There are two registers left in saved_regs_mask - LR and PC.  We
25476      only need to restore LR (the return address), but to
25477      save time we can load it directly into PC, unless we need a
25478      special function exit sequence, or we are not really returning.  */
25479   if (really_return
25480       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25481       && !crtl->calls_eh_return)
25482     /* Delete LR from the register mask, so that LR on
25483        the stack is loaded into the PC in the register mask.  */
25484     saved_regs_mask &= ~(1 << LR_REGNUM);
25485   else
25486     saved_regs_mask &= ~(1 << PC_REGNUM);
25487
25488   num_regs = bit_count (saved_regs_mask);
25489   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25490     {
25491       rtx_insn *insn;
25492       emit_insn (gen_blockage ());
25493       /* Unwind the stack to just below the saved registers.  */
25494       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25495                                     hard_frame_pointer_rtx,
25496                                     GEN_INT (- 4 * num_regs)));
25497
25498       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25499                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25500     }
25501
25502   arm_emit_multi_reg_pop (saved_regs_mask);
25503
25504   if (IS_INTERRUPT (func_type))
25505     {
25506       /* Interrupt handlers will have pushed the
25507          IP onto the stack, so restore it now.  */
25508       rtx_insn *insn;
25509       rtx addr = gen_rtx_MEM (SImode,
25510                               gen_rtx_POST_INC (SImode,
25511                               stack_pointer_rtx));
25512       set_mem_alias_set (addr, get_frame_alias_set ());
25513       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25514       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25515                                          gen_rtx_REG (SImode, IP_REGNUM),
25516                                          NULL_RTX);
25517     }
25518
25519   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25520     return;
25521
25522   if (crtl->calls_eh_return)
25523     emit_insn (gen_addsi3 (stack_pointer_rtx,
25524                            stack_pointer_rtx,
25525                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25526
25527   if (IS_STACKALIGN (func_type))
25528     /* Restore the original stack pointer.  Before prologue, the stack was
25529        realigned and the original stack pointer saved in r0.  For details,
25530        see comment in arm_expand_prologue.  */
25531     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25532
25533   emit_jump_insn (simple_return_rtx);
25534 }
25535
25536 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25537    function is not a sibcall.  */
25538 void
25539 arm_expand_epilogue (bool really_return)
25540 {
25541   unsigned long func_type;
25542   unsigned long saved_regs_mask;
25543   int num_regs = 0;
25544   int i;
25545   int amount;
25546   arm_stack_offsets *offsets;
25547
25548   func_type = arm_current_func_type ();
25549
25550   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25551      let output_return_instruction take care of instruction emission if any.  */
25552   if (IS_NAKED (func_type)
25553       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25554     {
25555       if (really_return)
25556         emit_jump_insn (simple_return_rtx);
25557       return;
25558     }
25559
25560   /* If we are throwing an exception, then we really must be doing a
25561      return, so we can't tail-call.  */
25562   gcc_assert (!crtl->calls_eh_return || really_return);
25563
25564   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25565     {
25566       arm_expand_epilogue_apcs_frame (really_return);
25567       return;
25568     }
25569
25570   /* Get frame offsets for ARM.  */
25571   offsets = arm_get_frame_offsets ();
25572   saved_regs_mask = offsets->saved_regs_mask;
25573   num_regs = bit_count (saved_regs_mask);
25574
25575   if (frame_pointer_needed)
25576     {
25577       rtx_insn *insn;
25578       /* Restore stack pointer if necessary.  */
25579       if (TARGET_ARM)
25580         {
25581           /* In ARM mode, frame pointer points to first saved register.
25582              Restore stack pointer to last saved register.  */
25583           amount = offsets->frame - offsets->saved_regs;
25584
25585           /* Force out any pending memory operations that reference stacked data
25586              before stack de-allocation occurs.  */
25587           emit_insn (gen_blockage ());
25588           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25589                             hard_frame_pointer_rtx,
25590                             GEN_INT (amount)));
25591           arm_add_cfa_adjust_cfa_note (insn, amount,
25592                                        stack_pointer_rtx,
25593                                        hard_frame_pointer_rtx);
25594
25595           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25596              deleted.  */
25597           emit_insn (gen_force_register_use (stack_pointer_rtx));
25598         }
25599       else
25600         {
25601           /* In Thumb-2 mode, the frame pointer points to the last saved
25602              register.  */
25603           amount = offsets->locals_base - offsets->saved_regs;
25604           if (amount)
25605             {
25606               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25607                                 hard_frame_pointer_rtx,
25608                                 GEN_INT (amount)));
25609               arm_add_cfa_adjust_cfa_note (insn, amount,
25610                                            hard_frame_pointer_rtx,
25611                                            hard_frame_pointer_rtx);
25612             }
25613
25614           /* Force out any pending memory operations that reference stacked data
25615              before stack de-allocation occurs.  */
25616           emit_insn (gen_blockage ());
25617           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25618                                        hard_frame_pointer_rtx));
25619           arm_add_cfa_adjust_cfa_note (insn, 0,
25620                                        stack_pointer_rtx,
25621                                        hard_frame_pointer_rtx);
25622           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25623              deleted.  */
25624           emit_insn (gen_force_register_use (stack_pointer_rtx));
25625         }
25626     }
25627   else
25628     {
25629       /* Pop off outgoing args and local frame to adjust stack pointer to
25630          last saved register.  */
25631       amount = offsets->outgoing_args - offsets->saved_regs;
25632       if (amount)
25633         {
25634           rtx_insn *tmp;
25635           /* Force out any pending memory operations that reference stacked data
25636              before stack de-allocation occurs.  */
25637           emit_insn (gen_blockage ());
25638           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25639                                        stack_pointer_rtx,
25640                                        GEN_INT (amount)));
25641           arm_add_cfa_adjust_cfa_note (tmp, amount,
25642                                        stack_pointer_rtx, stack_pointer_rtx);
25643           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25644              not deleted.  */
25645           emit_insn (gen_force_register_use (stack_pointer_rtx));
25646         }
25647     }
25648
25649   if (TARGET_HARD_FLOAT)
25650     {
25651       /* Generate VFP register multi-pop.  */
25652       int end_reg = LAST_VFP_REGNUM + 1;
25653
25654       /* Scan the registers in reverse order.  We need to match
25655          any groupings made in the prologue and generate matching
25656          vldm operations.  The need to match groups is because,
25657          unlike pop, vldm can only do consecutive regs.  */
25658       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25659         /* Look for a case where a reg does not need restoring.  */
25660         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25661             && (!df_regs_ever_live_p (i + 1)
25662                 || call_used_regs[i + 1]))
25663           {
25664             /* Restore the regs discovered so far (from reg+2 to
25665                end_reg).  */
25666             if (end_reg > i + 2)
25667               arm_emit_vfp_multi_reg_pop (i + 2,
25668                                           (end_reg - (i + 2)) / 2,
25669                                           stack_pointer_rtx);
25670             end_reg = i;
25671           }
25672
25673       /* Restore the remaining regs that we have discovered (or possibly
25674          even all of them, if the conditional in the for loop never
25675          fired).  */
25676       if (end_reg > i + 2)
25677         arm_emit_vfp_multi_reg_pop (i + 2,
25678                                     (end_reg - (i + 2)) / 2,
25679                                     stack_pointer_rtx);
25680     }
25681
25682   if (TARGET_IWMMXT)
25683     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25684       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25685         {
25686           rtx_insn *insn;
25687           rtx addr = gen_rtx_MEM (V2SImode,
25688                                   gen_rtx_POST_INC (SImode,
25689                                                     stack_pointer_rtx));
25690           set_mem_alias_set (addr, get_frame_alias_set ());
25691           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25692           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25693                                              gen_rtx_REG (V2SImode, i),
25694                                              NULL_RTX);
25695           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25696                                        stack_pointer_rtx, stack_pointer_rtx);
25697         }
25698
25699   if (saved_regs_mask)
25700     {
25701       rtx insn;
25702       bool return_in_pc = false;
25703
25704       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25705           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25706           && !IS_CMSE_ENTRY (func_type)
25707           && !IS_STACKALIGN (func_type)
25708           && really_return
25709           && crtl->args.pretend_args_size == 0
25710           && saved_regs_mask & (1 << LR_REGNUM)
25711           && !crtl->calls_eh_return)
25712         {
25713           saved_regs_mask &= ~(1 << LR_REGNUM);
25714           saved_regs_mask |= (1 << PC_REGNUM);
25715           return_in_pc = true;
25716         }
25717
25718       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25719         {
25720           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25721             if (saved_regs_mask & (1 << i))
25722               {
25723                 rtx addr = gen_rtx_MEM (SImode,
25724                                         gen_rtx_POST_INC (SImode,
25725                                                           stack_pointer_rtx));
25726                 set_mem_alias_set (addr, get_frame_alias_set ());
25727
25728                 if (i == PC_REGNUM)
25729                   {
25730                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25731                     XVECEXP (insn, 0, 0) = ret_rtx;
25732                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25733                                                         addr);
25734                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25735                     insn = emit_jump_insn (insn);
25736                   }
25737                 else
25738                   {
25739                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25740                                                  addr));
25741                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25742                                                        gen_rtx_REG (SImode, i),
25743                                                        NULL_RTX);
25744                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25745                                                  stack_pointer_rtx,
25746                                                  stack_pointer_rtx);
25747                   }
25748               }
25749         }
25750       else
25751         {
25752           if (TARGET_LDRD
25753               && current_tune->prefer_ldrd_strd
25754               && !optimize_function_for_size_p (cfun))
25755             {
25756               if (TARGET_THUMB2)
25757                 thumb2_emit_ldrd_pop (saved_regs_mask);
25758               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25759                 arm_emit_ldrd_pop (saved_regs_mask);
25760               else
25761                 arm_emit_multi_reg_pop (saved_regs_mask);
25762             }
25763           else
25764             arm_emit_multi_reg_pop (saved_regs_mask);
25765         }
25766
25767       if (return_in_pc)
25768         return;
25769     }
25770
25771   amount
25772     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25773   if (amount)
25774     {
25775       int i, j;
25776       rtx dwarf = NULL_RTX;
25777       rtx_insn *tmp =
25778         emit_insn (gen_addsi3 (stack_pointer_rtx,
25779                                stack_pointer_rtx,
25780                                GEN_INT (amount)));
25781
25782       RTX_FRAME_RELATED_P (tmp) = 1;
25783
25784       if (cfun->machine->uses_anonymous_args)
25785         {
25786           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25787              pretend_args in stack.  */
25788           int num_regs = crtl->args.pretend_args_size / 4;
25789           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25790           for (j = 0, i = 0; j < num_regs; i++)
25791             if (saved_regs_mask & (1 << i))
25792               {
25793                 rtx reg = gen_rtx_REG (SImode, i);
25794                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25795                 j++;
25796               }
25797           REG_NOTES (tmp) = dwarf;
25798         }
25799       arm_add_cfa_adjust_cfa_note (tmp, amount,
25800                                    stack_pointer_rtx, stack_pointer_rtx);
25801     }
25802
25803     /* Clear all caller-saved regs that are not used to return.  */
25804     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25805       {
25806         /* CMSE_ENTRY always returns.  */
25807         gcc_assert (really_return);
25808         cmse_nonsecure_entry_clear_before_return ();
25809       }
25810
25811   if (!really_return)
25812     return;
25813
25814   if (crtl->calls_eh_return)
25815     emit_insn (gen_addsi3 (stack_pointer_rtx,
25816                            stack_pointer_rtx,
25817                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25818
25819   if (IS_STACKALIGN (func_type))
25820     /* Restore the original stack pointer.  Before prologue, the stack was
25821        realigned and the original stack pointer saved in r0.  For details,
25822        see comment in arm_expand_prologue.  */
25823     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25824
25825   emit_jump_insn (simple_return_rtx);
25826 }
25827
25828 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25829    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25830
25831 const char *
25832 thumb1_output_interwork (void)
25833 {
25834   const char * name;
25835   FILE *f = asm_out_file;
25836
25837   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25838   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25839               == SYMBOL_REF);
25840   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25841
25842   /* Generate code sequence to switch us into Thumb mode.  */
25843   /* The .code 32 directive has already been emitted by
25844      ASM_DECLARE_FUNCTION_NAME.  */
25845   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25846   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25847
25848   /* Generate a label, so that the debugger will notice the
25849      change in instruction sets.  This label is also used by
25850      the assembler to bypass the ARM code when this function
25851      is called from a Thumb encoded function elsewhere in the
25852      same file.  Hence the definition of STUB_NAME here must
25853      agree with the definition in gas/config/tc-arm.c.  */
25854
25855 #define STUB_NAME ".real_start_of"
25856
25857   fprintf (f, "\t.code\t16\n");
25858 #ifdef ARM_PE
25859   if (arm_dllexport_name_p (name))
25860     name = arm_strip_name_encoding (name);
25861 #endif
25862   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25863   fprintf (f, "\t.thumb_func\n");
25864   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25865
25866   return "";
25867 }
25868
25869 /* Handle the case of a double word load into a low register from
25870    a computed memory address.  The computed address may involve a
25871    register which is overwritten by the load.  */
25872 const char *
25873 thumb_load_double_from_address (rtx *operands)
25874 {
25875   rtx addr;
25876   rtx base;
25877   rtx offset;
25878   rtx arg1;
25879   rtx arg2;
25880
25881   gcc_assert (REG_P (operands[0]));
25882   gcc_assert (MEM_P (operands[1]));
25883
25884   /* Get the memory address.  */
25885   addr = XEXP (operands[1], 0);
25886
25887   /* Work out how the memory address is computed.  */
25888   switch (GET_CODE (addr))
25889     {
25890     case REG:
25891       operands[2] = adjust_address (operands[1], SImode, 4);
25892
25893       if (REGNO (operands[0]) == REGNO (addr))
25894         {
25895           output_asm_insn ("ldr\t%H0, %2", operands);
25896           output_asm_insn ("ldr\t%0, %1", operands);
25897         }
25898       else
25899         {
25900           output_asm_insn ("ldr\t%0, %1", operands);
25901           output_asm_insn ("ldr\t%H0, %2", operands);
25902         }
25903       break;
25904
25905     case CONST:
25906       /* Compute <address> + 4 for the high order load.  */
25907       operands[2] = adjust_address (operands[1], SImode, 4);
25908
25909       output_asm_insn ("ldr\t%0, %1", operands);
25910       output_asm_insn ("ldr\t%H0, %2", operands);
25911       break;
25912
25913     case PLUS:
25914       arg1   = XEXP (addr, 0);
25915       arg2   = XEXP (addr, 1);
25916
25917       if (CONSTANT_P (arg1))
25918         base = arg2, offset = arg1;
25919       else
25920         base = arg1, offset = arg2;
25921
25922       gcc_assert (REG_P (base));
25923
25924       /* Catch the case of <address> = <reg> + <reg> */
25925       if (REG_P (offset))
25926         {
25927           int reg_offset = REGNO (offset);
25928           int reg_base   = REGNO (base);
25929           int reg_dest   = REGNO (operands[0]);
25930
25931           /* Add the base and offset registers together into the
25932              higher destination register.  */
25933           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25934                        reg_dest + 1, reg_base, reg_offset);
25935
25936           /* Load the lower destination register from the address in
25937              the higher destination register.  */
25938           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25939                        reg_dest, reg_dest + 1);
25940
25941           /* Load the higher destination register from its own address
25942              plus 4.  */
25943           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25944                        reg_dest + 1, reg_dest + 1);
25945         }
25946       else
25947         {
25948           /* Compute <address> + 4 for the high order load.  */
25949           operands[2] = adjust_address (operands[1], SImode, 4);
25950
25951           /* If the computed address is held in the low order register
25952              then load the high order register first, otherwise always
25953              load the low order register first.  */
25954           if (REGNO (operands[0]) == REGNO (base))
25955             {
25956               output_asm_insn ("ldr\t%H0, %2", operands);
25957               output_asm_insn ("ldr\t%0, %1", operands);
25958             }
25959           else
25960             {
25961               output_asm_insn ("ldr\t%0, %1", operands);
25962               output_asm_insn ("ldr\t%H0, %2", operands);
25963             }
25964         }
25965       break;
25966
25967     case LABEL_REF:
25968       /* With no registers to worry about we can just load the value
25969          directly.  */
25970       operands[2] = adjust_address (operands[1], SImode, 4);
25971
25972       output_asm_insn ("ldr\t%H0, %2", operands);
25973       output_asm_insn ("ldr\t%0, %1", operands);
25974       break;
25975
25976     default:
25977       gcc_unreachable ();
25978     }
25979
25980   return "";
25981 }
25982
25983 const char *
25984 thumb_output_move_mem_multiple (int n, rtx *operands)
25985 {
25986   switch (n)
25987     {
25988     case 2:
25989       if (REGNO (operands[4]) > REGNO (operands[5]))
25990         std::swap (operands[4], operands[5]);
25991
25992       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25993       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25994       break;
25995
25996     case 3:
25997       if (REGNO (operands[4]) > REGNO (operands[5]))
25998         std::swap (operands[4], operands[5]);
25999       if (REGNO (operands[5]) > REGNO (operands[6]))
26000         std::swap (operands[5], operands[6]);
26001       if (REGNO (operands[4]) > REGNO (operands[5]))
26002         std::swap (operands[4], operands[5]);
26003
26004       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26005       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26006       break;
26007
26008     default:
26009       gcc_unreachable ();
26010     }
26011
26012   return "";
26013 }
26014
26015 /* Output a call-via instruction for thumb state.  */
26016 const char *
26017 thumb_call_via_reg (rtx reg)
26018 {
26019   int regno = REGNO (reg);
26020   rtx *labelp;
26021
26022   gcc_assert (regno < LR_REGNUM);
26023
26024   /* If we are in the normal text section we can use a single instance
26025      per compilation unit.  If we are doing function sections, then we need
26026      an entry per section, since we can't rely on reachability.  */
26027   if (in_section == text_section)
26028     {
26029       thumb_call_reg_needed = 1;
26030
26031       if (thumb_call_via_label[regno] == NULL)
26032         thumb_call_via_label[regno] = gen_label_rtx ();
26033       labelp = thumb_call_via_label + regno;
26034     }
26035   else
26036     {
26037       if (cfun->machine->call_via[regno] == NULL)
26038         cfun->machine->call_via[regno] = gen_label_rtx ();
26039       labelp = cfun->machine->call_via + regno;
26040     }
26041
26042   output_asm_insn ("bl\t%a0", labelp);
26043   return "";
26044 }
26045
26046 /* Routines for generating rtl.  */
26047 void
26048 thumb_expand_movmemqi (rtx *operands)
26049 {
26050   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26051   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26052   HOST_WIDE_INT len = INTVAL (operands[2]);
26053   HOST_WIDE_INT offset = 0;
26054
26055   while (len >= 12)
26056     {
26057       emit_insn (gen_movmem12b (out, in, out, in));
26058       len -= 12;
26059     }
26060
26061   if (len >= 8)
26062     {
26063       emit_insn (gen_movmem8b (out, in, out, in));
26064       len -= 8;
26065     }
26066
26067   if (len >= 4)
26068     {
26069       rtx reg = gen_reg_rtx (SImode);
26070       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26071       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26072       len -= 4;
26073       offset += 4;
26074     }
26075
26076   if (len >= 2)
26077     {
26078       rtx reg = gen_reg_rtx (HImode);
26079       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26080                                               plus_constant (Pmode, in,
26081                                                              offset))));
26082       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26083                                                                 offset)),
26084                             reg));
26085       len -= 2;
26086       offset += 2;
26087     }
26088
26089   if (len)
26090     {
26091       rtx reg = gen_reg_rtx (QImode);
26092       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26093                                               plus_constant (Pmode, in,
26094                                                              offset))));
26095       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26096                                                                 offset)),
26097                             reg));
26098     }
26099 }
26100
26101 void
26102 thumb_reload_out_hi (rtx *operands)
26103 {
26104   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26105 }
26106
26107 /* Return the length of a function name prefix
26108     that starts with the character 'c'.  */
26109 static int
26110 arm_get_strip_length (int c)
26111 {
26112   switch (c)
26113     {
26114     ARM_NAME_ENCODING_LENGTHS
26115       default: return 0;
26116     }
26117 }
26118
26119 /* Return a pointer to a function's name with any
26120    and all prefix encodings stripped from it.  */
26121 const char *
26122 arm_strip_name_encoding (const char *name)
26123 {
26124   int skip;
26125
26126   while ((skip = arm_get_strip_length (* name)))
26127     name += skip;
26128
26129   return name;
26130 }
26131
26132 /* If there is a '*' anywhere in the name's prefix, then
26133    emit the stripped name verbatim, otherwise prepend an
26134    underscore if leading underscores are being used.  */
26135 void
26136 arm_asm_output_labelref (FILE *stream, const char *name)
26137 {
26138   int skip;
26139   int verbatim = 0;
26140
26141   while ((skip = arm_get_strip_length (* name)))
26142     {
26143       verbatim |= (*name == '*');
26144       name += skip;
26145     }
26146
26147   if (verbatim)
26148     fputs (name, stream);
26149   else
26150     asm_fprintf (stream, "%U%s", name);
26151 }
26152
26153 /* This function is used to emit an EABI tag and its associated value.
26154    We emit the numerical value of the tag in case the assembler does not
26155    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26156    the tag name in a comment so that anyone reading the assembler output
26157    will know which tag is being set.
26158
26159    This function is not static because arm-c.c needs it too.  */
26160
26161 void
26162 arm_emit_eabi_attribute (const char *name, int num, int val)
26163 {
26164   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26165   if (flag_verbose_asm || flag_debug_asm)
26166     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26167   asm_fprintf (asm_out_file, "\n");
26168 }
26169
26170 /* This function is used to print CPU tuning information as comment
26171    in assembler file.  Pointers are not printed for now.  */
26172
26173 void
26174 arm_print_tune_info (void)
26175 {
26176   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26177   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26178                current_tune->constant_limit);
26179   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26180                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26181   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26182                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26183   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26184                "prefetch.l1_cache_size:\t%d\n",
26185                current_tune->prefetch.l1_cache_size);
26186   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26187                "prefetch.l1_cache_line_size:\t%d\n",
26188                current_tune->prefetch.l1_cache_line_size);
26189   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26190                "prefer_constant_pool:\t%d\n",
26191                (int) current_tune->prefer_constant_pool);
26192   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26193                "branch_cost:\t(s:speed, p:predictable)\n");
26194   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26195   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26196                current_tune->branch_cost (false, false));
26197   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26198                current_tune->branch_cost (false, true));
26199   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26200                current_tune->branch_cost (true, false));
26201   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26202                current_tune->branch_cost (true, true));
26203   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26204                "prefer_ldrd_strd:\t%d\n",
26205                (int) current_tune->prefer_ldrd_strd);
26206   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26207                "logical_op_non_short_circuit:\t[%d,%d]\n",
26208                (int) current_tune->logical_op_non_short_circuit_thumb,
26209                (int) current_tune->logical_op_non_short_circuit_arm);
26210   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26211                "prefer_neon_for_64bits:\t%d\n",
26212                (int) current_tune->prefer_neon_for_64bits);
26213   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26214                "disparage_flag_setting_t16_encodings:\t%d\n",
26215                (int) current_tune->disparage_flag_setting_t16_encodings);
26216   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26217                "string_ops_prefer_neon:\t%d\n",
26218                (int) current_tune->string_ops_prefer_neon);
26219   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26220                "max_insns_inline_memset:\t%d\n",
26221                current_tune->max_insns_inline_memset);
26222   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26223                current_tune->fusible_ops);
26224   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26225                (int) current_tune->sched_autopref);
26226 }
26227
26228 /* Print .arch and .arch_extension directives corresponding to the
26229    current architecture configuration.  */
26230 static void
26231 arm_print_asm_arch_directives ()
26232 {
26233   const arch_option *arch
26234     = arm_parse_arch_option_name (all_architectures, "-march",
26235                                   arm_active_target.arch_name);
26236   auto_sbitmap opt_bits (isa_num_bits);
26237
26238   gcc_assert (arch);
26239
26240   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26241   if (!arch->common.extensions)
26242     return;
26243
26244   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26245        opt->name != NULL;
26246        opt++)
26247     {
26248       if (!opt->remove)
26249         {
26250           arm_initialize_isa (opt_bits, opt->isa_bits);
26251
26252           /* If every feature bit of this option is set in the target
26253              ISA specification, print out the option name.  However,
26254              don't print anything if all the bits are part of the
26255              FPU specification.  */
26256           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26257               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26258             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26259         }
26260     }
26261 }
26262
26263 static void
26264 arm_file_start (void)
26265 {
26266   int val;
26267
26268   if (TARGET_BPABI)
26269     {
26270       /* We don't have a specified CPU.  Use the architecture to
26271          generate the tags.
26272
26273          Note: it might be better to do this unconditionally, then the
26274          assembler would not need to know about all new CPU names as
26275          they are added.  */
26276       if (!arm_active_target.core_name)
26277         {
26278           /* armv7ve doesn't support any extensions.  */
26279           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26280             {
26281               /* Keep backward compatability for assemblers
26282                  which don't support armv7ve.  */
26283               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26284               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26285               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26286               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26287               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26288             }
26289           else
26290             arm_print_asm_arch_directives ();
26291         }
26292       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26293         asm_fprintf (asm_out_file, "\t.arch %s\n",
26294                      arm_active_target.core_name + 8);
26295       else
26296         {
26297           const char* truncated_name
26298             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26299           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26300         }
26301
26302       if (print_tune_info)
26303         arm_print_tune_info ();
26304
26305       if (! TARGET_SOFT_FLOAT)
26306         {
26307           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26308             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26309
26310           if (TARGET_HARD_FLOAT_ABI)
26311             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26312         }
26313
26314       /* Some of these attributes only apply when the corresponding features
26315          are used.  However we don't have any easy way of figuring this out.
26316          Conservatively record the setting that would have been used.  */
26317
26318       if (flag_rounding_math)
26319         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26320
26321       if (!flag_unsafe_math_optimizations)
26322         {
26323           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26324           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26325         }
26326       if (flag_signaling_nans)
26327         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26328
26329       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26330                            flag_finite_math_only ? 1 : 3);
26331
26332       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26333       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26334       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26335                                flag_short_enums ? 1 : 2);
26336
26337       /* Tag_ABI_optimization_goals.  */
26338       if (optimize_size)
26339         val = 4;
26340       else if (optimize >= 2)
26341         val = 2;
26342       else if (optimize)
26343         val = 1;
26344       else
26345         val = 6;
26346       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26347
26348       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26349                                unaligned_access);
26350
26351       if (arm_fp16_format)
26352         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26353                              (int) arm_fp16_format);
26354
26355       if (arm_lang_output_object_attributes_hook)
26356         arm_lang_output_object_attributes_hook();
26357     }
26358
26359   default_file_start ();
26360 }
26361
26362 static void
26363 arm_file_end (void)
26364 {
26365   int regno;
26366
26367   if (NEED_INDICATE_EXEC_STACK)
26368     /* Add .note.GNU-stack.  */
26369     file_end_indicate_exec_stack ();
26370
26371   if (! thumb_call_reg_needed)
26372     return;
26373
26374   switch_to_section (text_section);
26375   asm_fprintf (asm_out_file, "\t.code 16\n");
26376   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26377
26378   for (regno = 0; regno < LR_REGNUM; regno++)
26379     {
26380       rtx label = thumb_call_via_label[regno];
26381
26382       if (label != 0)
26383         {
26384           targetm.asm_out.internal_label (asm_out_file, "L",
26385                                           CODE_LABEL_NUMBER (label));
26386           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26387         }
26388     }
26389 }
26390
26391 #ifndef ARM_PE
26392 /* Symbols in the text segment can be accessed without indirecting via the
26393    constant pool; it may take an extra binary operation, but this is still
26394    faster than indirecting via memory.  Don't do this when not optimizing,
26395    since we won't be calculating al of the offsets necessary to do this
26396    simplification.  */
26397
26398 static void
26399 arm_encode_section_info (tree decl, rtx rtl, int first)
26400 {
26401   if (optimize > 0 && TREE_CONSTANT (decl))
26402     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26403
26404   default_encode_section_info (decl, rtl, first);
26405 }
26406 #endif /* !ARM_PE */
26407
26408 static void
26409 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26410 {
26411   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26412       && !strcmp (prefix, "L"))
26413     {
26414       arm_ccfsm_state = 0;
26415       arm_target_insn = NULL;
26416     }
26417   default_internal_label (stream, prefix, labelno);
26418 }
26419
26420 /* Output code to add DELTA to the first argument, and then jump
26421    to FUNCTION.  Used for C++ multiple inheritance.  */
26422
26423 static void
26424 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26425                      HOST_WIDE_INT, tree function)
26426 {
26427   static int thunk_label = 0;
26428   char label[256];
26429   char labelpc[256];
26430   int mi_delta = delta;
26431   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26432   int shift = 0;
26433   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26434                     ? 1 : 0);
26435   if (mi_delta < 0)
26436     mi_delta = - mi_delta;
26437
26438   final_start_function (emit_barrier (), file, 1);
26439
26440   if (TARGET_THUMB1)
26441     {
26442       int labelno = thunk_label++;
26443       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26444       /* Thunks are entered in arm mode when available.  */
26445       if (TARGET_THUMB1_ONLY)
26446         {
26447           /* push r3 so we can use it as a temporary.  */
26448           /* TODO: Omit this save if r3 is not used.  */
26449           fputs ("\tpush {r3}\n", file);
26450           fputs ("\tldr\tr3, ", file);
26451         }
26452       else
26453         {
26454           fputs ("\tldr\tr12, ", file);
26455         }
26456       assemble_name (file, label);
26457       fputc ('\n', file);
26458       if (flag_pic)
26459         {
26460           /* If we are generating PIC, the ldr instruction below loads
26461              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26462              the address of the add + 8, so we have:
26463
26464              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26465                  = target + 1.
26466
26467              Note that we have "+ 1" because some versions of GNU ld
26468              don't set the low bit of the result for R_ARM_REL32
26469              relocations against thumb function symbols.
26470              On ARMv6M this is +4, not +8.  */
26471           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26472           assemble_name (file, labelpc);
26473           fputs (":\n", file);
26474           if (TARGET_THUMB1_ONLY)
26475             {
26476               /* This is 2 insns after the start of the thunk, so we know it
26477                  is 4-byte aligned.  */
26478               fputs ("\tadd\tr3, pc, r3\n", file);
26479               fputs ("\tmov r12, r3\n", file);
26480             }
26481           else
26482             fputs ("\tadd\tr12, pc, r12\n", file);
26483         }
26484       else if (TARGET_THUMB1_ONLY)
26485         fputs ("\tmov r12, r3\n", file);
26486     }
26487   if (TARGET_THUMB1_ONLY)
26488     {
26489       if (mi_delta > 255)
26490         {
26491           fputs ("\tldr\tr3, ", file);
26492           assemble_name (file, label);
26493           fputs ("+4\n", file);
26494           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26495                        mi_op, this_regno, this_regno);
26496         }
26497       else if (mi_delta != 0)
26498         {
26499           /* Thumb1 unified syntax requires s suffix in instruction name when
26500              one of the operands is immediate.  */
26501           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26502                        mi_op, this_regno, this_regno,
26503                        mi_delta);
26504         }
26505     }
26506   else
26507     {
26508       /* TODO: Use movw/movt for large constants when available.  */
26509       while (mi_delta != 0)
26510         {
26511           if ((mi_delta & (3 << shift)) == 0)
26512             shift += 2;
26513           else
26514             {
26515               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26516                            mi_op, this_regno, this_regno,
26517                            mi_delta & (0xff << shift));
26518               mi_delta &= ~(0xff << shift);
26519               shift += 8;
26520             }
26521         }
26522     }
26523   if (TARGET_THUMB1)
26524     {
26525       if (TARGET_THUMB1_ONLY)
26526         fputs ("\tpop\t{r3}\n", file);
26527
26528       fprintf (file, "\tbx\tr12\n");
26529       ASM_OUTPUT_ALIGN (file, 2);
26530       assemble_name (file, label);
26531       fputs (":\n", file);
26532       if (flag_pic)
26533         {
26534           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26535           rtx tem = XEXP (DECL_RTL (function), 0);
26536           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26537              pipeline offset is four rather than eight.  Adjust the offset
26538              accordingly.  */
26539           tem = plus_constant (GET_MODE (tem), tem,
26540                                TARGET_THUMB1_ONLY ? -3 : -7);
26541           tem = gen_rtx_MINUS (GET_MODE (tem),
26542                                tem,
26543                                gen_rtx_SYMBOL_REF (Pmode,
26544                                                    ggc_strdup (labelpc)));
26545           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26546         }
26547       else
26548         /* Output ".word .LTHUNKn".  */
26549         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26550
26551       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26552         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26553     }
26554   else
26555     {
26556       fputs ("\tb\t", file);
26557       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26558       if (NEED_PLT_RELOC)
26559         fputs ("(PLT)", file);
26560       fputc ('\n', file);
26561     }
26562
26563   final_end_function ();
26564 }
26565
26566 /* MI thunk handling for TARGET_32BIT.  */
26567
26568 static void
26569 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26570                        HOST_WIDE_INT vcall_offset, tree function)
26571 {
26572   /* On ARM, this_regno is R0 or R1 depending on
26573      whether the function returns an aggregate or not.
26574   */
26575   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26576                                        function)
26577                     ? R1_REGNUM : R0_REGNUM);
26578
26579   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26580   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26581   reload_completed = 1;
26582   emit_note (NOTE_INSN_PROLOGUE_END);
26583
26584   /* Add DELTA to THIS_RTX.  */
26585   if (delta != 0)
26586     arm_split_constant (PLUS, Pmode, NULL_RTX,
26587                         delta, this_rtx, this_rtx, false);
26588
26589   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26590   if (vcall_offset != 0)
26591     {
26592       /* Load *THIS_RTX.  */
26593       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26594       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26595       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26596                           false);
26597       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26598       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26599       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26600     }
26601
26602   /* Generate a tail call to the target function.  */
26603   if (!TREE_USED (function))
26604     {
26605       assemble_external (function);
26606       TREE_USED (function) = 1;
26607     }
26608   rtx funexp = XEXP (DECL_RTL (function), 0);
26609   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26610   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26611   SIBLING_CALL_P (insn) = 1;
26612
26613   insn = get_insns ();
26614   shorten_branches (insn);
26615   final_start_function (insn, file, 1);
26616   final (insn, file, 1);
26617   final_end_function ();
26618
26619   /* Stop pretending this is a post-reload pass.  */
26620   reload_completed = 0;
26621 }
26622
26623 /* Output code to add DELTA to the first argument, and then jump
26624    to FUNCTION.  Used for C++ multiple inheritance.  */
26625
26626 static void
26627 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26628                      HOST_WIDE_INT vcall_offset, tree function)
26629 {
26630   if (TARGET_32BIT)
26631     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26632   else
26633     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26634 }
26635
26636 int
26637 arm_emit_vector_const (FILE *file, rtx x)
26638 {
26639   int i;
26640   const char * pattern;
26641
26642   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26643
26644   switch (GET_MODE (x))
26645     {
26646     case E_V2SImode: pattern = "%08x"; break;
26647     case E_V4HImode: pattern = "%04x"; break;
26648     case E_V8QImode: pattern = "%02x"; break;
26649     default:       gcc_unreachable ();
26650     }
26651
26652   fprintf (file, "0x");
26653   for (i = CONST_VECTOR_NUNITS (x); i--;)
26654     {
26655       rtx element;
26656
26657       element = CONST_VECTOR_ELT (x, i);
26658       fprintf (file, pattern, INTVAL (element));
26659     }
26660
26661   return 1;
26662 }
26663
26664 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26665    HFmode constant pool entries are actually loaded with ldr.  */
26666 void
26667 arm_emit_fp16_const (rtx c)
26668 {
26669   long bits;
26670
26671   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26672   if (WORDS_BIG_ENDIAN)
26673     assemble_zeros (2);
26674   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26675   if (!WORDS_BIG_ENDIAN)
26676     assemble_zeros (2);
26677 }
26678
26679 const char *
26680 arm_output_load_gr (rtx *operands)
26681 {
26682   rtx reg;
26683   rtx offset;
26684   rtx wcgr;
26685   rtx sum;
26686
26687   if (!MEM_P (operands [1])
26688       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26689       || !REG_P (reg = XEXP (sum, 0))
26690       || !CONST_INT_P (offset = XEXP (sum, 1))
26691       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26692     return "wldrw%?\t%0, %1";
26693
26694   /* Fix up an out-of-range load of a GR register.  */
26695   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26696   wcgr = operands[0];
26697   operands[0] = reg;
26698   output_asm_insn ("ldr%?\t%0, %1", operands);
26699
26700   operands[0] = wcgr;
26701   operands[1] = reg;
26702   output_asm_insn ("tmcr%?\t%0, %1", operands);
26703   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26704
26705   return "";
26706 }
26707
26708 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26709
26710    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26711    named arg and all anonymous args onto the stack.
26712    XXX I know the prologue shouldn't be pushing registers, but it is faster
26713    that way.  */
26714
26715 static void
26716 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26717                             machine_mode mode,
26718                             tree type,
26719                             int *pretend_size,
26720                             int second_time ATTRIBUTE_UNUSED)
26721 {
26722   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26723   int nregs;
26724
26725   cfun->machine->uses_anonymous_args = 1;
26726   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26727     {
26728       nregs = pcum->aapcs_ncrn;
26729       if (nregs & 1)
26730         {
26731           int res = arm_needs_doubleword_align (mode, type);
26732           if (res < 0 && warn_psabi)
26733             inform (input_location, "parameter passing for argument of "
26734                     "type %qT changed in GCC 7.1", type);
26735           else if (res > 0)
26736             nregs++;
26737         }
26738     }
26739   else
26740     nregs = pcum->nregs;
26741
26742   if (nregs < NUM_ARG_REGS)
26743     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26744 }
26745
26746 /* We can't rely on the caller doing the proper promotion when
26747    using APCS or ATPCS.  */
26748
26749 static bool
26750 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26751 {
26752     return !TARGET_AAPCS_BASED;
26753 }
26754
26755 static machine_mode
26756 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26757                            machine_mode mode,
26758                            int *punsignedp ATTRIBUTE_UNUSED,
26759                            const_tree fntype ATTRIBUTE_UNUSED,
26760                            int for_return ATTRIBUTE_UNUSED)
26761 {
26762   if (GET_MODE_CLASS (mode) == MODE_INT
26763       && GET_MODE_SIZE (mode) < 4)
26764     return SImode;
26765
26766   return mode;
26767 }
26768
26769
26770 static bool
26771 arm_default_short_enums (void)
26772 {
26773   return ARM_DEFAULT_SHORT_ENUMS;
26774 }
26775
26776
26777 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26778
26779 static bool
26780 arm_align_anon_bitfield (void)
26781 {
26782   return TARGET_AAPCS_BASED;
26783 }
26784
26785
26786 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26787
26788 static tree
26789 arm_cxx_guard_type (void)
26790 {
26791   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26792 }
26793
26794
26795 /* The EABI says test the least significant bit of a guard variable.  */
26796
26797 static bool
26798 arm_cxx_guard_mask_bit (void)
26799 {
26800   return TARGET_AAPCS_BASED;
26801 }
26802
26803
26804 /* The EABI specifies that all array cookies are 8 bytes long.  */
26805
26806 static tree
26807 arm_get_cookie_size (tree type)
26808 {
26809   tree size;
26810
26811   if (!TARGET_AAPCS_BASED)
26812     return default_cxx_get_cookie_size (type);
26813
26814   size = build_int_cst (sizetype, 8);
26815   return size;
26816 }
26817
26818
26819 /* The EABI says that array cookies should also contain the element size.  */
26820
26821 static bool
26822 arm_cookie_has_size (void)
26823 {
26824   return TARGET_AAPCS_BASED;
26825 }
26826
26827
26828 /* The EABI says constructors and destructors should return a pointer to
26829    the object constructed/destroyed.  */
26830
26831 static bool
26832 arm_cxx_cdtor_returns_this (void)
26833 {
26834   return TARGET_AAPCS_BASED;
26835 }
26836
26837 /* The EABI says that an inline function may never be the key
26838    method.  */
26839
26840 static bool
26841 arm_cxx_key_method_may_be_inline (void)
26842 {
26843   return !TARGET_AAPCS_BASED;
26844 }
26845
26846 static void
26847 arm_cxx_determine_class_data_visibility (tree decl)
26848 {
26849   if (!TARGET_AAPCS_BASED
26850       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26851     return;
26852
26853   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26854      is exported.  However, on systems without dynamic vague linkage,
26855      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26856   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26857     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26858   else
26859     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26860   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26861 }
26862
26863 static bool
26864 arm_cxx_class_data_always_comdat (void)
26865 {
26866   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26867      vague linkage if the class has no key function.  */
26868   return !TARGET_AAPCS_BASED;
26869 }
26870
26871
26872 /* The EABI says __aeabi_atexit should be used to register static
26873    destructors.  */
26874
26875 static bool
26876 arm_cxx_use_aeabi_atexit (void)
26877 {
26878   return TARGET_AAPCS_BASED;
26879 }
26880
26881
26882 void
26883 arm_set_return_address (rtx source, rtx scratch)
26884 {
26885   arm_stack_offsets *offsets;
26886   HOST_WIDE_INT delta;
26887   rtx addr, mem;
26888   unsigned long saved_regs;
26889
26890   offsets = arm_get_frame_offsets ();
26891   saved_regs = offsets->saved_regs_mask;
26892
26893   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26894     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26895   else
26896     {
26897       if (frame_pointer_needed)
26898         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26899       else
26900         {
26901           /* LR will be the first saved register.  */
26902           delta = offsets->outgoing_args - (offsets->frame + 4);
26903
26904
26905           if (delta >= 4096)
26906             {
26907               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26908                                      GEN_INT (delta & ~4095)));
26909               addr = scratch;
26910               delta &= 4095;
26911             }
26912           else
26913             addr = stack_pointer_rtx;
26914
26915           addr = plus_constant (Pmode, addr, delta);
26916         }
26917
26918       /* The store needs to be marked to prevent DSE from deleting
26919          it as dead if it is based on fp.  */
26920       mem = gen_frame_mem (Pmode, addr);
26921       MEM_VOLATILE_P (mem) = true;
26922       emit_move_insn (mem, source);
26923     }
26924 }
26925
26926
26927 void
26928 thumb_set_return_address (rtx source, rtx scratch)
26929 {
26930   arm_stack_offsets *offsets;
26931   HOST_WIDE_INT delta;
26932   HOST_WIDE_INT limit;
26933   int reg;
26934   rtx addr, mem;
26935   unsigned long mask;
26936
26937   emit_use (source);
26938
26939   offsets = arm_get_frame_offsets ();
26940   mask = offsets->saved_regs_mask;
26941   if (mask & (1 << LR_REGNUM))
26942     {
26943       limit = 1024;
26944       /* Find the saved regs.  */
26945       if (frame_pointer_needed)
26946         {
26947           delta = offsets->soft_frame - offsets->saved_args;
26948           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26949           if (TARGET_THUMB1)
26950             limit = 128;
26951         }
26952       else
26953         {
26954           delta = offsets->outgoing_args - offsets->saved_args;
26955           reg = SP_REGNUM;
26956         }
26957       /* Allow for the stack frame.  */
26958       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26959         delta -= 16;
26960       /* The link register is always the first saved register.  */
26961       delta -= 4;
26962
26963       /* Construct the address.  */
26964       addr = gen_rtx_REG (SImode, reg);
26965       if (delta > limit)
26966         {
26967           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26968           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26969           addr = scratch;
26970         }
26971       else
26972         addr = plus_constant (Pmode, addr, delta);
26973
26974       /* The store needs to be marked to prevent DSE from deleting
26975          it as dead if it is based on fp.  */
26976       mem = gen_frame_mem (Pmode, addr);
26977       MEM_VOLATILE_P (mem) = true;
26978       emit_move_insn (mem, source);
26979     }
26980   else
26981     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26982 }
26983
26984 /* Implements target hook vector_mode_supported_p.  */
26985 bool
26986 arm_vector_mode_supported_p (machine_mode mode)
26987 {
26988   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26989   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26990       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26991       || mode == V2DImode || mode == V8HFmode))
26992     return true;
26993
26994   if ((TARGET_NEON || TARGET_IWMMXT)
26995       && ((mode == V2SImode)
26996           || (mode == V4HImode)
26997           || (mode == V8QImode)))
26998     return true;
26999
27000   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27001       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27002       || mode == V2HAmode))
27003     return true;
27004
27005   return false;
27006 }
27007
27008 /* Implements target hook array_mode_supported_p.  */
27009
27010 static bool
27011 arm_array_mode_supported_p (machine_mode mode,
27012                             unsigned HOST_WIDE_INT nelems)
27013 {
27014   if (TARGET_NEON
27015       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27016       && (nelems >= 2 && nelems <= 4))
27017     return true;
27018
27019   return false;
27020 }
27021
27022 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27023    registers when autovectorizing for Neon, at least until multiple vector
27024    widths are supported properly by the middle-end.  */
27025
27026 static machine_mode
27027 arm_preferred_simd_mode (scalar_mode mode)
27028 {
27029   if (TARGET_NEON)
27030     switch (mode)
27031       {
27032       case E_SFmode:
27033         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27034       case E_SImode:
27035         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27036       case E_HImode:
27037         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27038       case E_QImode:
27039         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27040       case E_DImode:
27041         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27042           return V2DImode;
27043         break;
27044
27045       default:;
27046       }
27047
27048   if (TARGET_REALLY_IWMMXT)
27049     switch (mode)
27050       {
27051       case E_SImode:
27052         return V2SImode;
27053       case E_HImode:
27054         return V4HImode;
27055       case E_QImode:
27056         return V8QImode;
27057
27058       default:;
27059       }
27060
27061   return word_mode;
27062 }
27063
27064 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27065
27066    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27067    using r0-r4 for function arguments, r7 for the stack frame and don't have
27068    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27069    potentially problematic instructions accept high registers so this is not
27070    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27071    that require many low registers.  */
27072 static bool
27073 arm_class_likely_spilled_p (reg_class_t rclass)
27074 {
27075   if ((TARGET_THUMB1 && rclass == LO_REGS)
27076       || rclass  == CC_REG)
27077     return true;
27078
27079   return false;
27080 }
27081
27082 /* Implements target hook small_register_classes_for_mode_p.  */
27083 bool
27084 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27085 {
27086   return TARGET_THUMB1;
27087 }
27088
27089 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27090    ARM insns and therefore guarantee that the shift count is modulo 256.
27091    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27092    guarantee no particular behavior for out-of-range counts.  */
27093
27094 static unsigned HOST_WIDE_INT
27095 arm_shift_truncation_mask (machine_mode mode)
27096 {
27097   return mode == SImode ? 255 : 0;
27098 }
27099
27100
27101 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27102
27103 unsigned int
27104 arm_dbx_register_number (unsigned int regno)
27105 {
27106   if (regno < 16)
27107     return regno;
27108
27109   if (IS_VFP_REGNUM (regno))
27110     {
27111       /* See comment in arm_dwarf_register_span.  */
27112       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27113         return 64 + regno - FIRST_VFP_REGNUM;
27114       else
27115         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27116     }
27117
27118   if (IS_IWMMXT_GR_REGNUM (regno))
27119     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27120
27121   if (IS_IWMMXT_REGNUM (regno))
27122     return 112 + regno - FIRST_IWMMXT_REGNUM;
27123
27124   return DWARF_FRAME_REGISTERS;
27125 }
27126
27127 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27128    GCC models tham as 64 32-bit registers, so we need to describe this to
27129    the DWARF generation code.  Other registers can use the default.  */
27130 static rtx
27131 arm_dwarf_register_span (rtx rtl)
27132 {
27133   machine_mode mode;
27134   unsigned regno;
27135   rtx parts[16];
27136   int nregs;
27137   int i;
27138
27139   regno = REGNO (rtl);
27140   if (!IS_VFP_REGNUM (regno))
27141     return NULL_RTX;
27142
27143   /* XXX FIXME: The EABI defines two VFP register ranges:
27144         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27145         256-287: D0-D31
27146      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27147      corresponding D register.  Until GDB supports this, we shall use the
27148      legacy encodings.  We also use these encodings for D0-D15 for
27149      compatibility with older debuggers.  */
27150   mode = GET_MODE (rtl);
27151   if (GET_MODE_SIZE (mode) < 8)
27152     return NULL_RTX;
27153
27154   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27155     {
27156       nregs = GET_MODE_SIZE (mode) / 4;
27157       for (i = 0; i < nregs; i += 2)
27158         if (TARGET_BIG_END)
27159           {
27160             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27161             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27162           }
27163         else
27164           {
27165             parts[i] = gen_rtx_REG (SImode, regno + i);
27166             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27167           }
27168     }
27169   else
27170     {
27171       nregs = GET_MODE_SIZE (mode) / 8;
27172       for (i = 0; i < nregs; i++)
27173         parts[i] = gen_rtx_REG (DImode, regno + i);
27174     }
27175
27176   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27177 }
27178
27179 #if ARM_UNWIND_INFO
27180 /* Emit unwind directives for a store-multiple instruction or stack pointer
27181    push during alignment.
27182    These should only ever be generated by the function prologue code, so
27183    expect them to have a particular form.
27184    The store-multiple instruction sometimes pushes pc as the last register,
27185    although it should not be tracked into unwind information, or for -Os
27186    sometimes pushes some dummy registers before first register that needs
27187    to be tracked in unwind information; such dummy registers are there just
27188    to avoid separate stack adjustment, and will not be restored in the
27189    epilogue.  */
27190
27191 static void
27192 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27193 {
27194   int i;
27195   HOST_WIDE_INT offset;
27196   HOST_WIDE_INT nregs;
27197   int reg_size;
27198   unsigned reg;
27199   unsigned lastreg;
27200   unsigned padfirst = 0, padlast = 0;
27201   rtx e;
27202
27203   e = XVECEXP (p, 0, 0);
27204   gcc_assert (GET_CODE (e) == SET);
27205
27206   /* First insn will adjust the stack pointer.  */
27207   gcc_assert (GET_CODE (e) == SET
27208               && REG_P (SET_DEST (e))
27209               && REGNO (SET_DEST (e)) == SP_REGNUM
27210               && GET_CODE (SET_SRC (e)) == PLUS);
27211
27212   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27213   nregs = XVECLEN (p, 0) - 1;
27214   gcc_assert (nregs);
27215
27216   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27217   if (reg < 16)
27218     {
27219       /* For -Os dummy registers can be pushed at the beginning to
27220          avoid separate stack pointer adjustment.  */
27221       e = XVECEXP (p, 0, 1);
27222       e = XEXP (SET_DEST (e), 0);
27223       if (GET_CODE (e) == PLUS)
27224         padfirst = INTVAL (XEXP (e, 1));
27225       gcc_assert (padfirst == 0 || optimize_size);
27226       /* The function prologue may also push pc, but not annotate it as it is
27227          never restored.  We turn this into a stack pointer adjustment.  */
27228       e = XVECEXP (p, 0, nregs);
27229       e = XEXP (SET_DEST (e), 0);
27230       if (GET_CODE (e) == PLUS)
27231         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27232       else
27233         padlast = offset - 4;
27234       gcc_assert (padlast == 0 || padlast == 4);
27235       if (padlast == 4)
27236         fprintf (asm_out_file, "\t.pad #4\n");
27237       reg_size = 4;
27238       fprintf (asm_out_file, "\t.save {");
27239     }
27240   else if (IS_VFP_REGNUM (reg))
27241     {
27242       reg_size = 8;
27243       fprintf (asm_out_file, "\t.vsave {");
27244     }
27245   else
27246     /* Unknown register type.  */
27247     gcc_unreachable ();
27248
27249   /* If the stack increment doesn't match the size of the saved registers,
27250      something has gone horribly wrong.  */
27251   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27252
27253   offset = padfirst;
27254   lastreg = 0;
27255   /* The remaining insns will describe the stores.  */
27256   for (i = 1; i <= nregs; i++)
27257     {
27258       /* Expect (set (mem <addr>) (reg)).
27259          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27260       e = XVECEXP (p, 0, i);
27261       gcc_assert (GET_CODE (e) == SET
27262                   && MEM_P (SET_DEST (e))
27263                   && REG_P (SET_SRC (e)));
27264
27265       reg = REGNO (SET_SRC (e));
27266       gcc_assert (reg >= lastreg);
27267
27268       if (i != 1)
27269         fprintf (asm_out_file, ", ");
27270       /* We can't use %r for vfp because we need to use the
27271          double precision register names.  */
27272       if (IS_VFP_REGNUM (reg))
27273         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27274       else
27275         asm_fprintf (asm_out_file, "%r", reg);
27276
27277       if (flag_checking)
27278         {
27279           /* Check that the addresses are consecutive.  */
27280           e = XEXP (SET_DEST (e), 0);
27281           if (GET_CODE (e) == PLUS)
27282             gcc_assert (REG_P (XEXP (e, 0))
27283                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27284                         && CONST_INT_P (XEXP (e, 1))
27285                         && offset == INTVAL (XEXP (e, 1)));
27286           else
27287             gcc_assert (i == 1
27288                         && REG_P (e)
27289                         && REGNO (e) == SP_REGNUM);
27290           offset += reg_size;
27291         }
27292     }
27293   fprintf (asm_out_file, "}\n");
27294   if (padfirst)
27295     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27296 }
27297
27298 /*  Emit unwind directives for a SET.  */
27299
27300 static void
27301 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27302 {
27303   rtx e0;
27304   rtx e1;
27305   unsigned reg;
27306
27307   e0 = XEXP (p, 0);
27308   e1 = XEXP (p, 1);
27309   switch (GET_CODE (e0))
27310     {
27311     case MEM:
27312       /* Pushing a single register.  */
27313       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27314           || !REG_P (XEXP (XEXP (e0, 0), 0))
27315           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27316         abort ();
27317
27318       asm_fprintf (asm_out_file, "\t.save ");
27319       if (IS_VFP_REGNUM (REGNO (e1)))
27320         asm_fprintf(asm_out_file, "{d%d}\n",
27321                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27322       else
27323         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27324       break;
27325
27326     case REG:
27327       if (REGNO (e0) == SP_REGNUM)
27328         {
27329           /* A stack increment.  */
27330           if (GET_CODE (e1) != PLUS
27331               || !REG_P (XEXP (e1, 0))
27332               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27333               || !CONST_INT_P (XEXP (e1, 1)))
27334             abort ();
27335
27336           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27337                        -INTVAL (XEXP (e1, 1)));
27338         }
27339       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27340         {
27341           HOST_WIDE_INT offset;
27342
27343           if (GET_CODE (e1) == PLUS)
27344             {
27345               if (!REG_P (XEXP (e1, 0))
27346                   || !CONST_INT_P (XEXP (e1, 1)))
27347                 abort ();
27348               reg = REGNO (XEXP (e1, 0));
27349               offset = INTVAL (XEXP (e1, 1));
27350               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27351                            HARD_FRAME_POINTER_REGNUM, reg,
27352                            offset);
27353             }
27354           else if (REG_P (e1))
27355             {
27356               reg = REGNO (e1);
27357               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27358                            HARD_FRAME_POINTER_REGNUM, reg);
27359             }
27360           else
27361             abort ();
27362         }
27363       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27364         {
27365           /* Move from sp to reg.  */
27366           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27367         }
27368      else if (GET_CODE (e1) == PLUS
27369               && REG_P (XEXP (e1, 0))
27370               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27371               && CONST_INT_P (XEXP (e1, 1)))
27372         {
27373           /* Set reg to offset from sp.  */
27374           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27375                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27376         }
27377       else
27378         abort ();
27379       break;
27380
27381     default:
27382       abort ();
27383     }
27384 }
27385
27386
27387 /* Emit unwind directives for the given insn.  */
27388
27389 static void
27390 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27391 {
27392   rtx note, pat;
27393   bool handled_one = false;
27394
27395   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27396     return;
27397
27398   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27399       && (TREE_NOTHROW (current_function_decl)
27400           || crtl->all_throwers_are_sibcalls))
27401     return;
27402
27403   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27404     return;
27405
27406   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27407     {
27408       switch (REG_NOTE_KIND (note))
27409         {
27410         case REG_FRAME_RELATED_EXPR:
27411           pat = XEXP (note, 0);
27412           goto found;
27413
27414         case REG_CFA_REGISTER:
27415           pat = XEXP (note, 0);
27416           if (pat == NULL)
27417             {
27418               pat = PATTERN (insn);
27419               if (GET_CODE (pat) == PARALLEL)
27420                 pat = XVECEXP (pat, 0, 0);
27421             }
27422
27423           /* Only emitted for IS_STACKALIGN re-alignment.  */
27424           {
27425             rtx dest, src;
27426             unsigned reg;
27427
27428             src = SET_SRC (pat);
27429             dest = SET_DEST (pat);
27430
27431             gcc_assert (src == stack_pointer_rtx);
27432             reg = REGNO (dest);
27433             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27434                          reg + 0x90, reg);
27435           }
27436           handled_one = true;
27437           break;
27438
27439         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27440            to get correct dwarf information for shrink-wrap.  We should not
27441            emit unwind information for it because these are used either for
27442            pretend arguments or notes to adjust sp and restore registers from
27443            stack.  */
27444         case REG_CFA_DEF_CFA:
27445         case REG_CFA_ADJUST_CFA:
27446         case REG_CFA_RESTORE:
27447           return;
27448
27449         case REG_CFA_EXPRESSION:
27450         case REG_CFA_OFFSET:
27451           /* ??? Only handling here what we actually emit.  */
27452           gcc_unreachable ();
27453
27454         default:
27455           break;
27456         }
27457     }
27458   if (handled_one)
27459     return;
27460   pat = PATTERN (insn);
27461  found:
27462
27463   switch (GET_CODE (pat))
27464     {
27465     case SET:
27466       arm_unwind_emit_set (asm_out_file, pat);
27467       break;
27468
27469     case SEQUENCE:
27470       /* Store multiple.  */
27471       arm_unwind_emit_sequence (asm_out_file, pat);
27472       break;
27473
27474     default:
27475       abort();
27476     }
27477 }
27478
27479
27480 /* Output a reference from a function exception table to the type_info
27481    object X.  The EABI specifies that the symbol should be relocated by
27482    an R_ARM_TARGET2 relocation.  */
27483
27484 static bool
27485 arm_output_ttype (rtx x)
27486 {
27487   fputs ("\t.word\t", asm_out_file);
27488   output_addr_const (asm_out_file, x);
27489   /* Use special relocations for symbol references.  */
27490   if (!CONST_INT_P (x))
27491     fputs ("(TARGET2)", asm_out_file);
27492   fputc ('\n', asm_out_file);
27493
27494   return TRUE;
27495 }
27496
27497 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27498
27499 static void
27500 arm_asm_emit_except_personality (rtx personality)
27501 {
27502   fputs ("\t.personality\t", asm_out_file);
27503   output_addr_const (asm_out_file, personality);
27504   fputc ('\n', asm_out_file);
27505 }
27506 #endif /* ARM_UNWIND_INFO */
27507
27508 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27509
27510 static void
27511 arm_asm_init_sections (void)
27512 {
27513 #if ARM_UNWIND_INFO
27514   exception_section = get_unnamed_section (0, output_section_asm_op,
27515                                            "\t.handlerdata");
27516 #endif /* ARM_UNWIND_INFO */
27517
27518 #ifdef OBJECT_FORMAT_ELF
27519   if (target_pure_code)
27520     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27521 #endif
27522 }
27523
27524 /* Output unwind directives for the start/end of a function.  */
27525
27526 void
27527 arm_output_fn_unwind (FILE * f, bool prologue)
27528 {
27529   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27530     return;
27531
27532   if (prologue)
27533     fputs ("\t.fnstart\n", f);
27534   else
27535     {
27536       /* If this function will never be unwound, then mark it as such.
27537          The came condition is used in arm_unwind_emit to suppress
27538          the frame annotations.  */
27539       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27540           && (TREE_NOTHROW (current_function_decl)
27541               || crtl->all_throwers_are_sibcalls))
27542         fputs("\t.cantunwind\n", f);
27543
27544       fputs ("\t.fnend\n", f);
27545     }
27546 }
27547
27548 static bool
27549 arm_emit_tls_decoration (FILE *fp, rtx x)
27550 {
27551   enum tls_reloc reloc;
27552   rtx val;
27553
27554   val = XVECEXP (x, 0, 0);
27555   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27556
27557   output_addr_const (fp, val);
27558
27559   switch (reloc)
27560     {
27561     case TLS_GD32:
27562       fputs ("(tlsgd)", fp);
27563       break;
27564     case TLS_LDM32:
27565       fputs ("(tlsldm)", fp);
27566       break;
27567     case TLS_LDO32:
27568       fputs ("(tlsldo)", fp);
27569       break;
27570     case TLS_IE32:
27571       fputs ("(gottpoff)", fp);
27572       break;
27573     case TLS_LE32:
27574       fputs ("(tpoff)", fp);
27575       break;
27576     case TLS_DESCSEQ:
27577       fputs ("(tlsdesc)", fp);
27578       break;
27579     default:
27580       gcc_unreachable ();
27581     }
27582
27583   switch (reloc)
27584     {
27585     case TLS_GD32:
27586     case TLS_LDM32:
27587     case TLS_IE32:
27588     case TLS_DESCSEQ:
27589       fputs (" + (. - ", fp);
27590       output_addr_const (fp, XVECEXP (x, 0, 2));
27591       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27592       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27593       output_addr_const (fp, XVECEXP (x, 0, 3));
27594       fputc (')', fp);
27595       break;
27596     default:
27597       break;
27598     }
27599
27600   return TRUE;
27601 }
27602
27603 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27604
27605 static void
27606 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27607 {
27608   gcc_assert (size == 4);
27609   fputs ("\t.word\t", file);
27610   output_addr_const (file, x);
27611   fputs ("(tlsldo)", file);
27612 }
27613
27614 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27615
27616 static bool
27617 arm_output_addr_const_extra (FILE *fp, rtx x)
27618 {
27619   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27620     return arm_emit_tls_decoration (fp, x);
27621   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27622     {
27623       char label[256];
27624       int labelno = INTVAL (XVECEXP (x, 0, 0));
27625
27626       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27627       assemble_name_raw (fp, label);
27628
27629       return TRUE;
27630     }
27631   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27632     {
27633       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27634       if (GOT_PCREL)
27635         fputs ("+.", fp);
27636       fputs ("-(", fp);
27637       output_addr_const (fp, XVECEXP (x, 0, 0));
27638       fputc (')', fp);
27639       return TRUE;
27640     }
27641   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27642     {
27643       output_addr_const (fp, XVECEXP (x, 0, 0));
27644       if (GOT_PCREL)
27645         fputs ("+.", fp);
27646       fputs ("-(", fp);
27647       output_addr_const (fp, XVECEXP (x, 0, 1));
27648       fputc (')', fp);
27649       return TRUE;
27650     }
27651   else if (GET_CODE (x) == CONST_VECTOR)
27652     return arm_emit_vector_const (fp, x);
27653
27654   return FALSE;
27655 }
27656
27657 /* Output assembly for a shift instruction.
27658    SET_FLAGS determines how the instruction modifies the condition codes.
27659    0 - Do not set condition codes.
27660    1 - Set condition codes.
27661    2 - Use smallest instruction.  */
27662 const char *
27663 arm_output_shift(rtx * operands, int set_flags)
27664 {
27665   char pattern[100];
27666   static const char flag_chars[3] = {'?', '.', '!'};
27667   const char *shift;
27668   HOST_WIDE_INT val;
27669   char c;
27670
27671   c = flag_chars[set_flags];
27672   shift = shift_op(operands[3], &val);
27673   if (shift)
27674     {
27675       if (val != -1)
27676         operands[2] = GEN_INT(val);
27677       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27678     }
27679   else
27680     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27681
27682   output_asm_insn (pattern, operands);
27683   return "";
27684 }
27685
27686 /* Output assembly for a WMMX immediate shift instruction.  */
27687 const char *
27688 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27689 {
27690   int shift = INTVAL (operands[2]);
27691   char templ[50];
27692   machine_mode opmode = GET_MODE (operands[0]);
27693
27694   gcc_assert (shift >= 0);
27695
27696   /* If the shift value in the register versions is > 63 (for D qualifier),
27697      31 (for W qualifier) or 15 (for H qualifier).  */
27698   if (((opmode == V4HImode) && (shift > 15))
27699         || ((opmode == V2SImode) && (shift > 31))
27700         || ((opmode == DImode) && (shift > 63)))
27701   {
27702     if (wror_or_wsra)
27703       {
27704         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27705         output_asm_insn (templ, operands);
27706         if (opmode == DImode)
27707           {
27708             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27709             output_asm_insn (templ, operands);
27710           }
27711       }
27712     else
27713       {
27714         /* The destination register will contain all zeros.  */
27715         sprintf (templ, "wzero\t%%0");
27716         output_asm_insn (templ, operands);
27717       }
27718     return "";
27719   }
27720
27721   if ((opmode == DImode) && (shift > 32))
27722     {
27723       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27724       output_asm_insn (templ, operands);
27725       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27726       output_asm_insn (templ, operands);
27727     }
27728   else
27729     {
27730       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27731       output_asm_insn (templ, operands);
27732     }
27733   return "";
27734 }
27735
27736 /* Output assembly for a WMMX tinsr instruction.  */
27737 const char *
27738 arm_output_iwmmxt_tinsr (rtx *operands)
27739 {
27740   int mask = INTVAL (operands[3]);
27741   int i;
27742   char templ[50];
27743   int units = mode_nunits[GET_MODE (operands[0])];
27744   gcc_assert ((mask & (mask - 1)) == 0);
27745   for (i = 0; i < units; ++i)
27746     {
27747       if ((mask & 0x01) == 1)
27748         {
27749           break;
27750         }
27751       mask >>= 1;
27752     }
27753   gcc_assert (i < units);
27754   {
27755     switch (GET_MODE (operands[0]))
27756       {
27757       case E_V8QImode:
27758         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27759         break;
27760       case E_V4HImode:
27761         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27762         break;
27763       case E_V2SImode:
27764         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27765         break;
27766       default:
27767         gcc_unreachable ();
27768         break;
27769       }
27770     output_asm_insn (templ, operands);
27771   }
27772   return "";
27773 }
27774
27775 /* Output a Thumb-1 casesi dispatch sequence.  */
27776 const char *
27777 thumb1_output_casesi (rtx *operands)
27778 {
27779   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27780
27781   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27782
27783   switch (GET_MODE(diff_vec))
27784     {
27785     case E_QImode:
27786       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27787               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27788     case E_HImode:
27789       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27790               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27791     case E_SImode:
27792       return "bl\t%___gnu_thumb1_case_si";
27793     default:
27794       gcc_unreachable ();
27795     }
27796 }
27797
27798 /* Output a Thumb-2 casesi instruction.  */
27799 const char *
27800 thumb2_output_casesi (rtx *operands)
27801 {
27802   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27803
27804   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27805
27806   output_asm_insn ("cmp\t%0, %1", operands);
27807   output_asm_insn ("bhi\t%l3", operands);
27808   switch (GET_MODE(diff_vec))
27809     {
27810     case E_QImode:
27811       return "tbb\t[%|pc, %0]";
27812     case E_HImode:
27813       return "tbh\t[%|pc, %0, lsl #1]";
27814     case E_SImode:
27815       if (flag_pic)
27816         {
27817           output_asm_insn ("adr\t%4, %l2", operands);
27818           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27819           output_asm_insn ("add\t%4, %4, %5", operands);
27820           return "bx\t%4";
27821         }
27822       else
27823         {
27824           output_asm_insn ("adr\t%4, %l2", operands);
27825           return "ldr\t%|pc, [%4, %0, lsl #2]";
27826         }
27827     default:
27828       gcc_unreachable ();
27829     }
27830 }
27831
27832 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27833    per-core tuning structs.  */
27834 static int
27835 arm_issue_rate (void)
27836 {
27837   return current_tune->issue_rate;
27838 }
27839
27840 /* Return how many instructions should scheduler lookahead to choose the
27841    best one.  */
27842 static int
27843 arm_first_cycle_multipass_dfa_lookahead (void)
27844 {
27845   int issue_rate = arm_issue_rate ();
27846
27847   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27848 }
27849
27850 /* Enable modeling of L2 auto-prefetcher.  */
27851 static int
27852 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27853 {
27854   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27855 }
27856
27857 const char *
27858 arm_mangle_type (const_tree type)
27859 {
27860   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27861      has to be managled as if it is in the "std" namespace.  */
27862   if (TARGET_AAPCS_BASED
27863       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27864     return "St9__va_list";
27865
27866   /* Half-precision float.  */
27867   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27868     return "Dh";
27869
27870   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27871      builtin type.  */
27872   if (TYPE_NAME (type) != NULL)
27873     return arm_mangle_builtin_type (type);
27874
27875   /* Use the default mangling.  */
27876   return NULL;
27877 }
27878
27879 /* Order of allocation of core registers for Thumb: this allocation is
27880    written over the corresponding initial entries of the array
27881    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27882    first.  Saving and restoring a low register is usually cheaper than
27883    using a call-clobbered high register.  */
27884
27885 static const int thumb_core_reg_alloc_order[] =
27886 {
27887    3,  2,  1,  0,  4,  5,  6,  7,
27888   12, 14,  8,  9, 10, 11
27889 };
27890
27891 /* Adjust register allocation order when compiling for Thumb.  */
27892
27893 void
27894 arm_order_regs_for_local_alloc (void)
27895 {
27896   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27897   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27898   if (TARGET_THUMB)
27899     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27900             sizeof (thumb_core_reg_alloc_order));
27901 }
27902
27903 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27904
27905 bool
27906 arm_frame_pointer_required (void)
27907 {
27908   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27909     return true;
27910
27911   /* If the function receives nonlocal gotos, it needs to save the frame
27912      pointer in the nonlocal_goto_save_area object.  */
27913   if (cfun->has_nonlocal_label)
27914     return true;
27915
27916   /* The frame pointer is required for non-leaf APCS frames.  */
27917   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27918     return true;
27919
27920   /* If we are probing the stack in the prologue, we will have a faulting
27921      instruction prior to the stack adjustment and this requires a frame
27922      pointer if we want to catch the exception using the EABI unwinder.  */
27923   if (!IS_INTERRUPT (arm_current_func_type ())
27924       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27925           || flag_stack_clash_protection)
27926       && arm_except_unwind_info (&global_options) == UI_TARGET
27927       && cfun->can_throw_non_call_exceptions)
27928     {
27929       HOST_WIDE_INT size = get_frame_size ();
27930
27931       /* That's irrelevant if there is no stack adjustment.  */
27932       if (size <= 0)
27933         return false;
27934
27935       /* That's relevant only if there is a stack probe.  */
27936       if (crtl->is_leaf && !cfun->calls_alloca)
27937         {
27938           /* We don't have the final size of the frame so adjust.  */
27939           size += 32 * UNITS_PER_WORD;
27940           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27941             return true;
27942         }
27943       else
27944         return true;
27945     }
27946
27947   return false;
27948 }
27949
27950 /* Only thumb1 can't support conditional execution, so return true if
27951    the target is not thumb1.  */
27952 static bool
27953 arm_have_conditional_execution (void)
27954 {
27955   return !TARGET_THUMB1;
27956 }
27957
27958 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27959 static HOST_WIDE_INT
27960 arm_vector_alignment (const_tree type)
27961 {
27962   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27963
27964   if (TARGET_AAPCS_BASED)
27965     align = MIN (align, 64);
27966
27967   return align;
27968 }
27969
27970 static unsigned int
27971 arm_autovectorize_vector_sizes (void)
27972 {
27973   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27974 }
27975
27976 static bool
27977 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27978 {
27979   /* Vectors which aren't in packed structures will not be less aligned than
27980      the natural alignment of their element type, so this is safe.  */
27981   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27982     return !is_packed;
27983
27984   return default_builtin_vector_alignment_reachable (type, is_packed);
27985 }
27986
27987 static bool
27988 arm_builtin_support_vector_misalignment (machine_mode mode,
27989                                          const_tree type, int misalignment,
27990                                          bool is_packed)
27991 {
27992   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27993     {
27994       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27995
27996       if (is_packed)
27997         return align == 1;
27998
27999       /* If the misalignment is unknown, we should be able to handle the access
28000          so long as it is not to a member of a packed data structure.  */
28001       if (misalignment == -1)
28002         return true;
28003
28004       /* Return true if the misalignment is a multiple of the natural alignment
28005          of the vector's element type.  This is probably always going to be
28006          true in practice, since we've already established that this isn't a
28007          packed access.  */
28008       return ((misalignment % align) == 0);
28009     }
28010
28011   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28012                                                       is_packed);
28013 }
28014
28015 static void
28016 arm_conditional_register_usage (void)
28017 {
28018   int regno;
28019
28020   if (TARGET_THUMB1 && optimize_size)
28021     {
28022       /* When optimizing for size on Thumb-1, it's better not
28023         to use the HI regs, because of the overhead of
28024         stacking them.  */
28025       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28026         fixed_regs[regno] = call_used_regs[regno] = 1;
28027     }
28028
28029   /* The link register can be clobbered by any branch insn,
28030      but we have no way to track that at present, so mark
28031      it as unavailable.  */
28032   if (TARGET_THUMB1)
28033     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28034
28035   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28036     {
28037       /* VFPv3 registers are disabled when earlier VFP
28038          versions are selected due to the definition of
28039          LAST_VFP_REGNUM.  */
28040       for (regno = FIRST_VFP_REGNUM;
28041            regno <= LAST_VFP_REGNUM; ++ regno)
28042         {
28043           fixed_regs[regno] = 0;
28044           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28045             || regno >= FIRST_VFP_REGNUM + 32;
28046         }
28047     }
28048
28049   if (TARGET_REALLY_IWMMXT)
28050     {
28051       regno = FIRST_IWMMXT_GR_REGNUM;
28052       /* The 2002/10/09 revision of the XScale ABI has wCG0
28053          and wCG1 as call-preserved registers.  The 2002/11/21
28054          revision changed this so that all wCG registers are
28055          scratch registers.  */
28056       for (regno = FIRST_IWMMXT_GR_REGNUM;
28057            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28058         fixed_regs[regno] = 0;
28059       /* The XScale ABI has wR0 - wR9 as scratch registers,
28060          the rest as call-preserved registers.  */
28061       for (regno = FIRST_IWMMXT_REGNUM;
28062            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28063         {
28064           fixed_regs[regno] = 0;
28065           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28066         }
28067     }
28068
28069   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28070     {
28071       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28072       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28073     }
28074   else if (TARGET_APCS_STACK)
28075     {
28076       fixed_regs[10]     = 1;
28077       call_used_regs[10] = 1;
28078     }
28079   /* -mcaller-super-interworking reserves r11 for calls to
28080      _interwork_r11_call_via_rN().  Making the register global
28081      is an easy way of ensuring that it remains valid for all
28082      calls.  */
28083   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28084       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28085     {
28086       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28087       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28088       if (TARGET_CALLER_INTERWORKING)
28089         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28090     }
28091   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28092 }
28093
28094 static reg_class_t
28095 arm_preferred_rename_class (reg_class_t rclass)
28096 {
28097   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28098      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28099      and code size can be reduced.  */
28100   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28101     return LO_REGS;
28102   else
28103     return NO_REGS;
28104 }
28105
28106 /* Compute the attribute "length" of insn "*push_multi".
28107    So this function MUST be kept in sync with that insn pattern.  */
28108 int
28109 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28110 {
28111   int i, regno, hi_reg;
28112   int num_saves = XVECLEN (parallel_op, 0);
28113
28114   /* ARM mode.  */
28115   if (TARGET_ARM)
28116     return 4;
28117   /* Thumb1 mode.  */
28118   if (TARGET_THUMB1)
28119     return 2;
28120
28121   /* Thumb2 mode.  */
28122   regno = REGNO (first_op);
28123   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28124      list is 8-bit.  Normally this means all registers in the list must be
28125      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28126      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28127      with 16-bit encoding.  */
28128   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28129   for (i = 1; i < num_saves && !hi_reg; i++)
28130     {
28131       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28132       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28133     }
28134
28135   if (!hi_reg)
28136     return 2;
28137   return 4;
28138 }
28139
28140 /* Compute the attribute "length" of insn.  Currently, this function is used
28141    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28142    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28143    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28144    true if OPERANDS contains insn which explicit updates base register.  */
28145
28146 int
28147 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28148 {
28149   /* ARM mode.  */
28150   if (TARGET_ARM)
28151     return 4;
28152   /* Thumb1 mode.  */
28153   if (TARGET_THUMB1)
28154     return 2;
28155
28156   rtx parallel_op = operands[0];
28157   /* Initialize to elements number of PARALLEL.  */
28158   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28159   /* Initialize the value to base register.  */
28160   unsigned regno = REGNO (operands[1]);
28161   /* Skip return and write back pattern.
28162      We only need register pop pattern for later analysis.  */
28163   unsigned first_indx = 0;
28164   first_indx += return_pc ? 1 : 0;
28165   first_indx += write_back_p ? 1 : 0;
28166
28167   /* A pop operation can be done through LDM or POP.  If the base register is SP
28168      and if it's with write back, then a LDM will be alias of POP.  */
28169   bool pop_p = (regno == SP_REGNUM && write_back_p);
28170   bool ldm_p = !pop_p;
28171
28172   /* Check base register for LDM.  */
28173   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28174     return 4;
28175
28176   /* Check each register in the list.  */
28177   for (; indx >= first_indx; indx--)
28178     {
28179       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28180       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28181          comment in arm_attr_length_push_multi.  */
28182       if (REGNO_REG_CLASS (regno) == HI_REGS
28183           && (regno != PC_REGNUM || ldm_p))
28184         return 4;
28185     }
28186
28187   return 2;
28188 }
28189
28190 /* Compute the number of instructions emitted by output_move_double.  */
28191 int
28192 arm_count_output_move_double_insns (rtx *operands)
28193 {
28194   int count;
28195   rtx ops[2];
28196   /* output_move_double may modify the operands array, so call it
28197      here on a copy of the array.  */
28198   ops[0] = operands[0];
28199   ops[1] = operands[1];
28200   output_move_double (ops, false, &count);
28201   return count;
28202 }
28203
28204 int
28205 vfp3_const_double_for_fract_bits (rtx operand)
28206 {
28207   REAL_VALUE_TYPE r0;
28208
28209   if (!CONST_DOUBLE_P (operand))
28210     return 0;
28211
28212   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28213   if (exact_real_inverse (DFmode, &r0)
28214       && !REAL_VALUE_NEGATIVE (r0))
28215     {
28216       if (exact_real_truncate (DFmode, &r0))
28217         {
28218           HOST_WIDE_INT value = real_to_integer (&r0);
28219           value = value & 0xffffffff;
28220           if ((value != 0) && ( (value & (value - 1)) == 0))
28221             {
28222               int ret = exact_log2 (value);
28223               gcc_assert (IN_RANGE (ret, 0, 31));
28224               return ret;
28225             }
28226         }
28227     }
28228   return 0;
28229 }
28230
28231 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28232    log2 is in [1, 32], return that log2.  Otherwise return -1.
28233    This is used in the patterns for vcvt.s32.f32 floating-point to
28234    fixed-point conversions.  */
28235
28236 int
28237 vfp3_const_double_for_bits (rtx x)
28238 {
28239   const REAL_VALUE_TYPE *r;
28240
28241   if (!CONST_DOUBLE_P (x))
28242     return -1;
28243
28244   r = CONST_DOUBLE_REAL_VALUE (x);
28245
28246   if (REAL_VALUE_NEGATIVE (*r)
28247       || REAL_VALUE_ISNAN (*r)
28248       || REAL_VALUE_ISINF (*r)
28249       || !real_isinteger (r, SFmode))
28250     return -1;
28251
28252   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28253
28254 /* The exact_log2 above will have returned -1 if this is
28255    not an exact log2.  */
28256   if (!IN_RANGE (hwint, 1, 32))
28257     return -1;
28258
28259   return hwint;
28260 }
28261
28262 \f
28263 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28264
28265 static void
28266 arm_pre_atomic_barrier (enum memmodel model)
28267 {
28268   if (need_atomic_barrier_p (model, true))
28269     emit_insn (gen_memory_barrier ());
28270 }
28271
28272 static void
28273 arm_post_atomic_barrier (enum memmodel model)
28274 {
28275   if (need_atomic_barrier_p (model, false))
28276     emit_insn (gen_memory_barrier ());
28277 }
28278
28279 /* Emit the load-exclusive and store-exclusive instructions.
28280    Use acquire and release versions if necessary.  */
28281
28282 static void
28283 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28284 {
28285   rtx (*gen) (rtx, rtx);
28286
28287   if (acq)
28288     {
28289       switch (mode)
28290         {
28291         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28292         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28293         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28294         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28295         default:
28296           gcc_unreachable ();
28297         }
28298     }
28299   else
28300     {
28301       switch (mode)
28302         {
28303         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28304         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28305         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28306         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28307         default:
28308           gcc_unreachable ();
28309         }
28310     }
28311
28312   emit_insn (gen (rval, mem));
28313 }
28314
28315 static void
28316 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28317                           rtx mem, bool rel)
28318 {
28319   rtx (*gen) (rtx, rtx, rtx);
28320
28321   if (rel)
28322     {
28323       switch (mode)
28324         {
28325         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28326         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28327         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28328         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28329         default:
28330           gcc_unreachable ();
28331         }
28332     }
28333   else
28334     {
28335       switch (mode)
28336         {
28337         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28338         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28339         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28340         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28341         default:
28342           gcc_unreachable ();
28343         }
28344     }
28345
28346   emit_insn (gen (bval, rval, mem));
28347 }
28348
28349 /* Mark the previous jump instruction as unlikely.  */
28350
28351 static void
28352 emit_unlikely_jump (rtx insn)
28353 {
28354   rtx_insn *jump = emit_jump_insn (insn);
28355   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28356 }
28357
28358 /* Expand a compare and swap pattern.  */
28359
28360 void
28361 arm_expand_compare_and_swap (rtx operands[])
28362 {
28363   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28364   machine_mode mode;
28365   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28366
28367   bval = operands[0];
28368   rval = operands[1];
28369   mem = operands[2];
28370   oldval = operands[3];
28371   newval = operands[4];
28372   is_weak = operands[5];
28373   mod_s = operands[6];
28374   mod_f = operands[7];
28375   mode = GET_MODE (mem);
28376
28377   /* Normally the succ memory model must be stronger than fail, but in the
28378      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28379      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28380
28381   if (TARGET_HAVE_LDACQ
28382       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28383       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28384     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28385
28386   switch (mode)
28387     {
28388     case E_QImode:
28389     case E_HImode:
28390       /* For narrow modes, we're going to perform the comparison in SImode,
28391          so do the zero-extension now.  */
28392       rval = gen_reg_rtx (SImode);
28393       oldval = convert_modes (SImode, mode, oldval, true);
28394       /* FALLTHRU */
28395
28396     case E_SImode:
28397       /* Force the value into a register if needed.  We waited until after
28398          the zero-extension above to do this properly.  */
28399       if (!arm_add_operand (oldval, SImode))
28400         oldval = force_reg (SImode, oldval);
28401       break;
28402
28403     case E_DImode:
28404       if (!cmpdi_operand (oldval, mode))
28405         oldval = force_reg (mode, oldval);
28406       break;
28407
28408     default:
28409       gcc_unreachable ();
28410     }
28411
28412   if (TARGET_THUMB1)
28413     {
28414       switch (mode)
28415         {
28416         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28417         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28418         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28419         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28420         default:
28421           gcc_unreachable ();
28422         }
28423     }
28424   else
28425     {
28426       switch (mode)
28427         {
28428         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28429         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28430         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28431         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28432         default:
28433           gcc_unreachable ();
28434         }
28435     }
28436
28437   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28438   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28439
28440   if (mode == QImode || mode == HImode)
28441     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28442
28443   /* In all cases, we arrange for success to be signaled by Z set.
28444      This arrangement allows for the boolean result to be used directly
28445      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28446      boolean negation of the result is also stored in bval because Thumb-1
28447      backend lacks dependency tracking for CC flag due to flag-setting not
28448      being represented at RTL level.  */
28449   if (TARGET_THUMB1)
28450       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28451   else
28452     {
28453       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28454       emit_insn (gen_rtx_SET (bval, x));
28455     }
28456 }
28457
28458 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28459    another memory store between the load-exclusive and store-exclusive can
28460    reset the monitor from Exclusive to Open state.  This means we must wait
28461    until after reload to split the pattern, lest we get a register spill in
28462    the middle of the atomic sequence.  Success of the compare and swap is
28463    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28464    for Thumb-1 targets (ie. negation of the boolean value returned by
28465    atomic_compare_and_swapmode standard pattern in operand 0).  */
28466
28467 void
28468 arm_split_compare_and_swap (rtx operands[])
28469 {
28470   rtx rval, mem, oldval, newval, neg_bval;
28471   machine_mode mode;
28472   enum memmodel mod_s, mod_f;
28473   bool is_weak;
28474   rtx_code_label *label1, *label2;
28475   rtx x, cond;
28476
28477   rval = operands[1];
28478   mem = operands[2];
28479   oldval = operands[3];
28480   newval = operands[4];
28481   is_weak = (operands[5] != const0_rtx);
28482   mod_s = memmodel_from_int (INTVAL (operands[6]));
28483   mod_f = memmodel_from_int (INTVAL (operands[7]));
28484   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28485   mode = GET_MODE (mem);
28486
28487   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28488
28489   bool use_acquire = TARGET_HAVE_LDACQ
28490                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28491                           || is_mm_release (mod_s));
28492
28493   bool use_release = TARGET_HAVE_LDACQ
28494                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28495                           || is_mm_acquire (mod_s));
28496
28497   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28498      a full barrier is emitted after the store-release.  */
28499   if (is_armv8_sync)
28500     use_acquire = false;
28501
28502   /* Checks whether a barrier is needed and emits one accordingly.  */
28503   if (!(use_acquire || use_release))
28504     arm_pre_atomic_barrier (mod_s);
28505
28506   label1 = NULL;
28507   if (!is_weak)
28508     {
28509       label1 = gen_label_rtx ();
28510       emit_label (label1);
28511     }
28512   label2 = gen_label_rtx ();
28513
28514   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28515
28516   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28517      as required to communicate with arm_expand_compare_and_swap.  */
28518   if (TARGET_32BIT)
28519     {
28520       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28521       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28522       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28523                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28524       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28525     }
28526   else
28527     {
28528       emit_move_insn (neg_bval, const1_rtx);
28529       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28530       if (thumb1_cmpneg_operand (oldval, SImode))
28531         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28532                                                     label2, cond));
28533       else
28534         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28535     }
28536
28537   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28538
28539   /* Weak or strong, we want EQ to be true for success, so that we
28540      match the flags that we got from the compare above.  */
28541   if (TARGET_32BIT)
28542     {
28543       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28544       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28545       emit_insn (gen_rtx_SET (cond, x));
28546     }
28547
28548   if (!is_weak)
28549     {
28550       /* Z is set to boolean value of !neg_bval, as required to communicate
28551          with arm_expand_compare_and_swap.  */
28552       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28553       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28554     }
28555
28556   if (!is_mm_relaxed (mod_f))
28557     emit_label (label2);
28558
28559   /* Checks whether a barrier is needed and emits one accordingly.  */
28560   if (is_armv8_sync
28561       || !(use_acquire || use_release))
28562     arm_post_atomic_barrier (mod_s);
28563
28564   if (is_mm_relaxed (mod_f))
28565     emit_label (label2);
28566 }
28567
28568 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28569    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28570    operation).  Operation is performed on the content at MEM and on VALUE
28571    following the memory model MODEL_RTX.  The content at MEM before and after
28572    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28573    success of the operation is returned in COND.  Using a scratch register or
28574    an operand register for these determines what result is returned for that
28575    pattern.  */
28576
28577 void
28578 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28579                      rtx value, rtx model_rtx, rtx cond)
28580 {
28581   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28582   machine_mode mode = GET_MODE (mem);
28583   machine_mode wmode = (mode == DImode ? DImode : SImode);
28584   rtx_code_label *label;
28585   bool all_low_regs, bind_old_new;
28586   rtx x;
28587
28588   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28589
28590   bool use_acquire = TARGET_HAVE_LDACQ
28591                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28592                           || is_mm_release (model));
28593
28594   bool use_release = TARGET_HAVE_LDACQ
28595                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28596                           || is_mm_acquire (model));
28597
28598   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28599      a full barrier is emitted after the store-release.  */
28600   if (is_armv8_sync)
28601     use_acquire = false;
28602
28603   /* Checks whether a barrier is needed and emits one accordingly.  */
28604   if (!(use_acquire || use_release))
28605     arm_pre_atomic_barrier (model);
28606
28607   label = gen_label_rtx ();
28608   emit_label (label);
28609
28610   if (new_out)
28611     new_out = gen_lowpart (wmode, new_out);
28612   if (old_out)
28613     old_out = gen_lowpart (wmode, old_out);
28614   else
28615     old_out = new_out;
28616   value = simplify_gen_subreg (wmode, value, mode, 0);
28617
28618   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28619
28620   /* Does the operation require destination and first operand to use the same
28621      register?  This is decided by register constraints of relevant insn
28622      patterns in thumb1.md.  */
28623   gcc_assert (!new_out || REG_P (new_out));
28624   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28625                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28626                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28627   bind_old_new =
28628     (TARGET_THUMB1
28629      && code != SET
28630      && code != MINUS
28631      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28632
28633   /* We want to return the old value while putting the result of the operation
28634      in the same register as the old value so copy the old value over to the
28635      destination register and use that register for the operation.  */
28636   if (old_out && bind_old_new)
28637     {
28638       emit_move_insn (new_out, old_out);
28639       old_out = new_out;
28640     }
28641
28642   switch (code)
28643     {
28644     case SET:
28645       new_out = value;
28646       break;
28647
28648     case NOT:
28649       x = gen_rtx_AND (wmode, old_out, value);
28650       emit_insn (gen_rtx_SET (new_out, x));
28651       x = gen_rtx_NOT (wmode, new_out);
28652       emit_insn (gen_rtx_SET (new_out, x));
28653       break;
28654
28655     case MINUS:
28656       if (CONST_INT_P (value))
28657         {
28658           value = GEN_INT (-INTVAL (value));
28659           code = PLUS;
28660         }
28661       /* FALLTHRU */
28662
28663     case PLUS:
28664       if (mode == DImode)
28665         {
28666           /* DImode plus/minus need to clobber flags.  */
28667           /* The adddi3 and subdi3 patterns are incorrectly written so that
28668              they require matching operands, even when we could easily support
28669              three operands.  Thankfully, this can be fixed up post-splitting,
28670              as the individual add+adc patterns do accept three operands and
28671              post-reload cprop can make these moves go away.  */
28672           emit_move_insn (new_out, old_out);
28673           if (code == PLUS)
28674             x = gen_adddi3 (new_out, new_out, value);
28675           else
28676             x = gen_subdi3 (new_out, new_out, value);
28677           emit_insn (x);
28678           break;
28679         }
28680       /* FALLTHRU */
28681
28682     default:
28683       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28684       emit_insn (gen_rtx_SET (new_out, x));
28685       break;
28686     }
28687
28688   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28689                             use_release);
28690
28691   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28692   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28693
28694   /* Checks whether a barrier is needed and emits one accordingly.  */
28695   if (is_armv8_sync
28696       || !(use_acquire || use_release))
28697     arm_post_atomic_barrier (model);
28698 }
28699 \f
28700 #define MAX_VECT_LEN 16
28701
28702 struct expand_vec_perm_d
28703 {
28704   rtx target, op0, op1;
28705   auto_vec_perm_indices perm;
28706   machine_mode vmode;
28707   bool one_vector_p;
28708   bool testing_p;
28709 };
28710
28711 /* Generate a variable permutation.  */
28712
28713 static void
28714 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28715 {
28716   machine_mode vmode = GET_MODE (target);
28717   bool one_vector_p = rtx_equal_p (op0, op1);
28718
28719   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28720   gcc_checking_assert (GET_MODE (op0) == vmode);
28721   gcc_checking_assert (GET_MODE (op1) == vmode);
28722   gcc_checking_assert (GET_MODE (sel) == vmode);
28723   gcc_checking_assert (TARGET_NEON);
28724
28725   if (one_vector_p)
28726     {
28727       if (vmode == V8QImode)
28728         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28729       else
28730         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28731     }
28732   else
28733     {
28734       rtx pair;
28735
28736       if (vmode == V8QImode)
28737         {
28738           pair = gen_reg_rtx (V16QImode);
28739           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28740           pair = gen_lowpart (TImode, pair);
28741           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28742         }
28743       else
28744         {
28745           pair = gen_reg_rtx (OImode);
28746           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28747           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28748         }
28749     }
28750 }
28751
28752 void
28753 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28754 {
28755   machine_mode vmode = GET_MODE (target);
28756   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28757   bool one_vector_p = rtx_equal_p (op0, op1);
28758   rtx rmask[MAX_VECT_LEN], mask;
28759
28760   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28761      numbering of elements for big-endian, we must reverse the order.  */
28762   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28763
28764   /* The VTBL instruction does not use a modulo index, so we must take care
28765      of that ourselves.  */
28766   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28767   for (i = 0; i < nelt; ++i)
28768     rmask[i] = mask;
28769   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28770   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28771
28772   arm_expand_vec_perm_1 (target, op0, op1, sel);
28773 }
28774
28775 /* Map lane ordering between architectural lane order, and GCC lane order,
28776    taking into account ABI.  See comment above output_move_neon for details.  */
28777
28778 static int
28779 neon_endian_lane_map (machine_mode mode, int lane)
28780 {
28781   if (BYTES_BIG_ENDIAN)
28782   {
28783     int nelems = GET_MODE_NUNITS (mode);
28784     /* Reverse lane order.  */
28785     lane = (nelems - 1 - lane);
28786     /* Reverse D register order, to match ABI.  */
28787     if (GET_MODE_SIZE (mode) == 16)
28788       lane = lane ^ (nelems / 2);
28789   }
28790   return lane;
28791 }
28792
28793 /* Some permutations index into pairs of vectors, this is a helper function
28794    to map indexes into those pairs of vectors.  */
28795
28796 static int
28797 neon_pair_endian_lane_map (machine_mode mode, int lane)
28798 {
28799   int nelem = GET_MODE_NUNITS (mode);
28800   if (BYTES_BIG_ENDIAN)
28801     lane =
28802       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28803   return lane;
28804 }
28805
28806 /* Generate or test for an insn that supports a constant permutation.  */
28807
28808 /* Recognize patterns for the VUZP insns.  */
28809
28810 static bool
28811 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28812 {
28813   unsigned int i, odd, mask, nelt = d->perm.length ();
28814   rtx out0, out1, in0, in1;
28815   rtx (*gen)(rtx, rtx, rtx, rtx);
28816   int first_elem;
28817   int swap_nelt;
28818
28819   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28820     return false;
28821
28822   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28823      big endian pattern on 64 bit vectors, so we correct for that.  */
28824   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28825     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28826
28827   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28828
28829   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28830     odd = 0;
28831   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28832     odd = 1;
28833   else
28834     return false;
28835   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28836
28837   for (i = 0; i < nelt; i++)
28838     {
28839       unsigned elt =
28840         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28841       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28842         return false;
28843     }
28844
28845   /* Success!  */
28846   if (d->testing_p)
28847     return true;
28848
28849   switch (d->vmode)
28850     {
28851     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28852     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28853     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28854     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28855     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28856     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28857     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28858     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28859     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28860     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28861     default:
28862       gcc_unreachable ();
28863     }
28864
28865   in0 = d->op0;
28866   in1 = d->op1;
28867   if (swap_nelt != 0)
28868     std::swap (in0, in1);
28869
28870   out0 = d->target;
28871   out1 = gen_reg_rtx (d->vmode);
28872   if (odd)
28873     std::swap (out0, out1);
28874
28875   emit_insn (gen (out0, in0, in1, out1));
28876   return true;
28877 }
28878
28879 /* Recognize patterns for the VZIP insns.  */
28880
28881 static bool
28882 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28883 {
28884   unsigned int i, high, mask, nelt = d->perm.length ();
28885   rtx out0, out1, in0, in1;
28886   rtx (*gen)(rtx, rtx, rtx, rtx);
28887   int first_elem;
28888   bool is_swapped;
28889
28890   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28891     return false;
28892
28893   is_swapped = BYTES_BIG_ENDIAN;
28894
28895   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28896
28897   high = nelt / 2;
28898   if (first_elem == neon_endian_lane_map (d->vmode, high))
28899     ;
28900   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28901     high = 0;
28902   else
28903     return false;
28904   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28905
28906   for (i = 0; i < nelt / 2; i++)
28907     {
28908       unsigned elt =
28909         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28910       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28911           != elt)
28912         return false;
28913       elt =
28914         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28915       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28916           != elt)
28917         return false;
28918     }
28919
28920   /* Success!  */
28921   if (d->testing_p)
28922     return true;
28923
28924   switch (d->vmode)
28925     {
28926     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28927     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28928     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28929     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28930     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28931     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28932     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28933     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28934     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28935     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28936     default:
28937       gcc_unreachable ();
28938     }
28939
28940   in0 = d->op0;
28941   in1 = d->op1;
28942   if (is_swapped)
28943     std::swap (in0, in1);
28944
28945   out0 = d->target;
28946   out1 = gen_reg_rtx (d->vmode);
28947   if (high)
28948     std::swap (out0, out1);
28949
28950   emit_insn (gen (out0, in0, in1, out1));
28951   return true;
28952 }
28953
28954 /* Recognize patterns for the VREV insns.  */
28955
28956 static bool
28957 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28958 {
28959   unsigned int i, j, diff, nelt = d->perm.length ();
28960   rtx (*gen)(rtx, rtx);
28961
28962   if (!d->one_vector_p)
28963     return false;
28964
28965   diff = d->perm[0];
28966   switch (diff)
28967     {
28968     case 7:
28969       switch (d->vmode)
28970         {
28971         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28972         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28973         default:
28974           return false;
28975         }
28976       break;
28977     case 3:
28978       switch (d->vmode)
28979         {
28980         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28981         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28982         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28983         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28984         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28985         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28986         default:
28987           return false;
28988         }
28989       break;
28990     case 1:
28991       switch (d->vmode)
28992         {
28993         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28994         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28995         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28996         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28997         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
28998         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
28999         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29000         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29001         default:
29002           return false;
29003         }
29004       break;
29005     default:
29006       return false;
29007     }
29008
29009   for (i = 0; i < nelt ; i += diff + 1)
29010     for (j = 0; j <= diff; j += 1)
29011       {
29012         /* This is guaranteed to be true as the value of diff
29013            is 7, 3, 1 and we should have enough elements in the
29014            queue to generate this. Getting a vector mask with a
29015            value of diff other than these values implies that
29016            something is wrong by the time we get here.  */
29017         gcc_assert (i + j < nelt);
29018         if (d->perm[i + j] != i + diff - j)
29019           return false;
29020       }
29021
29022   /* Success! */
29023   if (d->testing_p)
29024     return true;
29025
29026   emit_insn (gen (d->target, d->op0));
29027   return true;
29028 }
29029
29030 /* Recognize patterns for the VTRN insns.  */
29031
29032 static bool
29033 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29034 {
29035   unsigned int i, odd, mask, nelt = d->perm.length ();
29036   rtx out0, out1, in0, in1;
29037   rtx (*gen)(rtx, rtx, rtx, rtx);
29038
29039   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29040     return false;
29041
29042   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29043   if (d->perm[0] == 0)
29044     odd = 0;
29045   else if (d->perm[0] == 1)
29046     odd = 1;
29047   else
29048     return false;
29049   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29050
29051   for (i = 0; i < nelt; i += 2)
29052     {
29053       if (d->perm[i] != i + odd)
29054         return false;
29055       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29056         return false;
29057     }
29058
29059   /* Success!  */
29060   if (d->testing_p)
29061     return true;
29062
29063   switch (d->vmode)
29064     {
29065     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29066     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29067     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29068     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29069     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29070     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29071     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29072     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29073     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29074     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29075     default:
29076       gcc_unreachable ();
29077     }
29078
29079   in0 = d->op0;
29080   in1 = d->op1;
29081   if (BYTES_BIG_ENDIAN)
29082     {
29083       std::swap (in0, in1);
29084       odd = !odd;
29085     }
29086
29087   out0 = d->target;
29088   out1 = gen_reg_rtx (d->vmode);
29089   if (odd)
29090     std::swap (out0, out1);
29091
29092   emit_insn (gen (out0, in0, in1, out1));
29093   return true;
29094 }
29095
29096 /* Recognize patterns for the VEXT insns.  */
29097
29098 static bool
29099 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29100 {
29101   unsigned int i, nelt = d->perm.length ();
29102   rtx (*gen) (rtx, rtx, rtx, rtx);
29103   rtx offset;
29104
29105   unsigned int location;
29106
29107   unsigned int next  = d->perm[0] + 1;
29108
29109   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29110   if (BYTES_BIG_ENDIAN)
29111     return false;
29112
29113   /* Check if the extracted indexes are increasing by one.  */
29114   for (i = 1; i < nelt; next++, i++)
29115     {
29116       /* If we hit the most significant element of the 2nd vector in
29117          the previous iteration, no need to test further.  */
29118       if (next == 2 * nelt)
29119         return false;
29120
29121       /* If we are operating on only one vector: it could be a
29122          rotation.  If there are only two elements of size < 64, let
29123          arm_evpc_neon_vrev catch it.  */
29124       if (d->one_vector_p && (next == nelt))
29125         {
29126           if ((nelt == 2) && (d->vmode != V2DImode))
29127             return false;
29128           else
29129             next = 0;
29130         }
29131
29132       if (d->perm[i] != next)
29133         return false;
29134     }
29135
29136   location = d->perm[0];
29137
29138   switch (d->vmode)
29139     {
29140     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29141     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29142     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29143     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29144     case E_V2SImode: gen = gen_neon_vextv2si; break;
29145     case E_V4SImode: gen = gen_neon_vextv4si; break;
29146     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29147     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29148     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29149     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29150     case E_V2DImode: gen = gen_neon_vextv2di; break;
29151     default:
29152       return false;
29153     }
29154
29155   /* Success! */
29156   if (d->testing_p)
29157     return true;
29158
29159   offset = GEN_INT (location);
29160   emit_insn (gen (d->target, d->op0, d->op1, offset));
29161   return true;
29162 }
29163
29164 /* The NEON VTBL instruction is a fully variable permuation that's even
29165    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29166    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29167    can do slightly better by expanding this as a constant where we don't
29168    have to apply a mask.  */
29169
29170 static bool
29171 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29172 {
29173   rtx rperm[MAX_VECT_LEN], sel;
29174   machine_mode vmode = d->vmode;
29175   unsigned int i, nelt = d->perm.length ();
29176
29177   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29178      numbering of elements for big-endian, we must reverse the order.  */
29179   if (BYTES_BIG_ENDIAN)
29180     return false;
29181
29182   if (d->testing_p)
29183     return true;
29184
29185   /* Generic code will try constant permutation twice.  Once with the
29186      original mode and again with the elements lowered to QImode.
29187      So wait and don't do the selector expansion ourselves.  */
29188   if (vmode != V8QImode && vmode != V16QImode)
29189     return false;
29190
29191   for (i = 0; i < nelt; ++i)
29192     rperm[i] = GEN_INT (d->perm[i]);
29193   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29194   sel = force_reg (vmode, sel);
29195
29196   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29197   return true;
29198 }
29199
29200 static bool
29201 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29202 {
29203   /* Check if the input mask matches vext before reordering the
29204      operands.  */
29205   if (TARGET_NEON)
29206     if (arm_evpc_neon_vext (d))
29207       return true;
29208
29209   /* The pattern matching functions above are written to look for a small
29210      number to begin the sequence (0, 1, N/2).  If we begin with an index
29211      from the second operand, we can swap the operands.  */
29212   unsigned int nelt = d->perm.length ();
29213   if (d->perm[0] >= nelt)
29214     {
29215       for (unsigned int i = 0; i < nelt; ++i)
29216         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29217
29218       std::swap (d->op0, d->op1);
29219     }
29220
29221   if (TARGET_NEON)
29222     {
29223       if (arm_evpc_neon_vuzp (d))
29224         return true;
29225       if (arm_evpc_neon_vzip (d))
29226         return true;
29227       if (arm_evpc_neon_vrev (d))
29228         return true;
29229       if (arm_evpc_neon_vtrn (d))
29230         return true;
29231       return arm_evpc_neon_vtbl (d);
29232     }
29233   return false;
29234 }
29235
29236 /* Expand a vec_perm_const pattern.  */
29237
29238 bool
29239 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29240 {
29241   struct expand_vec_perm_d d;
29242   int i, nelt, which;
29243
29244   d.target = target;
29245   d.op0 = op0;
29246   d.op1 = op1;
29247
29248   d.vmode = GET_MODE (target);
29249   gcc_assert (VECTOR_MODE_P (d.vmode));
29250   d.testing_p = false;
29251
29252   nelt = GET_MODE_NUNITS (d.vmode);
29253   d.perm.reserve (nelt);
29254   for (i = which = 0; i < nelt; ++i)
29255     {
29256       rtx e = XVECEXP (sel, 0, i);
29257       int ei = INTVAL (e) & (2 * nelt - 1);
29258       which |= (ei < nelt ? 1 : 2);
29259       d.perm.quick_push (ei);
29260     }
29261
29262   switch (which)
29263     {
29264     default:
29265       gcc_unreachable();
29266
29267     case 3:
29268       d.one_vector_p = false;
29269       if (!rtx_equal_p (op0, op1))
29270         break;
29271
29272       /* The elements of PERM do not suggest that only the first operand
29273          is used, but both operands are identical.  Allow easier matching
29274          of the permutation by folding the permutation into the single
29275          input vector.  */
29276       /* FALLTHRU */
29277     case 2:
29278       for (i = 0; i < nelt; ++i)
29279         d.perm[i] &= nelt - 1;
29280       d.op0 = op1;
29281       d.one_vector_p = true;
29282       break;
29283
29284     case 1:
29285       d.op1 = op0;
29286       d.one_vector_p = true;
29287       break;
29288     }
29289
29290   return arm_expand_vec_perm_const_1 (&d);
29291 }
29292
29293 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29294
29295 static bool
29296 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29297 {
29298   struct expand_vec_perm_d d;
29299   unsigned int i, nelt, which;
29300   bool ret;
29301
29302   d.vmode = vmode;
29303   d.testing_p = true;
29304   d.perm.safe_splice (sel);
29305
29306   /* Categorize the set of elements in the selector.  */
29307   nelt = GET_MODE_NUNITS (d.vmode);
29308   for (i = which = 0; i < nelt; ++i)
29309     {
29310       unsigned int e = d.perm[i];
29311       gcc_assert (e < 2 * nelt);
29312       which |= (e < nelt ? 1 : 2);
29313     }
29314
29315   /* For all elements from second vector, fold the elements to first.  */
29316   if (which == 2)
29317     for (i = 0; i < nelt; ++i)
29318       d.perm[i] -= nelt;
29319
29320   /* Check whether the mask can be applied to the vector type.  */
29321   d.one_vector_p = (which != 3);
29322
29323   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29324   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29325   if (!d.one_vector_p)
29326     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29327
29328   start_sequence ();
29329   ret = arm_expand_vec_perm_const_1 (&d);
29330   end_sequence ();
29331
29332   return ret;
29333 }
29334
29335 bool
29336 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29337 {
29338   /* If we are soft float and we do not have ldrd
29339      then all auto increment forms are ok.  */
29340   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29341     return true;
29342
29343   switch (code)
29344     {
29345       /* Post increment and Pre Decrement are supported for all
29346          instruction forms except for vector forms.  */
29347     case ARM_POST_INC:
29348     case ARM_PRE_DEC:
29349       if (VECTOR_MODE_P (mode))
29350         {
29351           if (code != ARM_PRE_DEC)
29352             return true;
29353           else
29354             return false;
29355         }
29356
29357       return true;
29358
29359     case ARM_POST_DEC:
29360     case ARM_PRE_INC:
29361       /* Without LDRD and mode size greater than
29362          word size, there is no point in auto-incrementing
29363          because ldm and stm will not have these forms.  */
29364       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29365         return false;
29366
29367       /* Vector and floating point modes do not support
29368          these auto increment forms.  */
29369       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29370         return false;
29371
29372       return true;
29373
29374     default:
29375       return false;
29376
29377     }
29378
29379   return false;
29380 }
29381
29382 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29383    on ARM, since we know that shifts by negative amounts are no-ops.
29384    Additionally, the default expansion code is not available or suitable
29385    for post-reload insn splits (this can occur when the register allocator
29386    chooses not to do a shift in NEON).
29387
29388    This function is used in both initial expand and post-reload splits, and
29389    handles all kinds of 64-bit shifts.
29390
29391    Input requirements:
29392     - It is safe for the input and output to be the same register, but
29393       early-clobber rules apply for the shift amount and scratch registers.
29394     - Shift by register requires both scratch registers.  In all other cases
29395       the scratch registers may be NULL.
29396     - Ashiftrt by a register also clobbers the CC register.  */
29397 void
29398 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29399                                rtx amount, rtx scratch1, rtx scratch2)
29400 {
29401   rtx out_high = gen_highpart (SImode, out);
29402   rtx out_low = gen_lowpart (SImode, out);
29403   rtx in_high = gen_highpart (SImode, in);
29404   rtx in_low = gen_lowpart (SImode, in);
29405
29406   /* Terminology:
29407         in = the register pair containing the input value.
29408         out = the destination register pair.
29409         up = the high- or low-part of each pair.
29410         down = the opposite part to "up".
29411      In a shift, we can consider bits to shift from "up"-stream to
29412      "down"-stream, so in a left-shift "up" is the low-part and "down"
29413      is the high-part of each register pair.  */
29414
29415   rtx out_up   = code == ASHIFT ? out_low : out_high;
29416   rtx out_down = code == ASHIFT ? out_high : out_low;
29417   rtx in_up   = code == ASHIFT ? in_low : in_high;
29418   rtx in_down = code == ASHIFT ? in_high : in_low;
29419
29420   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29421   gcc_assert (out
29422               && (REG_P (out) || GET_CODE (out) == SUBREG)
29423               && GET_MODE (out) == DImode);
29424   gcc_assert (in
29425               && (REG_P (in) || GET_CODE (in) == SUBREG)
29426               && GET_MODE (in) == DImode);
29427   gcc_assert (amount
29428               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29429                    && GET_MODE (amount) == SImode)
29430                   || CONST_INT_P (amount)));
29431   gcc_assert (scratch1 == NULL
29432               || (GET_CODE (scratch1) == SCRATCH)
29433               || (GET_MODE (scratch1) == SImode
29434                   && REG_P (scratch1)));
29435   gcc_assert (scratch2 == NULL
29436               || (GET_CODE (scratch2) == SCRATCH)
29437               || (GET_MODE (scratch2) == SImode
29438                   && REG_P (scratch2)));
29439   gcc_assert (!REG_P (out) || !REG_P (amount)
29440               || !HARD_REGISTER_P (out)
29441               || (REGNO (out) != REGNO (amount)
29442                   && REGNO (out) + 1 != REGNO (amount)));
29443
29444   /* Macros to make following code more readable.  */
29445   #define SUB_32(DEST,SRC) \
29446             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29447   #define RSB_32(DEST,SRC) \
29448             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29449   #define SUB_S_32(DEST,SRC) \
29450             gen_addsi3_compare0 ((DEST), (SRC), \
29451                                  GEN_INT (-32))
29452   #define SET(DEST,SRC) \
29453             gen_rtx_SET ((DEST), (SRC))
29454   #define SHIFT(CODE,SRC,AMOUNT) \
29455             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29456   #define LSHIFT(CODE,SRC,AMOUNT) \
29457             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29458                             SImode, (SRC), (AMOUNT))
29459   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29460             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29461                             SImode, (SRC), (AMOUNT))
29462   #define ORR(A,B) \
29463             gen_rtx_IOR (SImode, (A), (B))
29464   #define BRANCH(COND,LABEL) \
29465             gen_arm_cond_branch ((LABEL), \
29466                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29467                                                    const0_rtx), \
29468                                  cc_reg)
29469
29470   /* Shifts by register and shifts by constant are handled separately.  */
29471   if (CONST_INT_P (amount))
29472     {
29473       /* We have a shift-by-constant.  */
29474
29475       /* First, handle out-of-range shift amounts.
29476          In both cases we try to match the result an ARM instruction in a
29477          shift-by-register would give.  This helps reduce execution
29478          differences between optimization levels, but it won't stop other
29479          parts of the compiler doing different things.  This is "undefined
29480          behavior, in any case.  */
29481       if (INTVAL (amount) <= 0)
29482         emit_insn (gen_movdi (out, in));
29483       else if (INTVAL (amount) >= 64)
29484         {
29485           if (code == ASHIFTRT)
29486             {
29487               rtx const31_rtx = GEN_INT (31);
29488               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29489               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29490             }
29491           else
29492             emit_insn (gen_movdi (out, const0_rtx));
29493         }
29494
29495       /* Now handle valid shifts. */
29496       else if (INTVAL (amount) < 32)
29497         {
29498           /* Shifts by a constant less than 32.  */
29499           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29500
29501           /* Clearing the out register in DImode first avoids lots
29502              of spilling and results in less stack usage.
29503              Later this redundant insn is completely removed.
29504              Do that only if "in" and "out" are different registers.  */
29505           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29506             emit_insn (SET (out, const0_rtx));
29507           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29508           emit_insn (SET (out_down,
29509                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29510                                out_down)));
29511           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29512         }
29513       else
29514         {
29515           /* Shifts by a constant greater than 31.  */
29516           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29517
29518           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29519             emit_insn (SET (out, const0_rtx));
29520           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29521           if (code == ASHIFTRT)
29522             emit_insn (gen_ashrsi3 (out_up, in_up,
29523                                     GEN_INT (31)));
29524           else
29525             emit_insn (SET (out_up, const0_rtx));
29526         }
29527     }
29528   else
29529     {
29530       /* We have a shift-by-register.  */
29531       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29532
29533       /* This alternative requires the scratch registers.  */
29534       gcc_assert (scratch1 && REG_P (scratch1));
29535       gcc_assert (scratch2 && REG_P (scratch2));
29536
29537       /* We will need the values "amount-32" and "32-amount" later.
29538          Swapping them around now allows the later code to be more general. */
29539       switch (code)
29540         {
29541         case ASHIFT:
29542           emit_insn (SUB_32 (scratch1, amount));
29543           emit_insn (RSB_32 (scratch2, amount));
29544           break;
29545         case ASHIFTRT:
29546           emit_insn (RSB_32 (scratch1, amount));
29547           /* Also set CC = amount > 32.  */
29548           emit_insn (SUB_S_32 (scratch2, amount));
29549           break;
29550         case LSHIFTRT:
29551           emit_insn (RSB_32 (scratch1, amount));
29552           emit_insn (SUB_32 (scratch2, amount));
29553           break;
29554         default:
29555           gcc_unreachable ();
29556         }
29557
29558       /* Emit code like this:
29559
29560          arithmetic-left:
29561             out_down = in_down << amount;
29562             out_down = (in_up << (amount - 32)) | out_down;
29563             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29564             out_up = in_up << amount;
29565
29566          arithmetic-right:
29567             out_down = in_down >> amount;
29568             out_down = (in_up << (32 - amount)) | out_down;
29569             if (amount < 32)
29570               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29571             out_up = in_up << amount;
29572
29573          logical-right:
29574             out_down = in_down >> amount;
29575             out_down = (in_up << (32 - amount)) | out_down;
29576             if (amount < 32)
29577               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29578             out_up = in_up << amount;
29579
29580           The ARM and Thumb2 variants are the same but implemented slightly
29581           differently.  If this were only called during expand we could just
29582           use the Thumb2 case and let combine do the right thing, but this
29583           can also be called from post-reload splitters.  */
29584
29585       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29586
29587       if (!TARGET_THUMB2)
29588         {
29589           /* Emit code for ARM mode.  */
29590           emit_insn (SET (out_down,
29591                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29592           if (code == ASHIFTRT)
29593             {
29594               rtx_code_label *done_label = gen_label_rtx ();
29595               emit_jump_insn (BRANCH (LT, done_label));
29596               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29597                                              out_down)));
29598               emit_label (done_label);
29599             }
29600           else
29601             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29602                                            out_down)));
29603         }
29604       else
29605         {
29606           /* Emit code for Thumb2 mode.
29607              Thumb2 can't do shift and or in one insn.  */
29608           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29609           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29610
29611           if (code == ASHIFTRT)
29612             {
29613               rtx_code_label *done_label = gen_label_rtx ();
29614               emit_jump_insn (BRANCH (LT, done_label));
29615               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29616               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29617               emit_label (done_label);
29618             }
29619           else
29620             {
29621               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29622               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29623             }
29624         }
29625
29626       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29627     }
29628
29629   #undef SUB_32
29630   #undef RSB_32
29631   #undef SUB_S_32
29632   #undef SET
29633   #undef SHIFT
29634   #undef LSHIFT
29635   #undef REV_LSHIFT
29636   #undef ORR
29637   #undef BRANCH
29638 }
29639
29640 /* Returns true if the pattern is a valid symbolic address, which is either a
29641    symbol_ref or (symbol_ref + addend).
29642
29643    According to the ARM ELF ABI, the initial addend of REL-type relocations
29644    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29645    literal field of the instruction as a 16-bit signed value in the range
29646    -32768 <= A < 32768.  */
29647
29648 bool
29649 arm_valid_symbolic_address_p (rtx addr)
29650 {
29651   rtx xop0, xop1 = NULL_RTX;
29652   rtx tmp = addr;
29653
29654   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29655     return true;
29656
29657   /* (const (plus: symbol_ref const_int))  */
29658   if (GET_CODE (addr) == CONST)
29659     tmp = XEXP (addr, 0);
29660
29661   if (GET_CODE (tmp) == PLUS)
29662     {
29663       xop0 = XEXP (tmp, 0);
29664       xop1 = XEXP (tmp, 1);
29665
29666       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29667           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29668     }
29669
29670   return false;
29671 }
29672
29673 /* Returns true if a valid comparison operation and makes
29674    the operands in a form that is valid.  */
29675 bool
29676 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29677 {
29678   enum rtx_code code = GET_CODE (*comparison);
29679   int code_int;
29680   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29681     ? GET_MODE (*op2) : GET_MODE (*op1);
29682
29683   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29684
29685   if (code == UNEQ || code == LTGT)
29686     return false;
29687
29688   code_int = (int)code;
29689   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29690   PUT_CODE (*comparison, (enum rtx_code)code_int);
29691
29692   switch (mode)
29693     {
29694     case E_SImode:
29695       if (!arm_add_operand (*op1, mode))
29696         *op1 = force_reg (mode, *op1);
29697       if (!arm_add_operand (*op2, mode))
29698         *op2 = force_reg (mode, *op2);
29699       return true;
29700
29701     case E_DImode:
29702       if (!cmpdi_operand (*op1, mode))
29703         *op1 = force_reg (mode, *op1);
29704       if (!cmpdi_operand (*op2, mode))
29705         *op2 = force_reg (mode, *op2);
29706       return true;
29707
29708     case E_HFmode:
29709       if (!TARGET_VFP_FP16INST)
29710         break;
29711       /* FP16 comparisons are done in SF mode.  */
29712       mode = SFmode;
29713       *op1 = convert_to_mode (mode, *op1, 1);
29714       *op2 = convert_to_mode (mode, *op2, 1);
29715       /* Fall through.  */
29716     case E_SFmode:
29717     case E_DFmode:
29718       if (!vfp_compare_operand (*op1, mode))
29719         *op1 = force_reg (mode, *op1);
29720       if (!vfp_compare_operand (*op2, mode))
29721         *op2 = force_reg (mode, *op2);
29722       return true;
29723     default:
29724       break;
29725     }
29726
29727   return false;
29728
29729 }
29730
29731 /* Maximum number of instructions to set block of memory.  */
29732 static int
29733 arm_block_set_max_insns (void)
29734 {
29735   if (optimize_function_for_size_p (cfun))
29736     return 4;
29737   else
29738     return current_tune->max_insns_inline_memset;
29739 }
29740
29741 /* Return TRUE if it's profitable to set block of memory for
29742    non-vectorized case.  VAL is the value to set the memory
29743    with.  LENGTH is the number of bytes to set.  ALIGN is the
29744    alignment of the destination memory in bytes.  UNALIGNED_P
29745    is TRUE if we can only set the memory with instructions
29746    meeting alignment requirements.  USE_STRD_P is TRUE if we
29747    can use strd to set the memory.  */
29748 static bool
29749 arm_block_set_non_vect_profit_p (rtx val,
29750                                  unsigned HOST_WIDE_INT length,
29751                                  unsigned HOST_WIDE_INT align,
29752                                  bool unaligned_p, bool use_strd_p)
29753 {
29754   int num = 0;
29755   /* For leftovers in bytes of 0-7, we can set the memory block using
29756      strb/strh/str with minimum instruction number.  */
29757   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29758
29759   if (unaligned_p)
29760     {
29761       num = arm_const_inline_cost (SET, val);
29762       num += length / align + length % align;
29763     }
29764   else if (use_strd_p)
29765     {
29766       num = arm_const_double_inline_cost (val);
29767       num += (length >> 3) + leftover[length & 7];
29768     }
29769   else
29770     {
29771       num = arm_const_inline_cost (SET, val);
29772       num += (length >> 2) + leftover[length & 3];
29773     }
29774
29775   /* We may be able to combine last pair STRH/STRB into a single STR
29776      by shifting one byte back.  */
29777   if (unaligned_access && length > 3 && (length & 3) == 3)
29778     num--;
29779
29780   return (num <= arm_block_set_max_insns ());
29781 }
29782
29783 /* Return TRUE if it's profitable to set block of memory for
29784    vectorized case.  LENGTH is the number of bytes to set.
29785    ALIGN is the alignment of destination memory in bytes.
29786    MODE is the vector mode used to set the memory.  */
29787 static bool
29788 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29789                              unsigned HOST_WIDE_INT align,
29790                              machine_mode mode)
29791 {
29792   int num;
29793   bool unaligned_p = ((align & 3) != 0);
29794   unsigned int nelt = GET_MODE_NUNITS (mode);
29795
29796   /* Instruction loading constant value.  */
29797   num = 1;
29798   /* Instructions storing the memory.  */
29799   num += (length + nelt - 1) / nelt;
29800   /* Instructions adjusting the address expression.  Only need to
29801      adjust address expression if it's 4 bytes aligned and bytes
29802      leftover can only be stored by mis-aligned store instruction.  */
29803   if (!unaligned_p && (length & 3) != 0)
29804     num++;
29805
29806   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29807   if (!unaligned_p && mode == V16QImode)
29808     num--;
29809
29810   return (num <= arm_block_set_max_insns ());
29811 }
29812
29813 /* Set a block of memory using vectorization instructions for the
29814    unaligned case.  We fill the first LENGTH bytes of the memory
29815    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29816    the alignment requirement of memory.  Return TRUE if succeeded.  */
29817 static bool
29818 arm_block_set_unaligned_vect (rtx dstbase,
29819                               unsigned HOST_WIDE_INT length,
29820                               unsigned HOST_WIDE_INT value,
29821                               unsigned HOST_WIDE_INT align)
29822 {
29823   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29824   rtx dst, mem;
29825   rtx val_elt, val_vec, reg;
29826   rtx rval[MAX_VECT_LEN];
29827   rtx (*gen_func) (rtx, rtx);
29828   machine_mode mode;
29829   unsigned HOST_WIDE_INT v = value;
29830   unsigned int offset = 0;
29831   gcc_assert ((align & 0x3) != 0);
29832   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29833   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29834   if (length >= nelt_v16)
29835     {
29836       mode = V16QImode;
29837       gen_func = gen_movmisalignv16qi;
29838     }
29839   else
29840     {
29841       mode = V8QImode;
29842       gen_func = gen_movmisalignv8qi;
29843     }
29844   nelt_mode = GET_MODE_NUNITS (mode);
29845   gcc_assert (length >= nelt_mode);
29846   /* Skip if it isn't profitable.  */
29847   if (!arm_block_set_vect_profit_p (length, align, mode))
29848     return false;
29849
29850   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29851   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29852
29853   v = sext_hwi (v, BITS_PER_WORD);
29854   val_elt = GEN_INT (v);
29855   for (j = 0; j < nelt_mode; j++)
29856     rval[j] = val_elt;
29857
29858   reg = gen_reg_rtx (mode);
29859   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29860   /* Emit instruction loading the constant value.  */
29861   emit_move_insn (reg, val_vec);
29862
29863   /* Handle nelt_mode bytes in a vector.  */
29864   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29865     {
29866       emit_insn ((*gen_func) (mem, reg));
29867       if (i + 2 * nelt_mode <= length)
29868         {
29869           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29870           offset += nelt_mode;
29871           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29872         }
29873     }
29874
29875   /* If there are not less than nelt_v8 bytes leftover, we must be in
29876      V16QI mode.  */
29877   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29878
29879   /* Handle (8, 16) bytes leftover.  */
29880   if (i + nelt_v8 < length)
29881     {
29882       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29883       offset += length - i;
29884       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29885
29886       /* We are shifting bytes back, set the alignment accordingly.  */
29887       if ((length & 1) != 0 && align >= 2)
29888         set_mem_align (mem, BITS_PER_UNIT);
29889
29890       emit_insn (gen_movmisalignv16qi (mem, reg));
29891     }
29892   /* Handle (0, 8] bytes leftover.  */
29893   else if (i < length && i + nelt_v8 >= length)
29894     {
29895       if (mode == V16QImode)
29896         reg = gen_lowpart (V8QImode, reg);
29897
29898       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29899                                               + (nelt_mode - nelt_v8))));
29900       offset += (length - i) + (nelt_mode - nelt_v8);
29901       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29902
29903       /* We are shifting bytes back, set the alignment accordingly.  */
29904       if ((length & 1) != 0 && align >= 2)
29905         set_mem_align (mem, BITS_PER_UNIT);
29906
29907       emit_insn (gen_movmisalignv8qi (mem, reg));
29908     }
29909
29910   return true;
29911 }
29912
29913 /* Set a block of memory using vectorization instructions for the
29914    aligned case.  We fill the first LENGTH bytes of the memory area
29915    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29916    alignment requirement of memory.  Return TRUE if succeeded.  */
29917 static bool
29918 arm_block_set_aligned_vect (rtx dstbase,
29919                             unsigned HOST_WIDE_INT length,
29920                             unsigned HOST_WIDE_INT value,
29921                             unsigned HOST_WIDE_INT align)
29922 {
29923   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29924   rtx dst, addr, mem;
29925   rtx val_elt, val_vec, reg;
29926   rtx rval[MAX_VECT_LEN];
29927   machine_mode mode;
29928   unsigned HOST_WIDE_INT v = value;
29929   unsigned int offset = 0;
29930
29931   gcc_assert ((align & 0x3) == 0);
29932   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29933   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29934   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29935     mode = V16QImode;
29936   else
29937     mode = V8QImode;
29938
29939   nelt_mode = GET_MODE_NUNITS (mode);
29940   gcc_assert (length >= nelt_mode);
29941   /* Skip if it isn't profitable.  */
29942   if (!arm_block_set_vect_profit_p (length, align, mode))
29943     return false;
29944
29945   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29946
29947   v = sext_hwi (v, BITS_PER_WORD);
29948   val_elt = GEN_INT (v);
29949   for (j = 0; j < nelt_mode; j++)
29950     rval[j] = val_elt;
29951
29952   reg = gen_reg_rtx (mode);
29953   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29954   /* Emit instruction loading the constant value.  */
29955   emit_move_insn (reg, val_vec);
29956
29957   i = 0;
29958   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29959   if (mode == V16QImode)
29960     {
29961       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29962       emit_insn (gen_movmisalignv16qi (mem, reg));
29963       i += nelt_mode;
29964       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29965       if (i + nelt_v8 < length && i + nelt_v16 > length)
29966         {
29967           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29968           offset += length - nelt_mode;
29969           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29970           /* We are shifting bytes back, set the alignment accordingly.  */
29971           if ((length & 0x3) == 0)
29972             set_mem_align (mem, BITS_PER_UNIT * 4);
29973           else if ((length & 0x1) == 0)
29974             set_mem_align (mem, BITS_PER_UNIT * 2);
29975           else
29976             set_mem_align (mem, BITS_PER_UNIT);
29977
29978           emit_insn (gen_movmisalignv16qi (mem, reg));
29979           return true;
29980         }
29981       /* Fall through for bytes leftover.  */
29982       mode = V8QImode;
29983       nelt_mode = GET_MODE_NUNITS (mode);
29984       reg = gen_lowpart (V8QImode, reg);
29985     }
29986
29987   /* Handle 8 bytes in a vector.  */
29988   for (; (i + nelt_mode <= length); i += nelt_mode)
29989     {
29990       addr = plus_constant (Pmode, dst, i);
29991       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29992       emit_move_insn (mem, reg);
29993     }
29994
29995   /* Handle single word leftover by shifting 4 bytes back.  We can
29996      use aligned access for this case.  */
29997   if (i + UNITS_PER_WORD == length)
29998     {
29999       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30000       offset += i - UNITS_PER_WORD;
30001       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30002       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30003       if (align > UNITS_PER_WORD)
30004         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30005
30006       emit_move_insn (mem, reg);
30007     }
30008   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30009      We have to use unaligned access for this case.  */
30010   else if (i < length)
30011     {
30012       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30013       offset += length - nelt_mode;
30014       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30015       /* We are shifting bytes back, set the alignment accordingly.  */
30016       if ((length & 1) == 0)
30017         set_mem_align (mem, BITS_PER_UNIT * 2);
30018       else
30019         set_mem_align (mem, BITS_PER_UNIT);
30020
30021       emit_insn (gen_movmisalignv8qi (mem, reg));
30022     }
30023
30024   return true;
30025 }
30026
30027 /* Set a block of memory using plain strh/strb instructions, only
30028    using instructions allowed by ALIGN on processor.  We fill the
30029    first LENGTH bytes of the memory area starting from DSTBASE
30030    with byte constant VALUE.  ALIGN is the alignment requirement
30031    of memory.  */
30032 static bool
30033 arm_block_set_unaligned_non_vect (rtx dstbase,
30034                                   unsigned HOST_WIDE_INT length,
30035                                   unsigned HOST_WIDE_INT value,
30036                                   unsigned HOST_WIDE_INT align)
30037 {
30038   unsigned int i;
30039   rtx dst, addr, mem;
30040   rtx val_exp, val_reg, reg;
30041   machine_mode mode;
30042   HOST_WIDE_INT v = value;
30043
30044   gcc_assert (align == 1 || align == 2);
30045
30046   if (align == 2)
30047     v |= (value << BITS_PER_UNIT);
30048
30049   v = sext_hwi (v, BITS_PER_WORD);
30050   val_exp = GEN_INT (v);
30051   /* Skip if it isn't profitable.  */
30052   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30053                                         align, true, false))
30054     return false;
30055
30056   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30057   mode = (align == 2 ? HImode : QImode);
30058   val_reg = force_reg (SImode, val_exp);
30059   reg = gen_lowpart (mode, val_reg);
30060
30061   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30062     {
30063       addr = plus_constant (Pmode, dst, i);
30064       mem = adjust_automodify_address (dstbase, mode, addr, i);
30065       emit_move_insn (mem, reg);
30066     }
30067
30068   /* Handle single byte leftover.  */
30069   if (i + 1 == length)
30070     {
30071       reg = gen_lowpart (QImode, val_reg);
30072       addr = plus_constant (Pmode, dst, i);
30073       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30074       emit_move_insn (mem, reg);
30075       i++;
30076     }
30077
30078   gcc_assert (i == length);
30079   return true;
30080 }
30081
30082 /* Set a block of memory using plain strd/str/strh/strb instructions,
30083    to permit unaligned copies on processors which support unaligned
30084    semantics for those instructions.  We fill the first LENGTH bytes
30085    of the memory area starting from DSTBASE with byte constant VALUE.
30086    ALIGN is the alignment requirement of memory.  */
30087 static bool
30088 arm_block_set_aligned_non_vect (rtx dstbase,
30089                                 unsigned HOST_WIDE_INT length,
30090                                 unsigned HOST_WIDE_INT value,
30091                                 unsigned HOST_WIDE_INT align)
30092 {
30093   unsigned int i;
30094   rtx dst, addr, mem;
30095   rtx val_exp, val_reg, reg;
30096   unsigned HOST_WIDE_INT v;
30097   bool use_strd_p;
30098
30099   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30100                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30101
30102   v = (value | (value << 8) | (value << 16) | (value << 24));
30103   if (length < UNITS_PER_WORD)
30104     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30105
30106   if (use_strd_p)
30107     v |= (v << BITS_PER_WORD);
30108   else
30109     v = sext_hwi (v, BITS_PER_WORD);
30110
30111   val_exp = GEN_INT (v);
30112   /* Skip if it isn't profitable.  */
30113   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30114                                         align, false, use_strd_p))
30115     {
30116       if (!use_strd_p)
30117         return false;
30118
30119       /* Try without strd.  */
30120       v = (v >> BITS_PER_WORD);
30121       v = sext_hwi (v, BITS_PER_WORD);
30122       val_exp = GEN_INT (v);
30123       use_strd_p = false;
30124       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30125                                             align, false, use_strd_p))
30126         return false;
30127     }
30128
30129   i = 0;
30130   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30131   /* Handle double words using strd if possible.  */
30132   if (use_strd_p)
30133     {
30134       val_reg = force_reg (DImode, val_exp);
30135       reg = val_reg;
30136       for (; (i + 8 <= length); i += 8)
30137         {
30138           addr = plus_constant (Pmode, dst, i);
30139           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30140           emit_move_insn (mem, reg);
30141         }
30142     }
30143   else
30144     val_reg = force_reg (SImode, val_exp);
30145
30146   /* Handle words.  */
30147   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30148   for (; (i + 4 <= length); i += 4)
30149     {
30150       addr = plus_constant (Pmode, dst, i);
30151       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30152       if ((align & 3) == 0)
30153         emit_move_insn (mem, reg);
30154       else
30155         emit_insn (gen_unaligned_storesi (mem, reg));
30156     }
30157
30158   /* Merge last pair of STRH and STRB into a STR if possible.  */
30159   if (unaligned_access && i > 0 && (i + 3) == length)
30160     {
30161       addr = plus_constant (Pmode, dst, i - 1);
30162       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30163       /* We are shifting one byte back, set the alignment accordingly.  */
30164       if ((align & 1) == 0)
30165         set_mem_align (mem, BITS_PER_UNIT);
30166
30167       /* Most likely this is an unaligned access, and we can't tell at
30168          compilation time.  */
30169       emit_insn (gen_unaligned_storesi (mem, reg));
30170       return true;
30171     }
30172
30173   /* Handle half word leftover.  */
30174   if (i + 2 <= length)
30175     {
30176       reg = gen_lowpart (HImode, val_reg);
30177       addr = plus_constant (Pmode, dst, i);
30178       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30179       if ((align & 1) == 0)
30180         emit_move_insn (mem, reg);
30181       else
30182         emit_insn (gen_unaligned_storehi (mem, reg));
30183
30184       i += 2;
30185     }
30186
30187   /* Handle single byte leftover.  */
30188   if (i + 1 == length)
30189     {
30190       reg = gen_lowpart (QImode, val_reg);
30191       addr = plus_constant (Pmode, dst, i);
30192       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30193       emit_move_insn (mem, reg);
30194     }
30195
30196   return true;
30197 }
30198
30199 /* Set a block of memory using vectorization instructions for both
30200    aligned and unaligned cases.  We fill the first LENGTH bytes of
30201    the memory area starting from DSTBASE with byte constant VALUE.
30202    ALIGN is the alignment requirement of memory.  */
30203 static bool
30204 arm_block_set_vect (rtx dstbase,
30205                     unsigned HOST_WIDE_INT length,
30206                     unsigned HOST_WIDE_INT value,
30207                     unsigned HOST_WIDE_INT align)
30208 {
30209   /* Check whether we need to use unaligned store instruction.  */
30210   if (((align & 3) != 0 || (length & 3) != 0)
30211       /* Check whether unaligned store instruction is available.  */
30212       && (!unaligned_access || BYTES_BIG_ENDIAN))
30213     return false;
30214
30215   if ((align & 3) == 0)
30216     return arm_block_set_aligned_vect (dstbase, length, value, align);
30217   else
30218     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30219 }
30220
30221 /* Expand string store operation.  Firstly we try to do that by using
30222    vectorization instructions, then try with ARM unaligned access and
30223    double-word store if profitable.  OPERANDS[0] is the destination,
30224    OPERANDS[1] is the number of bytes, operands[2] is the value to
30225    initialize the memory, OPERANDS[3] is the known alignment of the
30226    destination.  */
30227 bool
30228 arm_gen_setmem (rtx *operands)
30229 {
30230   rtx dstbase = operands[0];
30231   unsigned HOST_WIDE_INT length;
30232   unsigned HOST_WIDE_INT value;
30233   unsigned HOST_WIDE_INT align;
30234
30235   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30236     return false;
30237
30238   length = UINTVAL (operands[1]);
30239   if (length > 64)
30240     return false;
30241
30242   value = (UINTVAL (operands[2]) & 0xFF);
30243   align = UINTVAL (operands[3]);
30244   if (TARGET_NEON && length >= 8
30245       && current_tune->string_ops_prefer_neon
30246       && arm_block_set_vect (dstbase, length, value, align))
30247     return true;
30248
30249   if (!unaligned_access && (align & 3) != 0)
30250     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30251
30252   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30253 }
30254
30255
30256 static bool
30257 arm_macro_fusion_p (void)
30258 {
30259   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30260 }
30261
30262 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30263    for MOVW / MOVT macro fusion.  */
30264
30265 static bool
30266 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30267 {
30268   /* We are trying to fuse
30269      movw imm / movt imm
30270     instructions as a group that gets scheduled together.  */
30271
30272   rtx set_dest = SET_DEST (curr_set);
30273
30274   if (GET_MODE (set_dest) != SImode)
30275     return false;
30276
30277   /* We are trying to match:
30278      prev (movw)  == (set (reg r0) (const_int imm16))
30279      curr (movt) == (set (zero_extract (reg r0)
30280                                         (const_int 16)
30281                                         (const_int 16))
30282                           (const_int imm16_1))
30283      or
30284      prev (movw) == (set (reg r1)
30285                           (high (symbol_ref ("SYM"))))
30286     curr (movt) == (set (reg r0)
30287                         (lo_sum (reg r1)
30288                                 (symbol_ref ("SYM"))))  */
30289
30290     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30291       {
30292         if (CONST_INT_P (SET_SRC (curr_set))
30293             && CONST_INT_P (SET_SRC (prev_set))
30294             && REG_P (XEXP (set_dest, 0))
30295             && REG_P (SET_DEST (prev_set))
30296             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30297           return true;
30298
30299       }
30300     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30301              && REG_P (SET_DEST (curr_set))
30302              && REG_P (SET_DEST (prev_set))
30303              && GET_CODE (SET_SRC (prev_set)) == HIGH
30304              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30305       return true;
30306
30307   return false;
30308 }
30309
30310 static bool
30311 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30312 {
30313   rtx prev_set = single_set (prev);
30314   rtx curr_set = single_set (curr);
30315
30316   if (!prev_set
30317       || !curr_set)
30318     return false;
30319
30320   if (any_condjump_p (curr))
30321     return false;
30322
30323   if (!arm_macro_fusion_p ())
30324     return false;
30325
30326   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30327       && aarch_crypto_can_dual_issue (prev, curr))
30328     return true;
30329
30330   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30331       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30332     return true;
30333
30334   return false;
30335 }
30336
30337 /* Return true iff the instruction fusion described by OP is enabled.  */
30338 bool
30339 arm_fusion_enabled_p (tune_params::fuse_ops op)
30340 {
30341   return current_tune->fusible_ops & op;
30342 }
30343
30344 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30345    scheduled for speculative execution.  Reject the long-running division
30346    and square-root instructions.  */
30347
30348 static bool
30349 arm_sched_can_speculate_insn (rtx_insn *insn)
30350 {
30351   switch (get_attr_type (insn))
30352     {
30353       case TYPE_SDIV:
30354       case TYPE_UDIV:
30355       case TYPE_FDIVS:
30356       case TYPE_FDIVD:
30357       case TYPE_FSQRTS:
30358       case TYPE_FSQRTD:
30359       case TYPE_NEON_FP_SQRT_S:
30360       case TYPE_NEON_FP_SQRT_D:
30361       case TYPE_NEON_FP_SQRT_S_Q:
30362       case TYPE_NEON_FP_SQRT_D_Q:
30363       case TYPE_NEON_FP_DIV_S:
30364       case TYPE_NEON_FP_DIV_D:
30365       case TYPE_NEON_FP_DIV_S_Q:
30366       case TYPE_NEON_FP_DIV_D_Q:
30367         return false;
30368       default:
30369         return true;
30370     }
30371 }
30372
30373 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30374
30375 static unsigned HOST_WIDE_INT
30376 arm_asan_shadow_offset (void)
30377 {
30378   return HOST_WIDE_INT_1U << 29;
30379 }
30380
30381
30382 /* This is a temporary fix for PR60655.  Ideally we need
30383    to handle most of these cases in the generic part but
30384    currently we reject minus (..) (sym_ref).  We try to
30385    ameliorate the case with minus (sym_ref1) (sym_ref2)
30386    where they are in the same section.  */
30387
30388 static bool
30389 arm_const_not_ok_for_debug_p (rtx p)
30390 {
30391   tree decl_op0 = NULL;
30392   tree decl_op1 = NULL;
30393
30394   if (GET_CODE (p) == MINUS)
30395     {
30396       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30397         {
30398           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30399           if (decl_op1
30400               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30401               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30402             {
30403               if ((VAR_P (decl_op1)
30404                    || TREE_CODE (decl_op1) == CONST_DECL)
30405                   && (VAR_P (decl_op0)
30406                       || TREE_CODE (decl_op0) == CONST_DECL))
30407                 return (get_variable_section (decl_op1, false)
30408                         != get_variable_section (decl_op0, false));
30409
30410               if (TREE_CODE (decl_op1) == LABEL_DECL
30411                   && TREE_CODE (decl_op0) == LABEL_DECL)
30412                 return (DECL_CONTEXT (decl_op1)
30413                         != DECL_CONTEXT (decl_op0));
30414             }
30415
30416           return true;
30417         }
30418     }
30419
30420   return false;
30421 }
30422
30423 /* return TRUE if x is a reference to a value in a constant pool */
30424 extern bool
30425 arm_is_constant_pool_ref (rtx x)
30426 {
30427   return (MEM_P (x)
30428           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30429           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30430 }
30431
30432 /* Remember the last target of arm_set_current_function.  */
30433 static GTY(()) tree arm_previous_fndecl;
30434
30435 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30436
30437 void
30438 save_restore_target_globals (tree new_tree)
30439 {
30440   /* If we have a previous state, use it.  */
30441   if (TREE_TARGET_GLOBALS (new_tree))
30442     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30443   else if (new_tree == target_option_default_node)
30444     restore_target_globals (&default_target_globals);
30445   else
30446     {
30447       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30448       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30449     }
30450
30451   arm_option_params_internal ();
30452 }
30453
30454 /* Invalidate arm_previous_fndecl.  */
30455
30456 void
30457 arm_reset_previous_fndecl (void)
30458 {
30459   arm_previous_fndecl = NULL_TREE;
30460 }
30461
30462 /* Establish appropriate back-end context for processing the function
30463    FNDECL.  The argument might be NULL to indicate processing at top
30464    level, outside of any function scope.  */
30465
30466 static void
30467 arm_set_current_function (tree fndecl)
30468 {
30469   if (!fndecl || fndecl == arm_previous_fndecl)
30470     return;
30471
30472   tree old_tree = (arm_previous_fndecl
30473                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30474                    : NULL_TREE);
30475
30476   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30477
30478   /* If current function has no attributes but previous one did,
30479      use the default node.  */
30480   if (! new_tree && old_tree)
30481     new_tree = target_option_default_node;
30482
30483   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30484      the default have been handled by save_restore_target_globals from
30485      arm_pragma_target_parse.  */
30486   if (old_tree == new_tree)
30487     return;
30488
30489   arm_previous_fndecl = fndecl;
30490
30491   /* First set the target options.  */
30492   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30493
30494   save_restore_target_globals (new_tree);
30495 }
30496
30497 /* Implement TARGET_OPTION_PRINT.  */
30498
30499 static void
30500 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30501 {
30502   int flags = ptr->x_target_flags;
30503   const char *fpu_name;
30504
30505   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30506               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30507
30508   fprintf (file, "%*sselected isa %s\n", indent, "",
30509            TARGET_THUMB2_P (flags) ? "thumb2" :
30510            TARGET_THUMB_P (flags) ? "thumb1" :
30511            "arm");
30512
30513   if (ptr->x_arm_arch_string)
30514     fprintf (file, "%*sselected architecture %s\n", indent, "",
30515              ptr->x_arm_arch_string);
30516
30517   if (ptr->x_arm_cpu_string)
30518     fprintf (file, "%*sselected CPU %s\n", indent, "",
30519              ptr->x_arm_cpu_string);
30520
30521   if (ptr->x_arm_tune_string)
30522     fprintf (file, "%*sselected tune %s\n", indent, "",
30523              ptr->x_arm_tune_string);
30524
30525   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30526 }
30527
30528 /* Hook to determine if one function can safely inline another.  */
30529
30530 static bool
30531 arm_can_inline_p (tree caller, tree callee)
30532 {
30533   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30534   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30535   bool can_inline = true;
30536
30537   struct cl_target_option *caller_opts
30538         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30539                                            : target_option_default_node);
30540
30541   struct cl_target_option *callee_opts
30542         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30543                                            : target_option_default_node);
30544
30545   if (callee_opts == caller_opts)
30546     return true;
30547
30548   /* Callee's ISA features should be a subset of the caller's.  */
30549   struct arm_build_target caller_target;
30550   struct arm_build_target callee_target;
30551   caller_target.isa = sbitmap_alloc (isa_num_bits);
30552   callee_target.isa = sbitmap_alloc (isa_num_bits);
30553
30554   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30555                               false);
30556   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30557                               false);
30558   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30559     can_inline = false;
30560
30561   sbitmap_free (caller_target.isa);
30562   sbitmap_free (callee_target.isa);
30563
30564   /* OK to inline between different modes.
30565      Function with mode specific instructions, e.g using asm,
30566      must be explicitly protected with noinline.  */
30567   return can_inline;
30568 }
30569
30570 /* Hook to fix function's alignment affected by target attribute.  */
30571
30572 static void
30573 arm_relayout_function (tree fndecl)
30574 {
30575   if (DECL_USER_ALIGN (fndecl))
30576     return;
30577
30578   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30579
30580   if (!callee_tree)
30581     callee_tree = target_option_default_node;
30582
30583   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30584   SET_DECL_ALIGN
30585     (fndecl,
30586      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30587 }
30588
30589 /* Inner function to process the attribute((target(...))), take an argument and
30590    set the current options from the argument.  If we have a list, recursively
30591    go over the list.  */
30592
30593 static bool
30594 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30595 {
30596   if (TREE_CODE (args) == TREE_LIST)
30597     {
30598       bool ret = true;
30599
30600       for (; args; args = TREE_CHAIN (args))
30601         if (TREE_VALUE (args)
30602             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30603           ret = false;
30604       return ret;
30605     }
30606
30607   else if (TREE_CODE (args) != STRING_CST)
30608     {
30609       error ("attribute %<target%> argument not a string");
30610       return false;
30611     }
30612
30613   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30614   char *q;
30615
30616   while ((q = strtok (argstr, ",")) != NULL)
30617     {
30618       while (ISSPACE (*q)) ++q;
30619
30620       argstr = NULL;
30621       if (!strncmp (q, "thumb", 5))
30622           opts->x_target_flags |= MASK_THUMB;
30623
30624       else if (!strncmp (q, "arm", 3))
30625           opts->x_target_flags &= ~MASK_THUMB;
30626
30627       else if (!strncmp (q, "fpu=", 4))
30628         {
30629           int fpu_index;
30630           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30631                                        &fpu_index, CL_TARGET))
30632             {
30633               error ("invalid fpu for attribute(target(\"%s\"))", q);
30634               return false;
30635             }
30636           if (fpu_index == TARGET_FPU_auto)
30637             {
30638               /* This doesn't really make sense until we support
30639                  general dynamic selection of the architecture and all
30640                  sub-features.  */
30641               sorry ("auto fpu selection not currently permitted here");
30642               return false;
30643             }
30644           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30645         }
30646       else
30647         {
30648           error ("attribute(target(\"%s\")) is unknown", q);
30649           return false;
30650         }
30651     }
30652
30653   return true;
30654 }
30655
30656 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30657
30658 tree
30659 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30660                                  struct gcc_options *opts_set)
30661 {
30662   struct cl_target_option cl_opts;
30663
30664   if (!arm_valid_target_attribute_rec (args, opts))
30665     return NULL_TREE;
30666
30667   cl_target_option_save (&cl_opts, opts);
30668   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30669   arm_option_check_internal (opts);
30670   /* Do any overrides, such as global options arch=xxx.  */
30671   arm_option_override_internal (opts, opts_set);
30672
30673   return build_target_option_node (opts);
30674 }
30675
30676 static void
30677 add_attribute  (const char * mode, tree *attributes)
30678 {
30679   size_t len = strlen (mode);
30680   tree value = build_string (len, mode);
30681
30682   TREE_TYPE (value) = build_array_type (char_type_node,
30683                                         build_index_type (size_int (len)));
30684
30685   *attributes = tree_cons (get_identifier ("target"),
30686                            build_tree_list (NULL_TREE, value),
30687                            *attributes);
30688 }
30689
30690 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30691
30692 static void
30693 arm_insert_attributes (tree fndecl, tree * attributes)
30694 {
30695   const char *mode;
30696
30697   if (! TARGET_FLIP_THUMB)
30698     return;
30699
30700   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30701       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30702    return;
30703
30704   /* Nested definitions must inherit mode.  */
30705   if (current_function_decl)
30706    {
30707      mode = TARGET_THUMB ? "thumb" : "arm";
30708      add_attribute (mode, attributes);
30709      return;
30710    }
30711
30712   /* If there is already a setting don't change it.  */
30713   if (lookup_attribute ("target", *attributes) != NULL)
30714     return;
30715
30716   mode = thumb_flipper ? "thumb" : "arm";
30717   add_attribute (mode, attributes);
30718
30719   thumb_flipper = !thumb_flipper;
30720 }
30721
30722 /* Hook to validate attribute((target("string"))).  */
30723
30724 static bool
30725 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30726                               tree args, int ARG_UNUSED (flags))
30727 {
30728   bool ret = true;
30729   struct gcc_options func_options;
30730   tree cur_tree, new_optimize;
30731   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30732
30733   /* Get the optimization options of the current function.  */
30734   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30735
30736   /* If the function changed the optimization levels as well as setting target
30737      options, start with the optimizations specified.  */
30738   if (!func_optimize)
30739     func_optimize = optimization_default_node;
30740
30741   /* Init func_options.  */
30742   memset (&func_options, 0, sizeof (func_options));
30743   init_options_struct (&func_options, NULL);
30744   lang_hooks.init_options_struct (&func_options);
30745
30746   /* Initialize func_options to the defaults.  */
30747   cl_optimization_restore (&func_options,
30748                            TREE_OPTIMIZATION (func_optimize));
30749
30750   cl_target_option_restore (&func_options,
30751                             TREE_TARGET_OPTION (target_option_default_node));
30752
30753   /* Set func_options flags with new target mode.  */
30754   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30755                                               &global_options_set);
30756
30757   if (cur_tree == NULL_TREE)
30758     ret = false;
30759
30760   new_optimize = build_optimization_node (&func_options);
30761
30762   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30763
30764   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30765
30766   finalize_options_struct (&func_options);
30767
30768   return ret;
30769 }
30770
30771 /* Match an ISA feature bitmap to a named FPU.  We always use the
30772    first entry that exactly matches the feature set, so that we
30773    effectively canonicalize the FPU name for the assembler.  */
30774 static const char*
30775 arm_identify_fpu_from_isa (sbitmap isa)
30776 {
30777   auto_sbitmap fpubits (isa_num_bits);
30778   auto_sbitmap cand_fpubits (isa_num_bits);
30779
30780   bitmap_and (fpubits, isa, isa_all_fpubits);
30781
30782   /* If there are no ISA feature bits relating to the FPU, we must be
30783      doing soft-float.  */
30784   if (bitmap_empty_p (fpubits))
30785     return "softvfp";
30786
30787   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30788     {
30789       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30790       if (bitmap_equal_p (fpubits, cand_fpubits))
30791         return all_fpus[i].name;
30792     }
30793   /* We must find an entry, or things have gone wrong.  */
30794   gcc_unreachable ();
30795 }
30796
30797 void
30798 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30799 {
30800
30801   fprintf (stream, "\t.syntax unified\n");
30802
30803   if (TARGET_THUMB)
30804     {
30805       if (is_called_in_ARM_mode (decl)
30806           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30807               && cfun->is_thunk))
30808         fprintf (stream, "\t.code 32\n");
30809       else if (TARGET_THUMB1)
30810         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30811       else
30812         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30813     }
30814   else
30815     fprintf (stream, "\t.arm\n");
30816
30817   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30818                (TARGET_SOFT_FLOAT
30819                 ? "softvfp"
30820                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30821
30822   if (TARGET_POKE_FUNCTION_NAME)
30823     arm_poke_function_name (stream, (const char *) name);
30824 }
30825
30826 /* If MEM is in the form of [base+offset], extract the two parts
30827    of address and set to BASE and OFFSET, otherwise return false
30828    after clearing BASE and OFFSET.  */
30829
30830 static bool
30831 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30832 {
30833   rtx addr;
30834
30835   gcc_assert (MEM_P (mem));
30836
30837   addr = XEXP (mem, 0);
30838
30839   /* Strip off const from addresses like (const (addr)).  */
30840   if (GET_CODE (addr) == CONST)
30841     addr = XEXP (addr, 0);
30842
30843   if (GET_CODE (addr) == REG)
30844     {
30845       *base = addr;
30846       *offset = const0_rtx;
30847       return true;
30848     }
30849
30850   if (GET_CODE (addr) == PLUS
30851       && GET_CODE (XEXP (addr, 0)) == REG
30852       && CONST_INT_P (XEXP (addr, 1)))
30853     {
30854       *base = XEXP (addr, 0);
30855       *offset = XEXP (addr, 1);
30856       return true;
30857     }
30858
30859   *base = NULL_RTX;
30860   *offset = NULL_RTX;
30861
30862   return false;
30863 }
30864
30865 /* If INSN is a load or store of address in the form of [base+offset],
30866    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30867    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30868    otherwise return FALSE.  */
30869
30870 static bool
30871 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30872 {
30873   rtx x, dest, src;
30874
30875   gcc_assert (INSN_P (insn));
30876   x = PATTERN (insn);
30877   if (GET_CODE (x) != SET)
30878     return false;
30879
30880   src = SET_SRC (x);
30881   dest = SET_DEST (x);
30882   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30883     {
30884       *is_load = false;
30885       extract_base_offset_in_addr (dest, base, offset);
30886     }
30887   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30888     {
30889       *is_load = true;
30890       extract_base_offset_in_addr (src, base, offset);
30891     }
30892   else
30893     return false;
30894
30895   return (*base != NULL_RTX && *offset != NULL_RTX);
30896 }
30897
30898 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30899
30900    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30901    and PRI are only calculated for these instructions.  For other instruction,
30902    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30903    instruction fusion can be supported by returning different priorities.
30904
30905    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30906
30907 static void
30908 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30909                            int *fusion_pri, int *pri)
30910 {
30911   int tmp, off_val;
30912   bool is_load;
30913   rtx base, offset;
30914
30915   gcc_assert (INSN_P (insn));
30916
30917   tmp = max_pri - 1;
30918   if (!fusion_load_store (insn, &base, &offset, &is_load))
30919     {
30920       *pri = tmp;
30921       *fusion_pri = tmp;
30922       return;
30923     }
30924
30925   /* Load goes first.  */
30926   if (is_load)
30927     *fusion_pri = tmp - 1;
30928   else
30929     *fusion_pri = tmp - 2;
30930
30931   tmp /= 2;
30932
30933   /* INSN with smaller base register goes first.  */
30934   tmp -= ((REGNO (base) & 0xff) << 20);
30935
30936   /* INSN with smaller offset goes first.  */
30937   off_val = (int)(INTVAL (offset));
30938   if (off_val >= 0)
30939     tmp -= (off_val & 0xfffff);
30940   else
30941     tmp += ((- off_val) & 0xfffff);
30942
30943   *pri = tmp;
30944   return;
30945 }
30946
30947
30948 /* Construct and return a PARALLEL RTX vector with elements numbering the
30949    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30950    the vector - from the perspective of the architecture.  This does not
30951    line up with GCC's perspective on lane numbers, so we end up with
30952    different masks depending on our target endian-ness.  The diagram
30953    below may help.  We must draw the distinction when building masks
30954    which select one half of the vector.  An instruction selecting
30955    architectural low-lanes for a big-endian target, must be described using
30956    a mask selecting GCC high-lanes.
30957
30958                  Big-Endian             Little-Endian
30959
30960 GCC             0   1   2   3           3   2   1   0
30961               | x | x | x | x |       | x | x | x | x |
30962 Architecture    3   2   1   0           3   2   1   0
30963
30964 Low Mask:         { 2, 3 }                { 0, 1 }
30965 High Mask:        { 0, 1 }                { 2, 3 }
30966 */
30967
30968 rtx
30969 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30970 {
30971   int nunits = GET_MODE_NUNITS (mode);
30972   rtvec v = rtvec_alloc (nunits / 2);
30973   int high_base = nunits / 2;
30974   int low_base = 0;
30975   int base;
30976   rtx t1;
30977   int i;
30978
30979   if (BYTES_BIG_ENDIAN)
30980     base = high ? low_base : high_base;
30981   else
30982     base = high ? high_base : low_base;
30983
30984   for (i = 0; i < nunits / 2; i++)
30985     RTVEC_ELT (v, i) = GEN_INT (base + i);
30986
30987   t1 = gen_rtx_PARALLEL (mode, v);
30988   return t1;
30989 }
30990
30991 /* Check OP for validity as a PARALLEL RTX vector with elements
30992    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30993    from the perspective of the architecture.  See the diagram above
30994    arm_simd_vect_par_cnst_half_p for more details.  */
30995
30996 bool
30997 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30998                                        bool high)
30999 {
31000   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31001   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31002   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31003   int i = 0;
31004
31005   if (!VECTOR_MODE_P (mode))
31006     return false;
31007
31008   if (count_op != count_ideal)
31009     return false;
31010
31011   for (i = 0; i < count_ideal; i++)
31012     {
31013       rtx elt_op = XVECEXP (op, 0, i);
31014       rtx elt_ideal = XVECEXP (ideal, 0, i);
31015
31016       if (!CONST_INT_P (elt_op)
31017           || INTVAL (elt_ideal) != INTVAL (elt_op))
31018         return false;
31019     }
31020   return true;
31021 }
31022
31023 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31024    in Thumb1.  */
31025 static bool
31026 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31027                          const_tree)
31028 {
31029   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31030   if (vcall_offset && TARGET_THUMB1)
31031     return false;
31032
31033   /* Otherwise ok.  */
31034   return true;
31035 }
31036
31037 /* Generate RTL for a conditional branch with rtx comparison CODE in
31038    mode CC_MODE. The destination of the unlikely conditional branch
31039    is LABEL_REF.  */
31040
31041 void
31042 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31043                           rtx label_ref)
31044 {
31045   rtx x;
31046   x = gen_rtx_fmt_ee (code, VOIDmode,
31047                       gen_rtx_REG (cc_mode, CC_REGNUM),
31048                       const0_rtx);
31049
31050   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31051                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31052                             pc_rtx);
31053   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31054 }
31055
31056 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31057
31058    For pure-code sections there is no letter code for this attribute, so
31059    output all the section flags numerically when this is needed.  */
31060
31061 static bool
31062 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31063 {
31064
31065   if (flags & SECTION_ARM_PURECODE)
31066     {
31067       *num = 0x20000000;
31068
31069       if (!(flags & SECTION_DEBUG))
31070         *num |= 0x2;
31071       if (flags & SECTION_EXCLUDE)
31072         *num |= 0x80000000;
31073       if (flags & SECTION_WRITE)
31074         *num |= 0x1;
31075       if (flags & SECTION_CODE)
31076         *num |= 0x4;
31077       if (flags & SECTION_MERGE)
31078         *num |= 0x10;
31079       if (flags & SECTION_STRINGS)
31080         *num |= 0x20;
31081       if (flags & SECTION_TLS)
31082         *num |= 0x400;
31083       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31084         *num |= 0x200;
31085
31086         return true;
31087     }
31088
31089   return false;
31090 }
31091
31092 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31093
31094    If pure-code is passed as an option, make sure all functions are in
31095    sections that have the SHF_ARM_PURECODE attribute.  */
31096
31097 static section *
31098 arm_function_section (tree decl, enum node_frequency freq,
31099                       bool startup, bool exit)
31100 {
31101   const char * section_name;
31102   section * sec;
31103
31104   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31105     return default_function_section (decl, freq, startup, exit);
31106
31107   if (!target_pure_code)
31108     return default_function_section (decl, freq, startup, exit);
31109
31110
31111   section_name = DECL_SECTION_NAME (decl);
31112
31113   /* If a function is not in a named section then it falls under the 'default'
31114      text section, also known as '.text'.  We can preserve previous behavior as
31115      the default text section already has the SHF_ARM_PURECODE section
31116      attribute.  */
31117   if (!section_name)
31118     {
31119       section *default_sec = default_function_section (decl, freq, startup,
31120                                                        exit);
31121
31122       /* If default_sec is not null, then it must be a special section like for
31123          example .text.startup.  We set the pure-code attribute and return the
31124          same section to preserve existing behavior.  */
31125       if (default_sec)
31126           default_sec->common.flags |= SECTION_ARM_PURECODE;
31127       return default_sec;
31128     }
31129
31130   /* Otherwise look whether a section has already been created with
31131      'section_name'.  */
31132   sec = get_named_section (decl, section_name, 0);
31133   if (!sec)
31134     /* If that is not the case passing NULL as the section's name to
31135        'get_named_section' will create a section with the declaration's
31136        section name.  */
31137     sec = get_named_section (decl, NULL, 0);
31138
31139   /* Set the SHF_ARM_PURECODE attribute.  */
31140   sec->common.flags |= SECTION_ARM_PURECODE;
31141
31142   return sec;
31143 }
31144
31145 /* Implements the TARGET_SECTION_FLAGS hook.
31146
31147    If DECL is a function declaration and pure-code is passed as an option
31148    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31149    section's name and RELOC indicates whether the declarations initializer may
31150    contain runtime relocations.  */
31151
31152 static unsigned int
31153 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31154 {
31155   unsigned int flags = default_section_type_flags (decl, name, reloc);
31156
31157   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31158     flags |= SECTION_ARM_PURECODE;
31159
31160   return flags;
31161 }
31162
31163 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31164
31165 static void
31166 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31167                            rtx op0, rtx op1,
31168                            rtx *quot_p, rtx *rem_p)
31169 {
31170   if (mode == SImode)
31171     gcc_assert (!TARGET_IDIV);
31172
31173   scalar_int_mode libval_mode
31174     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31175
31176   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31177                                         libval_mode,
31178                                         op0, GET_MODE (op0),
31179                                         op1, GET_MODE (op1));
31180
31181   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31182   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31183                                        GET_MODE_SIZE (mode));
31184
31185   gcc_assert (quotient);
31186   gcc_assert (remainder);
31187
31188   *quot_p = quotient;
31189   *rem_p = remainder;
31190 }
31191
31192 /*  This function checks for the availability of the coprocessor builtin passed
31193     in BUILTIN for the current target.  Returns true if it is available and
31194     false otherwise.  If a BUILTIN is passed for which this function has not
31195     been implemented it will cause an exception.  */
31196
31197 bool
31198 arm_coproc_builtin_available (enum unspecv builtin)
31199 {
31200   /* None of these builtins are available in Thumb mode if the target only
31201      supports Thumb-1.  */
31202   if (TARGET_THUMB1)
31203     return false;
31204
31205   switch (builtin)
31206     {
31207       case VUNSPEC_CDP:
31208       case VUNSPEC_LDC:
31209       case VUNSPEC_LDCL:
31210       case VUNSPEC_STC:
31211       case VUNSPEC_STCL:
31212       case VUNSPEC_MCR:
31213       case VUNSPEC_MRC:
31214         if (arm_arch4)
31215           return true;
31216         break;
31217       case VUNSPEC_CDP2:
31218       case VUNSPEC_LDC2:
31219       case VUNSPEC_LDC2L:
31220       case VUNSPEC_STC2:
31221       case VUNSPEC_STC2L:
31222       case VUNSPEC_MCR2:
31223       case VUNSPEC_MRC2:
31224         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31225            ARMv8-{A,M}.  */
31226         if (arm_arch5)
31227           return true;
31228         break;
31229       case VUNSPEC_MCRR:
31230       case VUNSPEC_MRRC:
31231         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31232            ARMv8-{A,M}.  */
31233         if (arm_arch6 || arm_arch5te)
31234           return true;
31235         break;
31236       case VUNSPEC_MCRR2:
31237       case VUNSPEC_MRRC2:
31238         if (arm_arch6)
31239           return true;
31240         break;
31241       default:
31242         gcc_unreachable ();
31243     }
31244   return false;
31245 }
31246
31247 /* This function returns true if OP is a valid memory operand for the ldc and
31248    stc coprocessor instructions and false otherwise.  */
31249
31250 bool
31251 arm_coproc_ldc_stc_legitimate_address (rtx op)
31252 {
31253   HOST_WIDE_INT range;
31254   /* Has to be a memory operand.  */
31255   if (!MEM_P (op))
31256     return false;
31257
31258   op = XEXP (op, 0);
31259
31260   /* We accept registers.  */
31261   if (REG_P (op))
31262     return true;
31263
31264   switch GET_CODE (op)
31265     {
31266       case PLUS:
31267         {
31268           /* Or registers with an offset.  */
31269           if (!REG_P (XEXP (op, 0)))
31270             return false;
31271
31272           op = XEXP (op, 1);
31273
31274           /* The offset must be an immediate though.  */
31275           if (!CONST_INT_P (op))
31276             return false;
31277
31278           range = INTVAL (op);
31279
31280           /* Within the range of [-1020,1020].  */
31281           if (!IN_RANGE (range, -1020, 1020))
31282             return false;
31283
31284           /* And a multiple of 4.  */
31285           return (range % 4) == 0;
31286         }
31287       case PRE_INC:
31288       case POST_INC:
31289       case PRE_DEC:
31290       case POST_DEC:
31291         return REG_P (XEXP (op, 0));
31292       default:
31293         gcc_unreachable ();
31294     }
31295   return false;
31296 }
31297
31298 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31299
31300    In VFPv1, VFP registers could only be accessed in the mode they were
31301    set, so subregs would be invalid there.  However, we don't support
31302    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31303
31304    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31305    VFP registers in little-endian order.  We can't describe that accurately to
31306    GCC, so avoid taking subregs of such values.
31307
31308    The only exception is going from a 128-bit to a 64-bit type.  In that
31309    case the data layout happens to be consistent for big-endian, so we
31310    explicitly allow that case.  */
31311
31312 static bool
31313 arm_can_change_mode_class (machine_mode from, machine_mode to,
31314                            reg_class_t rclass)
31315 {
31316   if (TARGET_BIG_END
31317       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31318       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31319           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31320       && reg_classes_intersect_p (VFP_REGS, rclass))
31321     return false;
31322   return true;
31323 }
31324
31325 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31326    strcpy from constants will be faster.  */
31327
31328 static HOST_WIDE_INT
31329 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31330 {
31331   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31332   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31333     return MAX (align, BITS_PER_WORD * factor);
31334   return align;
31335 }
31336
31337 #if CHECKING_P
31338 namespace selftest {
31339
31340 /* Scan the static data tables generated by parsecpu.awk looking for
31341    potential issues with the data.  We primarily check for
31342    inconsistencies in the option extensions at present (extensions
31343    that duplicate others but aren't marked as aliases).  Furthermore,
31344    for correct canonicalization later options must never be a subset
31345    of an earlier option.  Any extension should also only specify other
31346    feature bits and never an architecture bit.  The architecture is inferred
31347    from the declaration of the extension.  */
31348 static void
31349 arm_test_cpu_arch_data (void)
31350 {
31351   const arch_option *arch;
31352   const cpu_option *cpu;
31353   auto_sbitmap target_isa (isa_num_bits);
31354   auto_sbitmap isa1 (isa_num_bits);
31355   auto_sbitmap isa2 (isa_num_bits);
31356
31357   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31358     {
31359       const cpu_arch_extension *ext1, *ext2;
31360
31361       if (arch->common.extensions == NULL)
31362         continue;
31363
31364       arm_initialize_isa (target_isa, arch->common.isa_bits);
31365
31366       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31367         {
31368           if (ext1->alias)
31369             continue;
31370
31371           arm_initialize_isa (isa1, ext1->isa_bits);
31372           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31373             {
31374               if (ext2->alias || ext1->remove != ext2->remove)
31375                 continue;
31376
31377               arm_initialize_isa (isa2, ext2->isa_bits);
31378               /* If the option is a subset of the parent option, it doesn't
31379                  add anything and so isn't useful.  */
31380               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31381
31382               /* If the extension specifies any architectural bits then
31383                  disallow it.  Extensions should only specify feature bits.  */
31384               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31385             }
31386         }
31387     }
31388
31389   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31390     {
31391       const cpu_arch_extension *ext1, *ext2;
31392
31393       if (cpu->common.extensions == NULL)
31394         continue;
31395
31396       arm_initialize_isa (target_isa, arch->common.isa_bits);
31397
31398       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31399         {
31400           if (ext1->alias)
31401             continue;
31402
31403           arm_initialize_isa (isa1, ext1->isa_bits);
31404           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31405             {
31406               if (ext2->alias || ext1->remove != ext2->remove)
31407                 continue;
31408
31409               arm_initialize_isa (isa2, ext2->isa_bits);
31410               /* If the option is a subset of the parent option, it doesn't
31411                  add anything and so isn't useful.  */
31412               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31413
31414               /* If the extension specifies any architectural bits then
31415                  disallow it.  Extensions should only specify feature bits.  */
31416               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31417             }
31418         }
31419     }
31420 }
31421
31422 /* Scan the static data tables generated by parsecpu.awk looking for
31423    potential issues with the data.  Here we check for consistency between the
31424    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31425    a feature bit that is not defined by any FPU flag.  */
31426 static void
31427 arm_test_fpu_data (void)
31428 {
31429   auto_sbitmap isa_all_fpubits (isa_num_bits);
31430   auto_sbitmap fpubits (isa_num_bits);
31431   auto_sbitmap tmpset (isa_num_bits);
31432
31433   static const enum isa_feature fpu_bitlist[]
31434     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31435   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31436
31437   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31438   {
31439     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31440     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31441     bitmap_clear (isa_all_fpubits);
31442     bitmap_copy (isa_all_fpubits, tmpset);
31443   }
31444
31445   if (!bitmap_empty_p (isa_all_fpubits))
31446     {
31447         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31448                          " group that are not defined by any FPU.\n"
31449                          "       Check your arm-cpus.in.\n");
31450         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31451     }
31452 }
31453
31454 static void
31455 arm_run_selftests (void)
31456 {
31457   arm_test_cpu_arch_data ();
31458   arm_test_fpu_data ();
31459 }
31460 } /* Namespace selftest.  */
31461
31462 #undef TARGET_RUN_TARGET_SELFTESTS
31463 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31464 #endif /* CHECKING_P */
31465
31466 struct gcc_target targetm = TARGET_INITIALIZER;
31467
31468 #include "gt-arm.h"