gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "attribs.h"
  36 #include "optabs.h"
  37 #include "regs.h"
  38 #include "emit-rtl.h"
  39 #include "recog.h"
  40 #include "cgraph.h"
  41 #include "diagnostic-core.h"
  42 #include "alias.h"
  43 #include "fold-const.h"
  44 #include "stor-layout.h"
  45 #include "calls.h"
  46 #include "varasm.h"
  47 #include "output.h"
  48 #include "insn-attr.h"
  49 #include "flags.h"
  50 #include "reload.h"
  51 #include "explow.h"
  52 #include "expr.h"
  53 #include "cfgrtl.h"
  54 #include "sched-int.h"
  55 #include "common/common-target.h"
  56 #include "langhooks.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62 #include "target-globals.h"
  63 #include "builtins.h"
  64 #include "tm-constrs.h"
  65 #include "rtl-iter.h"
  66 #include "optabs-libfuncs.h"
  67 #include "gimplify.h"
  68 #include "gimple.h"
  69 #include "selftest.h"
  70
  71 /* This file should be included last.  */
  72 #include "target-def.h"
  73
  74 /* Forward definitions of types.  */
  75 typedef struct minipool_node    Mnode;
  76 typedef struct minipool_fixup   Mfix;
  77
  78 void (*arm_lang_output_object_attributes_hook)(void);
  79
  80 struct four_ints
  81 {
  82   int i[4];
  83 };
  84
  85 /* Forward function declarations.  */
  86 static bool arm_const_not_ok_for_debug_p (rtx);
  87 static int arm_needs_doubleword_align (machine_mode, const_tree);
  88 static int arm_compute_static_chain_stack_bytes (void);
  89 static arm_stack_offsets *arm_get_frame_offsets (void);
  90 static void arm_compute_frame_layout (void);
  91 static void arm_add_gc_roots (void);
  92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  93                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  94 static unsigned bit_count (unsigned long);
  95 static unsigned bitmap_popcount (const sbitmap);
  96 static int arm_address_register_rtx_p (rtx, int);
  97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  98 static bool is_called_in_ARM_mode (tree);
  99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 104 inline static int thumb1_index_register_rtx_p (rtx, int);
 105 static int thumb_far_jump_used_p (void);
 106 static bool thumb_force_lr_save (void);
 107 static unsigned arm_size_return_regs (void);
 108 static bool arm_assemble_integer (rtx, unsigned int, int);
 109 static void arm_print_operand (FILE *, rtx, int);
 110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 111 static bool arm_print_operand_punct_valid_p (unsigned char code);
 112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 113 static arm_cc get_arm_condition_code (rtx);
 114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 115 static const char *output_multi_immediate (rtx *, const char *, const char *,
 116                                            int, HOST_WIDE_INT);
 117 static const char *shift_op (rtx, HOST_WIDE_INT *);
 118 static struct machine_function *arm_init_machine_status (void);
 119 static void thumb_exit (FILE *, int);
 120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 122 static Mnode *add_minipool_forward_ref (Mfix *);
 123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 124 static Mnode *add_minipool_backward_ref (Mfix *);
 125 static void assign_minipool_offsets (Mfix *);
 126 static void arm_print_value (FILE *, rtx);
 127 static void dump_minipool (rtx_insn *);
 128 static int arm_barrier_cost (rtx_insn *);
 129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 132                                machine_mode, rtx);
 133 static void arm_reorg (void);
 134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 136 static unsigned long arm_compute_save_core_reg_mask (void);
 137 static unsigned long arm_isr_value (tree);
 138 static unsigned long arm_compute_func_type (void);
 139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 144 #endif
 145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 147 static void arm_output_function_epilogue (FILE *);
 148 static void arm_output_function_prologue (FILE *);
 149 static int arm_comp_type_attributes (const_tree, const_tree);
 150 static void arm_set_default_type_attributes (tree);
 151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 153 static int optimal_immediate_sequence (enum rtx_code code,
 154                                        unsigned HOST_WIDE_INT val,
 155                                        struct four_ints *return_sequence);
 156 static int optimal_immediate_sequence_1 (enum rtx_code code,
 157                                          unsigned HOST_WIDE_INT val,
 158                                          struct four_ints *return_sequence,
 159                                          int i);
 160 static int arm_get_strip_length (int);
 161 static bool arm_function_ok_for_sibcall (tree, tree);
 162 static machine_mode arm_promote_function_mode (const_tree,
 163                                                     machine_mode, int *,
 164                                                     const_tree, int);
 165 static bool arm_return_in_memory (const_tree, const_tree);
 166 static rtx arm_function_value (const_tree, const_tree, bool);
 167 static rtx arm_libcall_value_1 (machine_mode);
 168 static rtx arm_libcall_value (machine_mode, const_rtx);
 169 static bool arm_function_value_regno_p (const unsigned int);
 170 static void arm_internal_label (FILE *, const char *, unsigned long);
 171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 172                                  tree);
 173 static bool arm_have_conditional_execution (void);
 174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 175 static bool arm_legitimate_constant_p (machine_mode, rtx);
 176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 180 static void emit_constant_insn (rtx cond, rtx pattern);
 181 static rtx_insn *emit_set_insn (rtx, rtx);
 182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 184                                   tree, bool);
 185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 186                              const_tree, bool);
 187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 188                                       const_tree, bool);
 189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 192                                       const_tree);
 193 static rtx aapcs_libcall_value (machine_mode);
 194 static int aapcs_select_return_coproc (const_tree, const_tree);
 195
 196 #ifdef OBJECT_FORMAT_ELF
 197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 199 #endif
 200 #ifndef ARM_PE
 201 static void arm_encode_section_info (tree, rtx, int);
 202 #endif
 203
 204 static void arm_file_end (void);
 205 static void arm_file_start (void);
 206 static void arm_insert_attributes (tree, tree *);
 207
 208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 209                                         tree, int *, int);
 210 static bool arm_pass_by_reference (cumulative_args_t,
 211                                    machine_mode, const_tree, bool);
 212 static bool arm_promote_prototypes (const_tree);
 213 static bool arm_default_short_enums (void);
 214 static bool arm_align_anon_bitfield (void);
 215 static bool arm_return_in_msb (const_tree);
 216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 217 static bool arm_return_in_memory (const_tree, const_tree);
 218 #if ARM_UNWIND_INFO
 219 static void arm_unwind_emit (FILE *, rtx_insn *);
 220 static bool arm_output_ttype (rtx);
 221 static void arm_asm_emit_except_personality (rtx);
 222 #endif
 223 static void arm_asm_init_sections (void);
 224 static rtx arm_dwarf_register_span (rtx);
 225
 226 static tree arm_cxx_guard_type (void);
 227 static bool arm_cxx_guard_mask_bit (void);
 228 static tree arm_get_cookie_size (tree);
 229 static bool arm_cookie_has_size (void);
 230 static bool arm_cxx_cdtor_returns_this (void);
 231 static bool arm_cxx_key_method_may_be_inline (void);
 232 static void arm_cxx_determine_class_data_visibility (tree);
 233 static bool arm_cxx_class_data_always_comdat (void);
 234 static bool arm_cxx_use_aeabi_atexit (void);
 235 static void arm_init_libfuncs (void);
 236 static tree arm_build_builtin_va_list (void);
 237 static void arm_expand_builtin_va_start (tree, rtx);
 238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 239 static void arm_option_override (void);
 240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 241 static void arm_option_restore (struct gcc_options *,
 242                                 struct cl_target_option *);
 243 static void arm_override_options_after_change (void);
 244 static void arm_option_print (FILE *, int, struct cl_target_option *);
 245 static void arm_set_current_function (tree);
 246 static bool arm_can_inline_p (tree, tree);
 247 static void arm_relayout_function (tree);
 248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 250 static bool arm_sched_can_speculate_insn (rtx_insn *);
 251 static bool arm_macro_fusion_p (void);
 252 static bool arm_cannot_copy_insn_p (rtx_insn *);
 253 static int arm_issue_rate (void);
 254 static int arm_first_cycle_multipass_dfa_lookahead (void);
 255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static tree arm_promoted_type (const_tree t);
 261 static bool arm_scalar_mode_supported_p (scalar_mode);
 262 static bool arm_frame_pointer_required (void);
 263 static bool arm_can_eliminate (const int, const int);
 264 static void arm_asm_trampoline_template (FILE *);
 265 static void arm_trampoline_init (rtx, tree, rtx);
 266 static rtx arm_trampoline_adjust_address (rtx);
 267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 271 static bool arm_array_mode_supported_p (machine_mode,
 272                                         unsigned HOST_WIDE_INT);
 273 static machine_mode arm_preferred_simd_mode (scalar_mode);
 274 static bool arm_class_likely_spilled_p (reg_class_t);
 275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 278                                                      const_tree type,
 279                                                      int misalignment,
 280                                                      bool is_packed);
 281 static void arm_conditional_register_usage (void);
 282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 284 static unsigned int arm_autovectorize_vector_sizes (void);
 285 static int arm_default_branch_cost (bool, bool);
 286 static int arm_cortex_a5_branch_cost (bool, bool);
 287 static int arm_cortex_m_branch_cost (bool, bool);
 288 static int arm_cortex_m7_branch_cost (bool, bool);
 289
 290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
 291
 292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 293
 294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 295                                            tree vectype,
 296                                            int misalign ATTRIBUTE_UNUSED);
 297 static unsigned arm_add_stmt_cost (void *data, int count,
 298                                    enum vect_cost_for_stmt kind,
 299                                    struct _stmt_vec_info *stmt_info,
 300                                    int misalign,
 301                                    enum vect_cost_model_location where);
 302
 303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 304                                          bool op0_preserve_value);
 305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 306
 307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 309                                      const_tree);
 310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 313                                                 int reloc);
 314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 320 \f
 321 /* Table of machine attributes.  */
 322 static const struct attribute_spec arm_attribute_table[] =
 323 {
 324   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 325        affects_type_identity } */
 326   /* Function calls made to this symbol must be done indirectly, because
 327      it may lie outside of the 26 bit addressing range of a normal function
 328      call.  */
 329   { "long_call",    0, 0, false, true,  true,  NULL, false },
 330   /* Whereas these functions are always known to reside within the 26 bit
 331      addressing range.  */
 332   { "short_call",   0, 0, false, true,  true,  NULL, false },
 333   /* Specify the procedure call conventions for a function.  */
 334   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 335     false },
 336   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 337   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 338     false },
 339   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 340     false },
 341   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 342     false },
 343 #ifdef ARM_PE
 344   /* ARM/PE has three new attributes:
 345      interfacearm - ?
 346      dllexport - for exporting a function/variable that will live in a dll
 347      dllimport - for importing a function/variable from a dll
 348
 349      Microsoft allows multiple declspecs in one __declspec, separating
 350      them with spaces.  We do NOT support this.  Instead, use __declspec
 351      multiple times.
 352   */
 353   { "dllimport",    0, 0, true,  false, false, NULL, false },
 354   { "dllexport",    0, 0, true,  false, false, NULL, false },
 355   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 356     false },
 357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 358   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 359   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 360   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 361     false },
 362 #endif
 363   /* ARMv8-M Security Extensions support.  */
 364   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 365     arm_handle_cmse_nonsecure_entry, false },
 366   { "cmse_nonsecure_call", 0, 0, true, false, false,
 367     arm_handle_cmse_nonsecure_call, true },
 368   { NULL,           0, 0, false, false, false, NULL, false }
 369 };
 370 \f
 371 /* Initialize the GCC target structure.  */
 372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 373 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 375 #endif
 376
 377 #undef TARGET_LEGITIMIZE_ADDRESS
 378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 379
 380 #undef  TARGET_ATTRIBUTE_TABLE
 381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 382
 383 #undef  TARGET_INSERT_ATTRIBUTES
 384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 385
 386 #undef TARGET_ASM_FILE_START
 387 #define TARGET_ASM_FILE_START arm_file_start
 388 #undef TARGET_ASM_FILE_END
 389 #define TARGET_ASM_FILE_END arm_file_end
 390
 391 #undef  TARGET_ASM_ALIGNED_SI_OP
 392 #define TARGET_ASM_ALIGNED_SI_OP NULL
 393 #undef  TARGET_ASM_INTEGER
 394 #define TARGET_ASM_INTEGER arm_assemble_integer
 395
 396 #undef TARGET_PRINT_OPERAND
 397 #define TARGET_PRINT_OPERAND arm_print_operand
 398 #undef TARGET_PRINT_OPERAND_ADDRESS
 399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 402
 403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 405
 406 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 408
 409 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 411
 412 #undef TARGET_CAN_INLINE_P
 413 #define TARGET_CAN_INLINE_P arm_can_inline_p
 414
 415 #undef TARGET_RELAYOUT_FUNCTION
 416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 417
 418 #undef  TARGET_OPTION_OVERRIDE
 419 #define TARGET_OPTION_OVERRIDE arm_option_override
 420
 421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 423
 424 #undef TARGET_OPTION_SAVE
 425 #define TARGET_OPTION_SAVE arm_option_save
 426
 427 #undef TARGET_OPTION_RESTORE
 428 #define TARGET_OPTION_RESTORE arm_option_restore
 429
 430 #undef TARGET_OPTION_PRINT
 431 #define TARGET_OPTION_PRINT arm_option_print
 432
 433 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 435
 436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 438
 439 #undef TARGET_SCHED_MACRO_FUSION_P
 440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 441
 442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 444
 445 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 447
 448 #undef  TARGET_SCHED_ADJUST_COST
 449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 450
 451 #undef TARGET_SET_CURRENT_FUNCTION
 452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 453
 454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 456
 457 #undef TARGET_SCHED_REORDER
 458 #define TARGET_SCHED_REORDER arm_sched_reorder
 459
 460 #undef TARGET_REGISTER_MOVE_COST
 461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 462
 463 #undef TARGET_MEMORY_MOVE_COST
 464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 465
 466 #undef TARGET_ENCODE_SECTION_INFO
 467 #ifdef ARM_PE
 468 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 469 #else
 470 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 471 #endif
 472
 473 #undef  TARGET_STRIP_NAME_ENCODING
 474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 475
 476 #undef  TARGET_ASM_INTERNAL_LABEL
 477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 478
 479 #undef TARGET_FLOATN_MODE
 480 #define TARGET_FLOATN_MODE arm_floatn_mode
 481
 482 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 484
 485 #undef  TARGET_FUNCTION_VALUE
 486 #define TARGET_FUNCTION_VALUE arm_function_value
 487
 488 #undef  TARGET_LIBCALL_VALUE
 489 #define TARGET_LIBCALL_VALUE arm_libcall_value
 490
 491 #undef TARGET_FUNCTION_VALUE_REGNO_P
 492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 493
 494 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 496 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 498
 499 #undef  TARGET_RTX_COSTS
 500 #define TARGET_RTX_COSTS arm_rtx_costs
 501 #undef  TARGET_ADDRESS_COST
 502 #define TARGET_ADDRESS_COST arm_address_cost
 503
 504 #undef TARGET_SHIFT_TRUNCATION_MASK
 505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 514   arm_autovectorize_vector_sizes
 515
 516 #undef  TARGET_MACHINE_DEPENDENT_REORG
 517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 518
 519 #undef  TARGET_INIT_BUILTINS
 520 #define TARGET_INIT_BUILTINS  arm_init_builtins
 521 #undef  TARGET_EXPAND_BUILTIN
 522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 523 #undef  TARGET_BUILTIN_DECL
 524 #define TARGET_BUILTIN_DECL arm_builtin_decl
 525
 526 #undef TARGET_INIT_LIBFUNCS
 527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 528
 529 #undef TARGET_PROMOTE_FUNCTION_MODE
 530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 531 #undef TARGET_PROMOTE_PROTOTYPES
 532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 533 #undef TARGET_PASS_BY_REFERENCE
 534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 535 #undef TARGET_ARG_PARTIAL_BYTES
 536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 537 #undef TARGET_FUNCTION_ARG
 538 #define TARGET_FUNCTION_ARG arm_function_arg
 539 #undef TARGET_FUNCTION_ARG_ADVANCE
 540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 541 #undef TARGET_FUNCTION_ARG_PADDING
 542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 543 #undef TARGET_FUNCTION_ARG_BOUNDARY
 544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 545
 546 #undef  TARGET_SETUP_INCOMING_VARARGS
 547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 548
 549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 551
 552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 554 #undef TARGET_TRAMPOLINE_INIT
 555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 558
 559 #undef TARGET_WARN_FUNC_RETURN
 560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 561
 562 #undef TARGET_DEFAULT_SHORT_ENUMS
 563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 564
 565 #undef TARGET_ALIGN_ANON_BITFIELD
 566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 567
 568 #undef TARGET_NARROW_VOLATILE_BITFIELD
 569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 570
 571 #undef TARGET_CXX_GUARD_TYPE
 572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 573
 574 #undef TARGET_CXX_GUARD_MASK_BIT
 575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 576
 577 #undef TARGET_CXX_GET_COOKIE_SIZE
 578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 579
 580 #undef TARGET_CXX_COOKIE_HAS_SIZE
 581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 582
 583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 585
 586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 588
 589 #undef TARGET_CXX_USE_AEABI_ATEXIT
 590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 591
 592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 594   arm_cxx_determine_class_data_visibility
 595
 596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 598
 599 #undef TARGET_RETURN_IN_MSB
 600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 601
 602 #undef TARGET_RETURN_IN_MEMORY
 603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 604
 605 #undef TARGET_MUST_PASS_IN_STACK
 606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 607
 608 #if ARM_UNWIND_INFO
 609 #undef TARGET_ASM_UNWIND_EMIT
 610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 611
 612 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 613 #undef TARGET_ASM_TTYPE
 614 #define TARGET_ASM_TTYPE arm_output_ttype
 615
 616 #undef TARGET_ARM_EABI_UNWINDER
 617 #define TARGET_ARM_EABI_UNWINDER true
 618
 619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 621
 622 #endif /* ARM_UNWIND_INFO */
 623
 624 #undef TARGET_ASM_INIT_SECTIONS
 625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 626
 627 #undef TARGET_DWARF_REGISTER_SPAN
 628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 629
 630 #undef  TARGET_CANNOT_COPY_INSN_P
 631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 632
 633 #ifdef HAVE_AS_TLS
 634 #undef TARGET_HAVE_TLS
 635 #define TARGET_HAVE_TLS true
 636 #endif
 637
 638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 640
 641 #undef TARGET_LEGITIMATE_CONSTANT_P
 642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 643
 644 #undef TARGET_CANNOT_FORCE_CONST_MEM
 645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 646
 647 #undef TARGET_MAX_ANCHOR_OFFSET
 648 #define TARGET_MAX_ANCHOR_OFFSET 4095
 649
 650 /* The minimum is set such that the total size of the block
 651    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 652    divisible by eight, ensuring natural spacing of anchors.  */
 653 #undef TARGET_MIN_ANCHOR_OFFSET
 654 #define TARGET_MIN_ANCHOR_OFFSET -4088
 655
 656 #undef TARGET_SCHED_ISSUE_RATE
 657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 658
 659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 661   arm_first_cycle_multipass_dfa_lookahead
 662
 663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 665   arm_first_cycle_multipass_dfa_lookahead_guard
 666
 667 #undef TARGET_MANGLE_TYPE
 668 #define TARGET_MANGLE_TYPE arm_mangle_type
 669
 670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 672
 673 #undef TARGET_BUILD_BUILTIN_VA_LIST
 674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 675 #undef TARGET_EXPAND_BUILTIN_VA_START
 676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 679
 680 #ifdef HAVE_AS_TLS
 681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 683 #endif
 684
 685 #undef TARGET_LEGITIMATE_ADDRESS_P
 686 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 687
 688 #undef TARGET_PREFERRED_RELOAD_CLASS
 689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 690
 691 #undef TARGET_PROMOTED_TYPE
 692 #define TARGET_PROMOTED_TYPE arm_promoted_type
 693
 694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 696
 697 #undef TARGET_COMPUTE_FRAME_LAYOUT
 698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 699
 700 #undef TARGET_FRAME_POINTER_REQUIRED
 701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 702
 703 #undef TARGET_CAN_ELIMINATE
 704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 705
 706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 708
 709 #undef TARGET_CLASS_LIKELY_SPILLED_P
 710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 711
 712 #undef TARGET_VECTORIZE_BUILTINS
 713 #define TARGET_VECTORIZE_BUILTINS
 714
 715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 717   arm_builtin_vectorized_function
 718
 719 #undef TARGET_VECTOR_ALIGNMENT
 720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 721
 722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 724   arm_vector_alignment_reachable
 725
 726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 728   arm_builtin_support_vector_misalignment
 729
 730 #undef TARGET_PREFERRED_RENAME_CLASS
 731 #define TARGET_PREFERRED_RENAME_CLASS \
 732   arm_preferred_rename_class
 733
 734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 736   arm_vectorize_vec_perm_const_ok
 737
 738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 740   arm_builtin_vectorization_cost
 741 #undef TARGET_VECTORIZE_ADD_STMT_COST
 742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 743
 744 #undef TARGET_CANONICALIZE_COMPARISON
 745 #define TARGET_CANONICALIZE_COMPARISON \
 746   arm_canonicalize_comparison
 747
 748 #undef TARGET_ASAN_SHADOW_OFFSET
 749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 750
 751 #undef MAX_INSN_PER_IT_BLOCK
 752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 753
 754 #undef TARGET_CAN_USE_DOLOOP_P
 755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 756
 757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 759
 760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 762
 763 #undef TARGET_SCHED_FUSION_PRIORITY
 764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 765
 766 #undef  TARGET_ASM_FUNCTION_SECTION
 767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 768
 769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 771
 772 #undef TARGET_SECTION_TYPE_FLAGS
 773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 774
 775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 777
 778 #undef TARGET_C_EXCESS_PRECISION
 779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 780
 781 /* Although the architecture reserves bits 0 and 1, only the former is
 782    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 785
 786 #undef TARGET_FIXED_CONDITION_CODE_REGS
 787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 788
 789 #undef TARGET_HARD_REGNO_NREGS
 790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 791 #undef TARGET_HARD_REGNO_MODE_OK
 792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 793
 794 #undef TARGET_MODES_TIEABLE_P
 795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 796
 797 #undef TARGET_CAN_CHANGE_MODE_CLASS
 798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 799
 800 #undef TARGET_CONSTANT_ALIGNMENT
 801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 802 \f
 803 /* Obstack for minipool constant handling.  */
 804 static struct obstack minipool_obstack;
 805 static char *         minipool_startobj;
 806
 807 /* The maximum number of insns skipped which
 808    will be conditionalised if possible.  */
 809 static int max_insns_skipped = 5;
 810
 811 extern FILE * asm_out_file;
 812
 813 /* True if we are currently building a constant table.  */
 814 int making_const_table;
 815
 816 /* The processor for which instructions should be scheduled.  */
 817 enum processor_type arm_tune = TARGET_CPU_arm_none;
 818
 819 /* The current tuning set.  */
 820 const struct tune_params *current_tune;
 821
 822 /* Which floating point hardware to schedule for.  */
 823 int arm_fpu_attr;
 824
 825 /* Used for Thumb call_via trampolines.  */
 826 rtx thumb_call_via_label[14];
 827 static int thumb_call_reg_needed;
 828
 829 /* The bits in this mask specify which instruction scheduling options should
 830    be used.  */
 831 unsigned int tune_flags = 0;
 832
 833 /* The highest ARM architecture version supported by the
 834    target.  */
 835 enum base_architecture arm_base_arch = BASE_ARCH_0;
 836
 837 /* Active target architecture and tuning.  */
 838
 839 struct arm_build_target arm_active_target;
 840
 841 /* The following are used in the arm.md file as equivalents to bits
 842    in the above two flag variables.  */
 843
 844 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 845 int arm_arch3m = 0;
 846
 847 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 848 int arm_arch4 = 0;
 849
 850 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 851 int arm_arch4t = 0;
 852
 853 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 854 int arm_arch5 = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 857 int arm_arch5e = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 860 int arm_arch5te = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 863 int arm_arch6 = 0;
 864
 865 /* Nonzero if this chip supports the ARM 6K extensions.  */
 866 int arm_arch6k = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 869 int arm_arch6kz = 0;
 870
 871 /* Nonzero if instructions present in ARMv6-M can be used.  */
 872 int arm_arch6m = 0;
 873
 874 /* Nonzero if this chip supports the ARM 7 extensions.  */
 875 int arm_arch7 = 0;
 876
 877 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 878 int arm_arch_lpae = 0;
 879
 880 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 881 int arm_arch_notm = 0;
 882
 883 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 884 int arm_arch7em = 0;
 885
 886 /* Nonzero if instructions present in ARMv8 can be used.  */
 887 int arm_arch8 = 0;
 888
 889 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 890 int arm_arch8_1 = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 893 int arm_arch8_2 = 0;
 894
 895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 896    Architecture 8.2.  */
 897 int arm_fp16_inst = 0;
 898
 899 /* Nonzero if this chip can benefit from load scheduling.  */
 900 int arm_ld_sched = 0;
 901
 902 /* Nonzero if this chip is a StrongARM.  */
 903 int arm_tune_strongarm = 0;
 904
 905 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 906 int arm_arch_iwmmxt = 0;
 907
 908 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 909 int arm_arch_iwmmxt2 = 0;
 910
 911 /* Nonzero if this chip is an XScale.  */
 912 int arm_arch_xscale = 0;
 913
 914 /* Nonzero if tuning for XScale  */
 915 int arm_tune_xscale = 0;
 916
 917 /* Nonzero if we want to tune for stores that access the write-buffer.
 918    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 919 int arm_tune_wbuf = 0;
 920
 921 /* Nonzero if tuning for Cortex-A9.  */
 922 int arm_tune_cortex_a9 = 0;
 923
 924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 925    preprocessor.
 926    XXX This is a bit of a hack, it's intended to help work around
 927    problems in GLD which doesn't understand that armv5t code is
 928    interworking clean.  */
 929 int arm_cpp_interwork = 0;
 930
 931 /* Nonzero if chip supports Thumb 1.  */
 932 int arm_arch_thumb1;
 933
 934 /* Nonzero if chip supports Thumb 2.  */
 935 int arm_arch_thumb2;
 936
 937 /* Nonzero if chip supports integer division instruction.  */
 938 int arm_arch_arm_hwdiv;
 939 int arm_arch_thumb_hwdiv;
 940
 941 /* Nonzero if chip disallows volatile memory access in IT block.  */
 942 int arm_arch_no_volatile_ce;
 943
 944 /* Nonzero if we should use Neon to handle 64-bits operations rather
 945    than core registers.  */
 946 int prefer_neon_for_64bits = 0;
 947
 948 /* Nonzero if we shouldn't use literal pools.  */
 949 bool arm_disable_literal_pool = false;
 950
 951 /* The register number to be used for the PIC offset register.  */
 952 unsigned arm_pic_register = INVALID_REGNUM;
 953
 954 enum arm_pcs arm_pcs_default;
 955
 956 /* For an explanation of these variables, see final_prescan_insn below.  */
 957 int arm_ccfsm_state;
 958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 959 enum arm_cond_code arm_current_cc;
 960
 961 rtx arm_target_insn;
 962 int arm_target_label;
 963 /* The number of conditionally executed insns, including the current insn.  */
 964 int arm_condexec_count = 0;
 965 /* A bitmask specifying the patterns for the IT block.
 966    Zero means do not output an IT block before this insn. */
 967 int arm_condexec_mask = 0;
 968 /* The number of bits used in arm_condexec_mask.  */
 969 int arm_condexec_masklen = 0;
 970
 971 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 972 int arm_arch_crc = 0;
 973
 974 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 975 int arm_arch_cmse = 0;
 976
 977 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 978 int arm_m_profile_small_mul = 0;
 979
 980 /* The condition codes of the ARM, and the inverse function.  */
 981 static const char * const arm_condition_codes[] =
 982 {
 983   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 984   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 985 };
 986
 987 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 988 int arm_regs_in_sequence[] =
 989 {
 990   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 991 };
 992
 993 #define ARM_LSL_NAME "lsl"
 994 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 995
 996 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 997                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 998                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 999 \f
1000 /* Initialization code.  */
1001
1002 struct cpu_tune
1003 {
1004   enum processor_type scheduler;
1005   unsigned int tune_flags;
1006   const struct tune_params *tune;
1007 };
1008
1009 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1010 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1011   {                                                             \
1012     num_slots,                                                  \
1013     l1_size,                                                    \
1014     l1_line_size                                                \
1015   }
1016
1017 /* arm generic vectorizer costs.  */
1018 static const
1019 struct cpu_vec_costs arm_default_vec_cost = {
1020   1,                                    /* scalar_stmt_cost.  */
1021   1,                                    /* scalar load_cost.  */
1022   1,                                    /* scalar_store_cost.  */
1023   1,                                    /* vec_stmt_cost.  */
1024   1,                                    /* vec_to_scalar_cost.  */
1025   1,                                    /* scalar_to_vec_cost.  */
1026   1,                                    /* vec_align_load_cost.  */
1027   1,                                    /* vec_unalign_load_cost.  */
1028   1,                                    /* vec_unalign_store_cost.  */
1029   1,                                    /* vec_store_cost.  */
1030   3,                                    /* cond_taken_branch_cost.  */
1031   1,                                    /* cond_not_taken_branch_cost.  */
1032 };
1033
1034 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1035 #include "aarch-cost-tables.h"
1036
1037
1038
1039 const struct cpu_cost_table cortexa9_extra_costs =
1040 {
1041   /* ALU */
1042   {
1043     0,                  /* arith.  */
1044     0,                  /* logical.  */
1045     0,                  /* shift.  */
1046     COSTS_N_INSNS (1),  /* shift_reg.  */
1047     COSTS_N_INSNS (1),  /* arith_shift.  */
1048     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1049     0,                  /* log_shift.  */
1050     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1051     COSTS_N_INSNS (1),  /* extend.  */
1052     COSTS_N_INSNS (2),  /* extend_arith.  */
1053     COSTS_N_INSNS (1),  /* bfi.  */
1054     COSTS_N_INSNS (1),  /* bfx.  */
1055     0,                  /* clz.  */
1056     0,                  /* rev.  */
1057     0,                  /* non_exec.  */
1058     true                /* non_exec_costs_exec.  */
1059   },
1060   {
1061     /* MULT SImode */
1062     {
1063       COSTS_N_INSNS (3),        /* simple.  */
1064       COSTS_N_INSNS (3),        /* flag_setting.  */
1065       COSTS_N_INSNS (2),        /* extend.  */
1066       COSTS_N_INSNS (3),        /* add.  */
1067       COSTS_N_INSNS (2),        /* extend_add.  */
1068       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1069     },
1070     /* MULT DImode */
1071     {
1072       0,                        /* simple (N/A).  */
1073       0,                        /* flag_setting (N/A).  */
1074       COSTS_N_INSNS (4),        /* extend.  */
1075       0,                        /* add (N/A).  */
1076       COSTS_N_INSNS (4),        /* extend_add.  */
1077       0                         /* idiv (N/A).  */
1078     }
1079   },
1080   /* LD/ST */
1081   {
1082     COSTS_N_INSNS (2),  /* load.  */
1083     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1084     COSTS_N_INSNS (2),  /* ldrd.  */
1085     COSTS_N_INSNS (2),  /* ldm_1st.  */
1086     1,                  /* ldm_regs_per_insn_1st.  */
1087     2,                  /* ldm_regs_per_insn_subsequent.  */
1088     COSTS_N_INSNS (5),  /* loadf.  */
1089     COSTS_N_INSNS (5),  /* loadd.  */
1090     COSTS_N_INSNS (1),  /* load_unaligned.  */
1091     COSTS_N_INSNS (2),  /* store.  */
1092     COSTS_N_INSNS (2),  /* strd.  */
1093     COSTS_N_INSNS (2),  /* stm_1st.  */
1094     1,                  /* stm_regs_per_insn_1st.  */
1095     2,                  /* stm_regs_per_insn_subsequent.  */
1096     COSTS_N_INSNS (1),  /* storef.  */
1097     COSTS_N_INSNS (1),  /* stored.  */
1098     COSTS_N_INSNS (1),  /* store_unaligned.  */
1099     COSTS_N_INSNS (1),  /* loadv.  */
1100     COSTS_N_INSNS (1)   /* storev.  */
1101   },
1102   {
1103     /* FP SFmode */
1104     {
1105       COSTS_N_INSNS (14),       /* div.  */
1106       COSTS_N_INSNS (4),        /* mult.  */
1107       COSTS_N_INSNS (7),        /* mult_addsub. */
1108       COSTS_N_INSNS (30),       /* fma.  */
1109       COSTS_N_INSNS (3),        /* addsub.  */
1110       COSTS_N_INSNS (1),        /* fpconst.  */
1111       COSTS_N_INSNS (1),        /* neg.  */
1112       COSTS_N_INSNS (3),        /* compare.  */
1113       COSTS_N_INSNS (3),        /* widen.  */
1114       COSTS_N_INSNS (3),        /* narrow.  */
1115       COSTS_N_INSNS (3),        /* toint.  */
1116       COSTS_N_INSNS (3),        /* fromint.  */
1117       COSTS_N_INSNS (3)         /* roundint.  */
1118     },
1119     /* FP DFmode */
1120     {
1121       COSTS_N_INSNS (24),       /* div.  */
1122       COSTS_N_INSNS (5),        /* mult.  */
1123       COSTS_N_INSNS (8),        /* mult_addsub.  */
1124       COSTS_N_INSNS (30),       /* fma.  */
1125       COSTS_N_INSNS (3),        /* addsub.  */
1126       COSTS_N_INSNS (1),        /* fpconst.  */
1127       COSTS_N_INSNS (1),        /* neg.  */
1128       COSTS_N_INSNS (3),        /* compare.  */
1129       COSTS_N_INSNS (3),        /* widen.  */
1130       COSTS_N_INSNS (3),        /* narrow.  */
1131       COSTS_N_INSNS (3),        /* toint.  */
1132       COSTS_N_INSNS (3),        /* fromint.  */
1133       COSTS_N_INSNS (3)         /* roundint.  */
1134     }
1135   },
1136   /* Vector */
1137   {
1138     COSTS_N_INSNS (1)   /* alu.  */
1139   }
1140 };
1141
1142 const struct cpu_cost_table cortexa8_extra_costs =
1143 {
1144   /* ALU */
1145   {
1146     0,                  /* arith.  */
1147     0,                  /* logical.  */
1148     COSTS_N_INSNS (1),  /* shift.  */
1149     0,                  /* shift_reg.  */
1150     COSTS_N_INSNS (1),  /* arith_shift.  */
1151     0,                  /* arith_shift_reg.  */
1152     COSTS_N_INSNS (1),  /* log_shift.  */
1153     0,                  /* log_shift_reg.  */
1154     0,                  /* extend.  */
1155     0,                  /* extend_arith.  */
1156     0,                  /* bfi.  */
1157     0,                  /* bfx.  */
1158     0,                  /* clz.  */
1159     0,                  /* rev.  */
1160     0,                  /* non_exec.  */
1161     true                /* non_exec_costs_exec.  */
1162   },
1163   {
1164     /* MULT SImode */
1165     {
1166       COSTS_N_INSNS (1),        /* simple.  */
1167       COSTS_N_INSNS (1),        /* flag_setting.  */
1168       COSTS_N_INSNS (1),        /* extend.  */
1169       COSTS_N_INSNS (1),        /* add.  */
1170       COSTS_N_INSNS (1),        /* extend_add.  */
1171       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1172     },
1173     /* MULT DImode */
1174     {
1175       0,                        /* simple (N/A).  */
1176       0,                        /* flag_setting (N/A).  */
1177       COSTS_N_INSNS (2),        /* extend.  */
1178       0,                        /* add (N/A).  */
1179       COSTS_N_INSNS (2),        /* extend_add.  */
1180       0                         /* idiv (N/A).  */
1181     }
1182   },
1183   /* LD/ST */
1184   {
1185     COSTS_N_INSNS (1),  /* load.  */
1186     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1187     COSTS_N_INSNS (1),  /* ldrd.  */
1188     COSTS_N_INSNS (1),  /* ldm_1st.  */
1189     1,                  /* ldm_regs_per_insn_1st.  */
1190     2,                  /* ldm_regs_per_insn_subsequent.  */
1191     COSTS_N_INSNS (1),  /* loadf.  */
1192     COSTS_N_INSNS (1),  /* loadd.  */
1193     COSTS_N_INSNS (1),  /* load_unaligned.  */
1194     COSTS_N_INSNS (1),  /* store.  */
1195     COSTS_N_INSNS (1),  /* strd.  */
1196     COSTS_N_INSNS (1),  /* stm_1st.  */
1197     1,                  /* stm_regs_per_insn_1st.  */
1198     2,                  /* stm_regs_per_insn_subsequent.  */
1199     COSTS_N_INSNS (1),  /* storef.  */
1200     COSTS_N_INSNS (1),  /* stored.  */
1201     COSTS_N_INSNS (1),  /* store_unaligned.  */
1202     COSTS_N_INSNS (1),  /* loadv.  */
1203     COSTS_N_INSNS (1)   /* storev.  */
1204   },
1205   {
1206     /* FP SFmode */
1207     {
1208       COSTS_N_INSNS (36),       /* div.  */
1209       COSTS_N_INSNS (11),       /* mult.  */
1210       COSTS_N_INSNS (20),       /* mult_addsub. */
1211       COSTS_N_INSNS (30),       /* fma.  */
1212       COSTS_N_INSNS (9),        /* addsub.  */
1213       COSTS_N_INSNS (3),        /* fpconst.  */
1214       COSTS_N_INSNS (3),        /* neg.  */
1215       COSTS_N_INSNS (6),        /* compare.  */
1216       COSTS_N_INSNS (4),        /* widen.  */
1217       COSTS_N_INSNS (4),        /* narrow.  */
1218       COSTS_N_INSNS (8),        /* toint.  */
1219       COSTS_N_INSNS (8),        /* fromint.  */
1220       COSTS_N_INSNS (8)         /* roundint.  */
1221     },
1222     /* FP DFmode */
1223     {
1224       COSTS_N_INSNS (64),       /* div.  */
1225       COSTS_N_INSNS (16),       /* mult.  */
1226       COSTS_N_INSNS (25),       /* mult_addsub.  */
1227       COSTS_N_INSNS (30),       /* fma.  */
1228       COSTS_N_INSNS (9),        /* addsub.  */
1229       COSTS_N_INSNS (3),        /* fpconst.  */
1230       COSTS_N_INSNS (3),        /* neg.  */
1231       COSTS_N_INSNS (6),        /* compare.  */
1232       COSTS_N_INSNS (6),        /* widen.  */
1233       COSTS_N_INSNS (6),        /* narrow.  */
1234       COSTS_N_INSNS (8),        /* toint.  */
1235       COSTS_N_INSNS (8),        /* fromint.  */
1236       COSTS_N_INSNS (8)         /* roundint.  */
1237     }
1238   },
1239   /* Vector */
1240   {
1241     COSTS_N_INSNS (1)   /* alu.  */
1242   }
1243 };
1244
1245 const struct cpu_cost_table cortexa5_extra_costs =
1246 {
1247   /* ALU */
1248   {
1249     0,                  /* arith.  */
1250     0,                  /* logical.  */
1251     COSTS_N_INSNS (1),  /* shift.  */
1252     COSTS_N_INSNS (1),  /* shift_reg.  */
1253     COSTS_N_INSNS (1),  /* arith_shift.  */
1254     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1255     COSTS_N_INSNS (1),  /* log_shift.  */
1256     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1257     COSTS_N_INSNS (1),  /* extend.  */
1258     COSTS_N_INSNS (1),  /* extend_arith.  */
1259     COSTS_N_INSNS (1),  /* bfi.  */
1260     COSTS_N_INSNS (1),  /* bfx.  */
1261     COSTS_N_INSNS (1),  /* clz.  */
1262     COSTS_N_INSNS (1),  /* rev.  */
1263     0,                  /* non_exec.  */
1264     true                /* non_exec_costs_exec.  */
1265   },
1266
1267   {
1268     /* MULT SImode */
1269     {
1270       0,                        /* simple.  */
1271       COSTS_N_INSNS (1),        /* flag_setting.  */
1272       COSTS_N_INSNS (1),        /* extend.  */
1273       COSTS_N_INSNS (1),        /* add.  */
1274       COSTS_N_INSNS (1),        /* extend_add.  */
1275       COSTS_N_INSNS (7)         /* idiv.  */
1276     },
1277     /* MULT DImode */
1278     {
1279       0,                        /* simple (N/A).  */
1280       0,                        /* flag_setting (N/A).  */
1281       COSTS_N_INSNS (1),        /* extend.  */
1282       0,                        /* add.  */
1283       COSTS_N_INSNS (2),        /* extend_add.  */
1284       0                         /* idiv (N/A).  */
1285     }
1286   },
1287   /* LD/ST */
1288   {
1289     COSTS_N_INSNS (1),  /* load.  */
1290     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1291     COSTS_N_INSNS (6),  /* ldrd.  */
1292     COSTS_N_INSNS (1),  /* ldm_1st.  */
1293     1,                  /* ldm_regs_per_insn_1st.  */
1294     2,                  /* ldm_regs_per_insn_subsequent.  */
1295     COSTS_N_INSNS (2),  /* loadf.  */
1296     COSTS_N_INSNS (4),  /* loadd.  */
1297     COSTS_N_INSNS (1),  /* load_unaligned.  */
1298     COSTS_N_INSNS (1),  /* store.  */
1299     COSTS_N_INSNS (3),  /* strd.  */
1300     COSTS_N_INSNS (1),  /* stm_1st.  */
1301     1,                  /* stm_regs_per_insn_1st.  */
1302     2,                  /* stm_regs_per_insn_subsequent.  */
1303     COSTS_N_INSNS (2),  /* storef.  */
1304     COSTS_N_INSNS (2),  /* stored.  */
1305     COSTS_N_INSNS (1),  /* store_unaligned.  */
1306     COSTS_N_INSNS (1),  /* loadv.  */
1307     COSTS_N_INSNS (1)   /* storev.  */
1308   },
1309   {
1310     /* FP SFmode */
1311     {
1312       COSTS_N_INSNS (15),       /* div.  */
1313       COSTS_N_INSNS (3),        /* mult.  */
1314       COSTS_N_INSNS (7),        /* mult_addsub. */
1315       COSTS_N_INSNS (7),        /* fma.  */
1316       COSTS_N_INSNS (3),        /* addsub.  */
1317       COSTS_N_INSNS (3),        /* fpconst.  */
1318       COSTS_N_INSNS (3),        /* neg.  */
1319       COSTS_N_INSNS (3),        /* compare.  */
1320       COSTS_N_INSNS (3),        /* widen.  */
1321       COSTS_N_INSNS (3),        /* narrow.  */
1322       COSTS_N_INSNS (3),        /* toint.  */
1323       COSTS_N_INSNS (3),        /* fromint.  */
1324       COSTS_N_INSNS (3)         /* roundint.  */
1325     },
1326     /* FP DFmode */
1327     {
1328       COSTS_N_INSNS (30),       /* div.  */
1329       COSTS_N_INSNS (6),        /* mult.  */
1330       COSTS_N_INSNS (10),       /* mult_addsub.  */
1331       COSTS_N_INSNS (7),        /* fma.  */
1332       COSTS_N_INSNS (3),        /* addsub.  */
1333       COSTS_N_INSNS (3),        /* fpconst.  */
1334       COSTS_N_INSNS (3),        /* neg.  */
1335       COSTS_N_INSNS (3),        /* compare.  */
1336       COSTS_N_INSNS (3),        /* widen.  */
1337       COSTS_N_INSNS (3),        /* narrow.  */
1338       COSTS_N_INSNS (3),        /* toint.  */
1339       COSTS_N_INSNS (3),        /* fromint.  */
1340       COSTS_N_INSNS (3)         /* roundint.  */
1341     }
1342   },
1343   /* Vector */
1344   {
1345     COSTS_N_INSNS (1)   /* alu.  */
1346   }
1347 };
1348
1349
1350 const struct cpu_cost_table cortexa7_extra_costs =
1351 {
1352   /* ALU */
1353   {
1354     0,                  /* arith.  */
1355     0,                  /* logical.  */
1356     COSTS_N_INSNS (1),  /* shift.  */
1357     COSTS_N_INSNS (1),  /* shift_reg.  */
1358     COSTS_N_INSNS (1),  /* arith_shift.  */
1359     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1360     COSTS_N_INSNS (1),  /* log_shift.  */
1361     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1362     COSTS_N_INSNS (1),  /* extend.  */
1363     COSTS_N_INSNS (1),  /* extend_arith.  */
1364     COSTS_N_INSNS (1),  /* bfi.  */
1365     COSTS_N_INSNS (1),  /* bfx.  */
1366     COSTS_N_INSNS (1),  /* clz.  */
1367     COSTS_N_INSNS (1),  /* rev.  */
1368     0,                  /* non_exec.  */
1369     true                /* non_exec_costs_exec.  */
1370   },
1371
1372   {
1373     /* MULT SImode */
1374     {
1375       0,                        /* simple.  */
1376       COSTS_N_INSNS (1),        /* flag_setting.  */
1377       COSTS_N_INSNS (1),        /* extend.  */
1378       COSTS_N_INSNS (1),        /* add.  */
1379       COSTS_N_INSNS (1),        /* extend_add.  */
1380       COSTS_N_INSNS (7)         /* idiv.  */
1381     },
1382     /* MULT DImode */
1383     {
1384       0,                        /* simple (N/A).  */
1385       0,                        /* flag_setting (N/A).  */
1386       COSTS_N_INSNS (1),        /* extend.  */
1387       0,                        /* add.  */
1388       COSTS_N_INSNS (2),        /* extend_add.  */
1389       0                         /* idiv (N/A).  */
1390     }
1391   },
1392   /* LD/ST */
1393   {
1394     COSTS_N_INSNS (1),  /* load.  */
1395     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1396     COSTS_N_INSNS (3),  /* ldrd.  */
1397     COSTS_N_INSNS (1),  /* ldm_1st.  */
1398     1,                  /* ldm_regs_per_insn_1st.  */
1399     2,                  /* ldm_regs_per_insn_subsequent.  */
1400     COSTS_N_INSNS (2),  /* loadf.  */
1401     COSTS_N_INSNS (2),  /* loadd.  */
1402     COSTS_N_INSNS (1),  /* load_unaligned.  */
1403     COSTS_N_INSNS (1),  /* store.  */
1404     COSTS_N_INSNS (3),  /* strd.  */
1405     COSTS_N_INSNS (1),  /* stm_1st.  */
1406     1,                  /* stm_regs_per_insn_1st.  */
1407     2,                  /* stm_regs_per_insn_subsequent.  */
1408     COSTS_N_INSNS (2),  /* storef.  */
1409     COSTS_N_INSNS (2),  /* stored.  */
1410     COSTS_N_INSNS (1),  /* store_unaligned.  */
1411     COSTS_N_INSNS (1),  /* loadv.  */
1412     COSTS_N_INSNS (1)   /* storev.  */
1413   },
1414   {
1415     /* FP SFmode */
1416     {
1417       COSTS_N_INSNS (15),       /* div.  */
1418       COSTS_N_INSNS (3),        /* mult.  */
1419       COSTS_N_INSNS (7),        /* mult_addsub. */
1420       COSTS_N_INSNS (7),        /* fma.  */
1421       COSTS_N_INSNS (3),        /* addsub.  */
1422       COSTS_N_INSNS (3),        /* fpconst.  */
1423       COSTS_N_INSNS (3),        /* neg.  */
1424       COSTS_N_INSNS (3),        /* compare.  */
1425       COSTS_N_INSNS (3),        /* widen.  */
1426       COSTS_N_INSNS (3),        /* narrow.  */
1427       COSTS_N_INSNS (3),        /* toint.  */
1428       COSTS_N_INSNS (3),        /* fromint.  */
1429       COSTS_N_INSNS (3)         /* roundint.  */
1430     },
1431     /* FP DFmode */
1432     {
1433       COSTS_N_INSNS (30),       /* div.  */
1434       COSTS_N_INSNS (6),        /* mult.  */
1435       COSTS_N_INSNS (10),       /* mult_addsub.  */
1436       COSTS_N_INSNS (7),        /* fma.  */
1437       COSTS_N_INSNS (3),        /* addsub.  */
1438       COSTS_N_INSNS (3),        /* fpconst.  */
1439       COSTS_N_INSNS (3),        /* neg.  */
1440       COSTS_N_INSNS (3),        /* compare.  */
1441       COSTS_N_INSNS (3),        /* widen.  */
1442       COSTS_N_INSNS (3),        /* narrow.  */
1443       COSTS_N_INSNS (3),        /* toint.  */
1444       COSTS_N_INSNS (3),        /* fromint.  */
1445       COSTS_N_INSNS (3)         /* roundint.  */
1446     }
1447   },
1448   /* Vector */
1449   {
1450     COSTS_N_INSNS (1)   /* alu.  */
1451   }
1452 };
1453
1454 const struct cpu_cost_table cortexa12_extra_costs =
1455 {
1456   /* ALU */
1457   {
1458     0,                  /* arith.  */
1459     0,                  /* logical.  */
1460     0,                  /* shift.  */
1461     COSTS_N_INSNS (1),  /* shift_reg.  */
1462     COSTS_N_INSNS (1),  /* arith_shift.  */
1463     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1464     COSTS_N_INSNS (1),  /* log_shift.  */
1465     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1466     0,                  /* extend.  */
1467     COSTS_N_INSNS (1),  /* extend_arith.  */
1468     0,                  /* bfi.  */
1469     COSTS_N_INSNS (1),  /* bfx.  */
1470     COSTS_N_INSNS (1),  /* clz.  */
1471     COSTS_N_INSNS (1),  /* rev.  */
1472     0,                  /* non_exec.  */
1473     true                /* non_exec_costs_exec.  */
1474   },
1475   /* MULT SImode */
1476   {
1477     {
1478       COSTS_N_INSNS (2),        /* simple.  */
1479       COSTS_N_INSNS (3),        /* flag_setting.  */
1480       COSTS_N_INSNS (2),        /* extend.  */
1481       COSTS_N_INSNS (3),        /* add.  */
1482       COSTS_N_INSNS (2),        /* extend_add.  */
1483       COSTS_N_INSNS (18)        /* idiv.  */
1484     },
1485     /* MULT DImode */
1486     {
1487       0,                        /* simple (N/A).  */
1488       0,                        /* flag_setting (N/A).  */
1489       COSTS_N_INSNS (3),        /* extend.  */
1490       0,                        /* add (N/A).  */
1491       COSTS_N_INSNS (3),        /* extend_add.  */
1492       0                         /* idiv (N/A).  */
1493     }
1494   },
1495   /* LD/ST */
1496   {
1497     COSTS_N_INSNS (3),  /* load.  */
1498     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1499     COSTS_N_INSNS (3),  /* ldrd.  */
1500     COSTS_N_INSNS (3),  /* ldm_1st.  */
1501     1,                  /* ldm_regs_per_insn_1st.  */
1502     2,                  /* ldm_regs_per_insn_subsequent.  */
1503     COSTS_N_INSNS (3),  /* loadf.  */
1504     COSTS_N_INSNS (3),  /* loadd.  */
1505     0,                  /* load_unaligned.  */
1506     0,                  /* store.  */
1507     0,                  /* strd.  */
1508     0,                  /* stm_1st.  */
1509     1,                  /* stm_regs_per_insn_1st.  */
1510     2,                  /* stm_regs_per_insn_subsequent.  */
1511     COSTS_N_INSNS (2),  /* storef.  */
1512     COSTS_N_INSNS (2),  /* stored.  */
1513     0,                  /* store_unaligned.  */
1514     COSTS_N_INSNS (1),  /* loadv.  */
1515     COSTS_N_INSNS (1)   /* storev.  */
1516   },
1517   {
1518     /* FP SFmode */
1519     {
1520       COSTS_N_INSNS (17),       /* div.  */
1521       COSTS_N_INSNS (4),        /* mult.  */
1522       COSTS_N_INSNS (8),        /* mult_addsub. */
1523       COSTS_N_INSNS (8),        /* fma.  */
1524       COSTS_N_INSNS (4),        /* addsub.  */
1525       COSTS_N_INSNS (2),        /* fpconst. */
1526       COSTS_N_INSNS (2),        /* neg.  */
1527       COSTS_N_INSNS (2),        /* compare.  */
1528       COSTS_N_INSNS (4),        /* widen.  */
1529       COSTS_N_INSNS (4),        /* narrow.  */
1530       COSTS_N_INSNS (4),        /* toint.  */
1531       COSTS_N_INSNS (4),        /* fromint.  */
1532       COSTS_N_INSNS (4)         /* roundint.  */
1533     },
1534     /* FP DFmode */
1535     {
1536       COSTS_N_INSNS (31),       /* div.  */
1537       COSTS_N_INSNS (4),        /* mult.  */
1538       COSTS_N_INSNS (8),        /* mult_addsub.  */
1539       COSTS_N_INSNS (8),        /* fma.  */
1540       COSTS_N_INSNS (4),        /* addsub.  */
1541       COSTS_N_INSNS (2),        /* fpconst.  */
1542       COSTS_N_INSNS (2),        /* neg.  */
1543       COSTS_N_INSNS (2),        /* compare.  */
1544       COSTS_N_INSNS (4),        /* widen.  */
1545       COSTS_N_INSNS (4),        /* narrow.  */
1546       COSTS_N_INSNS (4),        /* toint.  */
1547       COSTS_N_INSNS (4),        /* fromint.  */
1548       COSTS_N_INSNS (4)         /* roundint.  */
1549     }
1550   },
1551   /* Vector */
1552   {
1553     COSTS_N_INSNS (1)   /* alu.  */
1554   }
1555 };
1556
1557 const struct cpu_cost_table cortexa15_extra_costs =
1558 {
1559   /* ALU */
1560   {
1561     0,                  /* arith.  */
1562     0,                  /* logical.  */
1563     0,                  /* shift.  */
1564     0,                  /* shift_reg.  */
1565     COSTS_N_INSNS (1),  /* arith_shift.  */
1566     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1567     COSTS_N_INSNS (1),  /* log_shift.  */
1568     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1569     0,                  /* extend.  */
1570     COSTS_N_INSNS (1),  /* extend_arith.  */
1571     COSTS_N_INSNS (1),  /* bfi.  */
1572     0,                  /* bfx.  */
1573     0,                  /* clz.  */
1574     0,                  /* rev.  */
1575     0,                  /* non_exec.  */
1576     true                /* non_exec_costs_exec.  */
1577   },
1578   /* MULT SImode */
1579   {
1580     {
1581       COSTS_N_INSNS (2),        /* simple.  */
1582       COSTS_N_INSNS (3),        /* flag_setting.  */
1583       COSTS_N_INSNS (2),        /* extend.  */
1584       COSTS_N_INSNS (2),        /* add.  */
1585       COSTS_N_INSNS (2),        /* extend_add.  */
1586       COSTS_N_INSNS (18)        /* idiv.  */
1587     },
1588     /* MULT DImode */
1589     {
1590       0,                        /* simple (N/A).  */
1591       0,                        /* flag_setting (N/A).  */
1592       COSTS_N_INSNS (3),        /* extend.  */
1593       0,                        /* add (N/A).  */
1594       COSTS_N_INSNS (3),        /* extend_add.  */
1595       0                         /* idiv (N/A).  */
1596     }
1597   },
1598   /* LD/ST */
1599   {
1600     COSTS_N_INSNS (3),  /* load.  */
1601     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1602     COSTS_N_INSNS (3),  /* ldrd.  */
1603     COSTS_N_INSNS (4),  /* ldm_1st.  */
1604     1,                  /* ldm_regs_per_insn_1st.  */
1605     2,                  /* ldm_regs_per_insn_subsequent.  */
1606     COSTS_N_INSNS (4),  /* loadf.  */
1607     COSTS_N_INSNS (4),  /* loadd.  */
1608     0,                  /* load_unaligned.  */
1609     0,                  /* store.  */
1610     0,                  /* strd.  */
1611     COSTS_N_INSNS (1),  /* stm_1st.  */
1612     1,                  /* stm_regs_per_insn_1st.  */
1613     2,                  /* stm_regs_per_insn_subsequent.  */
1614     0,                  /* storef.  */
1615     0,                  /* stored.  */
1616     0,                  /* store_unaligned.  */
1617     COSTS_N_INSNS (1),  /* loadv.  */
1618     COSTS_N_INSNS (1)   /* storev.  */
1619   },
1620   {
1621     /* FP SFmode */
1622     {
1623       COSTS_N_INSNS (17),       /* div.  */
1624       COSTS_N_INSNS (4),        /* mult.  */
1625       COSTS_N_INSNS (8),        /* mult_addsub. */
1626       COSTS_N_INSNS (8),        /* fma.  */
1627       COSTS_N_INSNS (4),        /* addsub.  */
1628       COSTS_N_INSNS (2),        /* fpconst. */
1629       COSTS_N_INSNS (2),        /* neg.  */
1630       COSTS_N_INSNS (5),        /* compare.  */
1631       COSTS_N_INSNS (4),        /* widen.  */
1632       COSTS_N_INSNS (4),        /* narrow.  */
1633       COSTS_N_INSNS (4),        /* toint.  */
1634       COSTS_N_INSNS (4),        /* fromint.  */
1635       COSTS_N_INSNS (4)         /* roundint.  */
1636     },
1637     /* FP DFmode */
1638     {
1639       COSTS_N_INSNS (31),       /* div.  */
1640       COSTS_N_INSNS (4),        /* mult.  */
1641       COSTS_N_INSNS (8),        /* mult_addsub.  */
1642       COSTS_N_INSNS (8),        /* fma.  */
1643       COSTS_N_INSNS (4),        /* addsub.  */
1644       COSTS_N_INSNS (2),        /* fpconst.  */
1645       COSTS_N_INSNS (2),        /* neg.  */
1646       COSTS_N_INSNS (2),        /* compare.  */
1647       COSTS_N_INSNS (4),        /* widen.  */
1648       COSTS_N_INSNS (4),        /* narrow.  */
1649       COSTS_N_INSNS (4),        /* toint.  */
1650       COSTS_N_INSNS (4),        /* fromint.  */
1651       COSTS_N_INSNS (4)         /* roundint.  */
1652     }
1653   },
1654   /* Vector */
1655   {
1656     COSTS_N_INSNS (1)   /* alu.  */
1657   }
1658 };
1659
1660 const struct cpu_cost_table v7m_extra_costs =
1661 {
1662   /* ALU */
1663   {
1664     0,                  /* arith.  */
1665     0,                  /* logical.  */
1666     0,                  /* shift.  */
1667     0,                  /* shift_reg.  */
1668     0,                  /* arith_shift.  */
1669     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1670     0,                  /* log_shift.  */
1671     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1672     0,                  /* extend.  */
1673     COSTS_N_INSNS (1),  /* extend_arith.  */
1674     0,                  /* bfi.  */
1675     0,                  /* bfx.  */
1676     0,                  /* clz.  */
1677     0,                  /* rev.  */
1678     COSTS_N_INSNS (1),  /* non_exec.  */
1679     false               /* non_exec_costs_exec.  */
1680   },
1681   {
1682     /* MULT SImode */
1683     {
1684       COSTS_N_INSNS (1),        /* simple.  */
1685       COSTS_N_INSNS (1),        /* flag_setting.  */
1686       COSTS_N_INSNS (2),        /* extend.  */
1687       COSTS_N_INSNS (1),        /* add.  */
1688       COSTS_N_INSNS (3),        /* extend_add.  */
1689       COSTS_N_INSNS (8)         /* idiv.  */
1690     },
1691     /* MULT DImode */
1692     {
1693       0,                        /* simple (N/A).  */
1694       0,                        /* flag_setting (N/A).  */
1695       COSTS_N_INSNS (2),        /* extend.  */
1696       0,                        /* add (N/A).  */
1697       COSTS_N_INSNS (3),        /* extend_add.  */
1698       0                         /* idiv (N/A).  */
1699     }
1700   },
1701   /* LD/ST */
1702   {
1703     COSTS_N_INSNS (2),  /* load.  */
1704     0,                  /* load_sign_extend.  */
1705     COSTS_N_INSNS (3),  /* ldrd.  */
1706     COSTS_N_INSNS (2),  /* ldm_1st.  */
1707     1,                  /* ldm_regs_per_insn_1st.  */
1708     1,                  /* ldm_regs_per_insn_subsequent.  */
1709     COSTS_N_INSNS (2),  /* loadf.  */
1710     COSTS_N_INSNS (3),  /* loadd.  */
1711     COSTS_N_INSNS (1),  /* load_unaligned.  */
1712     COSTS_N_INSNS (2),  /* store.  */
1713     COSTS_N_INSNS (3),  /* strd.  */
1714     COSTS_N_INSNS (2),  /* stm_1st.  */
1715     1,                  /* stm_regs_per_insn_1st.  */
1716     1,                  /* stm_regs_per_insn_subsequent.  */
1717     COSTS_N_INSNS (2),  /* storef.  */
1718     COSTS_N_INSNS (3),  /* stored.  */
1719     COSTS_N_INSNS (1),  /* store_unaligned.  */
1720     COSTS_N_INSNS (1),  /* loadv.  */
1721     COSTS_N_INSNS (1)   /* storev.  */
1722   },
1723   {
1724     /* FP SFmode */
1725     {
1726       COSTS_N_INSNS (7),        /* div.  */
1727       COSTS_N_INSNS (2),        /* mult.  */
1728       COSTS_N_INSNS (5),        /* mult_addsub.  */
1729       COSTS_N_INSNS (3),        /* fma.  */
1730       COSTS_N_INSNS (1),        /* addsub.  */
1731       0,                        /* fpconst.  */
1732       0,                        /* neg.  */
1733       0,                        /* compare.  */
1734       0,                        /* widen.  */
1735       0,                        /* narrow.  */
1736       0,                        /* toint.  */
1737       0,                        /* fromint.  */
1738       0                         /* roundint.  */
1739     },
1740     /* FP DFmode */
1741     {
1742       COSTS_N_INSNS (15),       /* div.  */
1743       COSTS_N_INSNS (5),        /* mult.  */
1744       COSTS_N_INSNS (7),        /* mult_addsub.  */
1745       COSTS_N_INSNS (7),        /* fma.  */
1746       COSTS_N_INSNS (3),        /* addsub.  */
1747       0,                        /* fpconst.  */
1748       0,                        /* neg.  */
1749       0,                        /* compare.  */
1750       0,                        /* widen.  */
1751       0,                        /* narrow.  */
1752       0,                        /* toint.  */
1753       0,                        /* fromint.  */
1754       0                         /* roundint.  */
1755     }
1756   },
1757   /* Vector */
1758   {
1759     COSTS_N_INSNS (1)   /* alu.  */
1760   }
1761 };
1762
1763 const struct tune_params arm_slowmul_tune =
1764 {
1765   &generic_extra_costs,                 /* Insn extra costs.  */
1766   NULL,                                 /* Sched adj cost.  */
1767   arm_default_branch_cost,
1768   &arm_default_vec_cost,
1769   3,                                            /* Constant limit.  */
1770   5,                                            /* Max cond insns.  */
1771   8,                                            /* Memset max inline.  */
1772   1,                                            /* Issue rate.  */
1773   ARM_PREFETCH_NOT_BENEFICIAL,
1774   tune_params::PREF_CONST_POOL_TRUE,
1775   tune_params::PREF_LDRD_FALSE,
1776   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1777   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1778   tune_params::DISPARAGE_FLAGS_NEITHER,
1779   tune_params::PREF_NEON_64_FALSE,
1780   tune_params::PREF_NEON_STRINGOPS_FALSE,
1781   tune_params::FUSE_NOTHING,
1782   tune_params::SCHED_AUTOPREF_OFF
1783 };
1784
1785 const struct tune_params arm_fastmul_tune =
1786 {
1787   &generic_extra_costs,                 /* Insn extra costs.  */
1788   NULL,                                 /* Sched adj cost.  */
1789   arm_default_branch_cost,
1790   &arm_default_vec_cost,
1791   1,                                            /* Constant limit.  */
1792   5,                                            /* Max cond insns.  */
1793   8,                                            /* Memset max inline.  */
1794   1,                                            /* Issue rate.  */
1795   ARM_PREFETCH_NOT_BENEFICIAL,
1796   tune_params::PREF_CONST_POOL_TRUE,
1797   tune_params::PREF_LDRD_FALSE,
1798   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1799   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1800   tune_params::DISPARAGE_FLAGS_NEITHER,
1801   tune_params::PREF_NEON_64_FALSE,
1802   tune_params::PREF_NEON_STRINGOPS_FALSE,
1803   tune_params::FUSE_NOTHING,
1804   tune_params::SCHED_AUTOPREF_OFF
1805 };
1806
1807 /* StrongARM has early execution of branches, so a sequence that is worth
1808    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1809
1810 const struct tune_params arm_strongarm_tune =
1811 {
1812   &generic_extra_costs,                 /* Insn extra costs.  */
1813   NULL,                                 /* Sched adj cost.  */
1814   arm_default_branch_cost,
1815   &arm_default_vec_cost,
1816   1,                                            /* Constant limit.  */
1817   3,                                            /* Max cond insns.  */
1818   8,                                            /* Memset max inline.  */
1819   1,                                            /* Issue rate.  */
1820   ARM_PREFETCH_NOT_BENEFICIAL,
1821   tune_params::PREF_CONST_POOL_TRUE,
1822   tune_params::PREF_LDRD_FALSE,
1823   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1824   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1825   tune_params::DISPARAGE_FLAGS_NEITHER,
1826   tune_params::PREF_NEON_64_FALSE,
1827   tune_params::PREF_NEON_STRINGOPS_FALSE,
1828   tune_params::FUSE_NOTHING,
1829   tune_params::SCHED_AUTOPREF_OFF
1830 };
1831
1832 const struct tune_params arm_xscale_tune =
1833 {
1834   &generic_extra_costs,                 /* Insn extra costs.  */
1835   xscale_sched_adjust_cost,
1836   arm_default_branch_cost,
1837   &arm_default_vec_cost,
1838   2,                                            /* Constant limit.  */
1839   3,                                            /* Max cond insns.  */
1840   8,                                            /* Memset max inline.  */
1841   1,                                            /* Issue rate.  */
1842   ARM_PREFETCH_NOT_BENEFICIAL,
1843   tune_params::PREF_CONST_POOL_TRUE,
1844   tune_params::PREF_LDRD_FALSE,
1845   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1846   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1847   tune_params::DISPARAGE_FLAGS_NEITHER,
1848   tune_params::PREF_NEON_64_FALSE,
1849   tune_params::PREF_NEON_STRINGOPS_FALSE,
1850   tune_params::FUSE_NOTHING,
1851   tune_params::SCHED_AUTOPREF_OFF
1852 };
1853
1854 const struct tune_params arm_9e_tune =
1855 {
1856   &generic_extra_costs,                 /* Insn extra costs.  */
1857   NULL,                                 /* Sched adj cost.  */
1858   arm_default_branch_cost,
1859   &arm_default_vec_cost,
1860   1,                                            /* Constant limit.  */
1861   5,                                            /* Max cond insns.  */
1862   8,                                            /* Memset max inline.  */
1863   1,                                            /* Issue rate.  */
1864   ARM_PREFETCH_NOT_BENEFICIAL,
1865   tune_params::PREF_CONST_POOL_TRUE,
1866   tune_params::PREF_LDRD_FALSE,
1867   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1868   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1869   tune_params::DISPARAGE_FLAGS_NEITHER,
1870   tune_params::PREF_NEON_64_FALSE,
1871   tune_params::PREF_NEON_STRINGOPS_FALSE,
1872   tune_params::FUSE_NOTHING,
1873   tune_params::SCHED_AUTOPREF_OFF
1874 };
1875
1876 const struct tune_params arm_marvell_pj4_tune =
1877 {
1878   &generic_extra_costs,                 /* Insn extra costs.  */
1879   NULL,                                 /* Sched adj cost.  */
1880   arm_default_branch_cost,
1881   &arm_default_vec_cost,
1882   1,                                            /* Constant limit.  */
1883   5,                                            /* Max cond insns.  */
1884   8,                                            /* Memset max inline.  */
1885   2,                                            /* Issue rate.  */
1886   ARM_PREFETCH_NOT_BENEFICIAL,
1887   tune_params::PREF_CONST_POOL_TRUE,
1888   tune_params::PREF_LDRD_FALSE,
1889   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1890   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1891   tune_params::DISPARAGE_FLAGS_NEITHER,
1892   tune_params::PREF_NEON_64_FALSE,
1893   tune_params::PREF_NEON_STRINGOPS_FALSE,
1894   tune_params::FUSE_NOTHING,
1895   tune_params::SCHED_AUTOPREF_OFF
1896 };
1897
1898 const struct tune_params arm_v6t2_tune =
1899 {
1900   &generic_extra_costs,                 /* Insn extra costs.  */
1901   NULL,                                 /* Sched adj cost.  */
1902   arm_default_branch_cost,
1903   &arm_default_vec_cost,
1904   1,                                            /* Constant limit.  */
1905   5,                                            /* Max cond insns.  */
1906   8,                                            /* Memset max inline.  */
1907   1,                                            /* Issue rate.  */
1908   ARM_PREFETCH_NOT_BENEFICIAL,
1909   tune_params::PREF_CONST_POOL_FALSE,
1910   tune_params::PREF_LDRD_FALSE,
1911   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1912   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1913   tune_params::DISPARAGE_FLAGS_NEITHER,
1914   tune_params::PREF_NEON_64_FALSE,
1915   tune_params::PREF_NEON_STRINGOPS_FALSE,
1916   tune_params::FUSE_NOTHING,
1917   tune_params::SCHED_AUTOPREF_OFF
1918 };
1919
1920
1921 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1922 const struct tune_params arm_cortex_tune =
1923 {
1924   &generic_extra_costs,
1925   NULL,                                 /* Sched adj cost.  */
1926   arm_default_branch_cost,
1927   &arm_default_vec_cost,
1928   1,                                            /* Constant limit.  */
1929   5,                                            /* Max cond insns.  */
1930   8,                                            /* Memset max inline.  */
1931   2,                                            /* Issue rate.  */
1932   ARM_PREFETCH_NOT_BENEFICIAL,
1933   tune_params::PREF_CONST_POOL_FALSE,
1934   tune_params::PREF_LDRD_FALSE,
1935   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1936   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1937   tune_params::DISPARAGE_FLAGS_NEITHER,
1938   tune_params::PREF_NEON_64_FALSE,
1939   tune_params::PREF_NEON_STRINGOPS_FALSE,
1940   tune_params::FUSE_NOTHING,
1941   tune_params::SCHED_AUTOPREF_OFF
1942 };
1943
1944 const struct tune_params arm_cortex_a8_tune =
1945 {
1946   &cortexa8_extra_costs,
1947   NULL,                                 /* Sched adj cost.  */
1948   arm_default_branch_cost,
1949   &arm_default_vec_cost,
1950   1,                                            /* Constant limit.  */
1951   5,                                            /* Max cond insns.  */
1952   8,                                            /* Memset max inline.  */
1953   2,                                            /* Issue rate.  */
1954   ARM_PREFETCH_NOT_BENEFICIAL,
1955   tune_params::PREF_CONST_POOL_FALSE,
1956   tune_params::PREF_LDRD_FALSE,
1957   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1958   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1959   tune_params::DISPARAGE_FLAGS_NEITHER,
1960   tune_params::PREF_NEON_64_FALSE,
1961   tune_params::PREF_NEON_STRINGOPS_TRUE,
1962   tune_params::FUSE_NOTHING,
1963   tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_cortex_a7_tune =
1967 {
1968   &cortexa7_extra_costs,
1969   NULL,                                 /* Sched adj cost.  */
1970   arm_default_branch_cost,
1971   &arm_default_vec_cost,
1972   1,                                            /* Constant limit.  */
1973   5,                                            /* Max cond insns.  */
1974   8,                                            /* Memset max inline.  */
1975   2,                                            /* Issue rate.  */
1976   ARM_PREFETCH_NOT_BENEFICIAL,
1977   tune_params::PREF_CONST_POOL_FALSE,
1978   tune_params::PREF_LDRD_FALSE,
1979   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1980   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1981   tune_params::DISPARAGE_FLAGS_NEITHER,
1982   tune_params::PREF_NEON_64_FALSE,
1983   tune_params::PREF_NEON_STRINGOPS_TRUE,
1984   tune_params::FUSE_NOTHING,
1985   tune_params::SCHED_AUTOPREF_OFF
1986 };
1987
1988 const struct tune_params arm_cortex_a15_tune =
1989 {
1990   &cortexa15_extra_costs,
1991   NULL,                                 /* Sched adj cost.  */
1992   arm_default_branch_cost,
1993   &arm_default_vec_cost,
1994   1,                                            /* Constant limit.  */
1995   2,                                            /* Max cond insns.  */
1996   8,                                            /* Memset max inline.  */
1997   3,                                            /* Issue rate.  */
1998   ARM_PREFETCH_NOT_BENEFICIAL,
1999   tune_params::PREF_CONST_POOL_FALSE,
2000   tune_params::PREF_LDRD_TRUE,
2001   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2002   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2003   tune_params::DISPARAGE_FLAGS_ALL,
2004   tune_params::PREF_NEON_64_FALSE,
2005   tune_params::PREF_NEON_STRINGOPS_TRUE,
2006   tune_params::FUSE_NOTHING,
2007   tune_params::SCHED_AUTOPREF_FULL
2008 };
2009
2010 const struct tune_params arm_cortex_a35_tune =
2011 {
2012   &cortexa53_extra_costs,
2013   NULL,                                 /* Sched adj cost.  */
2014   arm_default_branch_cost,
2015   &arm_default_vec_cost,
2016   1,                                            /* Constant limit.  */
2017   5,                                            /* Max cond insns.  */
2018   8,                                            /* Memset max inline.  */
2019   1,                                            /* Issue rate.  */
2020   ARM_PREFETCH_NOT_BENEFICIAL,
2021   tune_params::PREF_CONST_POOL_FALSE,
2022   tune_params::PREF_LDRD_FALSE,
2023   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2024   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2025   tune_params::DISPARAGE_FLAGS_NEITHER,
2026   tune_params::PREF_NEON_64_FALSE,
2027   tune_params::PREF_NEON_STRINGOPS_TRUE,
2028   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2029   tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032 const struct tune_params arm_cortex_a53_tune =
2033 {
2034   &cortexa53_extra_costs,
2035   NULL,                                 /* Sched adj cost.  */
2036   arm_default_branch_cost,
2037   &arm_default_vec_cost,
2038   1,                                            /* Constant limit.  */
2039   5,                                            /* Max cond insns.  */
2040   8,                                            /* Memset max inline.  */
2041   2,                                            /* Issue rate.  */
2042   ARM_PREFETCH_NOT_BENEFICIAL,
2043   tune_params::PREF_CONST_POOL_FALSE,
2044   tune_params::PREF_LDRD_FALSE,
2045   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2046   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2047   tune_params::DISPARAGE_FLAGS_NEITHER,
2048   tune_params::PREF_NEON_64_FALSE,
2049   tune_params::PREF_NEON_STRINGOPS_TRUE,
2050   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2051   tune_params::SCHED_AUTOPREF_OFF
2052 };
2053
2054 const struct tune_params arm_cortex_a57_tune =
2055 {
2056   &cortexa57_extra_costs,
2057   NULL,                                 /* Sched adj cost.  */
2058   arm_default_branch_cost,
2059   &arm_default_vec_cost,
2060   1,                                            /* Constant limit.  */
2061   2,                                            /* Max cond insns.  */
2062   8,                                            /* Memset max inline.  */
2063   3,                                            /* Issue rate.  */
2064   ARM_PREFETCH_NOT_BENEFICIAL,
2065   tune_params::PREF_CONST_POOL_FALSE,
2066   tune_params::PREF_LDRD_TRUE,
2067   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2068   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2069   tune_params::DISPARAGE_FLAGS_ALL,
2070   tune_params::PREF_NEON_64_FALSE,
2071   tune_params::PREF_NEON_STRINGOPS_TRUE,
2072   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2073   tune_params::SCHED_AUTOPREF_FULL
2074 };
2075
2076 const struct tune_params arm_exynosm1_tune =
2077 {
2078   &exynosm1_extra_costs,
2079   NULL,                                         /* Sched adj cost.  */
2080   arm_default_branch_cost,
2081   &arm_default_vec_cost,
2082   1,                                            /* Constant limit.  */
2083   2,                                            /* Max cond insns.  */
2084   8,                                            /* Memset max inline.  */
2085   3,                                            /* Issue rate.  */
2086   ARM_PREFETCH_NOT_BENEFICIAL,
2087   tune_params::PREF_CONST_POOL_FALSE,
2088   tune_params::PREF_LDRD_TRUE,
2089   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2090   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2091   tune_params::DISPARAGE_FLAGS_ALL,
2092   tune_params::PREF_NEON_64_FALSE,
2093   tune_params::PREF_NEON_STRINGOPS_TRUE,
2094   tune_params::FUSE_NOTHING,
2095   tune_params::SCHED_AUTOPREF_OFF
2096 };
2097
2098 const struct tune_params arm_xgene1_tune =
2099 {
2100   &xgene1_extra_costs,
2101   NULL,                                 /* Sched adj cost.  */
2102   arm_default_branch_cost,
2103   &arm_default_vec_cost,
2104   1,                                            /* Constant limit.  */
2105   2,                                            /* Max cond insns.  */
2106   32,                                           /* Memset max inline.  */
2107   4,                                            /* Issue rate.  */
2108   ARM_PREFETCH_NOT_BENEFICIAL,
2109   tune_params::PREF_CONST_POOL_FALSE,
2110   tune_params::PREF_LDRD_TRUE,
2111   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2112   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2113   tune_params::DISPARAGE_FLAGS_ALL,
2114   tune_params::PREF_NEON_64_FALSE,
2115   tune_params::PREF_NEON_STRINGOPS_FALSE,
2116   tune_params::FUSE_NOTHING,
2117   tune_params::SCHED_AUTOPREF_OFF
2118 };
2119
2120 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2121    less appealing.  Set max_insns_skipped to a low value.  */
2122
2123 const struct tune_params arm_cortex_a5_tune =
2124 {
2125   &cortexa5_extra_costs,
2126   NULL,                                 /* Sched adj cost.  */
2127   arm_cortex_a5_branch_cost,
2128   &arm_default_vec_cost,
2129   1,                                            /* Constant limit.  */
2130   1,                                            /* Max cond insns.  */
2131   8,                                            /* Memset max inline.  */
2132   2,                                            /* Issue rate.  */
2133   ARM_PREFETCH_NOT_BENEFICIAL,
2134   tune_params::PREF_CONST_POOL_FALSE,
2135   tune_params::PREF_LDRD_FALSE,
2136   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2137   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2138   tune_params::DISPARAGE_FLAGS_NEITHER,
2139   tune_params::PREF_NEON_64_FALSE,
2140   tune_params::PREF_NEON_STRINGOPS_TRUE,
2141   tune_params::FUSE_NOTHING,
2142   tune_params::SCHED_AUTOPREF_OFF
2143 };
2144
2145 const struct tune_params arm_cortex_a9_tune =
2146 {
2147   &cortexa9_extra_costs,
2148   cortex_a9_sched_adjust_cost,
2149   arm_default_branch_cost,
2150   &arm_default_vec_cost,
2151   1,                                            /* Constant limit.  */
2152   5,                                            /* Max cond insns.  */
2153   8,                                            /* Memset max inline.  */
2154   2,                                            /* Issue rate.  */
2155   ARM_PREFETCH_BENEFICIAL(4,32,32),
2156   tune_params::PREF_CONST_POOL_FALSE,
2157   tune_params::PREF_LDRD_FALSE,
2158   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2159   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2160   tune_params::DISPARAGE_FLAGS_NEITHER,
2161   tune_params::PREF_NEON_64_FALSE,
2162   tune_params::PREF_NEON_STRINGOPS_FALSE,
2163   tune_params::FUSE_NOTHING,
2164   tune_params::SCHED_AUTOPREF_OFF
2165 };
2166
2167 const struct tune_params arm_cortex_a12_tune =
2168 {
2169   &cortexa12_extra_costs,
2170   NULL,                                 /* Sched adj cost.  */
2171   arm_default_branch_cost,
2172   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2173   1,                                            /* Constant limit.  */
2174   2,                                            /* Max cond insns.  */
2175   8,                                            /* Memset max inline.  */
2176   2,                                            /* Issue rate.  */
2177   ARM_PREFETCH_NOT_BENEFICIAL,
2178   tune_params::PREF_CONST_POOL_FALSE,
2179   tune_params::PREF_LDRD_TRUE,
2180   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2181   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2182   tune_params::DISPARAGE_FLAGS_ALL,
2183   tune_params::PREF_NEON_64_FALSE,
2184   tune_params::PREF_NEON_STRINGOPS_TRUE,
2185   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2186   tune_params::SCHED_AUTOPREF_OFF
2187 };
2188
2189 const struct tune_params arm_cortex_a73_tune =
2190 {
2191   &cortexa57_extra_costs,
2192   NULL,                                         /* Sched adj cost.  */
2193   arm_default_branch_cost,
2194   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2195   1,                                            /* Constant limit.  */
2196   2,                                            /* Max cond insns.  */
2197   8,                                            /* Memset max inline.  */
2198   2,                                            /* Issue rate.  */
2199   ARM_PREFETCH_NOT_BENEFICIAL,
2200   tune_params::PREF_CONST_POOL_FALSE,
2201   tune_params::PREF_LDRD_TRUE,
2202   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2203   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2204   tune_params::DISPARAGE_FLAGS_ALL,
2205   tune_params::PREF_NEON_64_FALSE,
2206   tune_params::PREF_NEON_STRINGOPS_TRUE,
2207   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2208   tune_params::SCHED_AUTOPREF_FULL
2209 };
2210
2211 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2212    cycle to execute each.  An LDR from the constant pool also takes two cycles
2213    to execute, but mildly increases pipelining opportunity (consecutive
2214    loads/stores can be pipelined together, saving one cycle), and may also
2215    improve icache utilisation.  Hence we prefer the constant pool for such
2216    processors.  */
2217
2218 const struct tune_params arm_v7m_tune =
2219 {
2220   &v7m_extra_costs,
2221   NULL,                                 /* Sched adj cost.  */
2222   arm_cortex_m_branch_cost,
2223   &arm_default_vec_cost,
2224   1,                                            /* Constant limit.  */
2225   2,                                            /* Max cond insns.  */
2226   8,                                            /* Memset max inline.  */
2227   1,                                            /* Issue rate.  */
2228   ARM_PREFETCH_NOT_BENEFICIAL,
2229   tune_params::PREF_CONST_POOL_TRUE,
2230   tune_params::PREF_LDRD_FALSE,
2231   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2232   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2233   tune_params::DISPARAGE_FLAGS_NEITHER,
2234   tune_params::PREF_NEON_64_FALSE,
2235   tune_params::PREF_NEON_STRINGOPS_FALSE,
2236   tune_params::FUSE_NOTHING,
2237   tune_params::SCHED_AUTOPREF_OFF
2238 };
2239
2240 /* Cortex-M7 tuning.  */
2241
2242 const struct tune_params arm_cortex_m7_tune =
2243 {
2244   &v7m_extra_costs,
2245   NULL,                                 /* Sched adj cost.  */
2246   arm_cortex_m7_branch_cost,
2247   &arm_default_vec_cost,
2248   0,                                            /* Constant limit.  */
2249   1,                                            /* Max cond insns.  */
2250   8,                                            /* Memset max inline.  */
2251   2,                                            /* Issue rate.  */
2252   ARM_PREFETCH_NOT_BENEFICIAL,
2253   tune_params::PREF_CONST_POOL_TRUE,
2254   tune_params::PREF_LDRD_FALSE,
2255   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2256   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2257   tune_params::DISPARAGE_FLAGS_NEITHER,
2258   tune_params::PREF_NEON_64_FALSE,
2259   tune_params::PREF_NEON_STRINGOPS_FALSE,
2260   tune_params::FUSE_NOTHING,
2261   tune_params::SCHED_AUTOPREF_OFF
2262 };
2263
2264 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2265    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2266    cortex-m23.  */
2267 const struct tune_params arm_v6m_tune =
2268 {
2269   &generic_extra_costs,                 /* Insn extra costs.  */
2270   NULL,                                 /* Sched adj cost.  */
2271   arm_default_branch_cost,
2272   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2273   1,                                            /* Constant limit.  */
2274   5,                                            /* Max cond insns.  */
2275   8,                                            /* Memset max inline.  */
2276   1,                                            /* Issue rate.  */
2277   ARM_PREFETCH_NOT_BENEFICIAL,
2278   tune_params::PREF_CONST_POOL_FALSE,
2279   tune_params::PREF_LDRD_FALSE,
2280   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2281   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2282   tune_params::DISPARAGE_FLAGS_NEITHER,
2283   tune_params::PREF_NEON_64_FALSE,
2284   tune_params::PREF_NEON_STRINGOPS_FALSE,
2285   tune_params::FUSE_NOTHING,
2286   tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 const struct tune_params arm_fa726te_tune =
2290 {
2291   &generic_extra_costs,                         /* Insn extra costs.  */
2292   fa726te_sched_adjust_cost,
2293   arm_default_branch_cost,
2294   &arm_default_vec_cost,
2295   1,                                            /* Constant limit.  */
2296   5,                                            /* Max cond insns.  */
2297   8,                                            /* Memset max inline.  */
2298   2,                                            /* Issue rate.  */
2299   ARM_PREFETCH_NOT_BENEFICIAL,
2300   tune_params::PREF_CONST_POOL_TRUE,
2301   tune_params::PREF_LDRD_FALSE,
2302   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2303   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2304   tune_params::DISPARAGE_FLAGS_NEITHER,
2305   tune_params::PREF_NEON_64_FALSE,
2306   tune_params::PREF_NEON_STRINGOPS_FALSE,
2307   tune_params::FUSE_NOTHING,
2308   tune_params::SCHED_AUTOPREF_OFF
2309 };
2310
2311 /* Auto-generated CPU, FPU and architecture tables.  */
2312 #include "arm-cpu-data.h"
2313
2314 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2315    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2316    is thus chosen to be big enough to hold the longest architecture name.  */
2317
2318 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2319
2320 /* Supported TLS relocations.  */
2321
2322 enum tls_reloc {
2323   TLS_GD32,
2324   TLS_LDM32,
2325   TLS_LDO32,
2326   TLS_IE32,
2327   TLS_LE32,
2328   TLS_DESCSEQ   /* GNU scheme */
2329 };
2330
2331 /* The maximum number of insns to be used when loading a constant.  */
2332 inline static int
2333 arm_constant_limit (bool size_p)
2334 {
2335   return size_p ? 1 : current_tune->constant_limit;
2336 }
2337
2338 /* Emit an insn that's a simple single-set.  Both the operands must be known
2339    to be valid.  */
2340 inline static rtx_insn *
2341 emit_set_insn (rtx x, rtx y)
2342 {
2343   return emit_insn (gen_rtx_SET (x, y));
2344 }
2345
2346 /* Return the number of bits set in VALUE.  */
2347 static unsigned
2348 bit_count (unsigned long value)
2349 {
2350   unsigned long count = 0;
2351
2352   while (value)
2353     {
2354       count++;
2355       value &= value - 1;  /* Clear the least-significant set bit.  */
2356     }
2357
2358   return count;
2359 }
2360
2361 /* Return the number of bits set in BMAP.  */
2362 static unsigned
2363 bitmap_popcount (const sbitmap bmap)
2364 {
2365   unsigned int count = 0;
2366   unsigned int n = 0;
2367   sbitmap_iterator sbi;
2368
2369   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2370     count++;
2371   return count;
2372 }
2373
2374 typedef struct
2375 {
2376   machine_mode mode;
2377   const char *name;
2378 } arm_fixed_mode_set;
2379
2380 /* A small helper for setting fixed-point library libfuncs.  */
2381
2382 static void
2383 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2384                              const char *funcname, const char *modename,
2385                              int num_suffix)
2386 {
2387   char buffer[50];
2388
2389   if (num_suffix == 0)
2390     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2391   else
2392     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2393
2394   set_optab_libfunc (optable, mode, buffer);
2395 }
2396
2397 static void
2398 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2399                             machine_mode from, const char *funcname,
2400                             const char *toname, const char *fromname)
2401 {
2402   char buffer[50];
2403   const char *maybe_suffix_2 = "";
2404
2405   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2406   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2407       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2408       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2409     maybe_suffix_2 = "2";
2410
2411   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2412            maybe_suffix_2);
2413
2414   set_conv_libfunc (optable, to, from, buffer);
2415 }
2416
2417 /* Set up library functions unique to ARM.  */
2418
2419 static void
2420 arm_init_libfuncs (void)
2421 {
2422   /* For Linux, we have access to kernel support for atomic operations.  */
2423   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2424     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2425
2426   /* There are no special library functions unless we are using the
2427      ARM BPABI.  */
2428   if (!TARGET_BPABI)
2429     return;
2430
2431   /* The functions below are described in Section 4 of the "Run-Time
2432      ABI for the ARM architecture", Version 1.0.  */
2433
2434   /* Double-precision floating-point arithmetic.  Table 2.  */
2435   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2436   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2437   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2438   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2439   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2440
2441   /* Double-precision comparisons.  Table 3.  */
2442   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2443   set_optab_libfunc (ne_optab, DFmode, NULL);
2444   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2445   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2446   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2447   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2448   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2449
2450   /* Single-precision floating-point arithmetic.  Table 4.  */
2451   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2452   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2453   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2454   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2455   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2456
2457   /* Single-precision comparisons.  Table 5.  */
2458   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2459   set_optab_libfunc (ne_optab, SFmode, NULL);
2460   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2461   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2462   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2463   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2464   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2465
2466   /* Floating-point to integer conversions.  Table 6.  */
2467   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2468   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2469   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2470   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2471   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2472   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2473   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2474   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2475
2476   /* Conversions between floating types.  Table 7.  */
2477   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2478   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2479
2480   /* Integer to floating-point conversions.  Table 8.  */
2481   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2482   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2483   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2484   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2485   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2486   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2487   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2488   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2489
2490   /* Long long.  Table 9.  */
2491   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2492   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2493   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2494   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2495   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2496   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2497   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2498   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2499
2500   /* Integer (32/32->32) division.  \S 4.3.1.  */
2501   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2502   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2503
2504   /* The divmod functions are designed so that they can be used for
2505      plain division, even though they return both the quotient and the
2506      remainder.  The quotient is returned in the usual location (i.e.,
2507      r0 for SImode, {r0, r1} for DImode), just as would be expected
2508      for an ordinary division routine.  Because the AAPCS calling
2509      conventions specify that all of { r0, r1, r2, r3 } are
2510      callee-saved registers, there is no need to tell the compiler
2511      explicitly that those registers are clobbered by these
2512      routines.  */
2513   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2514   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2515
2516   /* For SImode division the ABI provides div-without-mod routines,
2517      which are faster.  */
2518   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2519   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2520
2521   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2522      divmod libcalls instead.  */
2523   set_optab_libfunc (smod_optab, DImode, NULL);
2524   set_optab_libfunc (umod_optab, DImode, NULL);
2525   set_optab_libfunc (smod_optab, SImode, NULL);
2526   set_optab_libfunc (umod_optab, SImode, NULL);
2527
2528   /* Half-precision float operations.  The compiler handles all operations
2529      with NULL libfuncs by converting the SFmode.  */
2530   switch (arm_fp16_format)
2531     {
2532     case ARM_FP16_FORMAT_IEEE:
2533     case ARM_FP16_FORMAT_ALTERNATIVE:
2534
2535       /* Conversions.  */
2536       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2537                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538                          ? "__gnu_f2h_ieee"
2539                          : "__gnu_f2h_alternative"));
2540       set_conv_libfunc (sext_optab, SFmode, HFmode,
2541                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2542                          ? "__gnu_h2f_ieee"
2543                          : "__gnu_h2f_alternative"));
2544
2545       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2546                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2547                          ? "__gnu_d2h_ieee"
2548                          : "__gnu_d2h_alternative"));
2549
2550       /* Arithmetic.  */
2551       set_optab_libfunc (add_optab, HFmode, NULL);
2552       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2553       set_optab_libfunc (smul_optab, HFmode, NULL);
2554       set_optab_libfunc (neg_optab, HFmode, NULL);
2555       set_optab_libfunc (sub_optab, HFmode, NULL);
2556
2557       /* Comparisons.  */
2558       set_optab_libfunc (eq_optab, HFmode, NULL);
2559       set_optab_libfunc (ne_optab, HFmode, NULL);
2560       set_optab_libfunc (lt_optab, HFmode, NULL);
2561       set_optab_libfunc (le_optab, HFmode, NULL);
2562       set_optab_libfunc (ge_optab, HFmode, NULL);
2563       set_optab_libfunc (gt_optab, HFmode, NULL);
2564       set_optab_libfunc (unord_optab, HFmode, NULL);
2565       break;
2566
2567     default:
2568       break;
2569     }
2570
2571   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2572   {
2573     const arm_fixed_mode_set fixed_arith_modes[] =
2574       {
2575         { E_QQmode, "qq" },
2576         { E_UQQmode, "uqq" },
2577         { E_HQmode, "hq" },
2578         { E_UHQmode, "uhq" },
2579         { E_SQmode, "sq" },
2580         { E_USQmode, "usq" },
2581         { E_DQmode, "dq" },
2582         { E_UDQmode, "udq" },
2583         { E_TQmode, "tq" },
2584         { E_UTQmode, "utq" },
2585         { E_HAmode, "ha" },
2586         { E_UHAmode, "uha" },
2587         { E_SAmode, "sa" },
2588         { E_USAmode, "usa" },
2589         { E_DAmode, "da" },
2590         { E_UDAmode, "uda" },
2591         { E_TAmode, "ta" },
2592         { E_UTAmode, "uta" }
2593       };
2594     const arm_fixed_mode_set fixed_conv_modes[] =
2595       {
2596         { E_QQmode, "qq" },
2597         { E_UQQmode, "uqq" },
2598         { E_HQmode, "hq" },
2599         { E_UHQmode, "uhq" },
2600         { E_SQmode, "sq" },
2601         { E_USQmode, "usq" },
2602         { E_DQmode, "dq" },
2603         { E_UDQmode, "udq" },
2604         { E_TQmode, "tq" },
2605         { E_UTQmode, "utq" },
2606         { E_HAmode, "ha" },
2607         { E_UHAmode, "uha" },
2608         { E_SAmode, "sa" },
2609         { E_USAmode, "usa" },
2610         { E_DAmode, "da" },
2611         { E_UDAmode, "uda" },
2612         { E_TAmode, "ta" },
2613         { E_UTAmode, "uta" },
2614         { E_QImode, "qi" },
2615         { E_HImode, "hi" },
2616         { E_SImode, "si" },
2617         { E_DImode, "di" },
2618         { E_TImode, "ti" },
2619         { E_SFmode, "sf" },
2620         { E_DFmode, "df" }
2621       };
2622     unsigned int i, j;
2623
2624     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2625       {
2626         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2627                                      "add", fixed_arith_modes[i].name, 3);
2628         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2629                                      "ssadd", fixed_arith_modes[i].name, 3);
2630         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2631                                      "usadd", fixed_arith_modes[i].name, 3);
2632         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2633                                      "sub", fixed_arith_modes[i].name, 3);
2634         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2635                                      "sssub", fixed_arith_modes[i].name, 3);
2636         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2637                                      "ussub", fixed_arith_modes[i].name, 3);
2638         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2639                                      "mul", fixed_arith_modes[i].name, 3);
2640         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2641                                      "ssmul", fixed_arith_modes[i].name, 3);
2642         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2643                                      "usmul", fixed_arith_modes[i].name, 3);
2644         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2645                                      "div", fixed_arith_modes[i].name, 3);
2646         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2647                                      "udiv", fixed_arith_modes[i].name, 3);
2648         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2649                                      "ssdiv", fixed_arith_modes[i].name, 3);
2650         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2651                                      "usdiv", fixed_arith_modes[i].name, 3);
2652         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2653                                      "neg", fixed_arith_modes[i].name, 2);
2654         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2655                                      "ssneg", fixed_arith_modes[i].name, 2);
2656         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2657                                      "usneg", fixed_arith_modes[i].name, 2);
2658         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2659                                      "ashl", fixed_arith_modes[i].name, 3);
2660         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2661                                      "ashr", fixed_arith_modes[i].name, 3);
2662         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2663                                      "lshr", fixed_arith_modes[i].name, 3);
2664         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2665                                      "ssashl", fixed_arith_modes[i].name, 3);
2666         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2667                                      "usashl", fixed_arith_modes[i].name, 3);
2668         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2669                                      "cmp", fixed_arith_modes[i].name, 2);
2670       }
2671
2672     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2673       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2674         {
2675           if (i == j
2676               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2677                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2678             continue;
2679
2680           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2681                                       fixed_conv_modes[j].mode, "fract",
2682                                       fixed_conv_modes[i].name,
2683                                       fixed_conv_modes[j].name);
2684           arm_set_fixed_conv_libfunc (satfract_optab,
2685                                       fixed_conv_modes[i].mode,
2686                                       fixed_conv_modes[j].mode, "satfract",
2687                                       fixed_conv_modes[i].name,
2688                                       fixed_conv_modes[j].name);
2689           arm_set_fixed_conv_libfunc (fractuns_optab,
2690                                       fixed_conv_modes[i].mode,
2691                                       fixed_conv_modes[j].mode, "fractuns",
2692                                       fixed_conv_modes[i].name,
2693                                       fixed_conv_modes[j].name);
2694           arm_set_fixed_conv_libfunc (satfractuns_optab,
2695                                       fixed_conv_modes[i].mode,
2696                                       fixed_conv_modes[j].mode, "satfractuns",
2697                                       fixed_conv_modes[i].name,
2698                                       fixed_conv_modes[j].name);
2699         }
2700   }
2701
2702   if (TARGET_AAPCS_BASED)
2703     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2704 }
2705
2706 /* On AAPCS systems, this is the "struct __va_list".  */
2707 static GTY(()) tree va_list_type;
2708
2709 /* Return the type to use as __builtin_va_list.  */
2710 static tree
2711 arm_build_builtin_va_list (void)
2712 {
2713   tree va_list_name;
2714   tree ap_field;
2715
2716   if (!TARGET_AAPCS_BASED)
2717     return std_build_builtin_va_list ();
2718
2719   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2720      defined as:
2721
2722        struct __va_list
2723        {
2724          void *__ap;
2725        };
2726
2727      The C Library ABI further reinforces this definition in \S
2728      4.1.
2729
2730      We must follow this definition exactly.  The structure tag
2731      name is visible in C++ mangled names, and thus forms a part
2732      of the ABI.  The field name may be used by people who
2733      #include <stdarg.h>.  */
2734   /* Create the type.  */
2735   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2736   /* Give it the required name.  */
2737   va_list_name = build_decl (BUILTINS_LOCATION,
2738                              TYPE_DECL,
2739                              get_identifier ("__va_list"),
2740                              va_list_type);
2741   DECL_ARTIFICIAL (va_list_name) = 1;
2742   TYPE_NAME (va_list_type) = va_list_name;
2743   TYPE_STUB_DECL (va_list_type) = va_list_name;
2744   /* Create the __ap field.  */
2745   ap_field = build_decl (BUILTINS_LOCATION,
2746                          FIELD_DECL,
2747                          get_identifier ("__ap"),
2748                          ptr_type_node);
2749   DECL_ARTIFICIAL (ap_field) = 1;
2750   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2751   TYPE_FIELDS (va_list_type) = ap_field;
2752   /* Compute its layout.  */
2753   layout_type (va_list_type);
2754
2755   return va_list_type;
2756 }
2757
2758 /* Return an expression of type "void *" pointing to the next
2759    available argument in a variable-argument list.  VALIST is the
2760    user-level va_list object, of type __builtin_va_list.  */
2761 static tree
2762 arm_extract_valist_ptr (tree valist)
2763 {
2764   if (TREE_TYPE (valist) == error_mark_node)
2765     return error_mark_node;
2766
2767   /* On an AAPCS target, the pointer is stored within "struct
2768      va_list".  */
2769   if (TARGET_AAPCS_BASED)
2770     {
2771       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2772       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2773                        valist, ap_field, NULL_TREE);
2774     }
2775
2776   return valist;
2777 }
2778
2779 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2780 static void
2781 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2782 {
2783   valist = arm_extract_valist_ptr (valist);
2784   std_expand_builtin_va_start (valist, nextarg);
2785 }
2786
2787 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2788 static tree
2789 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2790                           gimple_seq *post_p)
2791 {
2792   valist = arm_extract_valist_ptr (valist);
2793   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2794 }
2795
2796 /* Check any incompatible options that the user has specified.  */
2797 static void
2798 arm_option_check_internal (struct gcc_options *opts)
2799 {
2800   int flags = opts->x_target_flags;
2801
2802   /* iWMMXt and NEON are incompatible.  */
2803   if (TARGET_IWMMXT
2804       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2805     error ("iWMMXt and NEON are incompatible");
2806
2807   /* Make sure that the processor choice does not conflict with any of the
2808      other command line choices.  */
2809   if (TARGET_ARM_P (flags)
2810       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2811     error ("target CPU does not support ARM mode");
2812
2813   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2814   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2815     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2816
2817   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2818     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2819
2820   /* If this target is normally configured to use APCS frames, warn if they
2821      are turned off and debugging is turned on.  */
2822   if (TARGET_ARM_P (flags)
2823       && write_symbols != NO_DEBUG
2824       && !TARGET_APCS_FRAME
2825       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2826     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2827
2828   /* iWMMXt unsupported under Thumb mode.  */
2829   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2830     error ("iWMMXt unsupported under Thumb mode");
2831
2832   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2833     error ("can not use -mtp=cp15 with 16-bit Thumb");
2834
2835   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2836     {
2837       error ("RTP PIC is incompatible with Thumb");
2838       flag_pic = 0;
2839     }
2840
2841   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2842      with MOVT.  */
2843   if ((target_pure_code || target_slow_flash_data)
2844       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2845     {
2846       const char *flag = (target_pure_code ? "-mpure-code" :
2847                                              "-mslow-flash-data");
2848       error ("%s only supports non-pic code on M-profile targets with the "
2849              "MOVT instruction", flag);
2850     }
2851
2852 }
2853
2854 /* Recompute the global settings depending on target attribute options.  */
2855
2856 static void
2857 arm_option_params_internal (void)
2858 {
2859   /* If we are not using the default (ARM mode) section anchor offset
2860      ranges, then set the correct ranges now.  */
2861   if (TARGET_THUMB1)
2862     {
2863       /* Thumb-1 LDR instructions cannot have negative offsets.
2864          Permissible positive offset ranges are 5-bit (for byte loads),
2865          6-bit (for halfword loads), or 7-bit (for word loads).
2866          Empirical results suggest a 7-bit anchor range gives the best
2867          overall code size.  */
2868       targetm.min_anchor_offset = 0;
2869       targetm.max_anchor_offset = 127;
2870     }
2871   else if (TARGET_THUMB2)
2872     {
2873       /* The minimum is set such that the total size of the block
2874          for a particular anchor is 248 + 1 + 4095 bytes, which is
2875          divisible by eight, ensuring natural spacing of anchors.  */
2876       targetm.min_anchor_offset = -248;
2877       targetm.max_anchor_offset = 4095;
2878     }
2879   else
2880     {
2881       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2882       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2883     }
2884
2885   /* Increase the number of conditional instructions with -Os.  */
2886   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2887
2888   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2889   if (TARGET_THUMB2)
2890     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2891 }
2892
2893 /* True if -mflip-thumb should next add an attribute for the default
2894    mode, false if it should next add an attribute for the opposite mode.  */
2895 static GTY(()) bool thumb_flipper;
2896
2897 /* Options after initial target override.  */
2898 static GTY(()) tree init_optimize;
2899
2900 static void
2901 arm_override_options_after_change_1 (struct gcc_options *opts)
2902 {
2903   if (opts->x_align_functions <= 0)
2904     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2905       && opts->x_optimize_size ? 2 : 4;
2906 }
2907
2908 /* Implement targetm.override_options_after_change.  */
2909
2910 static void
2911 arm_override_options_after_change (void)
2912 {
2913   arm_configure_build_target (&arm_active_target,
2914                               TREE_TARGET_OPTION (target_option_default_node),
2915                               &global_options_set, false);
2916
2917   arm_override_options_after_change_1 (&global_options);
2918 }
2919
2920 /* Implement TARGET_OPTION_SAVE.  */
2921 static void
2922 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2923 {
2924   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2925   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2926   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2927 }
2928
2929 /* Implement TARGET_OPTION_RESTORE.  */
2930 static void
2931 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2932 {
2933   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2934   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2935   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2936   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2937                               false);
2938 }
2939
2940 /* Reset options between modes that the user has specified.  */
2941 static void
2942 arm_option_override_internal (struct gcc_options *opts,
2943                               struct gcc_options *opts_set)
2944 {
2945   arm_override_options_after_change_1 (opts);
2946
2947   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948     {
2949       /* The default is to enable interworking, so this warning message would
2950          be confusing to users who have just compiled with, eg, -march=armv3.  */
2951       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2952       opts->x_target_flags &= ~MASK_INTERWORK;
2953     }
2954
2955   if (TARGET_THUMB_P (opts->x_target_flags)
2956       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2957     {
2958       warning (0, "target CPU does not support THUMB instructions");
2959       opts->x_target_flags &= ~MASK_THUMB;
2960     }
2961
2962   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2963     {
2964       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2965       opts->x_target_flags &= ~MASK_APCS_FRAME;
2966     }
2967
2968   /* Callee super interworking implies thumb interworking.  Adding
2969      this to the flags here simplifies the logic elsewhere.  */
2970   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2971     opts->x_target_flags |= MASK_INTERWORK;
2972
2973   /* need to remember initial values so combinaisons of options like
2974      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2975   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2976
2977   if (! opts_set->x_arm_restrict_it)
2978     opts->x_arm_restrict_it = arm_arch8;
2979
2980   /* ARM execution state and M profile don't have [restrict] IT.  */
2981   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2982     opts->x_arm_restrict_it = 0;
2983
2984   /* Enable -munaligned-access by default for
2985      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2986      i.e. Thumb2 and ARM state only.
2987      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2988      - ARMv8 architecture-base processors.
2989
2990      Disable -munaligned-access by default for
2991      - all pre-ARMv6 architecture-based processors
2992      - ARMv6-M architecture-based processors
2993      - ARMv8-M Baseline processors.  */
2994
2995   if (! opts_set->x_unaligned_access)
2996     {
2997       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2998                           && arm_arch6 && (arm_arch_notm || arm_arch7));
2999     }
3000   else if (opts->x_unaligned_access == 1
3001            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3002     {
3003       warning (0, "target CPU does not support unaligned accesses");
3004      opts->x_unaligned_access = 0;
3005     }
3006
3007   /* Don't warn since it's on by default in -O2.  */
3008   if (TARGET_THUMB1_P (opts->x_target_flags))
3009     opts->x_flag_schedule_insns = 0;
3010   else
3011     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3012
3013   /* Disable shrink-wrap when optimizing function for size, since it tends to
3014      generate additional returns.  */
3015   if (optimize_function_for_size_p (cfun)
3016       && TARGET_THUMB2_P (opts->x_target_flags))
3017     opts->x_flag_shrink_wrap = false;
3018   else
3019     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3020
3021   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3022      - epilogue_insns - does not accurately model the corresponding insns
3023      emitted in the asm file.  In particular, see the comment in thumb_exit
3024      'Find out how many of the (return) argument registers we can corrupt'.
3025      As a consequence, the epilogue may clobber registers without fipa-ra
3026      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3027      TODO: Accurately model clobbers for epilogue_insns and reenable
3028      fipa-ra.  */
3029   if (TARGET_THUMB1_P (opts->x_target_flags))
3030     opts->x_flag_ipa_ra = 0;
3031   else
3032     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3033
3034   /* Thumb2 inline assembly code should always use unified syntax.
3035      This will apply to ARM and Thumb1 eventually.  */
3036   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3037
3038 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3039   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3040 #endif
3041 }
3042
3043 static sbitmap isa_all_fpubits;
3044 static sbitmap isa_quirkbits;
3045
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048    architecture have been specified, but the two are not identical.  */
3049 void
3050 arm_configure_build_target (struct arm_build_target *target,
3051                             struct cl_target_option *opts,
3052                             struct gcc_options *opts_set,
3053                             bool warn_compatible)
3054 {
3055   const cpu_option *arm_selected_tune = NULL;
3056   const arch_option *arm_selected_arch = NULL;
3057   const cpu_option *arm_selected_cpu = NULL;
3058   const arm_fpu_desc *arm_selected_fpu = NULL;
3059   const char *tune_opts = NULL;
3060   const char *arch_opts = NULL;
3061   const char *cpu_opts = NULL;
3062
3063   bitmap_clear (target->isa);
3064   target->core_name = NULL;
3065   target->arch_name = NULL;
3066
3067   if (opts_set->x_arm_arch_string)
3068     {
3069       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3070                                                       "-march",
3071                                                       opts->x_arm_arch_string);
3072       arch_opts = strchr (opts->x_arm_arch_string, '+');
3073     }
3074
3075   if (opts_set->x_arm_cpu_string)
3076     {
3077       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3078                                                     opts->x_arm_cpu_string);
3079       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3080       arm_selected_tune = arm_selected_cpu;
3081       /* If taking the tuning from -mcpu, we don't need to rescan the
3082          options for tuning.  */
3083     }
3084
3085   if (opts_set->x_arm_tune_string)
3086     {
3087       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3088                                                      opts->x_arm_tune_string);
3089       tune_opts = strchr (opts->x_arm_tune_string, '+');
3090     }
3091
3092   if (arm_selected_arch)
3093     {
3094       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3095       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3096                                  arch_opts);
3097
3098       if (arm_selected_cpu)
3099         {
3100           auto_sbitmap cpu_isa (isa_num_bits);
3101           auto_sbitmap isa_delta (isa_num_bits);
3102
3103           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3104           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3105                                      cpu_opts);
3106           bitmap_xor (isa_delta, cpu_isa, target->isa);
3107           /* Ignore any bits that are quirk bits.  */
3108           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3109           /* Ignore (for now) any bits that might be set by -mfpu.  */
3110           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3111
3112           if (!bitmap_empty_p (isa_delta))
3113             {
3114               if (warn_compatible)
3115                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3116                          arm_selected_cpu->common.name,
3117                          arm_selected_arch->common.name);
3118               /* -march wins for code generation.
3119                  -mcpu wins for default tuning.  */
3120               if (!arm_selected_tune)
3121                 arm_selected_tune = arm_selected_cpu;
3122
3123               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3124               target->arch_name = arm_selected_arch->common.name;
3125             }
3126           else
3127             {
3128               /* Architecture and CPU are essentially the same.
3129                  Prefer the CPU setting.  */
3130               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3131               target->core_name = arm_selected_cpu->common.name;
3132               /* Copy the CPU's capabilities, so that we inherit the
3133                  appropriate extensions and quirks.  */
3134               bitmap_copy (target->isa, cpu_isa);
3135             }
3136         }
3137       else
3138         {
3139           /* Pick a CPU based on the architecture.  */
3140           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3141           target->arch_name = arm_selected_arch->common.name;
3142           /* Note: target->core_name is left unset in this path.  */
3143         }
3144     }
3145   else if (arm_selected_cpu)
3146     {
3147       target->core_name = arm_selected_cpu->common.name;
3148       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3149       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3150                                  cpu_opts);
3151       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3152     }
3153   /* If the user did not specify a processor or architecture, choose
3154      one for them.  */
3155   else
3156     {
3157       const cpu_option *sel;
3158       auto_sbitmap sought_isa (isa_num_bits);
3159       bitmap_clear (sought_isa);
3160       auto_sbitmap default_isa (isa_num_bits);
3161
3162       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3163                                                     TARGET_CPU_DEFAULT);
3164       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3165       gcc_assert (arm_selected_cpu->common.name);
3166
3167       /* RWE: All of the selection logic below (to the end of this
3168          'if' clause) looks somewhat suspect.  It appears to be mostly
3169          there to support forcing thumb support when the default CPU
3170          does not have thumb (somewhat dubious in terms of what the
3171          user might be expecting).  I think it should be removed once
3172          support for the pre-thumb era cores is removed.  */
3173       sel = arm_selected_cpu;
3174       arm_initialize_isa (default_isa, sel->common.isa_bits);
3175       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3176                                  cpu_opts);
3177
3178       /* Now check to see if the user has specified any command line
3179          switches that require certain abilities from the cpu.  */
3180
3181       if (TARGET_INTERWORK || TARGET_THUMB)
3182         {
3183           bitmap_set_bit (sought_isa, isa_bit_thumb);
3184           bitmap_set_bit (sought_isa, isa_bit_mode32);
3185
3186           /* There are no ARM processors that support both APCS-26 and
3187              interworking.  Therefore we forcibly remove MODE26 from
3188              from the isa features here (if it was set), so that the
3189              search below will always be able to find a compatible
3190              processor.  */
3191           bitmap_clear_bit (default_isa, isa_bit_mode26);
3192         }
3193
3194       /* If there are such requirements and the default CPU does not
3195          satisfy them, we need to run over the complete list of
3196          cores looking for one that is satisfactory.  */
3197       if (!bitmap_empty_p (sought_isa)
3198           && !bitmap_subset_p (sought_isa, default_isa))
3199         {
3200           auto_sbitmap candidate_isa (isa_num_bits);
3201           /* We're only interested in a CPU with at least the
3202              capabilities of the default CPU and the required
3203              additional features.  */
3204           bitmap_ior (default_isa, default_isa, sought_isa);
3205
3206           /* Try to locate a CPU type that supports all of the abilities
3207              of the default CPU, plus the extra abilities requested by
3208              the user.  */
3209           for (sel = all_cores; sel->common.name != NULL; sel++)
3210             {
3211               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3212               /* An exact match?  */
3213               if (bitmap_equal_p (default_isa, candidate_isa))
3214                 break;
3215             }
3216
3217           if (sel->common.name == NULL)
3218             {
3219               unsigned current_bit_count = isa_num_bits;
3220               const cpu_option *best_fit = NULL;
3221
3222               /* Ideally we would like to issue an error message here
3223                  saying that it was not possible to find a CPU compatible
3224                  with the default CPU, but which also supports the command
3225                  line options specified by the programmer, and so they
3226                  ought to use the -mcpu=<name> command line option to
3227                  override the default CPU type.
3228
3229                  If we cannot find a CPU that has exactly the
3230                  characteristics of the default CPU and the given
3231                  command line options we scan the array again looking
3232                  for a best match.  The best match must have at least
3233                  the capabilities of the perfect match.  */
3234               for (sel = all_cores; sel->common.name != NULL; sel++)
3235                 {
3236                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3237
3238                   if (bitmap_subset_p (default_isa, candidate_isa))
3239                     {
3240                       unsigned count;
3241
3242                       bitmap_and_compl (candidate_isa, candidate_isa,
3243                                         default_isa);
3244                       count = bitmap_popcount (candidate_isa);
3245
3246                       if (count < current_bit_count)
3247                         {
3248                           best_fit = sel;
3249                           current_bit_count = count;
3250                         }
3251                     }
3252
3253                   gcc_assert (best_fit);
3254                   sel = best_fit;
3255                 }
3256             }
3257           arm_selected_cpu = sel;
3258         }
3259
3260       /* Now we know the CPU, we can finally initialize the target
3261          structure.  */
3262       target->core_name = arm_selected_cpu->common.name;
3263       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3264       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3265                                  cpu_opts);
3266       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3267     }
3268
3269   gcc_assert (arm_selected_cpu);
3270   gcc_assert (arm_selected_arch);
3271
3272   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3273     {
3274       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3275       auto_sbitmap fpu_bits (isa_num_bits);
3276
3277       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3278       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3279       bitmap_ior (target->isa, target->isa, fpu_bits);
3280     }
3281
3282   if (!arm_selected_tune)
3283     arm_selected_tune = arm_selected_cpu;
3284   else /* Validate the features passed to -mtune.  */
3285     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3286
3287   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3288
3289   /* Finish initializing the target structure.  */
3290   target->arch_pp_name = arm_selected_arch->arch;
3291   target->base_arch = arm_selected_arch->base_arch;
3292   target->profile = arm_selected_arch->profile;
3293
3294   target->tune_flags = tune_data->tune_flags;
3295   target->tune = tune_data->tune;
3296   target->tune_core = tune_data->scheduler;
3297 }
3298
3299 /* Fix up any incompatible options that the user has specified.  */
3300 static void
3301 arm_option_override (void)
3302 {
3303   static const enum isa_feature fpu_bitlist[]
3304     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3305   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3306   cl_target_option opts;
3307
3308   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3309   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3310
3311   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3312   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3313
3314   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3315
3316   if (!global_options_set.x_arm_fpu_index)
3317     {
3318       bool ok;
3319       int fpu_index;
3320
3321       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3322                                   CL_TARGET);
3323       gcc_assert (ok);
3324       arm_fpu_index = (enum fpu_type) fpu_index;
3325     }
3326
3327   cl_target_option_save (&opts, &global_options);
3328   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3329                               true);
3330
3331 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3332   SUBTARGET_OVERRIDE_OPTIONS;
3333 #endif
3334
3335   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3336   arm_base_arch = arm_active_target.base_arch;
3337
3338   arm_tune = arm_active_target.tune_core;
3339   tune_flags = arm_active_target.tune_flags;
3340   current_tune = arm_active_target.tune;
3341
3342   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3343   if (TARGET_APCS_FRAME)
3344     flag_shrink_wrap = false;
3345
3346   /* BPABI targets use linker tricks to allow interworking on cores
3347      without thumb support.  */
3348   if (TARGET_INTERWORK
3349       && !TARGET_BPABI
3350       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3351     {
3352       warning (0, "target CPU does not support interworking" );
3353       target_flags &= ~MASK_INTERWORK;
3354     }
3355
3356   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3357     {
3358       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3359       target_flags |= MASK_APCS_FRAME;
3360     }
3361
3362   if (TARGET_POKE_FUNCTION_NAME)
3363     target_flags |= MASK_APCS_FRAME;
3364
3365   if (TARGET_APCS_REENT && flag_pic)
3366     error ("-fpic and -mapcs-reent are incompatible");
3367
3368   if (TARGET_APCS_REENT)
3369     warning (0, "APCS reentrant code not supported.  Ignored");
3370
3371   /* Initialize boolean versions of the architectural flags, for use
3372      in the arm.md file.  */
3373   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3374   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3375   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3376   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3377   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3378   arm_arch5te = arm_arch5e
3379     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3380   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3381   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3382   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3383   arm_arch6m = arm_arch6 && !arm_arch_notm;
3384   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3385   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3386   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3387   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3388   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3389   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3390   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3391   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3392   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3393   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3394   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3395   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3396   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3397   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3398   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3399   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3400   if (arm_fp16_inst)
3401     {
3402       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3403         error ("selected fp16 options are incompatible");
3404       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3405     }
3406
3407
3408   /* Set up some tuning parameters.  */
3409   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3410   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3411   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3412   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3413   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3414   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3415
3416   /* And finally, set up some quirks.  */
3417   arm_arch_no_volatile_ce
3418     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3419   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3420                                             isa_bit_quirk_armv6kz);
3421
3422   /* V5 code we generate is completely interworking capable, so we turn off
3423      TARGET_INTERWORK here to avoid many tests later on.  */
3424
3425   /* XXX However, we must pass the right pre-processor defines to CPP
3426      or GLD can get confused.  This is a hack.  */
3427   if (TARGET_INTERWORK)
3428     arm_cpp_interwork = 1;
3429
3430   if (arm_arch5)
3431     target_flags &= ~MASK_INTERWORK;
3432
3433   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3434     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3435
3436   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3437     error ("iwmmxt abi requires an iwmmxt capable cpu");
3438
3439   /* If soft-float is specified then don't use FPU.  */
3440   if (TARGET_SOFT_FLOAT)
3441     arm_fpu_attr = FPU_NONE;
3442   else
3443     arm_fpu_attr = FPU_VFP;
3444
3445   if (TARGET_AAPCS_BASED)
3446     {
3447       if (TARGET_CALLER_INTERWORKING)
3448         error ("AAPCS does not support -mcaller-super-interworking");
3449       else
3450         if (TARGET_CALLEE_INTERWORKING)
3451           error ("AAPCS does not support -mcallee-super-interworking");
3452     }
3453
3454   /* __fp16 support currently assumes the core has ldrh.  */
3455   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3456     sorry ("__fp16 and no ldrh");
3457
3458   if (TARGET_AAPCS_BASED)
3459     {
3460       if (arm_abi == ARM_ABI_IWMMXT)
3461         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3462       else if (TARGET_HARD_FLOAT_ABI)
3463         {
3464           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3465           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3466             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3467         }
3468       else
3469         arm_pcs_default = ARM_PCS_AAPCS;
3470     }
3471   else
3472     {
3473       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3474         sorry ("-mfloat-abi=hard and VFP");
3475
3476       if (arm_abi == ARM_ABI_APCS)
3477         arm_pcs_default = ARM_PCS_APCS;
3478       else
3479         arm_pcs_default = ARM_PCS_ATPCS;
3480     }
3481
3482   /* For arm2/3 there is no need to do any scheduling if we are doing
3483      software floating-point.  */
3484   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3485     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3486
3487   /* Use the cp15 method if it is available.  */
3488   if (target_thread_pointer == TP_AUTO)
3489     {
3490       if (arm_arch6k && !TARGET_THUMB1)
3491         target_thread_pointer = TP_CP15;
3492       else
3493         target_thread_pointer = TP_SOFT;
3494     }
3495
3496   /* Override the default structure alignment for AAPCS ABI.  */
3497   if (!global_options_set.x_arm_structure_size_boundary)
3498     {
3499       if (TARGET_AAPCS_BASED)
3500         arm_structure_size_boundary = 8;
3501     }
3502   else
3503     {
3504       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3505
3506       if (arm_structure_size_boundary != 8
3507           && arm_structure_size_boundary != 32
3508           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3509         {
3510           if (ARM_DOUBLEWORD_ALIGN)
3511             warning (0,
3512                      "structure size boundary can only be set to 8, 32 or 64");
3513           else
3514             warning (0, "structure size boundary can only be set to 8 or 32");
3515           arm_structure_size_boundary
3516             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3517         }
3518     }
3519
3520   if (TARGET_VXWORKS_RTP)
3521     {
3522       if (!global_options_set.x_arm_pic_data_is_text_relative)
3523         arm_pic_data_is_text_relative = 0;
3524     }
3525   else if (flag_pic
3526            && !arm_pic_data_is_text_relative
3527            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3528     /* When text & data segments don't have a fixed displacement, the
3529        intended use is with a single, read only, pic base register.
3530        Unless the user explicitly requested not to do that, set
3531        it.  */
3532     target_flags |= MASK_SINGLE_PIC_BASE;
3533
3534   /* If stack checking is disabled, we can use r10 as the PIC register,
3535      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3536   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3537     {
3538       if (TARGET_VXWORKS_RTP)
3539         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3540       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3541     }
3542
3543   if (flag_pic && TARGET_VXWORKS_RTP)
3544     arm_pic_register = 9;
3545
3546   if (arm_pic_register_string != NULL)
3547     {
3548       int pic_register = decode_reg_name (arm_pic_register_string);
3549
3550       if (!flag_pic)
3551         warning (0, "-mpic-register= is useless without -fpic");
3552
3553       /* Prevent the user from choosing an obviously stupid PIC register.  */
3554       else if (pic_register < 0 || call_used_regs[pic_register]
3555                || pic_register == HARD_FRAME_POINTER_REGNUM
3556                || pic_register == STACK_POINTER_REGNUM
3557                || pic_register >= PC_REGNUM
3558                || (TARGET_VXWORKS_RTP
3559                    && (unsigned int) pic_register != arm_pic_register))
3560         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3561       else
3562         arm_pic_register = pic_register;
3563     }
3564
3565   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3566   if (fix_cm3_ldrd == 2)
3567     {
3568       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3569         fix_cm3_ldrd = 1;
3570       else
3571         fix_cm3_ldrd = 0;
3572     }
3573
3574   /* Hot/Cold partitioning is not currently supported, since we can't
3575      handle literal pool placement in that case.  */
3576   if (flag_reorder_blocks_and_partition)
3577     {
3578       inform (input_location,
3579               "-freorder-blocks-and-partition not supported on this architecture");
3580       flag_reorder_blocks_and_partition = 0;
3581       flag_reorder_blocks = 1;
3582     }
3583
3584   if (flag_pic)
3585     /* Hoisting PIC address calculations more aggressively provides a small,
3586        but measurable, size reduction for PIC code.  Therefore, we decrease
3587        the bar for unrestricted expression hoisting to the cost of PIC address
3588        calculation, which is 2 instructions.  */
3589     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3590                            global_options.x_param_values,
3591                            global_options_set.x_param_values);
3592
3593   /* ARM EABI defaults to strict volatile bitfields.  */
3594   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3595       && abi_version_at_least(2))
3596     flag_strict_volatile_bitfields = 1;
3597
3598   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3599      have deemed it beneficial (signified by setting
3600      prefetch.num_slots to 1 or more).  */
3601   if (flag_prefetch_loop_arrays < 0
3602       && HAVE_prefetch
3603       && optimize >= 3
3604       && current_tune->prefetch.num_slots > 0)
3605     flag_prefetch_loop_arrays = 1;
3606
3607   /* Set up parameters to be used in prefetching algorithm.  Do not
3608      override the defaults unless we are tuning for a core we have
3609      researched values for.  */
3610   if (current_tune->prefetch.num_slots > 0)
3611     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3612                            current_tune->prefetch.num_slots,
3613                            global_options.x_param_values,
3614                            global_options_set.x_param_values);
3615   if (current_tune->prefetch.l1_cache_line_size >= 0)
3616     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3617                            current_tune->prefetch.l1_cache_line_size,
3618                            global_options.x_param_values,
3619                            global_options_set.x_param_values);
3620   if (current_tune->prefetch.l1_cache_size >= 0)
3621     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3622                            current_tune->prefetch.l1_cache_size,
3623                            global_options.x_param_values,
3624                            global_options_set.x_param_values);
3625
3626   /* Use Neon to perform 64-bits operations rather than core
3627      registers.  */
3628   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3629   if (use_neon_for_64bits == 1)
3630      prefer_neon_for_64bits = true;
3631
3632   /* Use the alternative scheduling-pressure algorithm by default.  */
3633   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3634                          global_options.x_param_values,
3635                          global_options_set.x_param_values);
3636
3637   /* Look through ready list and all of queue for instructions
3638      relevant for L2 auto-prefetcher.  */
3639   int param_sched_autopref_queue_depth;
3640
3641   switch (current_tune->sched_autopref)
3642     {
3643     case tune_params::SCHED_AUTOPREF_OFF:
3644       param_sched_autopref_queue_depth = -1;
3645       break;
3646
3647     case tune_params::SCHED_AUTOPREF_RANK:
3648       param_sched_autopref_queue_depth = 0;
3649       break;
3650
3651     case tune_params::SCHED_AUTOPREF_FULL:
3652       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3653       break;
3654
3655     default:
3656       gcc_unreachable ();
3657     }
3658
3659   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3660                          param_sched_autopref_queue_depth,
3661                          global_options.x_param_values,
3662                          global_options_set.x_param_values);
3663
3664   /* Currently, for slow flash data, we just disable literal pools.  We also
3665      disable it for pure-code.  */
3666   if (target_slow_flash_data || target_pure_code)
3667     arm_disable_literal_pool = true;
3668
3669   if (use_cmse && !arm_arch_cmse)
3670     error ("target CPU does not support ARMv8-M Security Extensions");
3671
3672   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3673      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3674   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3675     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3676
3677   /* Disable scheduling fusion by default if it's not armv7 processor
3678      or doesn't prefer ldrd/strd.  */
3679   if (flag_schedule_fusion == 2
3680       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3681     flag_schedule_fusion = 0;
3682
3683   /* Need to remember initial options before they are overriden.  */
3684   init_optimize = build_optimization_node (&global_options);
3685
3686   arm_option_override_internal (&global_options, &global_options_set);
3687   arm_option_check_internal (&global_options);
3688   arm_option_params_internal ();
3689
3690   /* Create the default target_options structure.  */
3691   target_option_default_node = target_option_current_node
3692     = build_target_option_node (&global_options);
3693
3694   /* Register global variables with the garbage collector.  */
3695   arm_add_gc_roots ();
3696
3697   /* Init initial mode for testing.  */
3698   thumb_flipper = TARGET_THUMB;
3699 }
3700
3701 static void
3702 arm_add_gc_roots (void)
3703 {
3704   gcc_obstack_init(&minipool_obstack);
3705   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3706 }
3707 \f
3708 /* A table of known ARM exception types.
3709    For use with the interrupt function attribute.  */
3710
3711 typedef struct
3712 {
3713   const char *const arg;
3714   const unsigned long return_value;
3715 }
3716 isr_attribute_arg;
3717
3718 static const isr_attribute_arg isr_attribute_args [] =
3719 {
3720   { "IRQ",   ARM_FT_ISR },
3721   { "irq",   ARM_FT_ISR },
3722   { "FIQ",   ARM_FT_FIQ },
3723   { "fiq",   ARM_FT_FIQ },
3724   { "ABORT", ARM_FT_ISR },
3725   { "abort", ARM_FT_ISR },
3726   { "ABORT", ARM_FT_ISR },
3727   { "abort", ARM_FT_ISR },
3728   { "UNDEF", ARM_FT_EXCEPTION },
3729   { "undef", ARM_FT_EXCEPTION },
3730   { "SWI",   ARM_FT_EXCEPTION },
3731   { "swi",   ARM_FT_EXCEPTION },
3732   { NULL,    ARM_FT_NORMAL }
3733 };
3734
3735 /* Returns the (interrupt) function type of the current
3736    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3737
3738 static unsigned long
3739 arm_isr_value (tree argument)
3740 {
3741   const isr_attribute_arg * ptr;
3742   const char *              arg;
3743
3744   if (!arm_arch_notm)
3745     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3746
3747   /* No argument - default to IRQ.  */
3748   if (argument == NULL_TREE)
3749     return ARM_FT_ISR;
3750
3751   /* Get the value of the argument.  */
3752   if (TREE_VALUE (argument) == NULL_TREE
3753       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3754     return ARM_FT_UNKNOWN;
3755
3756   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3757
3758   /* Check it against the list of known arguments.  */
3759   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3760     if (streq (arg, ptr->arg))
3761       return ptr->return_value;
3762
3763   /* An unrecognized interrupt type.  */
3764   return ARM_FT_UNKNOWN;
3765 }
3766
3767 /* Computes the type of the current function.  */
3768
3769 static unsigned long
3770 arm_compute_func_type (void)
3771 {
3772   unsigned long type = ARM_FT_UNKNOWN;
3773   tree a;
3774   tree attr;
3775
3776   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3777
3778   /* Decide if the current function is volatile.  Such functions
3779      never return, and many memory cycles can be saved by not storing
3780      register values that will never be needed again.  This optimization
3781      was added to speed up context switching in a kernel application.  */
3782   if (optimize > 0
3783       && (TREE_NOTHROW (current_function_decl)
3784           || !(flag_unwind_tables
3785                || (flag_exceptions
3786                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3787       && TREE_THIS_VOLATILE (current_function_decl))
3788     type |= ARM_FT_VOLATILE;
3789
3790   if (cfun->static_chain_decl != NULL)
3791     type |= ARM_FT_NESTED;
3792
3793   attr = DECL_ATTRIBUTES (current_function_decl);
3794
3795   a = lookup_attribute ("naked", attr);
3796   if (a != NULL_TREE)
3797     type |= ARM_FT_NAKED;
3798
3799   a = lookup_attribute ("isr", attr);
3800   if (a == NULL_TREE)
3801     a = lookup_attribute ("interrupt", attr);
3802
3803   if (a == NULL_TREE)
3804     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3805   else
3806     type |= arm_isr_value (TREE_VALUE (a));
3807
3808   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3809     type |= ARM_FT_CMSE_ENTRY;
3810
3811   return type;
3812 }
3813
3814 /* Returns the type of the current function.  */
3815
3816 unsigned long
3817 arm_current_func_type (void)
3818 {
3819   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3820     cfun->machine->func_type = arm_compute_func_type ();
3821
3822   return cfun->machine->func_type;
3823 }
3824
3825 bool
3826 arm_allocate_stack_slots_for_args (void)
3827 {
3828   /* Naked functions should not allocate stack slots for arguments.  */
3829   return !IS_NAKED (arm_current_func_type ());
3830 }
3831
3832 static bool
3833 arm_warn_func_return (tree decl)
3834 {
3835   /* Naked functions are implemented entirely in assembly, including the
3836      return sequence, so suppress warnings about this.  */
3837   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3838 }
3839
3840 \f
3841 /* Output assembler code for a block containing the constant parts
3842    of a trampoline, leaving space for the variable parts.
3843
3844    On the ARM, (if r8 is the static chain regnum, and remembering that
3845    referencing pc adds an offset of 8) the trampoline looks like:
3846            ldr          r8, [pc, #0]
3847            ldr          pc, [pc]
3848            .word        static chain value
3849            .word        function's address
3850    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3851
3852 static void
3853 arm_asm_trampoline_template (FILE *f)
3854 {
3855   fprintf (f, "\t.syntax unified\n");
3856
3857   if (TARGET_ARM)
3858     {
3859       fprintf (f, "\t.arm\n");
3860       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3861       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3862     }
3863   else if (TARGET_THUMB2)
3864     {
3865       fprintf (f, "\t.thumb\n");
3866       /* The Thumb-2 trampoline is similar to the arm implementation.
3867          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3868       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3869                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3870       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3871     }
3872   else
3873     {
3874       ASM_OUTPUT_ALIGN (f, 2);
3875       fprintf (f, "\t.code\t16\n");
3876       fprintf (f, ".Ltrampoline_start:\n");
3877       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3878       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3879       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3880       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3881       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3882       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3883     }
3884   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3885   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3886 }
3887
3888 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3889
3890 static void
3891 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3892 {
3893   rtx fnaddr, mem, a_tramp;
3894
3895   emit_block_move (m_tramp, assemble_trampoline_template (),
3896                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3897
3898   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3899   emit_move_insn (mem, chain_value);
3900
3901   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3902   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3903   emit_move_insn (mem, fnaddr);
3904
3905   a_tramp = XEXP (m_tramp, 0);
3906   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3907                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3908                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3909 }
3910
3911 /* Thumb trampolines should be entered in thumb mode, so set
3912    the bottom bit of the address.  */
3913
3914 static rtx
3915 arm_trampoline_adjust_address (rtx addr)
3916 {
3917   if (TARGET_THUMB)
3918     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3919                                 NULL, 0, OPTAB_LIB_WIDEN);
3920   return addr;
3921 }
3922 \f
3923 /* Return 1 if it is possible to return using a single instruction.
3924    If SIBLING is non-null, this is a test for a return before a sibling
3925    call.  SIBLING is the call insn, so we can examine its register usage.  */
3926
3927 int
3928 use_return_insn (int iscond, rtx sibling)
3929 {
3930   int regno;
3931   unsigned int func_type;
3932   unsigned long saved_int_regs;
3933   unsigned HOST_WIDE_INT stack_adjust;
3934   arm_stack_offsets *offsets;
3935
3936   /* Never use a return instruction before reload has run.  */
3937   if (!reload_completed)
3938     return 0;
3939
3940   func_type = arm_current_func_type ();
3941
3942   /* Naked, volatile and stack alignment functions need special
3943      consideration.  */
3944   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3945     return 0;
3946
3947   /* So do interrupt functions that use the frame pointer and Thumb
3948      interrupt functions.  */
3949   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3950     return 0;
3951
3952   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3953       && !optimize_function_for_size_p (cfun))
3954     return 0;
3955
3956   offsets = arm_get_frame_offsets ();
3957   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3958
3959   /* As do variadic functions.  */
3960   if (crtl->args.pretend_args_size
3961       || cfun->machine->uses_anonymous_args
3962       /* Or if the function calls __builtin_eh_return () */
3963       || crtl->calls_eh_return
3964       /* Or if the function calls alloca */
3965       || cfun->calls_alloca
3966       /* Or if there is a stack adjustment.  However, if the stack pointer
3967          is saved on the stack, we can use a pre-incrementing stack load.  */
3968       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3969                                  && stack_adjust == 4))
3970       /* Or if the static chain register was saved above the frame, under the
3971          assumption that the stack pointer isn't saved on the stack.  */
3972       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3973           && arm_compute_static_chain_stack_bytes() != 0))
3974     return 0;
3975
3976   saved_int_regs = offsets->saved_regs_mask;
3977
3978   /* Unfortunately, the insn
3979
3980        ldmib sp, {..., sp, ...}
3981
3982      triggers a bug on most SA-110 based devices, such that the stack
3983      pointer won't be correctly restored if the instruction takes a
3984      page fault.  We work around this problem by popping r3 along with
3985      the other registers, since that is never slower than executing
3986      another instruction.
3987
3988      We test for !arm_arch5 here, because code for any architecture
3989      less than this could potentially be run on one of the buggy
3990      chips.  */
3991   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3992     {
3993       /* Validate that r3 is a call-clobbered register (always true in
3994          the default abi) ...  */
3995       if (!call_used_regs[3])
3996         return 0;
3997
3998       /* ... that it isn't being used for a return value ... */
3999       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4000         return 0;
4001
4002       /* ... or for a tail-call argument ...  */
4003       if (sibling)
4004         {
4005           gcc_assert (CALL_P (sibling));
4006
4007           if (find_regno_fusage (sibling, USE, 3))
4008             return 0;
4009         }
4010
4011       /* ... and that there are no call-saved registers in r0-r2
4012          (always true in the default ABI).  */
4013       if (saved_int_regs & 0x7)
4014         return 0;
4015     }
4016
4017   /* Can't be done if interworking with Thumb, and any registers have been
4018      stacked.  */
4019   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4020     return 0;
4021
4022   /* On StrongARM, conditional returns are expensive if they aren't
4023      taken and multiple registers have been stacked.  */
4024   if (iscond && arm_tune_strongarm)
4025     {
4026       /* Conditional return when just the LR is stored is a simple
4027          conditional-load instruction, that's not expensive.  */
4028       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4029         return 0;
4030
4031       if (flag_pic
4032           && arm_pic_register != INVALID_REGNUM
4033           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4034         return 0;
4035     }
4036
4037   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4038      several instructions if anything needs to be popped.  */
4039   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4040     return 0;
4041
4042   /* If there are saved registers but the LR isn't saved, then we need
4043      two instructions for the return.  */
4044   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4045     return 0;
4046
4047   /* Can't be done if any of the VFP regs are pushed,
4048      since this also requires an insn.  */
4049   if (TARGET_HARD_FLOAT)
4050     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4051       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4052         return 0;
4053
4054   if (TARGET_REALLY_IWMMXT)
4055     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4056       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4057         return 0;
4058
4059   return 1;
4060 }
4061
4062 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4063    shrink-wrapping if possible.  This is the case if we need to emit a
4064    prologue, which we can test by looking at the offsets.  */
4065 bool
4066 use_simple_return_p (void)
4067 {
4068   arm_stack_offsets *offsets;
4069
4070   /* Note this function can be called before or after reload.  */
4071   if (!reload_completed)
4072     arm_compute_frame_layout ();
4073
4074   offsets = arm_get_frame_offsets ();
4075   return offsets->outgoing_args != 0;
4076 }
4077
4078 /* Return TRUE if int I is a valid immediate ARM constant.  */
4079
4080 int
4081 const_ok_for_arm (HOST_WIDE_INT i)
4082 {
4083   int lowbit;
4084
4085   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4086      be all zero, or all one.  */
4087   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4088       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4089           != ((~(unsigned HOST_WIDE_INT) 0)
4090               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4091     return FALSE;
4092
4093   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4094
4095   /* Fast return for 0 and small values.  We must do this for zero, since
4096      the code below can't handle that one case.  */
4097   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4098     return TRUE;
4099
4100   /* Get the number of trailing zeros.  */
4101   lowbit = ffs((int) i) - 1;
4102
4103   /* Only even shifts are allowed in ARM mode so round down to the
4104      nearest even number.  */
4105   if (TARGET_ARM)
4106     lowbit &= ~1;
4107
4108   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4109     return TRUE;
4110
4111   if (TARGET_ARM)
4112     {
4113       /* Allow rotated constants in ARM mode.  */
4114       if (lowbit <= 4
4115            && ((i & ~0xc000003f) == 0
4116                || (i & ~0xf000000f) == 0
4117                || (i & ~0xfc000003) == 0))
4118         return TRUE;
4119     }
4120   else if (TARGET_THUMB2)
4121     {
4122       HOST_WIDE_INT v;
4123
4124       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4125       v = i & 0xff;
4126       v |= v << 16;
4127       if (i == v || i == (v | (v << 8)))
4128         return TRUE;
4129
4130       /* Allow repeated pattern 0xXY00XY00.  */
4131       v = i & 0xff00;
4132       v |= v << 16;
4133       if (i == v)
4134         return TRUE;
4135     }
4136   else if (TARGET_HAVE_MOVT)
4137     {
4138       /* Thumb-1 Targets with MOVT.  */
4139       if (i > 0xffff)
4140         return FALSE;
4141       else
4142         return TRUE;
4143     }
4144
4145   return FALSE;
4146 }
4147
4148 /* Return true if I is a valid constant for the operation CODE.  */
4149 int
4150 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4151 {
4152   if (const_ok_for_arm (i))
4153     return 1;
4154
4155   switch (code)
4156     {
4157     case SET:
4158       /* See if we can use movw.  */
4159       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4160         return 1;
4161       else
4162         /* Otherwise, try mvn.  */
4163         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4164
4165     case PLUS:
4166       /* See if we can use addw or subw.  */
4167       if (TARGET_THUMB2
4168           && ((i & 0xfffff000) == 0
4169               || ((-i) & 0xfffff000) == 0))
4170         return 1;
4171       /* Fall through.  */
4172     case COMPARE:
4173     case EQ:
4174     case NE:
4175     case GT:
4176     case LE:
4177     case LT:
4178     case GE:
4179     case GEU:
4180     case LTU:
4181     case GTU:
4182     case LEU:
4183     case UNORDERED:
4184     case ORDERED:
4185     case UNEQ:
4186     case UNGE:
4187     case UNLT:
4188     case UNGT:
4189     case UNLE:
4190       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4191
4192     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4193     case XOR:
4194       return 0;
4195
4196     case IOR:
4197       if (TARGET_THUMB2)
4198         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4199       return 0;
4200
4201     case AND:
4202       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4203
4204     default:
4205       gcc_unreachable ();
4206     }
4207 }
4208
4209 /* Return true if I is a valid di mode constant for the operation CODE.  */
4210 int
4211 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4212 {
4213   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4214   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4215   rtx hi = GEN_INT (hi_val);
4216   rtx lo = GEN_INT (lo_val);
4217
4218   if (TARGET_THUMB1)
4219     return 0;
4220
4221   switch (code)
4222     {
4223     case AND:
4224     case IOR:
4225     case XOR:
4226       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4227               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4228     case PLUS:
4229       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4230
4231     default:
4232       return 0;
4233     }
4234 }
4235
4236 /* Emit a sequence of insns to handle a large constant.
4237    CODE is the code of the operation required, it can be any of SET, PLUS,
4238    IOR, AND, XOR, MINUS;
4239    MODE is the mode in which the operation is being performed;
4240    VAL is the integer to operate on;
4241    SOURCE is the other operand (a register, or a null-pointer for SET);
4242    SUBTARGETS means it is safe to create scratch registers if that will
4243    either produce a simpler sequence, or we will want to cse the values.
4244    Return value is the number of insns emitted.  */
4245
4246 /* ??? Tweak this for thumb2.  */
4247 int
4248 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4249                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4250 {
4251   rtx cond;
4252
4253   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4254     cond = COND_EXEC_TEST (PATTERN (insn));
4255   else
4256     cond = NULL_RTX;
4257
4258   if (subtargets || code == SET
4259       || (REG_P (target) && REG_P (source)
4260           && REGNO (target) != REGNO (source)))
4261     {
4262       /* After arm_reorg has been called, we can't fix up expensive
4263          constants by pushing them into memory so we must synthesize
4264          them in-line, regardless of the cost.  This is only likely to
4265          be more costly on chips that have load delay slots and we are
4266          compiling without running the scheduler (so no splitting
4267          occurred before the final instruction emission).
4268
4269          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4270       */
4271       if (!cfun->machine->after_arm_reorg
4272           && !cond
4273           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4274                                 1, 0)
4275               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4276                  + (code != SET))))
4277         {
4278           if (code == SET)
4279             {
4280               /* Currently SET is the only monadic value for CODE, all
4281                  the rest are diadic.  */
4282               if (TARGET_USE_MOVT)
4283                 arm_emit_movpair (target, GEN_INT (val));
4284               else
4285                 emit_set_insn (target, GEN_INT (val));
4286
4287               return 1;
4288             }
4289           else
4290             {
4291               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4292
4293               if (TARGET_USE_MOVT)
4294                 arm_emit_movpair (temp, GEN_INT (val));
4295               else
4296                 emit_set_insn (temp, GEN_INT (val));
4297
4298               /* For MINUS, the value is subtracted from, since we never
4299                  have subtraction of a constant.  */
4300               if (code == MINUS)
4301                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4302               else
4303                 emit_set_insn (target,
4304                                gen_rtx_fmt_ee (code, mode, source, temp));
4305               return 2;
4306             }
4307         }
4308     }
4309
4310   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4311                            1);
4312 }
4313
4314 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4315    ARM/THUMB2 immediates, and add up to VAL.
4316    Thr function return value gives the number of insns required.  */
4317 static int
4318 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4319                             struct four_ints *return_sequence)
4320 {
4321   int best_consecutive_zeros = 0;
4322   int i;
4323   int best_start = 0;
4324   int insns1, insns2;
4325   struct four_ints tmp_sequence;
4326
4327   /* If we aren't targeting ARM, the best place to start is always at
4328      the bottom, otherwise look more closely.  */
4329   if (TARGET_ARM)
4330     {
4331       for (i = 0; i < 32; i += 2)
4332         {
4333           int consecutive_zeros = 0;
4334
4335           if (!(val & (3 << i)))
4336             {
4337               while ((i < 32) && !(val & (3 << i)))
4338                 {
4339                   consecutive_zeros += 2;
4340                   i += 2;
4341                 }
4342               if (consecutive_zeros > best_consecutive_zeros)
4343                 {
4344                   best_consecutive_zeros = consecutive_zeros;
4345                   best_start = i - consecutive_zeros;
4346                 }
4347               i -= 2;
4348             }
4349         }
4350     }
4351
4352   /* So long as it won't require any more insns to do so, it's
4353      desirable to emit a small constant (in bits 0...9) in the last
4354      insn.  This way there is more chance that it can be combined with
4355      a later addressing insn to form a pre-indexed load or store
4356      operation.  Consider:
4357
4358            *((volatile int *)0xe0000100) = 1;
4359            *((volatile int *)0xe0000110) = 2;
4360
4361      We want this to wind up as:
4362
4363             mov rA, #0xe0000000
4364             mov rB, #1
4365             str rB, [rA, #0x100]
4366             mov rB, #2
4367             str rB, [rA, #0x110]
4368
4369      rather than having to synthesize both large constants from scratch.
4370
4371      Therefore, we calculate how many insns would be required to emit
4372      the constant starting from `best_start', and also starting from
4373      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4374      yield a shorter sequence, we may as well use zero.  */
4375   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4376   if (best_start != 0
4377       && ((HOST_WIDE_INT_1U << best_start) < val))
4378     {
4379       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4380       if (insns2 <= insns1)
4381         {
4382           *return_sequence = tmp_sequence;
4383           insns1 = insns2;
4384         }
4385     }
4386
4387   return insns1;
4388 }
4389
4390 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4391 static int
4392 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4393                              struct four_ints *return_sequence, int i)
4394 {
4395   int remainder = val & 0xffffffff;
4396   int insns = 0;
4397
4398   /* Try and find a way of doing the job in either two or three
4399      instructions.
4400
4401      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4402      location.  We start at position I.  This may be the MSB, or
4403      optimial_immediate_sequence may have positioned it at the largest block
4404      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4405      wrapping around to the top of the word when we drop off the bottom.
4406      In the worst case this code should produce no more than four insns.
4407
4408      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4409      constants, shifted to any arbitrary location.  We should always start
4410      at the MSB.  */
4411   do
4412     {
4413       int end;
4414       unsigned int b1, b2, b3, b4;
4415       unsigned HOST_WIDE_INT result;
4416       int loc;
4417
4418       gcc_assert (insns < 4);
4419
4420       if (i <= 0)
4421         i += 32;
4422
4423       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4424       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4425         {
4426           loc = i;
4427           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4428             /* We can use addw/subw for the last 12 bits.  */
4429             result = remainder;
4430           else
4431             {
4432               /* Use an 8-bit shifted/rotated immediate.  */
4433               end = i - 8;
4434               if (end < 0)
4435                 end += 32;
4436               result = remainder & ((0x0ff << end)
4437                                    | ((i < end) ? (0xff >> (32 - end))
4438                                                 : 0));
4439               i -= 8;
4440             }
4441         }
4442       else
4443         {
4444           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4445              arbitrary shifts.  */
4446           i -= TARGET_ARM ? 2 : 1;
4447           continue;
4448         }
4449
4450       /* Next, see if we can do a better job with a thumb2 replicated
4451          constant.
4452
4453          We do it this way around to catch the cases like 0x01F001E0 where
4454          two 8-bit immediates would work, but a replicated constant would
4455          make it worse.
4456
4457          TODO: 16-bit constants that don't clear all the bits, but still win.
4458          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4459       if (TARGET_THUMB2)
4460         {
4461           b1 = (remainder & 0xff000000) >> 24;
4462           b2 = (remainder & 0x00ff0000) >> 16;
4463           b3 = (remainder & 0x0000ff00) >> 8;
4464           b4 = remainder & 0xff;
4465
4466           if (loc > 24)
4467             {
4468               /* The 8-bit immediate already found clears b1 (and maybe b2),
4469                  but must leave b3 and b4 alone.  */
4470
4471               /* First try to find a 32-bit replicated constant that clears
4472                  almost everything.  We can assume that we can't do it in one,
4473                  or else we wouldn't be here.  */
4474               unsigned int tmp = b1 & b2 & b3 & b4;
4475               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4476                                   + (tmp << 24);
4477               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4478                                             + (tmp == b3) + (tmp == b4);
4479               if (tmp
4480                   && (matching_bytes >= 3
4481                       || (matching_bytes == 2
4482                           && const_ok_for_op (remainder & ~tmp2, code))))
4483                 {
4484                   /* At least 3 of the bytes match, and the fourth has at
4485                      least as many bits set, or two of the bytes match
4486                      and it will only require one more insn to finish.  */
4487                   result = tmp2;
4488                   i = tmp != b1 ? 32
4489                       : tmp != b2 ? 24
4490                       : tmp != b3 ? 16
4491                       : 8;
4492                 }
4493
4494               /* Second, try to find a 16-bit replicated constant that can
4495                  leave three of the bytes clear.  If b2 or b4 is already
4496                  zero, then we can.  If the 8-bit from above would not
4497                  clear b2 anyway, then we still win.  */
4498               else if (b1 == b3 && (!b2 || !b4
4499                                || (remainder & 0x00ff0000 & ~result)))
4500                 {
4501                   result = remainder & 0xff00ff00;
4502                   i = 24;
4503                 }
4504             }
4505           else if (loc > 16)
4506             {
4507               /* The 8-bit immediate already found clears b2 (and maybe b3)
4508                  and we don't get here unless b1 is alredy clear, but it will
4509                  leave b4 unchanged.  */
4510
4511               /* If we can clear b2 and b4 at once, then we win, since the
4512                  8-bits couldn't possibly reach that far.  */
4513               if (b2 == b4)
4514                 {
4515                   result = remainder & 0x00ff00ff;
4516                   i = 16;
4517                 }
4518             }
4519         }
4520
4521       return_sequence->i[insns++] = result;
4522       remainder &= ~result;
4523
4524       if (code == SET || code == MINUS)
4525         code = PLUS;
4526     }
4527   while (remainder);
4528
4529   return insns;
4530 }
4531
4532 /* Emit an instruction with the indicated PATTERN.  If COND is
4533    non-NULL, conditionalize the execution of the instruction on COND
4534    being true.  */
4535
4536 static void
4537 emit_constant_insn (rtx cond, rtx pattern)
4538 {
4539   if (cond)
4540     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4541   emit_insn (pattern);
4542 }
4543
4544 /* As above, but extra parameter GENERATE which, if clear, suppresses
4545    RTL generation.  */
4546
4547 static int
4548 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4549                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4550                   int subtargets, int generate)
4551 {
4552   int can_invert = 0;
4553   int can_negate = 0;
4554   int final_invert = 0;
4555   int i;
4556   int set_sign_bit_copies = 0;
4557   int clear_sign_bit_copies = 0;
4558   int clear_zero_bit_copies = 0;
4559   int set_zero_bit_copies = 0;
4560   int insns = 0, neg_insns, inv_insns;
4561   unsigned HOST_WIDE_INT temp1, temp2;
4562   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4563   struct four_ints *immediates;
4564   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4565
4566   /* Find out which operations are safe for a given CODE.  Also do a quick
4567      check for degenerate cases; these can occur when DImode operations
4568      are split.  */
4569   switch (code)
4570     {
4571     case SET:
4572       can_invert = 1;
4573       break;
4574
4575     case PLUS:
4576       can_negate = 1;
4577       break;
4578
4579     case IOR:
4580       if (remainder == 0xffffffff)
4581         {
4582           if (generate)
4583             emit_constant_insn (cond,
4584                                 gen_rtx_SET (target,
4585                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4586           return 1;
4587         }
4588
4589       if (remainder == 0)
4590         {
4591           if (reload_completed && rtx_equal_p (target, source))
4592             return 0;
4593
4594           if (generate)
4595             emit_constant_insn (cond, gen_rtx_SET (target, source));
4596           return 1;
4597         }
4598       break;
4599
4600     case AND:
4601       if (remainder == 0)
4602         {
4603           if (generate)
4604             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4605           return 1;
4606         }
4607       if (remainder == 0xffffffff)
4608         {
4609           if (reload_completed && rtx_equal_p (target, source))
4610             return 0;
4611           if (generate)
4612             emit_constant_insn (cond, gen_rtx_SET (target, source));
4613           return 1;
4614         }
4615       can_invert = 1;
4616       break;
4617
4618     case XOR:
4619       if (remainder == 0)
4620         {
4621           if (reload_completed && rtx_equal_p (target, source))
4622             return 0;
4623           if (generate)
4624             emit_constant_insn (cond, gen_rtx_SET (target, source));
4625           return 1;
4626         }
4627
4628       if (remainder == 0xffffffff)
4629         {
4630           if (generate)
4631             emit_constant_insn (cond,
4632                                 gen_rtx_SET (target,
4633                                              gen_rtx_NOT (mode, source)));
4634           return 1;
4635         }
4636       final_invert = 1;
4637       break;
4638
4639     case MINUS:
4640       /* We treat MINUS as (val - source), since (source - val) is always
4641          passed as (source + (-val)).  */
4642       if (remainder == 0)
4643         {
4644           if (generate)
4645             emit_constant_insn (cond,
4646                                 gen_rtx_SET (target,
4647                                              gen_rtx_NEG (mode, source)));
4648           return 1;
4649         }
4650       if (const_ok_for_arm (val))
4651         {
4652           if (generate)
4653             emit_constant_insn (cond,
4654                                 gen_rtx_SET (target,
4655                                              gen_rtx_MINUS (mode, GEN_INT (val),
4656                                                             source)));
4657           return 1;
4658         }
4659
4660       break;
4661
4662     default:
4663       gcc_unreachable ();
4664     }
4665
4666   /* If we can do it in one insn get out quickly.  */
4667   if (const_ok_for_op (val, code))
4668     {
4669       if (generate)
4670         emit_constant_insn (cond,
4671                             gen_rtx_SET (target,
4672                                          (source
4673                                           ? gen_rtx_fmt_ee (code, mode, source,
4674                                                             GEN_INT (val))
4675                                           : GEN_INT (val))));
4676       return 1;
4677     }
4678
4679   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4680      insn.  */
4681   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4682       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4683     {
4684       if (generate)
4685         {
4686           if (mode == SImode && i == 16)
4687             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4688                smaller insn.  */
4689             emit_constant_insn (cond,
4690                                 gen_zero_extendhisi2
4691                                 (target, gen_lowpart (HImode, source)));
4692           else
4693             /* Extz only supports SImode, but we can coerce the operands
4694                into that mode.  */
4695             emit_constant_insn (cond,
4696                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4697                                               gen_lowpart (SImode, source),
4698                                               GEN_INT (i), const0_rtx));
4699         }
4700
4701       return 1;
4702     }
4703
4704   /* Calculate a few attributes that may be useful for specific
4705      optimizations.  */
4706   /* Count number of leading zeros.  */
4707   for (i = 31; i >= 0; i--)
4708     {
4709       if ((remainder & (1 << i)) == 0)
4710         clear_sign_bit_copies++;
4711       else
4712         break;
4713     }
4714
4715   /* Count number of leading 1's.  */
4716   for (i = 31; i >= 0; i--)
4717     {
4718       if ((remainder & (1 << i)) != 0)
4719         set_sign_bit_copies++;
4720       else
4721         break;
4722     }
4723
4724   /* Count number of trailing zero's.  */
4725   for (i = 0; i <= 31; i++)
4726     {
4727       if ((remainder & (1 << i)) == 0)
4728         clear_zero_bit_copies++;
4729       else
4730         break;
4731     }
4732
4733   /* Count number of trailing 1's.  */
4734   for (i = 0; i <= 31; i++)
4735     {
4736       if ((remainder & (1 << i)) != 0)
4737         set_zero_bit_copies++;
4738       else
4739         break;
4740     }
4741
4742   switch (code)
4743     {
4744     case SET:
4745       /* See if we can do this by sign_extending a constant that is known
4746          to be negative.  This is a good, way of doing it, since the shift
4747          may well merge into a subsequent insn.  */
4748       if (set_sign_bit_copies > 1)
4749         {
4750           if (const_ok_for_arm
4751               (temp1 = ARM_SIGN_EXTEND (remainder
4752                                         << (set_sign_bit_copies - 1))))
4753             {
4754               if (generate)
4755                 {
4756                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4757                   emit_constant_insn (cond,
4758                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4759                   emit_constant_insn (cond,
4760                                       gen_ashrsi3 (target, new_src,
4761                                                    GEN_INT (set_sign_bit_copies - 1)));
4762                 }
4763               return 2;
4764             }
4765           /* For an inverted constant, we will need to set the low bits,
4766              these will be shifted out of harm's way.  */
4767           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4768           if (const_ok_for_arm (~temp1))
4769             {
4770               if (generate)
4771                 {
4772                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4773                   emit_constant_insn (cond,
4774                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4775                   emit_constant_insn (cond,
4776                                       gen_ashrsi3 (target, new_src,
4777                                                    GEN_INT (set_sign_bit_copies - 1)));
4778                 }
4779               return 2;
4780             }
4781         }
4782
4783       /* See if we can calculate the value as the difference between two
4784          valid immediates.  */
4785       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4786         {
4787           int topshift = clear_sign_bit_copies & ~1;
4788
4789           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4790                                    & (0xff000000 >> topshift));
4791
4792           /* If temp1 is zero, then that means the 9 most significant
4793              bits of remainder were 1 and we've caused it to overflow.
4794              When topshift is 0 we don't need to do anything since we
4795              can borrow from 'bit 32'.  */
4796           if (temp1 == 0 && topshift != 0)
4797             temp1 = 0x80000000 >> (topshift - 1);
4798
4799           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4800
4801           if (const_ok_for_arm (temp2))
4802             {
4803               if (generate)
4804                 {
4805                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4806                   emit_constant_insn (cond,
4807                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4808                   emit_constant_insn (cond,
4809                                       gen_addsi3 (target, new_src,
4810                                                   GEN_INT (-temp2)));
4811                 }
4812
4813               return 2;
4814             }
4815         }
4816
4817       /* See if we can generate this by setting the bottom (or the top)
4818          16 bits, and then shifting these into the other half of the
4819          word.  We only look for the simplest cases, to do more would cost
4820          too much.  Be careful, however, not to generate this when the
4821          alternative would take fewer insns.  */
4822       if (val & 0xffff0000)
4823         {
4824           temp1 = remainder & 0xffff0000;
4825           temp2 = remainder & 0x0000ffff;
4826
4827           /* Overlaps outside this range are best done using other methods.  */
4828           for (i = 9; i < 24; i++)
4829             {
4830               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4831                   && !const_ok_for_arm (temp2))
4832                 {
4833                   rtx new_src = (subtargets
4834                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4835                                  : target);
4836                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4837                                             source, subtargets, generate);
4838                   source = new_src;
4839                   if (generate)
4840                     emit_constant_insn
4841                       (cond,
4842                        gen_rtx_SET
4843                        (target,
4844                         gen_rtx_IOR (mode,
4845                                      gen_rtx_ASHIFT (mode, source,
4846                                                      GEN_INT (i)),
4847                                      source)));
4848                   return insns + 1;
4849                 }
4850             }
4851
4852           /* Don't duplicate cases already considered.  */
4853           for (i = 17; i < 24; i++)
4854             {
4855               if (((temp1 | (temp1 >> i)) == remainder)
4856                   && !const_ok_for_arm (temp1))
4857                 {
4858                   rtx new_src = (subtargets
4859                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4860                                  : target);
4861                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4862                                             source, subtargets, generate);
4863                   source = new_src;
4864                   if (generate)
4865                     emit_constant_insn
4866                       (cond,
4867                        gen_rtx_SET (target,
4868                                     gen_rtx_IOR
4869                                     (mode,
4870                                      gen_rtx_LSHIFTRT (mode, source,
4871                                                        GEN_INT (i)),
4872                                      source)));
4873                   return insns + 1;
4874                 }
4875             }
4876         }
4877       break;
4878
4879     case IOR:
4880     case XOR:
4881       /* If we have IOR or XOR, and the constant can be loaded in a
4882          single instruction, and we can find a temporary to put it in,
4883          then this can be done in two instructions instead of 3-4.  */
4884       if (subtargets
4885           /* TARGET can't be NULL if SUBTARGETS is 0 */
4886           || (reload_completed && !reg_mentioned_p (target, source)))
4887         {
4888           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4889             {
4890               if (generate)
4891                 {
4892                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4893
4894                   emit_constant_insn (cond,
4895                                       gen_rtx_SET (sub, GEN_INT (val)));
4896                   emit_constant_insn (cond,
4897                                       gen_rtx_SET (target,
4898                                                    gen_rtx_fmt_ee (code, mode,
4899                                                                    source, sub)));
4900                 }
4901               return 2;
4902             }
4903         }
4904
4905       if (code == XOR)
4906         break;
4907
4908       /*  Convert.
4909           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4910                              and the remainder 0s for e.g. 0xfff00000)
4911           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4912
4913           This can be done in 2 instructions by using shifts with mov or mvn.
4914           e.g. for
4915           x = x | 0xfff00000;
4916           we generate.
4917           mvn   r0, r0, asl #12
4918           mvn   r0, r0, lsr #12  */
4919       if (set_sign_bit_copies > 8
4920           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4921         {
4922           if (generate)
4923             {
4924               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4925               rtx shift = GEN_INT (set_sign_bit_copies);
4926
4927               emit_constant_insn
4928                 (cond,
4929                  gen_rtx_SET (sub,
4930                               gen_rtx_NOT (mode,
4931                                            gen_rtx_ASHIFT (mode,
4932                                                            source,
4933                                                            shift))));
4934               emit_constant_insn
4935                 (cond,
4936                  gen_rtx_SET (target,
4937                               gen_rtx_NOT (mode,
4938                                            gen_rtx_LSHIFTRT (mode, sub,
4939                                                              shift))));
4940             }
4941           return 2;
4942         }
4943
4944       /* Convert
4945           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4946            to
4947           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4948
4949           For eg. r0 = r0 | 0xfff
4950                mvn      r0, r0, lsr #12
4951                mvn      r0, r0, asl #12
4952
4953       */
4954       if (set_zero_bit_copies > 8
4955           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4956         {
4957           if (generate)
4958             {
4959               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4960               rtx shift = GEN_INT (set_zero_bit_copies);
4961
4962               emit_constant_insn
4963                 (cond,
4964                  gen_rtx_SET (sub,
4965                               gen_rtx_NOT (mode,
4966                                            gen_rtx_LSHIFTRT (mode,
4967                                                              source,
4968                                                              shift))));
4969               emit_constant_insn
4970                 (cond,
4971                  gen_rtx_SET (target,
4972                               gen_rtx_NOT (mode,
4973                                            gen_rtx_ASHIFT (mode, sub,
4974                                                            shift))));
4975             }
4976           return 2;
4977         }
4978
4979       /* This will never be reached for Thumb2 because orn is a valid
4980          instruction. This is for Thumb1 and the ARM 32 bit cases.
4981
4982          x = y | constant (such that ~constant is a valid constant)
4983          Transform this to
4984          x = ~(~y & ~constant).
4985       */
4986       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4987         {
4988           if (generate)
4989             {
4990               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4991               emit_constant_insn (cond,
4992                                   gen_rtx_SET (sub,
4993                                                gen_rtx_NOT (mode, source)));
4994               source = sub;
4995               if (subtargets)
4996                 sub = gen_reg_rtx (mode);
4997               emit_constant_insn (cond,
4998                                   gen_rtx_SET (sub,
4999                                                gen_rtx_AND (mode, source,
5000                                                             GEN_INT (temp1))));
5001               emit_constant_insn (cond,
5002                                   gen_rtx_SET (target,
5003                                                gen_rtx_NOT (mode, sub)));
5004             }
5005           return 3;
5006         }
5007       break;
5008
5009     case AND:
5010       /* See if two shifts will do 2 or more insn's worth of work.  */
5011       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5012         {
5013           HOST_WIDE_INT shift_mask = ((0xffffffff
5014                                        << (32 - clear_sign_bit_copies))
5015                                       & 0xffffffff);
5016
5017           if ((remainder | shift_mask) != 0xffffffff)
5018             {
5019               HOST_WIDE_INT new_val
5020                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5021
5022               if (generate)
5023                 {
5024                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5025                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5026                                             new_src, source, subtargets, 1);
5027                   source = new_src;
5028                 }
5029               else
5030                 {
5031                   rtx targ = subtargets ? NULL_RTX : target;
5032                   insns = arm_gen_constant (AND, mode, cond, new_val,
5033                                             targ, source, subtargets, 0);
5034                 }
5035             }
5036
5037           if (generate)
5038             {
5039               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5040               rtx shift = GEN_INT (clear_sign_bit_copies);
5041
5042               emit_insn (gen_ashlsi3 (new_src, source, shift));
5043               emit_insn (gen_lshrsi3 (target, new_src, shift));
5044             }
5045
5046           return insns + 2;
5047         }
5048
5049       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5050         {
5051           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5052
5053           if ((remainder | shift_mask) != 0xffffffff)
5054             {
5055               HOST_WIDE_INT new_val
5056                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5057               if (generate)
5058                 {
5059                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5060
5061                   insns = arm_gen_constant (AND, mode, cond, new_val,
5062                                             new_src, source, subtargets, 1);
5063                   source = new_src;
5064                 }
5065               else
5066                 {
5067                   rtx targ = subtargets ? NULL_RTX : target;
5068
5069                   insns = arm_gen_constant (AND, mode, cond, new_val,
5070                                             targ, source, subtargets, 0);
5071                 }
5072             }
5073
5074           if (generate)
5075             {
5076               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5077               rtx shift = GEN_INT (clear_zero_bit_copies);
5078
5079               emit_insn (gen_lshrsi3 (new_src, source, shift));
5080               emit_insn (gen_ashlsi3 (target, new_src, shift));
5081             }
5082
5083           return insns + 2;
5084         }
5085
5086       break;
5087
5088     default:
5089       break;
5090     }
5091
5092   /* Calculate what the instruction sequences would be if we generated it
5093      normally, negated, or inverted.  */
5094   if (code == AND)
5095     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5096     insns = 99;
5097   else
5098     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5099
5100   if (can_negate)
5101     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5102                                             &neg_immediates);
5103   else
5104     neg_insns = 99;
5105
5106   if (can_invert || final_invert)
5107     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5108                                             &inv_immediates);
5109   else
5110     inv_insns = 99;
5111
5112   immediates = &pos_immediates;
5113
5114   /* Is the negated immediate sequence more efficient?  */
5115   if (neg_insns < insns && neg_insns <= inv_insns)
5116     {
5117       insns = neg_insns;
5118       immediates = &neg_immediates;
5119     }
5120   else
5121     can_negate = 0;
5122
5123   /* Is the inverted immediate sequence more efficient?
5124      We must allow for an extra NOT instruction for XOR operations, although
5125      there is some chance that the final 'mvn' will get optimized later.  */
5126   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5127     {
5128       insns = inv_insns;
5129       immediates = &inv_immediates;
5130     }
5131   else
5132     {
5133       can_invert = 0;
5134       final_invert = 0;
5135     }
5136
5137   /* Now output the chosen sequence as instructions.  */
5138   if (generate)
5139     {
5140       for (i = 0; i < insns; i++)
5141         {
5142           rtx new_src, temp1_rtx;
5143
5144           temp1 = immediates->i[i];
5145
5146           if (code == SET || code == MINUS)
5147             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5148           else if ((final_invert || i < (insns - 1)) && subtargets)
5149             new_src = gen_reg_rtx (mode);
5150           else
5151             new_src = target;
5152
5153           if (can_invert)
5154             temp1 = ~temp1;
5155           else if (can_negate)
5156             temp1 = -temp1;
5157
5158           temp1 = trunc_int_for_mode (temp1, mode);
5159           temp1_rtx = GEN_INT (temp1);
5160
5161           if (code == SET)
5162             ;
5163           else if (code == MINUS)
5164             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5165           else
5166             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5167
5168           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5169           source = new_src;
5170
5171           if (code == SET)
5172             {
5173               can_negate = can_invert;
5174               can_invert = 0;
5175               code = PLUS;
5176             }
5177           else if (code == MINUS)
5178             code = PLUS;
5179         }
5180     }
5181
5182   if (final_invert)
5183     {
5184       if (generate)
5185         emit_constant_insn (cond, gen_rtx_SET (target,
5186                                                gen_rtx_NOT (mode, source)));
5187       insns++;
5188     }
5189
5190   return insns;
5191 }
5192
5193 /* Canonicalize a comparison so that we are more likely to recognize it.
5194    This can be done for a few constant compares, where we can make the
5195    immediate value easier to load.  */
5196
5197 static void
5198 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5199                              bool op0_preserve_value)
5200 {
5201   machine_mode mode;
5202   unsigned HOST_WIDE_INT i, maxval;
5203
5204   mode = GET_MODE (*op0);
5205   if (mode == VOIDmode)
5206     mode = GET_MODE (*op1);
5207
5208   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5209
5210   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5211      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5212      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5213      for GTU/LEU in Thumb mode.  */
5214   if (mode == DImode)
5215     {
5216
5217       if (*code == GT || *code == LE
5218           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5219         {
5220           /* Missing comparison.  First try to use an available
5221              comparison.  */
5222           if (CONST_INT_P (*op1))
5223             {
5224               i = INTVAL (*op1);
5225               switch (*code)
5226                 {
5227                 case GT:
5228                 case LE:
5229                   if (i != maxval
5230                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5231                     {
5232                       *op1 = GEN_INT (i + 1);
5233                       *code = *code == GT ? GE : LT;
5234                       return;
5235                     }
5236                   break;
5237                 case GTU:
5238                 case LEU:
5239                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5240                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5241                     {
5242                       *op1 = GEN_INT (i + 1);
5243                       *code = *code == GTU ? GEU : LTU;
5244                       return;
5245                     }
5246                   break;
5247                 default:
5248                   gcc_unreachable ();
5249                 }
5250             }
5251
5252           /* If that did not work, reverse the condition.  */
5253           if (!op0_preserve_value)
5254             {
5255               std::swap (*op0, *op1);
5256               *code = (int)swap_condition ((enum rtx_code)*code);
5257             }
5258         }
5259       return;
5260     }
5261
5262   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5263      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5264      to facilitate possible combining with a cmp into 'ands'.  */
5265   if (mode == SImode
5266       && GET_CODE (*op0) == ZERO_EXTEND
5267       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5268       && GET_MODE (XEXP (*op0, 0)) == QImode
5269       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5270       && subreg_lowpart_p (XEXP (*op0, 0))
5271       && *op1 == const0_rtx)
5272     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5273                         GEN_INT (255));
5274
5275   /* Comparisons smaller than DImode.  Only adjust comparisons against
5276      an out-of-range constant.  */
5277   if (!CONST_INT_P (*op1)
5278       || const_ok_for_arm (INTVAL (*op1))
5279       || const_ok_for_arm (- INTVAL (*op1)))
5280     return;
5281
5282   i = INTVAL (*op1);
5283
5284   switch (*code)
5285     {
5286     case EQ:
5287     case NE:
5288       return;
5289
5290     case GT:
5291     case LE:
5292       if (i != maxval
5293           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5294         {
5295           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5296           *code = *code == GT ? GE : LT;
5297           return;
5298         }
5299       break;
5300
5301     case GE:
5302     case LT:
5303       if (i != ~maxval
5304           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5305         {
5306           *op1 = GEN_INT (i - 1);
5307           *code = *code == GE ? GT : LE;
5308           return;
5309         }
5310       break;
5311
5312     case GTU:
5313     case LEU:
5314       if (i != ~((unsigned HOST_WIDE_INT) 0)
5315           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5316         {
5317           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5318           *code = *code == GTU ? GEU : LTU;
5319           return;
5320         }
5321       break;
5322
5323     case GEU:
5324     case LTU:
5325       if (i != 0
5326           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5327         {
5328           *op1 = GEN_INT (i - 1);
5329           *code = *code == GEU ? GTU : LEU;
5330           return;
5331         }
5332       break;
5333
5334     default:
5335       gcc_unreachable ();
5336     }
5337 }
5338
5339
5340 /* Define how to find the value returned by a function.  */
5341
5342 static rtx
5343 arm_function_value(const_tree type, const_tree func,
5344                    bool outgoing ATTRIBUTE_UNUSED)
5345 {
5346   machine_mode mode;
5347   int unsignedp ATTRIBUTE_UNUSED;
5348   rtx r ATTRIBUTE_UNUSED;
5349
5350   mode = TYPE_MODE (type);
5351
5352   if (TARGET_AAPCS_BASED)
5353     return aapcs_allocate_return_reg (mode, type, func);
5354
5355   /* Promote integer types.  */
5356   if (INTEGRAL_TYPE_P (type))
5357     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5358
5359   /* Promotes small structs returned in a register to full-word size
5360      for big-endian AAPCS.  */
5361   if (arm_return_in_msb (type))
5362     {
5363       HOST_WIDE_INT size = int_size_in_bytes (type);
5364       if (size % UNITS_PER_WORD != 0)
5365         {
5366           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5367           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5368         }
5369     }
5370
5371   return arm_libcall_value_1 (mode);
5372 }
5373
5374 /* libcall hashtable helpers.  */
5375
5376 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5377 {
5378   static inline hashval_t hash (const rtx_def *);
5379   static inline bool equal (const rtx_def *, const rtx_def *);
5380   static inline void remove (rtx_def *);
5381 };
5382
5383 inline bool
5384 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5385 {
5386   return rtx_equal_p (p1, p2);
5387 }
5388
5389 inline hashval_t
5390 libcall_hasher::hash (const rtx_def *p1)
5391 {
5392   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5393 }
5394
5395 typedef hash_table<libcall_hasher> libcall_table_type;
5396
5397 static void
5398 add_libcall (libcall_table_type *htab, rtx libcall)
5399 {
5400   *htab->find_slot (libcall, INSERT) = libcall;
5401 }
5402
5403 static bool
5404 arm_libcall_uses_aapcs_base (const_rtx libcall)
5405 {
5406   static bool init_done = false;
5407   static libcall_table_type *libcall_htab = NULL;
5408
5409   if (!init_done)
5410     {
5411       init_done = true;
5412
5413       libcall_htab = new libcall_table_type (31);
5414       add_libcall (libcall_htab,
5415                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5416       add_libcall (libcall_htab,
5417                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5418       add_libcall (libcall_htab,
5419                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5420       add_libcall (libcall_htab,
5421                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5422
5423       add_libcall (libcall_htab,
5424                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5425       add_libcall (libcall_htab,
5426                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5427       add_libcall (libcall_htab,
5428                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5429       add_libcall (libcall_htab,
5430                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5431
5432       add_libcall (libcall_htab,
5433                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5434       add_libcall (libcall_htab,
5435                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5436       add_libcall (libcall_htab,
5437                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5438       add_libcall (libcall_htab,
5439                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5440       add_libcall (libcall_htab,
5441                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5442       add_libcall (libcall_htab,
5443                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5444       add_libcall (libcall_htab,
5445                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5446       add_libcall (libcall_htab,
5447                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5448
5449       /* Values from double-precision helper functions are returned in core
5450          registers if the selected core only supports single-precision
5451          arithmetic, even if we are using the hard-float ABI.  The same is
5452          true for single-precision helpers, but we will never be using the
5453          hard-float ABI on a CPU which doesn't support single-precision
5454          operations in hardware.  */
5455       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5456       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5457       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5458       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5459       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5460       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5461       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5462       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5463       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5464       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5465       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5466       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5467                                                         SFmode));
5468       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5469                                                         DFmode));
5470       add_libcall (libcall_htab,
5471                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5472     }
5473
5474   return libcall && libcall_htab->find (libcall) != NULL;
5475 }
5476
5477 static rtx
5478 arm_libcall_value_1 (machine_mode mode)
5479 {
5480   if (TARGET_AAPCS_BASED)
5481     return aapcs_libcall_value (mode);
5482   else if (TARGET_IWMMXT_ABI
5483            && arm_vector_mode_supported_p (mode))
5484     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5485   else
5486     return gen_rtx_REG (mode, ARG_REGISTER (1));
5487 }
5488
5489 /* Define how to find the value returned by a library function
5490    assuming the value has mode MODE.  */
5491
5492 static rtx
5493 arm_libcall_value (machine_mode mode, const_rtx libcall)
5494 {
5495   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5496       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5497     {
5498       /* The following libcalls return their result in integer registers,
5499          even though they return a floating point value.  */
5500       if (arm_libcall_uses_aapcs_base (libcall))
5501         return gen_rtx_REG (mode, ARG_REGISTER(1));
5502
5503     }
5504
5505   return arm_libcall_value_1 (mode);
5506 }
5507
5508 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5509
5510 static bool
5511 arm_function_value_regno_p (const unsigned int regno)
5512 {
5513   if (regno == ARG_REGISTER (1)
5514       || (TARGET_32BIT
5515           && TARGET_AAPCS_BASED
5516           && TARGET_HARD_FLOAT
5517           && regno == FIRST_VFP_REGNUM)
5518       || (TARGET_IWMMXT_ABI
5519           && regno == FIRST_IWMMXT_REGNUM))
5520     return true;
5521
5522   return false;
5523 }
5524
5525 /* Determine the amount of memory needed to store the possible return
5526    registers of an untyped call.  */
5527 int
5528 arm_apply_result_size (void)
5529 {
5530   int size = 16;
5531
5532   if (TARGET_32BIT)
5533     {
5534       if (TARGET_HARD_FLOAT_ABI)
5535         size += 32;
5536       if (TARGET_IWMMXT_ABI)
5537         size += 8;
5538     }
5539
5540   return size;
5541 }
5542
5543 /* Decide whether TYPE should be returned in memory (true)
5544    or in a register (false).  FNTYPE is the type of the function making
5545    the call.  */
5546 static bool
5547 arm_return_in_memory (const_tree type, const_tree fntype)
5548 {
5549   HOST_WIDE_INT size;
5550
5551   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5552
5553   if (TARGET_AAPCS_BASED)
5554     {
5555       /* Simple, non-aggregate types (ie not including vectors and
5556          complex) are always returned in a register (or registers).
5557          We don't care about which register here, so we can short-cut
5558          some of the detail.  */
5559       if (!AGGREGATE_TYPE_P (type)
5560           && TREE_CODE (type) != VECTOR_TYPE
5561           && TREE_CODE (type) != COMPLEX_TYPE)
5562         return false;
5563
5564       /* Any return value that is no larger than one word can be
5565          returned in r0.  */
5566       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5567         return false;
5568
5569       /* Check any available co-processors to see if they accept the
5570          type as a register candidate (VFP, for example, can return
5571          some aggregates in consecutive registers).  These aren't
5572          available if the call is variadic.  */
5573       if (aapcs_select_return_coproc (type, fntype) >= 0)
5574         return false;
5575
5576       /* Vector values should be returned using ARM registers, not
5577          memory (unless they're over 16 bytes, which will break since
5578          we only have four call-clobbered registers to play with).  */
5579       if (TREE_CODE (type) == VECTOR_TYPE)
5580         return (size < 0 || size > (4 * UNITS_PER_WORD));
5581
5582       /* The rest go in memory.  */
5583       return true;
5584     }
5585
5586   if (TREE_CODE (type) == VECTOR_TYPE)
5587     return (size < 0 || size > (4 * UNITS_PER_WORD));
5588
5589   if (!AGGREGATE_TYPE_P (type) &&
5590       (TREE_CODE (type) != VECTOR_TYPE))
5591     /* All simple types are returned in registers.  */
5592     return false;
5593
5594   if (arm_abi != ARM_ABI_APCS)
5595     {
5596       /* ATPCS and later return aggregate types in memory only if they are
5597          larger than a word (or are variable size).  */
5598       return (size < 0 || size > UNITS_PER_WORD);
5599     }
5600
5601   /* For the arm-wince targets we choose to be compatible with Microsoft's
5602      ARM and Thumb compilers, which always return aggregates in memory.  */
5603 #ifndef ARM_WINCE
5604   /* All structures/unions bigger than one word are returned in memory.
5605      Also catch the case where int_size_in_bytes returns -1.  In this case
5606      the aggregate is either huge or of variable size, and in either case
5607      we will want to return it via memory and not in a register.  */
5608   if (size < 0 || size > UNITS_PER_WORD)
5609     return true;
5610
5611   if (TREE_CODE (type) == RECORD_TYPE)
5612     {
5613       tree field;
5614
5615       /* For a struct the APCS says that we only return in a register
5616          if the type is 'integer like' and every addressable element
5617          has an offset of zero.  For practical purposes this means
5618          that the structure can have at most one non bit-field element
5619          and that this element must be the first one in the structure.  */
5620
5621       /* Find the first field, ignoring non FIELD_DECL things which will
5622          have been created by C++.  */
5623       for (field = TYPE_FIELDS (type);
5624            field && TREE_CODE (field) != FIELD_DECL;
5625            field = DECL_CHAIN (field))
5626         continue;
5627
5628       if (field == NULL)
5629         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5630
5631       /* Check that the first field is valid for returning in a register.  */
5632
5633       /* ... Floats are not allowed */
5634       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5635         return true;
5636
5637       /* ... Aggregates that are not themselves valid for returning in
5638          a register are not allowed.  */
5639       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5640         return true;
5641
5642       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5643          since they are not addressable.  */
5644       for (field = DECL_CHAIN (field);
5645            field;
5646            field = DECL_CHAIN (field))
5647         {
5648           if (TREE_CODE (field) != FIELD_DECL)
5649             continue;
5650
5651           if (!DECL_BIT_FIELD_TYPE (field))
5652             return true;
5653         }
5654
5655       return false;
5656     }
5657
5658   if (TREE_CODE (type) == UNION_TYPE)
5659     {
5660       tree field;
5661
5662       /* Unions can be returned in registers if every element is
5663          integral, or can be returned in an integer register.  */
5664       for (field = TYPE_FIELDS (type);
5665            field;
5666            field = DECL_CHAIN (field))
5667         {
5668           if (TREE_CODE (field) != FIELD_DECL)
5669             continue;
5670
5671           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5672             return true;
5673
5674           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5675             return true;
5676         }
5677
5678       return false;
5679     }
5680 #endif /* not ARM_WINCE */
5681
5682   /* Return all other types in memory.  */
5683   return true;
5684 }
5685
5686 const struct pcs_attribute_arg
5687 {
5688   const char *arg;
5689   enum arm_pcs value;
5690 } pcs_attribute_args[] =
5691   {
5692     {"aapcs", ARM_PCS_AAPCS},
5693     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5694 #if 0
5695     /* We could recognize these, but changes would be needed elsewhere
5696      * to implement them.  */
5697     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5698     {"atpcs", ARM_PCS_ATPCS},
5699     {"apcs", ARM_PCS_APCS},
5700 #endif
5701     {NULL, ARM_PCS_UNKNOWN}
5702   };
5703
5704 static enum arm_pcs
5705 arm_pcs_from_attribute (tree attr)
5706 {
5707   const struct pcs_attribute_arg *ptr;
5708   const char *arg;
5709
5710   /* Get the value of the argument.  */
5711   if (TREE_VALUE (attr) == NULL_TREE
5712       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5713     return ARM_PCS_UNKNOWN;
5714
5715   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5716
5717   /* Check it against the list of known arguments.  */
5718   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5719     if (streq (arg, ptr->arg))
5720       return ptr->value;
5721
5722   /* An unrecognized interrupt type.  */
5723   return ARM_PCS_UNKNOWN;
5724 }
5725
5726 /* Get the PCS variant to use for this call.  TYPE is the function's type
5727    specification, DECL is the specific declartion.  DECL may be null if
5728    the call could be indirect or if this is a library call.  */
5729 static enum arm_pcs
5730 arm_get_pcs_model (const_tree type, const_tree decl)
5731 {
5732   bool user_convention = false;
5733   enum arm_pcs user_pcs = arm_pcs_default;
5734   tree attr;
5735
5736   gcc_assert (type);
5737
5738   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5739   if (attr)
5740     {
5741       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5742       user_convention = true;
5743     }
5744
5745   if (TARGET_AAPCS_BASED)
5746     {
5747       /* Detect varargs functions.  These always use the base rules
5748          (no argument is ever a candidate for a co-processor
5749          register).  */
5750       bool base_rules = stdarg_p (type);
5751
5752       if (user_convention)
5753         {
5754           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5755             sorry ("non-AAPCS derived PCS variant");
5756           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5757             error ("variadic functions must use the base AAPCS variant");
5758         }
5759
5760       if (base_rules)
5761         return ARM_PCS_AAPCS;
5762       else if (user_convention)
5763         return user_pcs;
5764       else if (decl && flag_unit_at_a_time)
5765         {
5766           /* Local functions never leak outside this compilation unit,
5767              so we are free to use whatever conventions are
5768              appropriate.  */
5769           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5770           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5771           if (i && i->local)
5772             return ARM_PCS_AAPCS_LOCAL;
5773         }
5774     }
5775   else if (user_convention && user_pcs != arm_pcs_default)
5776     sorry ("PCS variant");
5777
5778   /* For everything else we use the target's default.  */
5779   return arm_pcs_default;
5780 }
5781
5782
5783 static void
5784 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5785                     const_tree fntype ATTRIBUTE_UNUSED,
5786                     rtx libcall ATTRIBUTE_UNUSED,
5787                     const_tree fndecl ATTRIBUTE_UNUSED)
5788 {
5789   /* Record the unallocated VFP registers.  */
5790   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5791   pcum->aapcs_vfp_reg_alloc = 0;
5792 }
5793
5794 /* Walk down the type tree of TYPE counting consecutive base elements.
5795    If *MODEP is VOIDmode, then set it to the first valid floating point
5796    type.  If a non-floating point type is found, or if a floating point
5797    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5798    otherwise return the count in the sub-tree.  */
5799 static int
5800 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5801 {
5802   machine_mode mode;
5803   HOST_WIDE_INT size;
5804
5805   switch (TREE_CODE (type))
5806     {
5807     case REAL_TYPE:
5808       mode = TYPE_MODE (type);
5809       if (mode != DFmode && mode != SFmode && mode != HFmode)
5810         return -1;
5811
5812       if (*modep == VOIDmode)
5813         *modep = mode;
5814
5815       if (*modep == mode)
5816         return 1;
5817
5818       break;
5819
5820     case COMPLEX_TYPE:
5821       mode = TYPE_MODE (TREE_TYPE (type));
5822       if (mode != DFmode && mode != SFmode)
5823         return -1;
5824
5825       if (*modep == VOIDmode)
5826         *modep = mode;
5827
5828       if (*modep == mode)
5829         return 2;
5830
5831       break;
5832
5833     case VECTOR_TYPE:
5834       /* Use V2SImode and V4SImode as representatives of all 64-bit
5835          and 128-bit vector types, whether or not those modes are
5836          supported with the present options.  */
5837       size = int_size_in_bytes (type);
5838       switch (size)
5839         {
5840         case 8:
5841           mode = V2SImode;
5842           break;
5843         case 16:
5844           mode = V4SImode;
5845           break;
5846         default:
5847           return -1;
5848         }
5849
5850       if (*modep == VOIDmode)
5851         *modep = mode;
5852
5853       /* Vector modes are considered to be opaque: two vectors are
5854          equivalent for the purposes of being homogeneous aggregates
5855          if they are the same size.  */
5856       if (*modep == mode)
5857         return 1;
5858
5859       break;
5860
5861     case ARRAY_TYPE:
5862       {
5863         int count;
5864         tree index = TYPE_DOMAIN (type);
5865
5866         /* Can't handle incomplete types nor sizes that are not
5867            fixed.  */
5868         if (!COMPLETE_TYPE_P (type)
5869             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5870           return -1;
5871
5872         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5873         if (count == -1
5874             || !index
5875             || !TYPE_MAX_VALUE (index)
5876             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5877             || !TYPE_MIN_VALUE (index)
5878             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5879             || count < 0)
5880           return -1;
5881
5882         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5883                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5884
5885         /* There must be no padding.  */
5886         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5887           return -1;
5888
5889         return count;
5890       }
5891
5892     case RECORD_TYPE:
5893       {
5894         int count = 0;
5895         int sub_count;
5896         tree field;
5897
5898         /* Can't handle incomplete types nor sizes that are not
5899            fixed.  */
5900         if (!COMPLETE_TYPE_P (type)
5901             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5902           return -1;
5903
5904         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5905           {
5906             if (TREE_CODE (field) != FIELD_DECL)
5907               continue;
5908
5909             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5910             if (sub_count < 0)
5911               return -1;
5912             count += sub_count;
5913           }
5914
5915         /* There must be no padding.  */
5916         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5917           return -1;
5918
5919         return count;
5920       }
5921
5922     case UNION_TYPE:
5923     case QUAL_UNION_TYPE:
5924       {
5925         /* These aren't very interesting except in a degenerate case.  */
5926         int count = 0;
5927         int sub_count;
5928         tree field;
5929
5930         /* Can't handle incomplete types nor sizes that are not
5931            fixed.  */
5932         if (!COMPLETE_TYPE_P (type)
5933             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5934           return -1;
5935
5936         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5937           {
5938             if (TREE_CODE (field) != FIELD_DECL)
5939               continue;
5940
5941             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5942             if (sub_count < 0)
5943               return -1;
5944             count = count > sub_count ? count : sub_count;
5945           }
5946
5947         /* There must be no padding.  */
5948         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5949           return -1;
5950
5951         return count;
5952       }
5953
5954     default:
5955       break;
5956     }
5957
5958   return -1;
5959 }
5960
5961 /* Return true if PCS_VARIANT should use VFP registers.  */
5962 static bool
5963 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5964 {
5965   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5966     {
5967       static bool seen_thumb1_vfp = false;
5968
5969       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5970         {
5971           sorry ("Thumb-1 hard-float VFP ABI");
5972           /* sorry() is not immediately fatal, so only display this once.  */
5973           seen_thumb1_vfp = true;
5974         }
5975
5976       return true;
5977     }
5978
5979   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5980     return false;
5981
5982   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5983           (TARGET_VFP_DOUBLE || !is_double));
5984 }
5985
5986 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5987    suitable for passing or returning in VFP registers for the PCS
5988    variant selected.  If it is, then *BASE_MODE is updated to contain
5989    a machine mode describing each element of the argument's type and
5990    *COUNT to hold the number of such elements.  */
5991 static bool
5992 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5993                                        machine_mode mode, const_tree type,
5994                                        machine_mode *base_mode, int *count)
5995 {
5996   machine_mode new_mode = VOIDmode;
5997
5998   /* If we have the type information, prefer that to working things
5999      out from the mode.  */
6000   if (type)
6001     {
6002       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6003
6004       if (ag_count > 0 && ag_count <= 4)
6005         *count = ag_count;
6006       else
6007         return false;
6008     }
6009   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6010            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6011            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6012     {
6013       *count = 1;
6014       new_mode = mode;
6015     }
6016   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6017     {
6018       *count = 2;
6019       new_mode = (mode == DCmode ? DFmode : SFmode);
6020     }
6021   else
6022     return false;
6023
6024
6025   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6026     return false;
6027
6028   *base_mode = new_mode;
6029   return true;
6030 }
6031
6032 static bool
6033 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6034                                machine_mode mode, const_tree type)
6035 {
6036   int count ATTRIBUTE_UNUSED;
6037   machine_mode ag_mode ATTRIBUTE_UNUSED;
6038
6039   if (!use_vfp_abi (pcs_variant, false))
6040     return false;
6041   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6042                                                 &ag_mode, &count);
6043 }
6044
6045 static bool
6046 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6047                              const_tree type)
6048 {
6049   if (!use_vfp_abi (pcum->pcs_variant, false))
6050     return false;
6051
6052   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6053                                                 &pcum->aapcs_vfp_rmode,
6054                                                 &pcum->aapcs_vfp_rcount);
6055 }
6056
6057 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6058    for the behaviour of this function.  */
6059
6060 static bool
6061 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6062                     const_tree type  ATTRIBUTE_UNUSED)
6063 {
6064   int rmode_size
6065     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6066   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6067   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6068   int regno;
6069
6070   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6071     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6072       {
6073         pcum->aapcs_vfp_reg_alloc = mask << regno;
6074         if (mode == BLKmode
6075             || (mode == TImode && ! TARGET_NEON)
6076             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6077           {
6078             int i;
6079             int rcount = pcum->aapcs_vfp_rcount;
6080             int rshift = shift;
6081             machine_mode rmode = pcum->aapcs_vfp_rmode;
6082             rtx par;
6083             if (!TARGET_NEON)
6084               {
6085                 /* Avoid using unsupported vector modes.  */
6086                 if (rmode == V2SImode)
6087                   rmode = DImode;
6088                 else if (rmode == V4SImode)
6089                   {
6090                     rmode = DImode;
6091                     rcount *= 2;
6092                     rshift /= 2;
6093                   }
6094               }
6095             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6096             for (i = 0; i < rcount; i++)
6097               {
6098                 rtx tmp = gen_rtx_REG (rmode,
6099                                        FIRST_VFP_REGNUM + regno + i * rshift);
6100                 tmp = gen_rtx_EXPR_LIST
6101                   (VOIDmode, tmp,
6102                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6103                 XVECEXP (par, 0, i) = tmp;
6104               }
6105
6106             pcum->aapcs_reg = par;
6107           }
6108         else
6109           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6110         return true;
6111       }
6112   return false;
6113 }
6114
6115 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6116    comment there for the behaviour of this function.  */
6117
6118 static rtx
6119 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6120                                machine_mode mode,
6121                                const_tree type ATTRIBUTE_UNUSED)
6122 {
6123   if (!use_vfp_abi (pcs_variant, false))
6124     return NULL;
6125
6126   if (mode == BLKmode
6127       || (GET_MODE_CLASS (mode) == MODE_INT
6128           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6129           && !TARGET_NEON))
6130     {
6131       int count;
6132       machine_mode ag_mode;
6133       int i;
6134       rtx par;
6135       int shift;
6136
6137       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6138                                              &ag_mode, &count);
6139
6140       if (!TARGET_NEON)
6141         {
6142           if (ag_mode == V2SImode)
6143             ag_mode = DImode;
6144           else if (ag_mode == V4SImode)
6145             {
6146               ag_mode = DImode;
6147               count *= 2;
6148             }
6149         }
6150       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6151       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6152       for (i = 0; i < count; i++)
6153         {
6154           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6155           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6156                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6157           XVECEXP (par, 0, i) = tmp;
6158         }
6159
6160       return par;
6161     }
6162
6163   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6164 }
6165
6166 static void
6167 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6168                    machine_mode mode  ATTRIBUTE_UNUSED,
6169                    const_tree type  ATTRIBUTE_UNUSED)
6170 {
6171   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6172   pcum->aapcs_vfp_reg_alloc = 0;
6173   return;
6174 }
6175
6176 #define AAPCS_CP(X)                             \
6177   {                                             \
6178     aapcs_ ## X ## _cum_init,                   \
6179     aapcs_ ## X ## _is_call_candidate,          \
6180     aapcs_ ## X ## _allocate,                   \
6181     aapcs_ ## X ## _is_return_candidate,        \
6182     aapcs_ ## X ## _allocate_return_reg,        \
6183     aapcs_ ## X ## _advance                     \
6184   }
6185
6186 /* Table of co-processors that can be used to pass arguments in
6187    registers.  Idealy no arugment should be a candidate for more than
6188    one co-processor table entry, but the table is processed in order
6189    and stops after the first match.  If that entry then fails to put
6190    the argument into a co-processor register, the argument will go on
6191    the stack.  */
6192 static struct
6193 {
6194   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6195   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6196
6197   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6198      BLKmode) is a candidate for this co-processor's registers; this
6199      function should ignore any position-dependent state in
6200      CUMULATIVE_ARGS and only use call-type dependent information.  */
6201   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6202
6203   /* Return true if the argument does get a co-processor register; it
6204      should set aapcs_reg to an RTX of the register allocated as is
6205      required for a return from FUNCTION_ARG.  */
6206   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6207
6208   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6209      be returned in this co-processor's registers.  */
6210   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6211
6212   /* Allocate and return an RTX element to hold the return type of a call.  This
6213      routine must not fail and will only be called if is_return_candidate
6214      returned true with the same parameters.  */
6215   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6216
6217   /* Finish processing this argument and prepare to start processing
6218      the next one.  */
6219   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6220 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6221   {
6222     AAPCS_CP(vfp)
6223   };
6224
6225 #undef AAPCS_CP
6226
6227 static int
6228 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6229                           const_tree type)
6230 {
6231   int i;
6232
6233   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6234     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6235       return i;
6236
6237   return -1;
6238 }
6239
6240 static int
6241 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6242 {
6243   /* We aren't passed a decl, so we can't check that a call is local.
6244      However, it isn't clear that that would be a win anyway, since it
6245      might limit some tail-calling opportunities.  */
6246   enum arm_pcs pcs_variant;
6247
6248   if (fntype)
6249     {
6250       const_tree fndecl = NULL_TREE;
6251
6252       if (TREE_CODE (fntype) == FUNCTION_DECL)
6253         {
6254           fndecl = fntype;
6255           fntype = TREE_TYPE (fntype);
6256         }
6257
6258       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6259     }
6260   else
6261     pcs_variant = arm_pcs_default;
6262
6263   if (pcs_variant != ARM_PCS_AAPCS)
6264     {
6265       int i;
6266
6267       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6268         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6269                                                         TYPE_MODE (type),
6270                                                         type))
6271           return i;
6272     }
6273   return -1;
6274 }
6275
6276 static rtx
6277 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6278                            const_tree fntype)
6279 {
6280   /* We aren't passed a decl, so we can't check that a call is local.
6281      However, it isn't clear that that would be a win anyway, since it
6282      might limit some tail-calling opportunities.  */
6283   enum arm_pcs pcs_variant;
6284   int unsignedp ATTRIBUTE_UNUSED;
6285
6286   if (fntype)
6287     {
6288       const_tree fndecl = NULL_TREE;
6289
6290       if (TREE_CODE (fntype) == FUNCTION_DECL)
6291         {
6292           fndecl = fntype;
6293           fntype = TREE_TYPE (fntype);
6294         }
6295
6296       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6297     }
6298   else
6299     pcs_variant = arm_pcs_default;
6300
6301   /* Promote integer types.  */
6302   if (type && INTEGRAL_TYPE_P (type))
6303     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6304
6305   if (pcs_variant != ARM_PCS_AAPCS)
6306     {
6307       int i;
6308
6309       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6310         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6311                                                         type))
6312           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6313                                                              mode, type);
6314     }
6315
6316   /* Promotes small structs returned in a register to full-word size
6317      for big-endian AAPCS.  */
6318   if (type && arm_return_in_msb (type))
6319     {
6320       HOST_WIDE_INT size = int_size_in_bytes (type);
6321       if (size % UNITS_PER_WORD != 0)
6322         {
6323           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6324           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6325         }
6326     }
6327
6328   return gen_rtx_REG (mode, R0_REGNUM);
6329 }
6330
6331 static rtx
6332 aapcs_libcall_value (machine_mode mode)
6333 {
6334   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6335       && GET_MODE_SIZE (mode) <= 4)
6336     mode = SImode;
6337
6338   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6339 }
6340
6341 /* Lay out a function argument using the AAPCS rules.  The rule
6342    numbers referred to here are those in the AAPCS.  */
6343 static void
6344 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6345                   const_tree type, bool named)
6346 {
6347   int nregs, nregs2;
6348   int ncrn;
6349
6350   /* We only need to do this once per argument.  */
6351   if (pcum->aapcs_arg_processed)
6352     return;
6353
6354   pcum->aapcs_arg_processed = true;
6355
6356   /* Special case: if named is false then we are handling an incoming
6357      anonymous argument which is on the stack.  */
6358   if (!named)
6359     return;
6360
6361   /* Is this a potential co-processor register candidate?  */
6362   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6363     {
6364       int slot = aapcs_select_call_coproc (pcum, mode, type);
6365       pcum->aapcs_cprc_slot = slot;
6366
6367       /* We don't have to apply any of the rules from part B of the
6368          preparation phase, these are handled elsewhere in the
6369          compiler.  */
6370
6371       if (slot >= 0)
6372         {
6373           /* A Co-processor register candidate goes either in its own
6374              class of registers or on the stack.  */
6375           if (!pcum->aapcs_cprc_failed[slot])
6376             {
6377               /* C1.cp - Try to allocate the argument to co-processor
6378                  registers.  */
6379               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6380                 return;
6381
6382               /* C2.cp - Put the argument on the stack and note that we
6383                  can't assign any more candidates in this slot.  We also
6384                  need to note that we have allocated stack space, so that
6385                  we won't later try to split a non-cprc candidate between
6386                  core registers and the stack.  */
6387               pcum->aapcs_cprc_failed[slot] = true;
6388               pcum->can_split = false;
6389             }
6390
6391           /* We didn't get a register, so this argument goes on the
6392              stack.  */
6393           gcc_assert (pcum->can_split == false);
6394           return;
6395         }
6396     }
6397
6398   /* C3 - For double-word aligned arguments, round the NCRN up to the
6399      next even number.  */
6400   ncrn = pcum->aapcs_ncrn;
6401   if (ncrn & 1)
6402     {
6403       int res = arm_needs_doubleword_align (mode, type);
6404       /* Only warn during RTL expansion of call stmts, otherwise we would
6405          warn e.g. during gimplification even on functions that will be
6406          always inlined, and we'd warn multiple times.  Don't warn when
6407          called in expand_function_start either, as we warn instead in
6408          arm_function_arg_boundary in that case.  */
6409       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6410         inform (input_location, "parameter passing for argument of type "
6411                 "%qT changed in GCC 7.1", type);
6412       else if (res > 0)
6413         ncrn++;
6414     }
6415
6416   nregs = ARM_NUM_REGS2(mode, type);
6417
6418   /* Sigh, this test should really assert that nregs > 0, but a GCC
6419      extension allows empty structs and then gives them empty size; it
6420      then allows such a structure to be passed by value.  For some of
6421      the code below we have to pretend that such an argument has
6422      non-zero size so that we 'locate' it correctly either in
6423      registers or on the stack.  */
6424   gcc_assert (nregs >= 0);
6425
6426   nregs2 = nregs ? nregs : 1;
6427
6428   /* C4 - Argument fits entirely in core registers.  */
6429   if (ncrn + nregs2 <= NUM_ARG_REGS)
6430     {
6431       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6432       pcum->aapcs_next_ncrn = ncrn + nregs;
6433       return;
6434     }
6435
6436   /* C5 - Some core registers left and there are no arguments already
6437      on the stack: split this argument between the remaining core
6438      registers and the stack.  */
6439   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6440     {
6441       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6442       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6443       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6444       return;
6445     }
6446
6447   /* C6 - NCRN is set to 4.  */
6448   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6449
6450   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6451   return;
6452 }
6453
6454 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6455    for a call to a function whose data type is FNTYPE.
6456    For a library call, FNTYPE is NULL.  */
6457 void
6458 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6459                           rtx libname,
6460                           tree fndecl ATTRIBUTE_UNUSED)
6461 {
6462   /* Long call handling.  */
6463   if (fntype)
6464     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6465   else
6466     pcum->pcs_variant = arm_pcs_default;
6467
6468   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6469     {
6470       if (arm_libcall_uses_aapcs_base (libname))
6471         pcum->pcs_variant = ARM_PCS_AAPCS;
6472
6473       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6474       pcum->aapcs_reg = NULL_RTX;
6475       pcum->aapcs_partial = 0;
6476       pcum->aapcs_arg_processed = false;
6477       pcum->aapcs_cprc_slot = -1;
6478       pcum->can_split = true;
6479
6480       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6481         {
6482           int i;
6483
6484           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6485             {
6486               pcum->aapcs_cprc_failed[i] = false;
6487               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6488             }
6489         }
6490       return;
6491     }
6492
6493   /* Legacy ABIs */
6494
6495   /* On the ARM, the offset starts at 0.  */
6496   pcum->nregs = 0;
6497   pcum->iwmmxt_nregs = 0;
6498   pcum->can_split = true;
6499
6500   /* Varargs vectors are treated the same as long long.
6501      named_count avoids having to change the way arm handles 'named' */
6502   pcum->named_count = 0;
6503   pcum->nargs = 0;
6504
6505   if (TARGET_REALLY_IWMMXT && fntype)
6506     {
6507       tree fn_arg;
6508
6509       for (fn_arg = TYPE_ARG_TYPES (fntype);
6510            fn_arg;
6511            fn_arg = TREE_CHAIN (fn_arg))
6512         pcum->named_count += 1;
6513
6514       if (! pcum->named_count)
6515         pcum->named_count = INT_MAX;
6516     }
6517 }
6518
6519 /* Return 1 if double word alignment is required for argument passing.
6520    Return -1 if double word alignment used to be required for argument
6521    passing before PR77728 ABI fix, but is not required anymore.
6522    Return 0 if double word alignment is not required and wasn't requried
6523    before either.  */
6524 static int
6525 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6526 {
6527   if (!type)
6528     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6529
6530   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6531   if (!AGGREGATE_TYPE_P (type))
6532     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6533
6534   /* Array types: Use member alignment of element type.  */
6535   if (TREE_CODE (type) == ARRAY_TYPE)
6536     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6537
6538   int ret = 0;
6539   /* Record/aggregate types: Use greatest member alignment of any member.  */
6540   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6541     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6542       {
6543         if (TREE_CODE (field) == FIELD_DECL)
6544           return 1;
6545         else
6546           /* Before PR77728 fix, we were incorrectly considering also
6547              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6548              Make sure we can warn about that with -Wpsabi.  */
6549           ret = -1;
6550       }
6551
6552   return ret;
6553 }
6554
6555
6556 /* Determine where to put an argument to a function.
6557    Value is zero to push the argument on the stack,
6558    or a hard register in which to store the argument.
6559
6560    MODE is the argument's machine mode.
6561    TYPE is the data type of the argument (as a tree).
6562     This is null for libcalls where that information may
6563     not be available.
6564    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6565     the preceding args and about the function being called.
6566    NAMED is nonzero if this argument is a named parameter
6567     (otherwise it is an extra parameter matching an ellipsis).
6568
6569    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6570    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6571    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6572    defined), say it is passed in the stack (function_prologue will
6573    indeed make it pass in the stack if necessary).  */
6574
6575 static rtx
6576 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6577                   const_tree type, bool named)
6578 {
6579   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6580   int nregs;
6581
6582   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6583      a call insn (op3 of a call_value insn).  */
6584   if (mode == VOIDmode)
6585     return const0_rtx;
6586
6587   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6588     {
6589       aapcs_layout_arg (pcum, mode, type, named);
6590       return pcum->aapcs_reg;
6591     }
6592
6593   /* Varargs vectors are treated the same as long long.
6594      named_count avoids having to change the way arm handles 'named' */
6595   if (TARGET_IWMMXT_ABI
6596       && arm_vector_mode_supported_p (mode)
6597       && pcum->named_count > pcum->nargs + 1)
6598     {
6599       if (pcum->iwmmxt_nregs <= 9)
6600         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6601       else
6602         {
6603           pcum->can_split = false;
6604           return NULL_RTX;
6605         }
6606     }
6607
6608   /* Put doubleword aligned quantities in even register pairs.  */
6609   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6610     {
6611       int res = arm_needs_doubleword_align (mode, type);
6612       if (res < 0 && warn_psabi)
6613         inform (input_location, "parameter passing for argument of type "
6614                 "%qT changed in GCC 7.1", type);
6615       else if (res > 0)
6616         pcum->nregs++;
6617     }
6618
6619   /* Only allow splitting an arg between regs and memory if all preceding
6620      args were allocated to regs.  For args passed by reference we only count
6621      the reference pointer.  */
6622   if (pcum->can_split)
6623     nregs = 1;
6624   else
6625     nregs = ARM_NUM_REGS2 (mode, type);
6626
6627   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6628     return NULL_RTX;
6629
6630   return gen_rtx_REG (mode, pcum->nregs);
6631 }
6632
6633 static unsigned int
6634 arm_function_arg_boundary (machine_mode mode, const_tree type)
6635 {
6636   if (!ARM_DOUBLEWORD_ALIGN)
6637     return PARM_BOUNDARY;
6638
6639   int res = arm_needs_doubleword_align (mode, type);
6640   if (res < 0 && warn_psabi)
6641     inform (input_location, "parameter passing for argument of type %qT "
6642             "changed in GCC 7.1", type);
6643
6644   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6645 }
6646
6647 static int
6648 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6649                        tree type, bool named)
6650 {
6651   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6652   int nregs = pcum->nregs;
6653
6654   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6655     {
6656       aapcs_layout_arg (pcum, mode, type, named);
6657       return pcum->aapcs_partial;
6658     }
6659
6660   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6661     return 0;
6662
6663   if (NUM_ARG_REGS > nregs
6664       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6665       && pcum->can_split)
6666     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6667
6668   return 0;
6669 }
6670
6671 /* Update the data in PCUM to advance over an argument
6672    of mode MODE and data type TYPE.
6673    (TYPE is null for libcalls where that information may not be available.)  */
6674
6675 static void
6676 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6677                           const_tree type, bool named)
6678 {
6679   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6680
6681   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6682     {
6683       aapcs_layout_arg (pcum, mode, type, named);
6684
6685       if (pcum->aapcs_cprc_slot >= 0)
6686         {
6687           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6688                                                               type);
6689           pcum->aapcs_cprc_slot = -1;
6690         }
6691
6692       /* Generic stuff.  */
6693       pcum->aapcs_arg_processed = false;
6694       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6695       pcum->aapcs_reg = NULL_RTX;
6696       pcum->aapcs_partial = 0;
6697     }
6698   else
6699     {
6700       pcum->nargs += 1;
6701       if (arm_vector_mode_supported_p (mode)
6702           && pcum->named_count > pcum->nargs
6703           && TARGET_IWMMXT_ABI)
6704         pcum->iwmmxt_nregs += 1;
6705       else
6706         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6707     }
6708 }
6709
6710 /* Variable sized types are passed by reference.  This is a GCC
6711    extension to the ARM ABI.  */
6712
6713 static bool
6714 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6715                        machine_mode mode ATTRIBUTE_UNUSED,
6716                        const_tree type, bool named ATTRIBUTE_UNUSED)
6717 {
6718   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6719 }
6720 \f
6721 /* Encode the current state of the #pragma [no_]long_calls.  */
6722 typedef enum
6723 {
6724   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6725   LONG,         /* #pragma long_calls is in effect.  */
6726   SHORT         /* #pragma no_long_calls is in effect.  */
6727 } arm_pragma_enum;
6728
6729 static arm_pragma_enum arm_pragma_long_calls = OFF;
6730
6731 void
6732 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6733 {
6734   arm_pragma_long_calls = LONG;
6735 }
6736
6737 void
6738 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6739 {
6740   arm_pragma_long_calls = SHORT;
6741 }
6742
6743 void
6744 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6745 {
6746   arm_pragma_long_calls = OFF;
6747 }
6748 \f
6749 /* Handle an attribute requiring a FUNCTION_DECL;
6750    arguments as in struct attribute_spec.handler.  */
6751 static tree
6752 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6753                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6754 {
6755   if (TREE_CODE (*node) != FUNCTION_DECL)
6756     {
6757       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6758                name);
6759       *no_add_attrs = true;
6760     }
6761
6762   return NULL_TREE;
6763 }
6764
6765 /* Handle an "interrupt" or "isr" attribute;
6766    arguments as in struct attribute_spec.handler.  */
6767 static tree
6768 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6769                           bool *no_add_attrs)
6770 {
6771   if (DECL_P (*node))
6772     {
6773       if (TREE_CODE (*node) != FUNCTION_DECL)
6774         {
6775           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6776                    name);
6777           *no_add_attrs = true;
6778         }
6779       /* FIXME: the argument if any is checked for type attributes;
6780          should it be checked for decl ones?  */
6781     }
6782   else
6783     {
6784       if (TREE_CODE (*node) == FUNCTION_TYPE
6785           || TREE_CODE (*node) == METHOD_TYPE)
6786         {
6787           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6788             {
6789               warning (OPT_Wattributes, "%qE attribute ignored",
6790                        name);
6791               *no_add_attrs = true;
6792             }
6793         }
6794       else if (TREE_CODE (*node) == POINTER_TYPE
6795                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6796                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6797                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6798         {
6799           *node = build_variant_type_copy (*node);
6800           TREE_TYPE (*node) = build_type_attribute_variant
6801             (TREE_TYPE (*node),
6802              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6803           *no_add_attrs = true;
6804         }
6805       else
6806         {
6807           /* Possibly pass this attribute on from the type to a decl.  */
6808           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6809                        | (int) ATTR_FLAG_FUNCTION_NEXT
6810                        | (int) ATTR_FLAG_ARRAY_NEXT))
6811             {
6812               *no_add_attrs = true;
6813               return tree_cons (name, args, NULL_TREE);
6814             }
6815           else
6816             {
6817               warning (OPT_Wattributes, "%qE attribute ignored",
6818                        name);
6819             }
6820         }
6821     }
6822
6823   return NULL_TREE;
6824 }
6825
6826 /* Handle a "pcs" attribute; arguments as in struct
6827    attribute_spec.handler.  */
6828 static tree
6829 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6830                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6831 {
6832   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6833     {
6834       warning (OPT_Wattributes, "%qE attribute ignored", name);
6835       *no_add_attrs = true;
6836     }
6837   return NULL_TREE;
6838 }
6839
6840 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6841 /* Handle the "notshared" attribute.  This attribute is another way of
6842    requesting hidden visibility.  ARM's compiler supports
6843    "__declspec(notshared)"; we support the same thing via an
6844    attribute.  */
6845
6846 static tree
6847 arm_handle_notshared_attribute (tree *node,
6848                                 tree name ATTRIBUTE_UNUSED,
6849                                 tree args ATTRIBUTE_UNUSED,
6850                                 int flags ATTRIBUTE_UNUSED,
6851                                 bool *no_add_attrs)
6852 {
6853   tree decl = TYPE_NAME (*node);
6854
6855   if (decl)
6856     {
6857       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6858       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6859       *no_add_attrs = false;
6860     }
6861   return NULL_TREE;
6862 }
6863 #endif
6864
6865 /* This function returns true if a function with declaration FNDECL and type
6866    FNTYPE uses the stack to pass arguments or return variables and false
6867    otherwise.  This is used for functions with the attributes
6868    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6869    diagnostic messages if the stack is used.  NAME is the name of the attribute
6870    used.  */
6871
6872 static bool
6873 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6874 {
6875   function_args_iterator args_iter;
6876   CUMULATIVE_ARGS args_so_far_v;
6877   cumulative_args_t args_so_far;
6878   bool first_param = true;
6879   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6880
6881   /* Error out if any argument is passed on the stack.  */
6882   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6883   args_so_far = pack_cumulative_args (&args_so_far_v);
6884   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6885     {
6886       rtx arg_rtx;
6887       machine_mode arg_mode = TYPE_MODE (arg_type);
6888
6889       prev_arg_type = arg_type;
6890       if (VOID_TYPE_P (arg_type))
6891         continue;
6892
6893       if (!first_param)
6894         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6895       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6896       if (!arg_rtx
6897           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6898         {
6899           error ("%qE attribute not available to functions with arguments "
6900                  "passed on the stack", name);
6901           return true;
6902         }
6903       first_param = false;
6904     }
6905
6906   /* Error out for variadic functions since we cannot control how many
6907      arguments will be passed and thus stack could be used.  stdarg_p () is not
6908      used for the checking to avoid browsing arguments twice.  */
6909   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6910     {
6911       error ("%qE attribute not available to functions with variable number "
6912              "of arguments", name);
6913       return true;
6914     }
6915
6916   /* Error out if return value is passed on the stack.  */
6917   ret_type = TREE_TYPE (fntype);
6918   if (arm_return_in_memory (ret_type, fntype))
6919     {
6920       error ("%qE attribute not available to functions that return value on "
6921              "the stack", name);
6922       return true;
6923     }
6924   return false;
6925 }
6926
6927 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6928    function will check whether the attribute is allowed here and will add the
6929    attribute to the function declaration tree or otherwise issue a warning.  */
6930
6931 static tree
6932 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6933                                  tree /* args */,
6934                                  int /* flags */,
6935                                  bool *no_add_attrs)
6936 {
6937   tree fndecl;
6938
6939   if (!use_cmse)
6940     {
6941       *no_add_attrs = true;
6942       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6943                name);
6944       return NULL_TREE;
6945     }
6946
6947   /* Ignore attribute for function types.  */
6948   if (TREE_CODE (*node) != FUNCTION_DECL)
6949     {
6950       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6951                name);
6952       *no_add_attrs = true;
6953       return NULL_TREE;
6954     }
6955
6956   fndecl = *node;
6957
6958   /* Warn for static linkage functions.  */
6959   if (!TREE_PUBLIC (fndecl))
6960     {
6961       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6962                "with static linkage", name);
6963       *no_add_attrs = true;
6964       return NULL_TREE;
6965     }
6966
6967   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6968                                                 TREE_TYPE (fndecl));
6969   return NULL_TREE;
6970 }
6971
6972
6973 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6974    function will check whether the attribute is allowed here and will add the
6975    attribute to the function type tree or otherwise issue a diagnostic.  The
6976    reason we check this at declaration time is to only allow the use of the
6977    attribute with declarations of function pointers and not function
6978    declarations.  This function checks NODE is of the expected type and issues
6979    diagnostics otherwise using NAME.  If it is not of the expected type
6980    *NO_ADD_ATTRS will be set to true.  */
6981
6982 static tree
6983 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6984                                  tree /* args */,
6985                                  int /* flags */,
6986                                  bool *no_add_attrs)
6987 {
6988   tree decl = NULL_TREE, fntype = NULL_TREE;
6989   tree type;
6990
6991   if (!use_cmse)
6992     {
6993       *no_add_attrs = true;
6994       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6995                name);
6996       return NULL_TREE;
6997     }
6998
6999   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7000     {
7001       decl = *node;
7002       fntype = TREE_TYPE (decl);
7003     }
7004
7005   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7006     fntype = TREE_TYPE (fntype);
7007
7008   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7009     {
7010         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7011                  "function pointer", name);
7012         *no_add_attrs = true;
7013         return NULL_TREE;
7014     }
7015
7016   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7017
7018   if (*no_add_attrs)
7019     return NULL_TREE;
7020
7021   /* Prevent trees being shared among function types with and without
7022      cmse_nonsecure_call attribute.  */
7023   type = TREE_TYPE (decl);
7024
7025   type = build_distinct_type_copy (type);
7026   TREE_TYPE (decl) = type;
7027   fntype = type;
7028
7029   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7030     {
7031       type = fntype;
7032       fntype = TREE_TYPE (fntype);
7033       fntype = build_distinct_type_copy (fntype);
7034       TREE_TYPE (type) = fntype;
7035     }
7036
7037   /* Construct a type attribute and add it to the function type.  */
7038   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7039                           TYPE_ATTRIBUTES (fntype));
7040   TYPE_ATTRIBUTES (fntype) = attrs;
7041   return NULL_TREE;
7042 }
7043
7044 /* Return 0 if the attributes for two types are incompatible, 1 if they
7045    are compatible, and 2 if they are nearly compatible (which causes a
7046    warning to be generated).  */
7047 static int
7048 arm_comp_type_attributes (const_tree type1, const_tree type2)
7049 {
7050   int l1, l2, s1, s2;
7051
7052   /* Check for mismatch of non-default calling convention.  */
7053   if (TREE_CODE (type1) != FUNCTION_TYPE)
7054     return 1;
7055
7056   /* Check for mismatched call attributes.  */
7057   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7058   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7059   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7060   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7061
7062   /* Only bother to check if an attribute is defined.  */
7063   if (l1 | l2 | s1 | s2)
7064     {
7065       /* If one type has an attribute, the other must have the same attribute.  */
7066       if ((l1 != l2) || (s1 != s2))
7067         return 0;
7068
7069       /* Disallow mixed attributes.  */
7070       if ((l1 & s2) || (l2 & s1))
7071         return 0;
7072     }
7073
7074   /* Check for mismatched ISR attribute.  */
7075   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7076   if (! l1)
7077     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7078   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7079   if (! l2)
7080     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7081   if (l1 != l2)
7082     return 0;
7083
7084   l1 = lookup_attribute ("cmse_nonsecure_call",
7085                          TYPE_ATTRIBUTES (type1)) != NULL;
7086   l2 = lookup_attribute ("cmse_nonsecure_call",
7087                          TYPE_ATTRIBUTES (type2)) != NULL;
7088
7089   if (l1 != l2)
7090     return 0;
7091
7092   return 1;
7093 }
7094
7095 /*  Assigns default attributes to newly defined type.  This is used to
7096     set short_call/long_call attributes for function types of
7097     functions defined inside corresponding #pragma scopes.  */
7098 static void
7099 arm_set_default_type_attributes (tree type)
7100 {
7101   /* Add __attribute__ ((long_call)) to all functions, when
7102      inside #pragma long_calls or __attribute__ ((short_call)),
7103      when inside #pragma no_long_calls.  */
7104   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7105     {
7106       tree type_attr_list, attr_name;
7107       type_attr_list = TYPE_ATTRIBUTES (type);
7108
7109       if (arm_pragma_long_calls == LONG)
7110         attr_name = get_identifier ("long_call");
7111       else if (arm_pragma_long_calls == SHORT)
7112         attr_name = get_identifier ("short_call");
7113       else
7114         return;
7115
7116       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7117       TYPE_ATTRIBUTES (type) = type_attr_list;
7118     }
7119 }
7120 \f
7121 /* Return true if DECL is known to be linked into section SECTION.  */
7122
7123 static bool
7124 arm_function_in_section_p (tree decl, section *section)
7125 {
7126   /* We can only be certain about the prevailing symbol definition.  */
7127   if (!decl_binds_to_current_def_p (decl))
7128     return false;
7129
7130   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7131   if (!DECL_SECTION_NAME (decl))
7132     {
7133       /* Make sure that we will not create a unique section for DECL.  */
7134       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7135         return false;
7136     }
7137
7138   return function_section (decl) == section;
7139 }
7140
7141 /* Return nonzero if a 32-bit "long_call" should be generated for
7142    a call from the current function to DECL.  We generate a long_call
7143    if the function:
7144
7145         a.  has an __attribute__((long call))
7146      or b.  is within the scope of a #pragma long_calls
7147      or c.  the -mlong-calls command line switch has been specified
7148
7149    However we do not generate a long call if the function:
7150
7151         d.  has an __attribute__ ((short_call))
7152      or e.  is inside the scope of a #pragma no_long_calls
7153      or f.  is defined in the same section as the current function.  */
7154
7155 bool
7156 arm_is_long_call_p (tree decl)
7157 {
7158   tree attrs;
7159
7160   if (!decl)
7161     return TARGET_LONG_CALLS;
7162
7163   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7164   if (lookup_attribute ("short_call", attrs))
7165     return false;
7166
7167   /* For "f", be conservative, and only cater for cases in which the
7168      whole of the current function is placed in the same section.  */
7169   if (!flag_reorder_blocks_and_partition
7170       && TREE_CODE (decl) == FUNCTION_DECL
7171       && arm_function_in_section_p (decl, current_function_section ()))
7172     return false;
7173
7174   if (lookup_attribute ("long_call", attrs))
7175     return true;
7176
7177   return TARGET_LONG_CALLS;
7178 }
7179
7180 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7181 static bool
7182 arm_function_ok_for_sibcall (tree decl, tree exp)
7183 {
7184   unsigned long func_type;
7185
7186   if (cfun->machine->sibcall_blocked)
7187     return false;
7188
7189   /* Never tailcall something if we are generating code for Thumb-1.  */
7190   if (TARGET_THUMB1)
7191     return false;
7192
7193   /* The PIC register is live on entry to VxWorks PLT entries, so we
7194      must make the call before restoring the PIC register.  */
7195   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7196     return false;
7197
7198   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7199      may be used both as target of the call and base register for restoring
7200      the VFP registers  */
7201   if (TARGET_APCS_FRAME && TARGET_ARM
7202       && TARGET_HARD_FLOAT
7203       && decl && arm_is_long_call_p (decl))
7204     return false;
7205
7206   /* If we are interworking and the function is not declared static
7207      then we can't tail-call it unless we know that it exists in this
7208      compilation unit (since it might be a Thumb routine).  */
7209   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7210       && !TREE_ASM_WRITTEN (decl))
7211     return false;
7212
7213   func_type = arm_current_func_type ();
7214   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7215   if (IS_INTERRUPT (func_type))
7216     return false;
7217
7218   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7219      generated for entry functions themselves.  */
7220   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7221     return false;
7222
7223   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7224      this would complicate matters for later code generation.  */
7225   if (TREE_CODE (exp) == CALL_EXPR)
7226     {
7227       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7228       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7229         return false;
7230     }
7231
7232   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7233     {
7234       /* Check that the return value locations are the same.  For
7235          example that we aren't returning a value from the sibling in
7236          a VFP register but then need to transfer it to a core
7237          register.  */
7238       rtx a, b;
7239       tree decl_or_type = decl;
7240
7241       /* If it is an indirect function pointer, get the function type.  */
7242       if (!decl)
7243         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7244
7245       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7246       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7247                               cfun->decl, false);
7248       if (!rtx_equal_p (a, b))
7249         return false;
7250     }
7251
7252   /* Never tailcall if function may be called with a misaligned SP.  */
7253   if (IS_STACKALIGN (func_type))
7254     return false;
7255
7256   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7257      references should become a NOP.  Don't convert such calls into
7258      sibling calls.  */
7259   if (TARGET_AAPCS_BASED
7260       && arm_abi == ARM_ABI_AAPCS
7261       && decl
7262       && DECL_WEAK (decl))
7263     return false;
7264
7265   /* We cannot do a tailcall for an indirect call by descriptor if all the
7266      argument registers are used because the only register left to load the
7267      address is IP and it will already contain the static chain.  */
7268   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7269     {
7270       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7271       CUMULATIVE_ARGS cum;
7272       cumulative_args_t cum_v;
7273
7274       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7275       cum_v = pack_cumulative_args (&cum);
7276
7277       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7278         {
7279           tree type = TREE_VALUE (t);
7280           if (!VOID_TYPE_P (type))
7281             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7282         }
7283
7284       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7285         return false;
7286     }
7287
7288   /* Everything else is ok.  */
7289   return true;
7290 }
7291
7292 \f
7293 /* Addressing mode support functions.  */
7294
7295 /* Return nonzero if X is a legitimate immediate operand when compiling
7296    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7297 int
7298 legitimate_pic_operand_p (rtx x)
7299 {
7300   if (GET_CODE (x) == SYMBOL_REF
7301       || (GET_CODE (x) == CONST
7302           && GET_CODE (XEXP (x, 0)) == PLUS
7303           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7304     return 0;
7305
7306   return 1;
7307 }
7308
7309 /* Record that the current function needs a PIC register.  Initialize
7310    cfun->machine->pic_reg if we have not already done so.  */
7311
7312 static void
7313 require_pic_register (void)
7314 {
7315   /* A lot of the logic here is made obscure by the fact that this
7316      routine gets called as part of the rtx cost estimation process.
7317      We don't want those calls to affect any assumptions about the real
7318      function; and further, we can't call entry_of_function() until we
7319      start the real expansion process.  */
7320   if (!crtl->uses_pic_offset_table)
7321     {
7322       gcc_assert (can_create_pseudo_p ());
7323       if (arm_pic_register != INVALID_REGNUM
7324           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7325         {
7326           if (!cfun->machine->pic_reg)
7327             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7328
7329           /* Play games to avoid marking the function as needing pic
7330              if we are being called as part of the cost-estimation
7331              process.  */
7332           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7333             crtl->uses_pic_offset_table = 1;
7334         }
7335       else
7336         {
7337           rtx_insn *seq, *insn;
7338
7339           if (!cfun->machine->pic_reg)
7340             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7341
7342           /* Play games to avoid marking the function as needing pic
7343              if we are being called as part of the cost-estimation
7344              process.  */
7345           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7346             {
7347               crtl->uses_pic_offset_table = 1;
7348               start_sequence ();
7349
7350               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7351                   && arm_pic_register > LAST_LO_REGNUM)
7352                 emit_move_insn (cfun->machine->pic_reg,
7353                                 gen_rtx_REG (Pmode, arm_pic_register));
7354               else
7355                 arm_load_pic_register (0UL);
7356
7357               seq = get_insns ();
7358               end_sequence ();
7359
7360               for (insn = seq; insn; insn = NEXT_INSN (insn))
7361                 if (INSN_P (insn))
7362                   INSN_LOCATION (insn) = prologue_location;
7363
7364               /* We can be called during expansion of PHI nodes, where
7365                  we can't yet emit instructions directly in the final
7366                  insn stream.  Queue the insns on the entry edge, they will
7367                  be committed after everything else is expanded.  */
7368               insert_insn_on_edge (seq,
7369                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7370             }
7371         }
7372     }
7373 }
7374
7375 rtx
7376 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7377 {
7378   if (GET_CODE (orig) == SYMBOL_REF
7379       || GET_CODE (orig) == LABEL_REF)
7380     {
7381       if (reg == 0)
7382         {
7383           gcc_assert (can_create_pseudo_p ());
7384           reg = gen_reg_rtx (Pmode);
7385         }
7386
7387       /* VxWorks does not impose a fixed gap between segments; the run-time
7388          gap can be different from the object-file gap.  We therefore can't
7389          use GOTOFF unless we are absolutely sure that the symbol is in the
7390          same segment as the GOT.  Unfortunately, the flexibility of linker
7391          scripts means that we can't be sure of that in general, so assume
7392          that GOTOFF is never valid on VxWorks.  */
7393       /* References to weak symbols cannot be resolved locally: they
7394          may be overridden by a non-weak definition at link time.  */
7395       rtx_insn *insn;
7396       if ((GET_CODE (orig) == LABEL_REF
7397            || (GET_CODE (orig) == SYMBOL_REF
7398                && SYMBOL_REF_LOCAL_P (orig)
7399                && (SYMBOL_REF_DECL (orig)
7400                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7401           && NEED_GOT_RELOC
7402           && arm_pic_data_is_text_relative)
7403         insn = arm_pic_static_addr (orig, reg);
7404       else
7405         {
7406           rtx pat;
7407           rtx mem;
7408
7409           /* If this function doesn't have a pic register, create one now.  */
7410           require_pic_register ();
7411
7412           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7413
7414           /* Make the MEM as close to a constant as possible.  */
7415           mem = SET_SRC (pat);
7416           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7417           MEM_READONLY_P (mem) = 1;
7418           MEM_NOTRAP_P (mem) = 1;
7419
7420           insn = emit_insn (pat);
7421         }
7422
7423       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7424          by loop.  */
7425       set_unique_reg_note (insn, REG_EQUAL, orig);
7426
7427       return reg;
7428     }
7429   else if (GET_CODE (orig) == CONST)
7430     {
7431       rtx base, offset;
7432
7433       if (GET_CODE (XEXP (orig, 0)) == PLUS
7434           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7435         return orig;
7436
7437       /* Handle the case where we have: const (UNSPEC_TLS).  */
7438       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7439           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7440         return orig;
7441
7442       /* Handle the case where we have:
7443          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7444          CONST_INT.  */
7445       if (GET_CODE (XEXP (orig, 0)) == PLUS
7446           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7447           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7448         {
7449           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7450           return orig;
7451         }
7452
7453       if (reg == 0)
7454         {
7455           gcc_assert (can_create_pseudo_p ());
7456           reg = gen_reg_rtx (Pmode);
7457         }
7458
7459       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7460
7461       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7462       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7463                                        base == reg ? 0 : reg);
7464
7465       if (CONST_INT_P (offset))
7466         {
7467           /* The base register doesn't really matter, we only want to
7468              test the index for the appropriate mode.  */
7469           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7470             {
7471               gcc_assert (can_create_pseudo_p ());
7472               offset = force_reg (Pmode, offset);
7473             }
7474
7475           if (CONST_INT_P (offset))
7476             return plus_constant (Pmode, base, INTVAL (offset));
7477         }
7478
7479       if (GET_MODE_SIZE (mode) > 4
7480           && (GET_MODE_CLASS (mode) == MODE_INT
7481               || TARGET_SOFT_FLOAT))
7482         {
7483           emit_insn (gen_addsi3 (reg, base, offset));
7484           return reg;
7485         }
7486
7487       return gen_rtx_PLUS (Pmode, base, offset);
7488     }
7489
7490   return orig;
7491 }
7492
7493
7494 /* Find a spare register to use during the prolog of a function.  */
7495
7496 static int
7497 thumb_find_work_register (unsigned long pushed_regs_mask)
7498 {
7499   int reg;
7500
7501   /* Check the argument registers first as these are call-used.  The
7502      register allocation order means that sometimes r3 might be used
7503      but earlier argument registers might not, so check them all.  */
7504   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7505     if (!df_regs_ever_live_p (reg))
7506       return reg;
7507
7508   /* Before going on to check the call-saved registers we can try a couple
7509      more ways of deducing that r3 is available.  The first is when we are
7510      pushing anonymous arguments onto the stack and we have less than 4
7511      registers worth of fixed arguments(*).  In this case r3 will be part of
7512      the variable argument list and so we can be sure that it will be
7513      pushed right at the start of the function.  Hence it will be available
7514      for the rest of the prologue.
7515      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7516   if (cfun->machine->uses_anonymous_args
7517       && crtl->args.pretend_args_size > 0)
7518     return LAST_ARG_REGNUM;
7519
7520   /* The other case is when we have fixed arguments but less than 4 registers
7521      worth.  In this case r3 might be used in the body of the function, but
7522      it is not being used to convey an argument into the function.  In theory
7523      we could just check crtl->args.size to see how many bytes are
7524      being passed in argument registers, but it seems that it is unreliable.
7525      Sometimes it will have the value 0 when in fact arguments are being
7526      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7527      check the args_info.nregs field as well.  The problem with this field is
7528      that it makes no allowances for arguments that are passed to the
7529      function but which are not used.  Hence we could miss an opportunity
7530      when a function has an unused argument in r3.  But it is better to be
7531      safe than to be sorry.  */
7532   if (! cfun->machine->uses_anonymous_args
7533       && crtl->args.size >= 0
7534       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7535       && (TARGET_AAPCS_BASED
7536           ? crtl->args.info.aapcs_ncrn < 4
7537           : crtl->args.info.nregs < 4))
7538     return LAST_ARG_REGNUM;
7539
7540   /* Otherwise look for a call-saved register that is going to be pushed.  */
7541   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7542     if (pushed_regs_mask & (1 << reg))
7543       return reg;
7544
7545   if (TARGET_THUMB2)
7546     {
7547       /* Thumb-2 can use high regs.  */
7548       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7549         if (pushed_regs_mask & (1 << reg))
7550           return reg;
7551     }
7552   /* Something went wrong - thumb_compute_save_reg_mask()
7553      should have arranged for a suitable register to be pushed.  */
7554   gcc_unreachable ();
7555 }
7556
7557 static GTY(()) int pic_labelno;
7558
7559 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7560    low register.  */
7561
7562 void
7563 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7564 {
7565   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7566
7567   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7568     return;
7569
7570   gcc_assert (flag_pic);
7571
7572   pic_reg = cfun->machine->pic_reg;
7573   if (TARGET_VXWORKS_RTP)
7574     {
7575       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7576       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7577       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7578
7579       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7580
7581       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7582       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7583     }
7584   else
7585     {
7586       /* We use an UNSPEC rather than a LABEL_REF because this label
7587          never appears in the code stream.  */
7588
7589       labelno = GEN_INT (pic_labelno++);
7590       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7591       l1 = gen_rtx_CONST (VOIDmode, l1);
7592
7593       /* On the ARM the PC register contains 'dot + 8' at the time of the
7594          addition, on the Thumb it is 'dot + 4'.  */
7595       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7596       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7597                                 UNSPEC_GOTSYM_OFF);
7598       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7599
7600       if (TARGET_32BIT)
7601         {
7602           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7603         }
7604       else /* TARGET_THUMB1 */
7605         {
7606           if (arm_pic_register != INVALID_REGNUM
7607               && REGNO (pic_reg) > LAST_LO_REGNUM)
7608             {
7609               /* We will have pushed the pic register, so we should always be
7610                  able to find a work register.  */
7611               pic_tmp = gen_rtx_REG (SImode,
7612                                      thumb_find_work_register (saved_regs));
7613               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7614               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7615               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7616             }
7617           else if (arm_pic_register != INVALID_REGNUM
7618                    && arm_pic_register > LAST_LO_REGNUM
7619                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7620             {
7621               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7622               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7623               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7624             }
7625           else
7626             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7627         }
7628     }
7629
7630   /* Need to emit this whether or not we obey regdecls,
7631      since setjmp/longjmp can cause life info to screw up.  */
7632   emit_use (pic_reg);
7633 }
7634
7635 /* Generate code to load the address of a static var when flag_pic is set.  */
7636 static rtx_insn *
7637 arm_pic_static_addr (rtx orig, rtx reg)
7638 {
7639   rtx l1, labelno, offset_rtx;
7640
7641   gcc_assert (flag_pic);
7642
7643   /* We use an UNSPEC rather than a LABEL_REF because this label
7644      never appears in the code stream.  */
7645   labelno = GEN_INT (pic_labelno++);
7646   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7647   l1 = gen_rtx_CONST (VOIDmode, l1);
7648
7649   /* On the ARM the PC register contains 'dot + 8' at the time of the
7650      addition, on the Thumb it is 'dot + 4'.  */
7651   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7652   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7653                                UNSPEC_SYMBOL_OFFSET);
7654   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7655
7656   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7657 }
7658
7659 /* Return nonzero if X is valid as an ARM state addressing register.  */
7660 static int
7661 arm_address_register_rtx_p (rtx x, int strict_p)
7662 {
7663   int regno;
7664
7665   if (!REG_P (x))
7666     return 0;
7667
7668   regno = REGNO (x);
7669
7670   if (strict_p)
7671     return ARM_REGNO_OK_FOR_BASE_P (regno);
7672
7673   return (regno <= LAST_ARM_REGNUM
7674           || regno >= FIRST_PSEUDO_REGISTER
7675           || regno == FRAME_POINTER_REGNUM
7676           || regno == ARG_POINTER_REGNUM);
7677 }
7678
7679 /* Return TRUE if this rtx is the difference of a symbol and a label,
7680    and will reduce to a PC-relative relocation in the object file.
7681    Expressions like this can be left alone when generating PIC, rather
7682    than forced through the GOT.  */
7683 static int
7684 pcrel_constant_p (rtx x)
7685 {
7686   if (GET_CODE (x) == MINUS)
7687     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7688
7689   return FALSE;
7690 }
7691
7692 /* Return true if X will surely end up in an index register after next
7693    splitting pass.  */
7694 static bool
7695 will_be_in_index_register (const_rtx x)
7696 {
7697   /* arm.md: calculate_pic_address will split this into a register.  */
7698   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7699 }
7700
7701 /* Return nonzero if X is a valid ARM state address operand.  */
7702 int
7703 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7704                                 int strict_p)
7705 {
7706   bool use_ldrd;
7707   enum rtx_code code = GET_CODE (x);
7708
7709   if (arm_address_register_rtx_p (x, strict_p))
7710     return 1;
7711
7712   use_ldrd = (TARGET_LDRD
7713               && (mode == DImode || mode == DFmode));
7714
7715   if (code == POST_INC || code == PRE_DEC
7716       || ((code == PRE_INC || code == POST_DEC)
7717           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7718     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7719
7720   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7721            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7722            && GET_CODE (XEXP (x, 1)) == PLUS
7723            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7724     {
7725       rtx addend = XEXP (XEXP (x, 1), 1);
7726
7727       /* Don't allow ldrd post increment by register because it's hard
7728          to fixup invalid register choices.  */
7729       if (use_ldrd
7730           && GET_CODE (x) == POST_MODIFY
7731           && REG_P (addend))
7732         return 0;
7733
7734       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7735               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7736     }
7737
7738   /* After reload constants split into minipools will have addresses
7739      from a LABEL_REF.  */
7740   else if (reload_completed
7741            && (code == LABEL_REF
7742                || (code == CONST
7743                    && GET_CODE (XEXP (x, 0)) == PLUS
7744                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7745                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7746     return 1;
7747
7748   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7749     return 0;
7750
7751   else if (code == PLUS)
7752     {
7753       rtx xop0 = XEXP (x, 0);
7754       rtx xop1 = XEXP (x, 1);
7755
7756       return ((arm_address_register_rtx_p (xop0, strict_p)
7757                && ((CONST_INT_P (xop1)
7758                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7759                    || (!strict_p && will_be_in_index_register (xop1))))
7760               || (arm_address_register_rtx_p (xop1, strict_p)
7761                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7762     }
7763
7764 #if 0
7765   /* Reload currently can't handle MINUS, so disable this for now */
7766   else if (GET_CODE (x) == MINUS)
7767     {
7768       rtx xop0 = XEXP (x, 0);
7769       rtx xop1 = XEXP (x, 1);
7770
7771       return (arm_address_register_rtx_p (xop0, strict_p)
7772               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7773     }
7774 #endif
7775
7776   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7777            && code == SYMBOL_REF
7778            && CONSTANT_POOL_ADDRESS_P (x)
7779            && ! (flag_pic
7780                  && symbol_mentioned_p (get_pool_constant (x))
7781                  && ! pcrel_constant_p (get_pool_constant (x))))
7782     return 1;
7783
7784   return 0;
7785 }
7786
7787 /* Return true if we can avoid creating a constant pool entry for x.  */
7788 static bool
7789 can_avoid_literal_pool_for_label_p (rtx x)
7790 {
7791   /* Normally we can assign constant values to target registers without
7792      the help of constant pool.  But there are cases we have to use constant
7793      pool like:
7794      1) assign a label to register.
7795      2) sign-extend a 8bit value to 32bit and then assign to register.
7796
7797      Constant pool access in format:
7798      (set (reg r0) (mem (symbol_ref (".LC0"))))
7799      will cause the use of literal pool (later in function arm_reorg).
7800      So here we mark such format as an invalid format, then the compiler
7801      will adjust it into:
7802      (set (reg r0) (symbol_ref (".LC0")))
7803      (set (reg r0) (mem (reg r0))).
7804      No extra register is required, and (mem (reg r0)) won't cause the use
7805      of literal pools.  */
7806   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7807       && CONSTANT_POOL_ADDRESS_P (x))
7808     return 1;
7809   return 0;
7810 }
7811
7812
7813 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7814 static int
7815 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7816 {
7817   bool use_ldrd;
7818   enum rtx_code code = GET_CODE (x);
7819
7820   if (arm_address_register_rtx_p (x, strict_p))
7821     return 1;
7822
7823   use_ldrd = (TARGET_LDRD
7824               && (mode == DImode || mode == DFmode));
7825
7826   if (code == POST_INC || code == PRE_DEC
7827       || ((code == PRE_INC || code == POST_DEC)
7828           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7829     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7830
7831   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7832            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7833            && GET_CODE (XEXP (x, 1)) == PLUS
7834            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7835     {
7836       /* Thumb-2 only has autoincrement by constant.  */
7837       rtx addend = XEXP (XEXP (x, 1), 1);
7838       HOST_WIDE_INT offset;
7839
7840       if (!CONST_INT_P (addend))
7841         return 0;
7842
7843       offset = INTVAL(addend);
7844       if (GET_MODE_SIZE (mode) <= 4)
7845         return (offset > -256 && offset < 256);
7846
7847       return (use_ldrd && offset > -1024 && offset < 1024
7848               && (offset & 3) == 0);
7849     }
7850
7851   /* After reload constants split into minipools will have addresses
7852      from a LABEL_REF.  */
7853   else if (reload_completed
7854            && (code == LABEL_REF
7855                || (code == CONST
7856                    && GET_CODE (XEXP (x, 0)) == PLUS
7857                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7858                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7859     return 1;
7860
7861   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7862     return 0;
7863
7864   else if (code == PLUS)
7865     {
7866       rtx xop0 = XEXP (x, 0);
7867       rtx xop1 = XEXP (x, 1);
7868
7869       return ((arm_address_register_rtx_p (xop0, strict_p)
7870                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7871                    || (!strict_p && will_be_in_index_register (xop1))))
7872               || (arm_address_register_rtx_p (xop1, strict_p)
7873                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7874     }
7875
7876   else if (can_avoid_literal_pool_for_label_p (x))
7877     return 0;
7878
7879   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7880            && code == SYMBOL_REF
7881            && CONSTANT_POOL_ADDRESS_P (x)
7882            && ! (flag_pic
7883                  && symbol_mentioned_p (get_pool_constant (x))
7884                  && ! pcrel_constant_p (get_pool_constant (x))))
7885     return 1;
7886
7887   return 0;
7888 }
7889
7890 /* Return nonzero if INDEX is valid for an address index operand in
7891    ARM state.  */
7892 static int
7893 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7894                         int strict_p)
7895 {
7896   HOST_WIDE_INT range;
7897   enum rtx_code code = GET_CODE (index);
7898
7899   /* Standard coprocessor addressing modes.  */
7900   if (TARGET_HARD_FLOAT
7901       && (mode == SFmode || mode == DFmode))
7902     return (code == CONST_INT && INTVAL (index) < 1024
7903             && INTVAL (index) > -1024
7904             && (INTVAL (index) & 3) == 0);
7905
7906   /* For quad modes, we restrict the constant offset to be slightly less
7907      than what the instruction format permits.  We do this because for
7908      quad mode moves, we will actually decompose them into two separate
7909      double-mode reads or writes.  INDEX must therefore be a valid
7910      (double-mode) offset and so should INDEX+8.  */
7911   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7912     return (code == CONST_INT
7913             && INTVAL (index) < 1016
7914             && INTVAL (index) > -1024
7915             && (INTVAL (index) & 3) == 0);
7916
7917   /* We have no such constraint on double mode offsets, so we permit the
7918      full range of the instruction format.  */
7919   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7920     return (code == CONST_INT
7921             && INTVAL (index) < 1024
7922             && INTVAL (index) > -1024
7923             && (INTVAL (index) & 3) == 0);
7924
7925   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7926     return (code == CONST_INT
7927             && INTVAL (index) < 1024
7928             && INTVAL (index) > -1024
7929             && (INTVAL (index) & 3) == 0);
7930
7931   if (arm_address_register_rtx_p (index, strict_p)
7932       && (GET_MODE_SIZE (mode) <= 4))
7933     return 1;
7934
7935   if (mode == DImode || mode == DFmode)
7936     {
7937       if (code == CONST_INT)
7938         {
7939           HOST_WIDE_INT val = INTVAL (index);
7940
7941           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7942              If vldr is selected it uses arm_coproc_mem_operand.  */
7943           if (TARGET_LDRD)
7944             return val > -256 && val < 256;
7945           else
7946             return val > -4096 && val < 4092;
7947         }
7948
7949       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7950     }
7951
7952   if (GET_MODE_SIZE (mode) <= 4
7953       && ! (arm_arch4
7954             && (mode == HImode
7955                 || mode == HFmode
7956                 || (mode == QImode && outer == SIGN_EXTEND))))
7957     {
7958       if (code == MULT)
7959         {
7960           rtx xiop0 = XEXP (index, 0);
7961           rtx xiop1 = XEXP (index, 1);
7962
7963           return ((arm_address_register_rtx_p (xiop0, strict_p)
7964                    && power_of_two_operand (xiop1, SImode))
7965                   || (arm_address_register_rtx_p (xiop1, strict_p)
7966                       && power_of_two_operand (xiop0, SImode)));
7967         }
7968       else if (code == LSHIFTRT || code == ASHIFTRT
7969                || code == ASHIFT || code == ROTATERT)
7970         {
7971           rtx op = XEXP (index, 1);
7972
7973           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7974                   && CONST_INT_P (op)
7975                   && INTVAL (op) > 0
7976                   && INTVAL (op) <= 31);
7977         }
7978     }
7979
7980   /* For ARM v4 we may be doing a sign-extend operation during the
7981      load.  */
7982   if (arm_arch4)
7983     {
7984       if (mode == HImode
7985           || mode == HFmode
7986           || (outer == SIGN_EXTEND && mode == QImode))
7987         range = 256;
7988       else
7989         range = 4096;
7990     }
7991   else
7992     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7993
7994   return (code == CONST_INT
7995           && INTVAL (index) < range
7996           && INTVAL (index) > -range);
7997 }
7998
7999 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8000    index operand.  i.e. 1, 2, 4 or 8.  */
8001 static bool
8002 thumb2_index_mul_operand (rtx op)
8003 {
8004   HOST_WIDE_INT val;
8005
8006   if (!CONST_INT_P (op))
8007     return false;
8008
8009   val = INTVAL(op);
8010   return (val == 1 || val == 2 || val == 4 || val == 8);
8011 }
8012
8013 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8014 static int
8015 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8016 {
8017   enum rtx_code code = GET_CODE (index);
8018
8019   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8020   /* Standard coprocessor addressing modes.  */
8021   if (TARGET_HARD_FLOAT
8022       && (mode == SFmode || mode == DFmode))
8023     return (code == CONST_INT && INTVAL (index) < 1024
8024             /* Thumb-2 allows only > -256 index range for it's core register
8025                load/stores. Since we allow SF/DF in core registers, we have
8026                to use the intersection between -256~4096 (core) and -1024~1024
8027                (coprocessor).  */
8028             && INTVAL (index) > -256
8029             && (INTVAL (index) & 3) == 0);
8030
8031   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8032     {
8033       /* For DImode assume values will usually live in core regs
8034          and only allow LDRD addressing modes.  */
8035       if (!TARGET_LDRD || mode != DImode)
8036         return (code == CONST_INT
8037                 && INTVAL (index) < 1024
8038                 && INTVAL (index) > -1024
8039                 && (INTVAL (index) & 3) == 0);
8040     }
8041
8042   /* For quad modes, we restrict the constant offset to be slightly less
8043      than what the instruction format permits.  We do this because for
8044      quad mode moves, we will actually decompose them into two separate
8045      double-mode reads or writes.  INDEX must therefore be a valid
8046      (double-mode) offset and so should INDEX+8.  */
8047   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8048     return (code == CONST_INT
8049             && INTVAL (index) < 1016
8050             && INTVAL (index) > -1024
8051             && (INTVAL (index) & 3) == 0);
8052
8053   /* We have no such constraint on double mode offsets, so we permit the
8054      full range of the instruction format.  */
8055   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8056     return (code == CONST_INT
8057             && INTVAL (index) < 1024
8058             && INTVAL (index) > -1024
8059             && (INTVAL (index) & 3) == 0);
8060
8061   if (arm_address_register_rtx_p (index, strict_p)
8062       && (GET_MODE_SIZE (mode) <= 4))
8063     return 1;
8064
8065   if (mode == DImode || mode == DFmode)
8066     {
8067       if (code == CONST_INT)
8068         {
8069           HOST_WIDE_INT val = INTVAL (index);
8070           /* Thumb-2 ldrd only has reg+const addressing modes.
8071              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8072              If vldr is selected it uses arm_coproc_mem_operand.  */
8073           if (TARGET_LDRD)
8074             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8075           else
8076             return IN_RANGE (val, -255, 4095 - 4);
8077         }
8078       else
8079         return 0;
8080     }
8081
8082   if (code == MULT)
8083     {
8084       rtx xiop0 = XEXP (index, 0);
8085       rtx xiop1 = XEXP (index, 1);
8086
8087       return ((arm_address_register_rtx_p (xiop0, strict_p)
8088                && thumb2_index_mul_operand (xiop1))
8089               || (arm_address_register_rtx_p (xiop1, strict_p)
8090                   && thumb2_index_mul_operand (xiop0)));
8091     }
8092   else if (code == ASHIFT)
8093     {
8094       rtx op = XEXP (index, 1);
8095
8096       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8097               && CONST_INT_P (op)
8098               && INTVAL (op) > 0
8099               && INTVAL (op) <= 3);
8100     }
8101
8102   return (code == CONST_INT
8103           && INTVAL (index) < 4096
8104           && INTVAL (index) > -256);
8105 }
8106
8107 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8108 static int
8109 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8110 {
8111   int regno;
8112
8113   if (!REG_P (x))
8114     return 0;
8115
8116   regno = REGNO (x);
8117
8118   if (strict_p)
8119     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8120
8121   return (regno <= LAST_LO_REGNUM
8122           || regno > LAST_VIRTUAL_REGISTER
8123           || regno == FRAME_POINTER_REGNUM
8124           || (GET_MODE_SIZE (mode) >= 4
8125               && (regno == STACK_POINTER_REGNUM
8126                   || regno >= FIRST_PSEUDO_REGISTER
8127                   || x == hard_frame_pointer_rtx
8128                   || x == arg_pointer_rtx)));
8129 }
8130
8131 /* Return nonzero if x is a legitimate index register.  This is the case
8132    for any base register that can access a QImode object.  */
8133 inline static int
8134 thumb1_index_register_rtx_p (rtx x, int strict_p)
8135 {
8136   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8137 }
8138
8139 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8140
8141    The AP may be eliminated to either the SP or the FP, so we use the
8142    least common denominator, e.g. SImode, and offsets from 0 to 64.
8143
8144    ??? Verify whether the above is the right approach.
8145
8146    ??? Also, the FP may be eliminated to the SP, so perhaps that
8147    needs special handling also.
8148
8149    ??? Look at how the mips16 port solves this problem.  It probably uses
8150    better ways to solve some of these problems.
8151
8152    Although it is not incorrect, we don't accept QImode and HImode
8153    addresses based on the frame pointer or arg pointer until the
8154    reload pass starts.  This is so that eliminating such addresses
8155    into stack based ones won't produce impossible code.  */
8156 int
8157 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8158 {
8159   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8160     return 0;
8161
8162   /* ??? Not clear if this is right.  Experiment.  */
8163   if (GET_MODE_SIZE (mode) < 4
8164       && !(reload_in_progress || reload_completed)
8165       && (reg_mentioned_p (frame_pointer_rtx, x)
8166           || reg_mentioned_p (arg_pointer_rtx, x)
8167           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8168           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8169           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8170           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8171     return 0;
8172
8173   /* Accept any base register.  SP only in SImode or larger.  */
8174   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8175     return 1;
8176
8177   /* This is PC relative data before arm_reorg runs.  */
8178   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8179            && GET_CODE (x) == SYMBOL_REF
8180            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8181     return 1;
8182
8183   /* This is PC relative data after arm_reorg runs.  */
8184   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8185            && reload_completed
8186            && (GET_CODE (x) == LABEL_REF
8187                || (GET_CODE (x) == CONST
8188                    && GET_CODE (XEXP (x, 0)) == PLUS
8189                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8190                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8191     return 1;
8192
8193   /* Post-inc indexing only supported for SImode and larger.  */
8194   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8195            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8196     return 1;
8197
8198   else if (GET_CODE (x) == PLUS)
8199     {
8200       /* REG+REG address can be any two index registers.  */
8201       /* We disallow FRAME+REG addressing since we know that FRAME
8202          will be replaced with STACK, and SP relative addressing only
8203          permits SP+OFFSET.  */
8204       if (GET_MODE_SIZE (mode) <= 4
8205           && XEXP (x, 0) != frame_pointer_rtx
8206           && XEXP (x, 1) != frame_pointer_rtx
8207           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8208           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8209               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8210         return 1;
8211
8212       /* REG+const has 5-7 bit offset for non-SP registers.  */
8213       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8214                 || XEXP (x, 0) == arg_pointer_rtx)
8215                && CONST_INT_P (XEXP (x, 1))
8216                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8217         return 1;
8218
8219       /* REG+const has 10-bit offset for SP, but only SImode and
8220          larger is supported.  */
8221       /* ??? Should probably check for DI/DFmode overflow here
8222          just like GO_IF_LEGITIMATE_OFFSET does.  */
8223       else if (REG_P (XEXP (x, 0))
8224                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8225                && GET_MODE_SIZE (mode) >= 4
8226                && CONST_INT_P (XEXP (x, 1))
8227                && INTVAL (XEXP (x, 1)) >= 0
8228                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8229                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8230         return 1;
8231
8232       else if (REG_P (XEXP (x, 0))
8233                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8234                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8235                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8236                        && REGNO (XEXP (x, 0))
8237                           <= LAST_VIRTUAL_POINTER_REGISTER))
8238                && GET_MODE_SIZE (mode) >= 4
8239                && CONST_INT_P (XEXP (x, 1))
8240                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8241         return 1;
8242     }
8243
8244   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8245            && GET_MODE_SIZE (mode) == 4
8246            && GET_CODE (x) == SYMBOL_REF
8247            && CONSTANT_POOL_ADDRESS_P (x)
8248            && ! (flag_pic
8249                  && symbol_mentioned_p (get_pool_constant (x))
8250                  && ! pcrel_constant_p (get_pool_constant (x))))
8251     return 1;
8252
8253   return 0;
8254 }
8255
8256 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8257    instruction of mode MODE.  */
8258 int
8259 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8260 {
8261   switch (GET_MODE_SIZE (mode))
8262     {
8263     case 1:
8264       return val >= 0 && val < 32;
8265
8266     case 2:
8267       return val >= 0 && val < 64 && (val & 1) == 0;
8268
8269     default:
8270       return (val >= 0
8271               && (val + GET_MODE_SIZE (mode)) <= 128
8272               && (val & 3) == 0);
8273     }
8274 }
8275
8276 bool
8277 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8278 {
8279   if (TARGET_ARM)
8280     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8281   else if (TARGET_THUMB2)
8282     return thumb2_legitimate_address_p (mode, x, strict_p);
8283   else /* if (TARGET_THUMB1) */
8284     return thumb1_legitimate_address_p (mode, x, strict_p);
8285 }
8286
8287 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8288
8289    Given an rtx X being reloaded into a reg required to be
8290    in class CLASS, return the class of reg to actually use.
8291    In general this is just CLASS, but for the Thumb core registers and
8292    immediate constants we prefer a LO_REGS class or a subset.  */
8293
8294 static reg_class_t
8295 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8296 {
8297   if (TARGET_32BIT)
8298     return rclass;
8299   else
8300     {
8301       if (rclass == GENERAL_REGS)
8302         return LO_REGS;
8303       else
8304         return rclass;
8305     }
8306 }
8307
8308 /* Build the SYMBOL_REF for __tls_get_addr.  */
8309
8310 static GTY(()) rtx tls_get_addr_libfunc;
8311
8312 static rtx
8313 get_tls_get_addr (void)
8314 {
8315   if (!tls_get_addr_libfunc)
8316     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8317   return tls_get_addr_libfunc;
8318 }
8319
8320 rtx
8321 arm_load_tp (rtx target)
8322 {
8323   if (!target)
8324     target = gen_reg_rtx (SImode);
8325
8326   if (TARGET_HARD_TP)
8327     {
8328       /* Can return in any reg.  */
8329       emit_insn (gen_load_tp_hard (target));
8330     }
8331   else
8332     {
8333       /* Always returned in r0.  Immediately copy the result into a pseudo,
8334          otherwise other uses of r0 (e.g. setting up function arguments) may
8335          clobber the value.  */
8336
8337       rtx tmp;
8338
8339       emit_insn (gen_load_tp_soft ());
8340
8341       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8342       emit_move_insn (target, tmp);
8343     }
8344   return target;
8345 }
8346
8347 static rtx
8348 load_tls_operand (rtx x, rtx reg)
8349 {
8350   rtx tmp;
8351
8352   if (reg == NULL_RTX)
8353     reg = gen_reg_rtx (SImode);
8354
8355   tmp = gen_rtx_CONST (SImode, x);
8356
8357   emit_move_insn (reg, tmp);
8358
8359   return reg;
8360 }
8361
8362 static rtx_insn *
8363 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8364 {
8365   rtx label, labelno, sum;
8366
8367   gcc_assert (reloc != TLS_DESCSEQ);
8368   start_sequence ();
8369
8370   labelno = GEN_INT (pic_labelno++);
8371   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8372   label = gen_rtx_CONST (VOIDmode, label);
8373
8374   sum = gen_rtx_UNSPEC (Pmode,
8375                         gen_rtvec (4, x, GEN_INT (reloc), label,
8376                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8377                         UNSPEC_TLS);
8378   reg = load_tls_operand (sum, reg);
8379
8380   if (TARGET_ARM)
8381     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8382   else
8383     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8384
8385   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8386                                      LCT_PURE, /* LCT_CONST?  */
8387                                      Pmode, reg, Pmode);
8388
8389   rtx_insn *insns = get_insns ();
8390   end_sequence ();
8391
8392   return insns;
8393 }
8394
8395 static rtx
8396 arm_tls_descseq_addr (rtx x, rtx reg)
8397 {
8398   rtx labelno = GEN_INT (pic_labelno++);
8399   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8400   rtx sum = gen_rtx_UNSPEC (Pmode,
8401                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8402                                        gen_rtx_CONST (VOIDmode, label),
8403                                        GEN_INT (!TARGET_ARM)),
8404                             UNSPEC_TLS);
8405   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8406
8407   emit_insn (gen_tlscall (x, labelno));
8408   if (!reg)
8409     reg = gen_reg_rtx (SImode);
8410   else
8411     gcc_assert (REGNO (reg) != R0_REGNUM);
8412
8413   emit_move_insn (reg, reg0);
8414
8415   return reg;
8416 }
8417
8418 rtx
8419 legitimize_tls_address (rtx x, rtx reg)
8420 {
8421   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8422   rtx_insn *insns;
8423   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8424
8425   switch (model)
8426     {
8427     case TLS_MODEL_GLOBAL_DYNAMIC:
8428       if (TARGET_GNU2_TLS)
8429         {
8430           reg = arm_tls_descseq_addr (x, reg);
8431
8432           tp = arm_load_tp (NULL_RTX);
8433
8434           dest = gen_rtx_PLUS (Pmode, tp, reg);
8435         }
8436       else
8437         {
8438           /* Original scheme */
8439           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8440           dest = gen_reg_rtx (Pmode);
8441           emit_libcall_block (insns, dest, ret, x);
8442         }
8443       return dest;
8444
8445     case TLS_MODEL_LOCAL_DYNAMIC:
8446       if (TARGET_GNU2_TLS)
8447         {
8448           reg = arm_tls_descseq_addr (x, reg);
8449
8450           tp = arm_load_tp (NULL_RTX);
8451
8452           dest = gen_rtx_PLUS (Pmode, tp, reg);
8453         }
8454       else
8455         {
8456           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8457
8458           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8459              share the LDM result with other LD model accesses.  */
8460           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8461                                 UNSPEC_TLS);
8462           dest = gen_reg_rtx (Pmode);
8463           emit_libcall_block (insns, dest, ret, eqv);
8464
8465           /* Load the addend.  */
8466           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8467                                                      GEN_INT (TLS_LDO32)),
8468                                    UNSPEC_TLS);
8469           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8470           dest = gen_rtx_PLUS (Pmode, dest, addend);
8471         }
8472       return dest;
8473
8474     case TLS_MODEL_INITIAL_EXEC:
8475       labelno = GEN_INT (pic_labelno++);
8476       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8477       label = gen_rtx_CONST (VOIDmode, label);
8478       sum = gen_rtx_UNSPEC (Pmode,
8479                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8480                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8481                             UNSPEC_TLS);
8482       reg = load_tls_operand (sum, reg);
8483
8484       if (TARGET_ARM)
8485         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8486       else if (TARGET_THUMB2)
8487         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8488       else
8489         {
8490           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8491           emit_move_insn (reg, gen_const_mem (SImode, reg));
8492         }
8493
8494       tp = arm_load_tp (NULL_RTX);
8495
8496       return gen_rtx_PLUS (Pmode, tp, reg);
8497
8498     case TLS_MODEL_LOCAL_EXEC:
8499       tp = arm_load_tp (NULL_RTX);
8500
8501       reg = gen_rtx_UNSPEC (Pmode,
8502                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8503                             UNSPEC_TLS);
8504       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8505
8506       return gen_rtx_PLUS (Pmode, tp, reg);
8507
8508     default:
8509       abort ();
8510     }
8511 }
8512
8513 /* Try machine-dependent ways of modifying an illegitimate address
8514    to be legitimate.  If we find one, return the new, valid address.  */
8515 rtx
8516 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8517 {
8518   if (arm_tls_referenced_p (x))
8519     {
8520       rtx addend = NULL;
8521
8522       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8523         {
8524           addend = XEXP (XEXP (x, 0), 1);
8525           x = XEXP (XEXP (x, 0), 0);
8526         }
8527
8528       if (GET_CODE (x) != SYMBOL_REF)
8529         return x;
8530
8531       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8532
8533       x = legitimize_tls_address (x, NULL_RTX);
8534
8535       if (addend)
8536         {
8537           x = gen_rtx_PLUS (SImode, x, addend);
8538           orig_x = x;
8539         }
8540       else
8541         return x;
8542     }
8543
8544   if (!TARGET_ARM)
8545     {
8546       /* TODO: legitimize_address for Thumb2.  */
8547       if (TARGET_THUMB2)
8548         return x;
8549       return thumb_legitimize_address (x, orig_x, mode);
8550     }
8551
8552   if (GET_CODE (x) == PLUS)
8553     {
8554       rtx xop0 = XEXP (x, 0);
8555       rtx xop1 = XEXP (x, 1);
8556
8557       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8558         xop0 = force_reg (SImode, xop0);
8559
8560       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8561           && !symbol_mentioned_p (xop1))
8562         xop1 = force_reg (SImode, xop1);
8563
8564       if (ARM_BASE_REGISTER_RTX_P (xop0)
8565           && CONST_INT_P (xop1))
8566         {
8567           HOST_WIDE_INT n, low_n;
8568           rtx base_reg, val;
8569           n = INTVAL (xop1);
8570
8571           /* VFP addressing modes actually allow greater offsets, but for
8572              now we just stick with the lowest common denominator.  */
8573           if (mode == DImode || mode == DFmode)
8574             {
8575               low_n = n & 0x0f;
8576               n &= ~0x0f;
8577               if (low_n > 4)
8578                 {
8579                   n += 16;
8580                   low_n -= 16;
8581                 }
8582             }
8583           else
8584             {
8585               low_n = ((mode) == TImode ? 0
8586                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8587               n -= low_n;
8588             }
8589
8590           base_reg = gen_reg_rtx (SImode);
8591           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8592           emit_move_insn (base_reg, val);
8593           x = plus_constant (Pmode, base_reg, low_n);
8594         }
8595       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8596         x = gen_rtx_PLUS (SImode, xop0, xop1);
8597     }
8598
8599   /* XXX We don't allow MINUS any more -- see comment in
8600      arm_legitimate_address_outer_p ().  */
8601   else if (GET_CODE (x) == MINUS)
8602     {
8603       rtx xop0 = XEXP (x, 0);
8604       rtx xop1 = XEXP (x, 1);
8605
8606       if (CONSTANT_P (xop0))
8607         xop0 = force_reg (SImode, xop0);
8608
8609       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8610         xop1 = force_reg (SImode, xop1);
8611
8612       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8613         x = gen_rtx_MINUS (SImode, xop0, xop1);
8614     }
8615
8616   /* Make sure to take full advantage of the pre-indexed addressing mode
8617      with absolute addresses which often allows for the base register to
8618      be factorized for multiple adjacent memory references, and it might
8619      even allows for the mini pool to be avoided entirely. */
8620   else if (CONST_INT_P (x) && optimize > 0)
8621     {
8622       unsigned int bits;
8623       HOST_WIDE_INT mask, base, index;
8624       rtx base_reg;
8625
8626       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8627          use a 8-bit index. So let's use a 12-bit index for SImode only and
8628          hope that arm_gen_constant will enable ldrb to use more bits. */
8629       bits = (mode == SImode) ? 12 : 8;
8630       mask = (1 << bits) - 1;
8631       base = INTVAL (x) & ~mask;
8632       index = INTVAL (x) & mask;
8633       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8634         {
8635           /* It'll most probably be more efficient to generate the base
8636              with more bits set and use a negative index instead. */
8637           base |= mask;
8638           index -= mask;
8639         }
8640       base_reg = force_reg (SImode, GEN_INT (base));
8641       x = plus_constant (Pmode, base_reg, index);
8642     }
8643
8644   if (flag_pic)
8645     {
8646       /* We need to find and carefully transform any SYMBOL and LABEL
8647          references; so go back to the original address expression.  */
8648       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8649
8650       if (new_x != orig_x)
8651         x = new_x;
8652     }
8653
8654   return x;
8655 }
8656
8657
8658 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8659    to be legitimate.  If we find one, return the new, valid address.  */
8660 rtx
8661 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8662 {
8663   if (GET_CODE (x) == PLUS
8664       && CONST_INT_P (XEXP (x, 1))
8665       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8666           || INTVAL (XEXP (x, 1)) < 0))
8667     {
8668       rtx xop0 = XEXP (x, 0);
8669       rtx xop1 = XEXP (x, 1);
8670       HOST_WIDE_INT offset = INTVAL (xop1);
8671
8672       /* Try and fold the offset into a biasing of the base register and
8673          then offsetting that.  Don't do this when optimizing for space
8674          since it can cause too many CSEs.  */
8675       if (optimize_size && offset >= 0
8676           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8677         {
8678           HOST_WIDE_INT delta;
8679
8680           if (offset >= 256)
8681             delta = offset - (256 - GET_MODE_SIZE (mode));
8682           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8683             delta = 31 * GET_MODE_SIZE (mode);
8684           else
8685             delta = offset & (~31 * GET_MODE_SIZE (mode));
8686
8687           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8688                                 NULL_RTX);
8689           x = plus_constant (Pmode, xop0, delta);
8690         }
8691       else if (offset < 0 && offset > -256)
8692         /* Small negative offsets are best done with a subtract before the
8693            dereference, forcing these into a register normally takes two
8694            instructions.  */
8695         x = force_operand (x, NULL_RTX);
8696       else
8697         {
8698           /* For the remaining cases, force the constant into a register.  */
8699           xop1 = force_reg (SImode, xop1);
8700           x = gen_rtx_PLUS (SImode, xop0, xop1);
8701         }
8702     }
8703   else if (GET_CODE (x) == PLUS
8704            && s_register_operand (XEXP (x, 1), SImode)
8705            && !s_register_operand (XEXP (x, 0), SImode))
8706     {
8707       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8708
8709       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8710     }
8711
8712   if (flag_pic)
8713     {
8714       /* We need to find and carefully transform any SYMBOL and LABEL
8715          references; so go back to the original address expression.  */
8716       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8717
8718       if (new_x != orig_x)
8719         x = new_x;
8720     }
8721
8722   return x;
8723 }
8724
8725 /* Return TRUE if X contains any TLS symbol references.  */
8726
8727 bool
8728 arm_tls_referenced_p (rtx x)
8729 {
8730   if (! TARGET_HAVE_TLS)
8731     return false;
8732
8733   subrtx_iterator::array_type array;
8734   FOR_EACH_SUBRTX (iter, array, x, ALL)
8735     {
8736       const_rtx x = *iter;
8737       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8738         {
8739           /* ARM currently does not provide relocations to encode TLS variables
8740              into AArch32 instructions, only data, so there is no way to
8741              currently implement these if a literal pool is disabled.  */
8742           if (arm_disable_literal_pool)
8743             sorry ("accessing thread-local storage is not currently supported "
8744                    "with -mpure-code or -mslow-flash-data");
8745
8746           return true;
8747         }
8748
8749       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8750          TLS offsets, not real symbol references.  */
8751       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8752         iter.skip_subrtxes ();
8753     }
8754   return false;
8755 }
8756
8757 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8758
8759    On the ARM, allow any integer (invalid ones are removed later by insn
8760    patterns), nice doubles and symbol_refs which refer to the function's
8761    constant pool XXX.
8762
8763    When generating pic allow anything.  */
8764
8765 static bool
8766 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8767 {
8768   return flag_pic || !label_mentioned_p (x);
8769 }
8770
8771 static bool
8772 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8773 {
8774   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8775      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8776      for ARMv8-M Baseline or later the result is valid.  */
8777   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8778     x = XEXP (x, 0);
8779
8780   return (CONST_INT_P (x)
8781           || CONST_DOUBLE_P (x)
8782           || CONSTANT_ADDRESS_P (x)
8783           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8784           || flag_pic);
8785 }
8786
8787 static bool
8788 arm_legitimate_constant_p (machine_mode mode, rtx x)
8789 {
8790   return (!arm_cannot_force_const_mem (mode, x)
8791           && (TARGET_32BIT
8792               ? arm_legitimate_constant_p_1 (mode, x)
8793               : thumb_legitimate_constant_p (mode, x)));
8794 }
8795
8796 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8797
8798 static bool
8799 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8800 {
8801   rtx base, offset;
8802
8803   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8804     {
8805       split_const (x, &base, &offset);
8806       if (GET_CODE (base) == SYMBOL_REF
8807           && !offset_within_block_p (base, INTVAL (offset)))
8808         return true;
8809     }
8810   return arm_tls_referenced_p (x);
8811 }
8812 \f
8813 #define REG_OR_SUBREG_REG(X)                                            \
8814   (REG_P (X)                                                    \
8815    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8816
8817 #define REG_OR_SUBREG_RTX(X)                    \
8818    (REG_P (X) ? (X) : SUBREG_REG (X))
8819
8820 static inline int
8821 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8822 {
8823   machine_mode mode = GET_MODE (x);
8824   int total, words;
8825
8826   switch (code)
8827     {
8828     case ASHIFT:
8829     case ASHIFTRT:
8830     case LSHIFTRT:
8831     case ROTATERT:
8832       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8833
8834     case PLUS:
8835     case MINUS:
8836     case COMPARE:
8837     case NEG:
8838     case NOT:
8839       return COSTS_N_INSNS (1);
8840
8841     case MULT:
8842       if (arm_arch6m && arm_m_profile_small_mul)
8843         return COSTS_N_INSNS (32);
8844
8845       if (CONST_INT_P (XEXP (x, 1)))
8846         {
8847           int cycles = 0;
8848           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8849
8850           while (i)
8851             {
8852               i >>= 2;
8853               cycles++;
8854             }
8855           return COSTS_N_INSNS (2) + cycles;
8856         }
8857       return COSTS_N_INSNS (1) + 16;
8858
8859     case SET:
8860       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8861          the mode.  */
8862       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8863       return (COSTS_N_INSNS (words)
8864               + 4 * ((MEM_P (SET_SRC (x)))
8865                      + MEM_P (SET_DEST (x))));
8866
8867     case CONST_INT:
8868       if (outer == SET)
8869         {
8870           if (UINTVAL (x) < 256
8871               /* 16-bit constant.  */
8872               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8873             return 0;
8874           if (thumb_shiftable_const (INTVAL (x)))
8875             return COSTS_N_INSNS (2);
8876           return COSTS_N_INSNS (3);
8877         }
8878       else if ((outer == PLUS || outer == COMPARE)
8879                && INTVAL (x) < 256 && INTVAL (x) > -256)
8880         return 0;
8881       else if ((outer == IOR || outer == XOR || outer == AND)
8882                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8883         return COSTS_N_INSNS (1);
8884       else if (outer == AND)
8885         {
8886           int i;
8887           /* This duplicates the tests in the andsi3 expander.  */
8888           for (i = 9; i <= 31; i++)
8889             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8890                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8891               return COSTS_N_INSNS (2);
8892         }
8893       else if (outer == ASHIFT || outer == ASHIFTRT
8894                || outer == LSHIFTRT)
8895         return 0;
8896       return COSTS_N_INSNS (2);
8897
8898     case CONST:
8899     case CONST_DOUBLE:
8900     case LABEL_REF:
8901     case SYMBOL_REF:
8902       return COSTS_N_INSNS (3);
8903
8904     case UDIV:
8905     case UMOD:
8906     case DIV:
8907     case MOD:
8908       return 100;
8909
8910     case TRUNCATE:
8911       return 99;
8912
8913     case AND:
8914     case XOR:
8915     case IOR:
8916       /* XXX guess.  */
8917       return 8;
8918
8919     case MEM:
8920       /* XXX another guess.  */
8921       /* Memory costs quite a lot for the first word, but subsequent words
8922          load at the equivalent of a single insn each.  */
8923       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8924               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8925                  ? 4 : 0));
8926
8927     case IF_THEN_ELSE:
8928       /* XXX a guess.  */
8929       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8930         return 14;
8931       return 2;
8932
8933     case SIGN_EXTEND:
8934     case ZERO_EXTEND:
8935       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8936       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8937
8938       if (mode == SImode)
8939         return total;
8940
8941       if (arm_arch6)
8942         return total + COSTS_N_INSNS (1);
8943
8944       /* Assume a two-shift sequence.  Increase the cost slightly so
8945          we prefer actual shifts over an extend operation.  */
8946       return total + 1 + COSTS_N_INSNS (2);
8947
8948     default:
8949       return 99;
8950     }
8951 }
8952
8953 /* Estimates the size cost of thumb1 instructions.
8954    For now most of the code is copied from thumb1_rtx_costs. We need more
8955    fine grain tuning when we have more related test cases.  */
8956 static inline int
8957 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8958 {
8959   machine_mode mode = GET_MODE (x);
8960   int words, cost;
8961
8962   switch (code)
8963     {
8964     case ASHIFT:
8965     case ASHIFTRT:
8966     case LSHIFTRT:
8967     case ROTATERT:
8968       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8969
8970     case PLUS:
8971     case MINUS:
8972       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8973          defined by RTL expansion, especially for the expansion of
8974          multiplication.  */
8975       if ((GET_CODE (XEXP (x, 0)) == MULT
8976            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8977           || (GET_CODE (XEXP (x, 1)) == MULT
8978               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8979         return COSTS_N_INSNS (2);
8980       /* Fall through.  */
8981     case COMPARE:
8982     case NEG:
8983     case NOT:
8984       return COSTS_N_INSNS (1);
8985
8986     case MULT:
8987       if (CONST_INT_P (XEXP (x, 1)))
8988         {
8989           /* Thumb1 mul instruction can't operate on const. We must Load it
8990              into a register first.  */
8991           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8992           /* For the targets which have a very small and high-latency multiply
8993              unit, we prefer to synthesize the mult with up to 5 instructions,
8994              giving a good balance between size and performance.  */
8995           if (arm_arch6m && arm_m_profile_small_mul)
8996             return COSTS_N_INSNS (5);
8997           else
8998             return COSTS_N_INSNS (1) + const_size;
8999         }
9000       return COSTS_N_INSNS (1);
9001
9002     case SET:
9003       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9004          the mode.  */
9005       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9006       cost = COSTS_N_INSNS (words);
9007       if (satisfies_constraint_J (SET_SRC (x))
9008           || satisfies_constraint_K (SET_SRC (x))
9009              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9010           || (CONST_INT_P (SET_SRC (x))
9011               && UINTVAL (SET_SRC (x)) >= 256
9012               && TARGET_HAVE_MOVT
9013               && satisfies_constraint_j (SET_SRC (x)))
9014              /* thumb1_movdi_insn.  */
9015           || ((words > 1) && MEM_P (SET_SRC (x))))
9016         cost += COSTS_N_INSNS (1);
9017       return cost;
9018
9019     case CONST_INT:
9020       if (outer == SET)
9021         {
9022           if (UINTVAL (x) < 256)
9023             return COSTS_N_INSNS (1);
9024           /* movw is 4byte long.  */
9025           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9026             return COSTS_N_INSNS (2);
9027           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9028           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9029             return COSTS_N_INSNS (2);
9030           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9031           if (thumb_shiftable_const (INTVAL (x)))
9032             return COSTS_N_INSNS (2);
9033           return COSTS_N_INSNS (3);
9034         }
9035       else if ((outer == PLUS || outer == COMPARE)
9036                && INTVAL (x) < 256 && INTVAL (x) > -256)
9037         return 0;
9038       else if ((outer == IOR || outer == XOR || outer == AND)
9039                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9040         return COSTS_N_INSNS (1);
9041       else if (outer == AND)
9042         {
9043           int i;
9044           /* This duplicates the tests in the andsi3 expander.  */
9045           for (i = 9; i <= 31; i++)
9046             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9047                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9048               return COSTS_N_INSNS (2);
9049         }
9050       else if (outer == ASHIFT || outer == ASHIFTRT
9051                || outer == LSHIFTRT)
9052         return 0;
9053       return COSTS_N_INSNS (2);
9054
9055     case CONST:
9056     case CONST_DOUBLE:
9057     case LABEL_REF:
9058     case SYMBOL_REF:
9059       return COSTS_N_INSNS (3);
9060
9061     case UDIV:
9062     case UMOD:
9063     case DIV:
9064     case MOD:
9065       return 100;
9066
9067     case TRUNCATE:
9068       return 99;
9069
9070     case AND:
9071     case XOR:
9072     case IOR:
9073       return COSTS_N_INSNS (1);
9074
9075     case MEM:
9076       return (COSTS_N_INSNS (1)
9077               + COSTS_N_INSNS (1)
9078                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9079               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9080                  ? COSTS_N_INSNS (1) : 0));
9081
9082     case IF_THEN_ELSE:
9083       /* XXX a guess.  */
9084       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9085         return 14;
9086       return 2;
9087
9088     case ZERO_EXTEND:
9089       /* XXX still guessing.  */
9090       switch (GET_MODE (XEXP (x, 0)))
9091         {
9092           case E_QImode:
9093             return (1 + (mode == DImode ? 4 : 0)
9094                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9095
9096           case E_HImode:
9097             return (4 + (mode == DImode ? 4 : 0)
9098                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9099
9100           case E_SImode:
9101             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9102
9103           default:
9104             return 99;
9105         }
9106
9107     default:
9108       return 99;
9109     }
9110 }
9111
9112 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9113    operand, then return the operand that is being shifted.  If the shift
9114    is not by a constant, then set SHIFT_REG to point to the operand.
9115    Return NULL if OP is not a shifter operand.  */
9116 static rtx
9117 shifter_op_p (rtx op, rtx *shift_reg)
9118 {
9119   enum rtx_code code = GET_CODE (op);
9120
9121   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9122       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9123     return XEXP (op, 0);
9124   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9125     return XEXP (op, 0);
9126   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9127            || code == ASHIFTRT)
9128     {
9129       if (!CONST_INT_P (XEXP (op, 1)))
9130         *shift_reg = XEXP (op, 1);
9131       return XEXP (op, 0);
9132     }
9133
9134   return NULL;
9135 }
9136
9137 static bool
9138 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9139 {
9140   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9141   rtx_code code = GET_CODE (x);
9142   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9143
9144   switch (XINT (x, 1))
9145     {
9146     case UNSPEC_UNALIGNED_LOAD:
9147       /* We can only do unaligned loads into the integer unit, and we can't
9148          use LDM or LDRD.  */
9149       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9150       if (speed_p)
9151         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9152                   + extra_cost->ldst.load_unaligned);
9153
9154 #ifdef NOT_YET
9155       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9156                                  ADDR_SPACE_GENERIC, speed_p);
9157 #endif
9158       return true;
9159
9160     case UNSPEC_UNALIGNED_STORE:
9161       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9162       if (speed_p)
9163         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9164                   + extra_cost->ldst.store_unaligned);
9165
9166       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9167 #ifdef NOT_YET
9168       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9169                                  ADDR_SPACE_GENERIC, speed_p);
9170 #endif
9171       return true;
9172
9173     case UNSPEC_VRINTZ:
9174     case UNSPEC_VRINTP:
9175     case UNSPEC_VRINTM:
9176     case UNSPEC_VRINTR:
9177     case UNSPEC_VRINTX:
9178     case UNSPEC_VRINTA:
9179       if (speed_p)
9180         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9181
9182       return true;
9183     default:
9184       *cost = COSTS_N_INSNS (2);
9185       break;
9186     }
9187   return true;
9188 }
9189
9190 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9191    call (one insn for -Os) and then one for processing the result.  */
9192 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9193
9194 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9195         do                                                              \
9196           {                                                             \
9197             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9198             if (shift_op != NULL                                        \
9199                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9200               {                                                         \
9201                 if (shift_reg)                                          \
9202                   {                                                     \
9203                     if (speed_p)                                        \
9204                       *cost += extra_cost->alu.arith_shift_reg;         \
9205                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9206                                        ASHIFT, 1, speed_p);             \
9207                   }                                                     \
9208                 else if (speed_p)                                       \
9209                   *cost += extra_cost->alu.arith_shift;                 \
9210                                                                         \
9211                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9212                                     ASHIFT, 0, speed_p)                 \
9213                           + rtx_cost (XEXP (x, 1 - IDX),                \
9214                                       GET_MODE (shift_op),              \
9215                                       OP, 1, speed_p));                 \
9216                 return true;                                            \
9217               }                                                         \
9218           }                                                             \
9219         while (0);
9220
9221 /* RTX costs.  Make an estimate of the cost of executing the operation
9222    X, which is contained with an operation with code OUTER_CODE.
9223    SPEED_P indicates whether the cost desired is the performance cost,
9224    or the size cost.  The estimate is stored in COST and the return
9225    value is TRUE if the cost calculation is final, or FALSE if the
9226    caller should recurse through the operands of X to add additional
9227    costs.
9228
9229    We currently make no attempt to model the size savings of Thumb-2
9230    16-bit instructions.  At the normal points in compilation where
9231    this code is called we have no measure of whether the condition
9232    flags are live or not, and thus no realistic way to determine what
9233    the size will eventually be.  */
9234 static bool
9235 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9236                    const struct cpu_cost_table *extra_cost,
9237                    int *cost, bool speed_p)
9238 {
9239   machine_mode mode = GET_MODE (x);
9240
9241   *cost = COSTS_N_INSNS (1);
9242
9243   if (TARGET_THUMB1)
9244     {
9245       if (speed_p)
9246         *cost = thumb1_rtx_costs (x, code, outer_code);
9247       else
9248         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9249       return true;
9250     }
9251
9252   switch (code)
9253     {
9254     case SET:
9255       *cost = 0;
9256       /* SET RTXs don't have a mode so we get it from the destination.  */
9257       mode = GET_MODE (SET_DEST (x));
9258
9259       if (REG_P (SET_SRC (x))
9260           && REG_P (SET_DEST (x)))
9261         {
9262           /* Assume that most copies can be done with a single insn,
9263              unless we don't have HW FP, in which case everything
9264              larger than word mode will require two insns.  */
9265           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9266                                    && GET_MODE_SIZE (mode) > 4)
9267                                   || mode == DImode)
9268                                  ? 2 : 1);
9269           /* Conditional register moves can be encoded
9270              in 16 bits in Thumb mode.  */
9271           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9272             *cost >>= 1;
9273
9274           return true;
9275         }
9276
9277       if (CONST_INT_P (SET_SRC (x)))
9278         {
9279           /* Handle CONST_INT here, since the value doesn't have a mode
9280              and we would otherwise be unable to work out the true cost.  */
9281           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9282                             0, speed_p);
9283           outer_code = SET;
9284           /* Slightly lower the cost of setting a core reg to a constant.
9285              This helps break up chains and allows for better scheduling.  */
9286           if (REG_P (SET_DEST (x))
9287               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9288             *cost -= 1;
9289           x = SET_SRC (x);
9290           /* Immediate moves with an immediate in the range [0, 255] can be
9291              encoded in 16 bits in Thumb mode.  */
9292           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9293               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9294             *cost >>= 1;
9295           goto const_int_cost;
9296         }
9297
9298       return false;
9299
9300     case MEM:
9301       /* A memory access costs 1 insn if the mode is small, or the address is
9302          a single register, otherwise it costs one insn per word.  */
9303       if (REG_P (XEXP (x, 0)))
9304         *cost = COSTS_N_INSNS (1);
9305       else if (flag_pic
9306                && GET_CODE (XEXP (x, 0)) == PLUS
9307                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9308         /* This will be split into two instructions.
9309            See arm.md:calculate_pic_address.  */
9310         *cost = COSTS_N_INSNS (2);
9311       else
9312         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9313
9314       /* For speed optimizations, add the costs of the address and
9315          accessing memory.  */
9316       if (speed_p)
9317 #ifdef NOT_YET
9318         *cost += (extra_cost->ldst.load
9319                   + arm_address_cost (XEXP (x, 0), mode,
9320                                       ADDR_SPACE_GENERIC, speed_p));
9321 #else
9322         *cost += extra_cost->ldst.load;
9323 #endif
9324       return true;
9325
9326     case PARALLEL:
9327     {
9328    /* Calculations of LDM costs are complex.  We assume an initial cost
9329    (ldm_1st) which will load the number of registers mentioned in
9330    ldm_regs_per_insn_1st registers; then each additional
9331    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9332    formula for N regs is thus:
9333
9334    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9335                              + ldm_regs_per_insn_subsequent - 1)
9336                             / ldm_regs_per_insn_subsequent).
9337
9338    Additional costs may also be added for addressing.  A similar
9339    formula is used for STM.  */
9340
9341       bool is_ldm = load_multiple_operation (x, SImode);
9342       bool is_stm = store_multiple_operation (x, SImode);
9343
9344       if (is_ldm || is_stm)
9345         {
9346           if (speed_p)
9347             {
9348               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9349               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9350                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9351                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9352               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9353                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9354                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9355
9356               *cost += regs_per_insn_1st
9357                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9358                                             + regs_per_insn_sub - 1)
9359                                           / regs_per_insn_sub);
9360               return true;
9361             }
9362
9363         }
9364       return false;
9365     }
9366     case DIV:
9367     case UDIV:
9368       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9369           && (mode == SFmode || !TARGET_VFP_SINGLE))
9370         *cost += COSTS_N_INSNS (speed_p
9371                                ? extra_cost->fp[mode != SFmode].div : 0);
9372       else if (mode == SImode && TARGET_IDIV)
9373         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9374       else
9375         *cost = LIBCALL_COST (2);
9376
9377       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9378          possible udiv is prefered.  */
9379       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9380       return false;     /* All arguments must be in registers.  */
9381
9382     case MOD:
9383       /* MOD by a power of 2 can be expanded as:
9384          rsbs    r1, r0, #0
9385          and     r0, r0, #(n - 1)
9386          and     r1, r1, #(n - 1)
9387          rsbpl   r0, r1, #0.  */
9388       if (CONST_INT_P (XEXP (x, 1))
9389           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9390           && mode == SImode)
9391         {
9392           *cost += COSTS_N_INSNS (3);
9393
9394           if (speed_p)
9395             *cost += 2 * extra_cost->alu.logical
9396                      + extra_cost->alu.arith;
9397           return true;
9398         }
9399
9400     /* Fall-through.  */
9401     case UMOD:
9402       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9403          possible udiv is prefered.  */
9404       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9405       return false;     /* All arguments must be in registers.  */
9406
9407     case ROTATE:
9408       if (mode == SImode && REG_P (XEXP (x, 1)))
9409         {
9410           *cost += (COSTS_N_INSNS (1)
9411                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9412           if (speed_p)
9413             *cost += extra_cost->alu.shift_reg;
9414           return true;
9415         }
9416       /* Fall through */
9417     case ROTATERT:
9418     case ASHIFT:
9419     case LSHIFTRT:
9420     case ASHIFTRT:
9421       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9422         {
9423           *cost += (COSTS_N_INSNS (2)
9424                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9425           if (speed_p)
9426             *cost += 2 * extra_cost->alu.shift;
9427           return true;
9428         }
9429       else if (mode == SImode)
9430         {
9431           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9432           /* Slightly disparage register shifts at -Os, but not by much.  */
9433           if (!CONST_INT_P (XEXP (x, 1)))
9434             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9435                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9436           return true;
9437         }
9438       else if (GET_MODE_CLASS (mode) == MODE_INT
9439                && GET_MODE_SIZE (mode) < 4)
9440         {
9441           if (code == ASHIFT)
9442             {
9443               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9444               /* Slightly disparage register shifts at -Os, but not by
9445                  much.  */
9446               if (!CONST_INT_P (XEXP (x, 1)))
9447                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9448                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9449             }
9450           else if (code == LSHIFTRT || code == ASHIFTRT)
9451             {
9452               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9453                 {
9454                   /* Can use SBFX/UBFX.  */
9455                   if (speed_p)
9456                     *cost += extra_cost->alu.bfx;
9457                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9458                 }
9459               else
9460                 {
9461                   *cost += COSTS_N_INSNS (1);
9462                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9463                   if (speed_p)
9464                     {
9465                       if (CONST_INT_P (XEXP (x, 1)))
9466                         *cost += 2 * extra_cost->alu.shift;
9467                       else
9468                         *cost += (extra_cost->alu.shift
9469                                   + extra_cost->alu.shift_reg);
9470                     }
9471                   else
9472                     /* Slightly disparage register shifts.  */
9473                     *cost += !CONST_INT_P (XEXP (x, 1));
9474                 }
9475             }
9476           else /* Rotates.  */
9477             {
9478               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9479               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9480               if (speed_p)
9481                 {
9482                   if (CONST_INT_P (XEXP (x, 1)))
9483                     *cost += (2 * extra_cost->alu.shift
9484                               + extra_cost->alu.log_shift);
9485                   else
9486                     *cost += (extra_cost->alu.shift
9487                               + extra_cost->alu.shift_reg
9488                               + extra_cost->alu.log_shift_reg);
9489                 }
9490             }
9491           return true;
9492         }
9493
9494       *cost = LIBCALL_COST (2);
9495       return false;
9496
9497     case BSWAP:
9498       if (arm_arch6)
9499         {
9500           if (mode == SImode)
9501             {
9502               if (speed_p)
9503                 *cost += extra_cost->alu.rev;
9504
9505               return false;
9506             }
9507         }
9508       else
9509         {
9510         /* No rev instruction available.  Look at arm_legacy_rev
9511            and thumb_legacy_rev for the form of RTL used then.  */
9512           if (TARGET_THUMB)
9513             {
9514               *cost += COSTS_N_INSNS (9);
9515
9516               if (speed_p)
9517                 {
9518                   *cost += 6 * extra_cost->alu.shift;
9519                   *cost += 3 * extra_cost->alu.logical;
9520                 }
9521             }
9522           else
9523             {
9524               *cost += COSTS_N_INSNS (4);
9525
9526               if (speed_p)
9527                 {
9528                   *cost += 2 * extra_cost->alu.shift;
9529                   *cost += extra_cost->alu.arith_shift;
9530                   *cost += 2 * extra_cost->alu.logical;
9531                 }
9532             }
9533           return true;
9534         }
9535       return false;
9536
9537     case MINUS:
9538       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9539           && (mode == SFmode || !TARGET_VFP_SINGLE))
9540         {
9541           if (GET_CODE (XEXP (x, 0)) == MULT
9542               || GET_CODE (XEXP (x, 1)) == MULT)
9543             {
9544               rtx mul_op0, mul_op1, sub_op;
9545
9546               if (speed_p)
9547                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9548
9549               if (GET_CODE (XEXP (x, 0)) == MULT)
9550                 {
9551                   mul_op0 = XEXP (XEXP (x, 0), 0);
9552                   mul_op1 = XEXP (XEXP (x, 0), 1);
9553                   sub_op = XEXP (x, 1);
9554                 }
9555               else
9556                 {
9557                   mul_op0 = XEXP (XEXP (x, 1), 0);
9558                   mul_op1 = XEXP (XEXP (x, 1), 1);
9559                   sub_op = XEXP (x, 0);
9560                 }
9561
9562               /* The first operand of the multiply may be optionally
9563                  negated.  */
9564               if (GET_CODE (mul_op0) == NEG)
9565                 mul_op0 = XEXP (mul_op0, 0);
9566
9567               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9568                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9569                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9570
9571               return true;
9572             }
9573
9574           if (speed_p)
9575             *cost += extra_cost->fp[mode != SFmode].addsub;
9576           return false;
9577         }
9578
9579       if (mode == SImode)
9580         {
9581           rtx shift_by_reg = NULL;
9582           rtx shift_op;
9583           rtx non_shift_op;
9584
9585           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9586           if (shift_op == NULL)
9587             {
9588               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9589               non_shift_op = XEXP (x, 0);
9590             }
9591           else
9592             non_shift_op = XEXP (x, 1);
9593
9594           if (shift_op != NULL)
9595             {
9596               if (shift_by_reg != NULL)
9597                 {
9598                   if (speed_p)
9599                     *cost += extra_cost->alu.arith_shift_reg;
9600                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9601                 }
9602               else if (speed_p)
9603                 *cost += extra_cost->alu.arith_shift;
9604
9605               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9606               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9607               return true;
9608             }
9609
9610           if (arm_arch_thumb2
9611               && GET_CODE (XEXP (x, 1)) == MULT)
9612             {
9613               /* MLS.  */
9614               if (speed_p)
9615                 *cost += extra_cost->mult[0].add;
9616               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9617               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9618               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9619               return true;
9620             }
9621
9622           if (CONST_INT_P (XEXP (x, 0)))
9623             {
9624               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9625                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9626                                             NULL_RTX, 1, 0);
9627               *cost = COSTS_N_INSNS (insns);
9628               if (speed_p)
9629                 *cost += insns * extra_cost->alu.arith;
9630               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9631               return true;
9632             }
9633           else if (speed_p)
9634             *cost += extra_cost->alu.arith;
9635
9636           return false;
9637         }
9638
9639       if (GET_MODE_CLASS (mode) == MODE_INT
9640           && GET_MODE_SIZE (mode) < 4)
9641         {
9642           rtx shift_op, shift_reg;
9643           shift_reg = NULL;
9644
9645           /* We check both sides of the MINUS for shifter operands since,
9646              unlike PLUS, it's not commutative.  */
9647
9648           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9649           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9650
9651           /* Slightly disparage, as we might need to widen the result.  */
9652           *cost += 1;
9653           if (speed_p)
9654             *cost += extra_cost->alu.arith;
9655
9656           if (CONST_INT_P (XEXP (x, 0)))
9657             {
9658               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9659               return true;
9660             }
9661
9662           return false;
9663         }
9664
9665       if (mode == DImode)
9666         {
9667           *cost += COSTS_N_INSNS (1);
9668
9669           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9670             {
9671               rtx op1 = XEXP (x, 1);
9672
9673               if (speed_p)
9674                 *cost += 2 * extra_cost->alu.arith;
9675
9676               if (GET_CODE (op1) == ZERO_EXTEND)
9677                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9678                                    0, speed_p);
9679               else
9680                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9681               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9682                                  0, speed_p);
9683               return true;
9684             }
9685           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9686             {
9687               if (speed_p)
9688                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9689               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9690                                   0, speed_p)
9691                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9692               return true;
9693             }
9694           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9695                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9696             {
9697               if (speed_p)
9698                 *cost += (extra_cost->alu.arith
9699                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9700                              ? extra_cost->alu.arith
9701                              : extra_cost->alu.arith_shift));
9702               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9703                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9704                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9705               return true;
9706             }
9707
9708           if (speed_p)
9709             *cost += 2 * extra_cost->alu.arith;
9710           return false;
9711         }
9712
9713       /* Vector mode?  */
9714
9715       *cost = LIBCALL_COST (2);
9716       return false;
9717
9718     case PLUS:
9719       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9720           && (mode == SFmode || !TARGET_VFP_SINGLE))
9721         {
9722           if (GET_CODE (XEXP (x, 0)) == MULT)
9723             {
9724               rtx mul_op0, mul_op1, add_op;
9725
9726               if (speed_p)
9727                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9728
9729               mul_op0 = XEXP (XEXP (x, 0), 0);
9730               mul_op1 = XEXP (XEXP (x, 0), 1);
9731               add_op = XEXP (x, 1);
9732
9733               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9734                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9735                         + rtx_cost (add_op, mode, code, 0, speed_p));
9736
9737               return true;
9738             }
9739
9740           if (speed_p)
9741             *cost += extra_cost->fp[mode != SFmode].addsub;
9742           return false;
9743         }
9744       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9745         {
9746           *cost = LIBCALL_COST (2);
9747           return false;
9748         }
9749
9750         /* Narrow modes can be synthesized in SImode, but the range
9751            of useful sub-operations is limited.  Check for shift operations
9752            on one of the operands.  Only left shifts can be used in the
9753            narrow modes.  */
9754       if (GET_MODE_CLASS (mode) == MODE_INT
9755           && GET_MODE_SIZE (mode) < 4)
9756         {
9757           rtx shift_op, shift_reg;
9758           shift_reg = NULL;
9759
9760           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9761
9762           if (CONST_INT_P (XEXP (x, 1)))
9763             {
9764               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9765                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9766                                             NULL_RTX, 1, 0);
9767               *cost = COSTS_N_INSNS (insns);
9768               if (speed_p)
9769                 *cost += insns * extra_cost->alu.arith;
9770               /* Slightly penalize a narrow operation as the result may
9771                  need widening.  */
9772               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9773               return true;
9774             }
9775
9776           /* Slightly penalize a narrow operation as the result may
9777              need widening.  */
9778           *cost += 1;
9779           if (speed_p)
9780             *cost += extra_cost->alu.arith;
9781
9782           return false;
9783         }
9784
9785       if (mode == SImode)
9786         {
9787           rtx shift_op, shift_reg;
9788
9789           if (TARGET_INT_SIMD
9790               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9791                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9792             {
9793               /* UXTA[BH] or SXTA[BH].  */
9794               if (speed_p)
9795                 *cost += extra_cost->alu.extend_arith;
9796               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9797                                   0, speed_p)
9798                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9799               return true;
9800             }
9801
9802           shift_reg = NULL;
9803           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9804           if (shift_op != NULL)
9805             {
9806               if (shift_reg)
9807                 {
9808                   if (speed_p)
9809                     *cost += extra_cost->alu.arith_shift_reg;
9810                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9811                 }
9812               else if (speed_p)
9813                 *cost += extra_cost->alu.arith_shift;
9814
9815               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9816                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9817               return true;
9818             }
9819           if (GET_CODE (XEXP (x, 0)) == MULT)
9820             {
9821               rtx mul_op = XEXP (x, 0);
9822
9823               if (TARGET_DSP_MULTIPLY
9824                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9825                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9826                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9827                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9828                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9829                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9830                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9831                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9832                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9833                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9834                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9835                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9836                                       == 16))))))
9837                 {
9838                   /* SMLA[BT][BT].  */
9839                   if (speed_p)
9840                     *cost += extra_cost->mult[0].extend_add;
9841                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9842                                       SIGN_EXTEND, 0, speed_p)
9843                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9844                                         SIGN_EXTEND, 0, speed_p)
9845                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9846                   return true;
9847                 }
9848
9849               if (speed_p)
9850                 *cost += extra_cost->mult[0].add;
9851               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9852                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9853                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9854               return true;
9855             }
9856           if (CONST_INT_P (XEXP (x, 1)))
9857             {
9858               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9859                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9860                                             NULL_RTX, 1, 0);
9861               *cost = COSTS_N_INSNS (insns);
9862               if (speed_p)
9863                 *cost += insns * extra_cost->alu.arith;
9864               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9865               return true;
9866             }
9867           else if (speed_p)
9868             *cost += extra_cost->alu.arith;
9869
9870           return false;
9871         }
9872
9873       if (mode == DImode)
9874         {
9875           if (arm_arch3m
9876               && GET_CODE (XEXP (x, 0)) == MULT
9877               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9878                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9879                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9880                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9881             {
9882               if (speed_p)
9883                 *cost += extra_cost->mult[1].extend_add;
9884               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9885                                   ZERO_EXTEND, 0, speed_p)
9886                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9887                                     ZERO_EXTEND, 0, speed_p)
9888                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9889               return true;
9890             }
9891
9892           *cost += COSTS_N_INSNS (1);
9893
9894           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9895               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9896             {
9897               if (speed_p)
9898                 *cost += (extra_cost->alu.arith
9899                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9900                              ? extra_cost->alu.arith
9901                              : extra_cost->alu.arith_shift));
9902
9903               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9904                                   0, speed_p)
9905                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9906               return true;
9907             }
9908
9909           if (speed_p)
9910             *cost += 2 * extra_cost->alu.arith;
9911           return false;
9912         }
9913
9914       /* Vector mode?  */
9915       *cost = LIBCALL_COST (2);
9916       return false;
9917     case IOR:
9918       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9919         {
9920           if (speed_p)
9921             *cost += extra_cost->alu.rev;
9922
9923           return true;
9924         }
9925     /* Fall through.  */
9926     case AND: case XOR:
9927       if (mode == SImode)
9928         {
9929           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9930           rtx op0 = XEXP (x, 0);
9931           rtx shift_op, shift_reg;
9932
9933           if (subcode == NOT
9934               && (code == AND
9935                   || (code == IOR && TARGET_THUMB2)))
9936             op0 = XEXP (op0, 0);
9937
9938           shift_reg = NULL;
9939           shift_op = shifter_op_p (op0, &shift_reg);
9940           if (shift_op != NULL)
9941             {
9942               if (shift_reg)
9943                 {
9944                   if (speed_p)
9945                     *cost += extra_cost->alu.log_shift_reg;
9946                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9947                 }
9948               else if (speed_p)
9949                 *cost += extra_cost->alu.log_shift;
9950
9951               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9952                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9953               return true;
9954             }
9955
9956           if (CONST_INT_P (XEXP (x, 1)))
9957             {
9958               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9959                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9960                                             NULL_RTX, 1, 0);
9961
9962               *cost = COSTS_N_INSNS (insns);
9963               if (speed_p)
9964                 *cost += insns * extra_cost->alu.logical;
9965               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9966               return true;
9967             }
9968
9969           if (speed_p)
9970             *cost += extra_cost->alu.logical;
9971           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9972                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9973           return true;
9974         }
9975
9976       if (mode == DImode)
9977         {
9978           rtx op0 = XEXP (x, 0);
9979           enum rtx_code subcode = GET_CODE (op0);
9980
9981           *cost += COSTS_N_INSNS (1);
9982
9983           if (subcode == NOT
9984               && (code == AND
9985                   || (code == IOR && TARGET_THUMB2)))
9986             op0 = XEXP (op0, 0);
9987
9988           if (GET_CODE (op0) == ZERO_EXTEND)
9989             {
9990               if (speed_p)
9991                 *cost += 2 * extra_cost->alu.logical;
9992
9993               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9994                                   0, speed_p)
9995                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9996               return true;
9997             }
9998           else if (GET_CODE (op0) == SIGN_EXTEND)
9999             {
10000               if (speed_p)
10001                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10002
10003               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10004                                   0, speed_p)
10005                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10006               return true;
10007             }
10008
10009           if (speed_p)
10010             *cost += 2 * extra_cost->alu.logical;
10011
10012           return true;
10013         }
10014       /* Vector mode?  */
10015
10016       *cost = LIBCALL_COST (2);
10017       return false;
10018
10019     case MULT:
10020       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10021           && (mode == SFmode || !TARGET_VFP_SINGLE))
10022         {
10023           rtx op0 = XEXP (x, 0);
10024
10025           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10026             op0 = XEXP (op0, 0);
10027
10028           if (speed_p)
10029             *cost += extra_cost->fp[mode != SFmode].mult;
10030
10031           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10032                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10033           return true;
10034         }
10035       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10036         {
10037           *cost = LIBCALL_COST (2);
10038           return false;
10039         }
10040
10041       if (mode == SImode)
10042         {
10043           if (TARGET_DSP_MULTIPLY
10044               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10045                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10046                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10047                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10048                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10049                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10050                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10051                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10052                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10053                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10054                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10055                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10056                                   == 16))))))
10057             {
10058               /* SMUL[TB][TB].  */
10059               if (speed_p)
10060                 *cost += extra_cost->mult[0].extend;
10061               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10062                                  SIGN_EXTEND, 0, speed_p);
10063               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10064                                  SIGN_EXTEND, 1, speed_p);
10065               return true;
10066             }
10067           if (speed_p)
10068             *cost += extra_cost->mult[0].simple;
10069           return false;
10070         }
10071
10072       if (mode == DImode)
10073         {
10074           if (arm_arch3m
10075               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10076                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10077                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10078                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10079             {
10080               if (speed_p)
10081                 *cost += extra_cost->mult[1].extend;
10082               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10083                                   ZERO_EXTEND, 0, speed_p)
10084                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10085                                     ZERO_EXTEND, 0, speed_p));
10086               return true;
10087             }
10088
10089           *cost = LIBCALL_COST (2);
10090           return false;
10091         }
10092
10093       /* Vector mode?  */
10094       *cost = LIBCALL_COST (2);
10095       return false;
10096
10097     case NEG:
10098       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10099           && (mode == SFmode || !TARGET_VFP_SINGLE))
10100         {
10101           if (GET_CODE (XEXP (x, 0)) == MULT)
10102             {
10103               /* VNMUL.  */
10104               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10105               return true;
10106             }
10107
10108           if (speed_p)
10109             *cost += extra_cost->fp[mode != SFmode].neg;
10110
10111           return false;
10112         }
10113       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10114         {
10115           *cost = LIBCALL_COST (1);
10116           return false;
10117         }
10118
10119       if (mode == SImode)
10120         {
10121           if (GET_CODE (XEXP (x, 0)) == ABS)
10122             {
10123               *cost += COSTS_N_INSNS (1);
10124               /* Assume the non-flag-changing variant.  */
10125               if (speed_p)
10126                 *cost += (extra_cost->alu.log_shift
10127                           + extra_cost->alu.arith_shift);
10128               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10129               return true;
10130             }
10131
10132           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10133               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10134             {
10135               *cost += COSTS_N_INSNS (1);
10136               /* No extra cost for MOV imm and MVN imm.  */
10137               /* If the comparison op is using the flags, there's no further
10138                  cost, otherwise we need to add the cost of the comparison.  */
10139               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10140                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10141                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10142                 {
10143                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10144                   *cost += (COSTS_N_INSNS (1)
10145                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10146                                         0, speed_p)
10147                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10148                                         1, speed_p));
10149                   if (speed_p)
10150                     *cost += extra_cost->alu.arith;
10151                 }
10152               return true;
10153             }
10154
10155           if (speed_p)
10156             *cost += extra_cost->alu.arith;
10157           return false;
10158         }
10159
10160       if (GET_MODE_CLASS (mode) == MODE_INT
10161           && GET_MODE_SIZE (mode) < 4)
10162         {
10163           /* Slightly disparage, as we might need an extend operation.  */
10164           *cost += 1;
10165           if (speed_p)
10166             *cost += extra_cost->alu.arith;
10167           return false;
10168         }
10169
10170       if (mode == DImode)
10171         {
10172           *cost += COSTS_N_INSNS (1);
10173           if (speed_p)
10174             *cost += 2 * extra_cost->alu.arith;
10175           return false;
10176         }
10177
10178       /* Vector mode?  */
10179       *cost = LIBCALL_COST (1);
10180       return false;
10181
10182     case NOT:
10183       if (mode == SImode)
10184         {
10185           rtx shift_op;
10186           rtx shift_reg = NULL;
10187
10188           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10189
10190           if (shift_op)
10191             {
10192               if (shift_reg != NULL)
10193                 {
10194                   if (speed_p)
10195                     *cost += extra_cost->alu.log_shift_reg;
10196                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10197                 }
10198               else if (speed_p)
10199                 *cost += extra_cost->alu.log_shift;
10200               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10201               return true;
10202             }
10203
10204           if (speed_p)
10205             *cost += extra_cost->alu.logical;
10206           return false;
10207         }
10208       if (mode == DImode)
10209         {
10210           *cost += COSTS_N_INSNS (1);
10211           return false;
10212         }
10213
10214       /* Vector mode?  */
10215
10216       *cost += LIBCALL_COST (1);
10217       return false;
10218
10219     case IF_THEN_ELSE:
10220       {
10221         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10222           {
10223             *cost += COSTS_N_INSNS (3);
10224             return true;
10225           }
10226         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10227         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10228
10229         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10230         /* Assume that if one arm of the if_then_else is a register,
10231            that it will be tied with the result and eliminate the
10232            conditional insn.  */
10233         if (REG_P (XEXP (x, 1)))
10234           *cost += op2cost;
10235         else if (REG_P (XEXP (x, 2)))
10236           *cost += op1cost;
10237         else
10238           {
10239             if (speed_p)
10240               {
10241                 if (extra_cost->alu.non_exec_costs_exec)
10242                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10243                 else
10244                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10245               }
10246             else
10247               *cost += op1cost + op2cost;
10248           }
10249       }
10250       return true;
10251
10252     case COMPARE:
10253       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10254         *cost = 0;
10255       else
10256         {
10257           machine_mode op0mode;
10258           /* We'll mostly assume that the cost of a compare is the cost of the
10259              LHS.  However, there are some notable exceptions.  */
10260
10261           /* Floating point compares are never done as side-effects.  */
10262           op0mode = GET_MODE (XEXP (x, 0));
10263           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10264               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10265             {
10266               if (speed_p)
10267                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10268
10269               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10270                 {
10271                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10272                   return true;
10273                 }
10274
10275               return false;
10276             }
10277           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10278             {
10279               *cost = LIBCALL_COST (2);
10280               return false;
10281             }
10282
10283           /* DImode compares normally take two insns.  */
10284           if (op0mode == DImode)
10285             {
10286               *cost += COSTS_N_INSNS (1);
10287               if (speed_p)
10288                 *cost += 2 * extra_cost->alu.arith;
10289               return false;
10290             }
10291
10292           if (op0mode == SImode)
10293             {
10294               rtx shift_op;
10295               rtx shift_reg;
10296
10297               if (XEXP (x, 1) == const0_rtx
10298                   && !(REG_P (XEXP (x, 0))
10299                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10300                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10301                 {
10302                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10303
10304                   /* Multiply operations that set the flags are often
10305                      significantly more expensive.  */
10306                   if (speed_p
10307                       && GET_CODE (XEXP (x, 0)) == MULT
10308                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10309                     *cost += extra_cost->mult[0].flag_setting;
10310
10311                   if (speed_p
10312                       && GET_CODE (XEXP (x, 0)) == PLUS
10313                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10314                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10315                                                             0), 1), mode))
10316                     *cost += extra_cost->mult[0].flag_setting;
10317                   return true;
10318                 }
10319
10320               shift_reg = NULL;
10321               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10322               if (shift_op != NULL)
10323                 {
10324                   if (shift_reg != NULL)
10325                     {
10326                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10327                                          1, speed_p);
10328                       if (speed_p)
10329                         *cost += extra_cost->alu.arith_shift_reg;
10330                     }
10331                   else if (speed_p)
10332                     *cost += extra_cost->alu.arith_shift;
10333                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10334                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10335                   return true;
10336                 }
10337
10338               if (speed_p)
10339                 *cost += extra_cost->alu.arith;
10340               if (CONST_INT_P (XEXP (x, 1))
10341                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10342                 {
10343                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10344                   return true;
10345                 }
10346               return false;
10347             }
10348
10349           /* Vector mode?  */
10350
10351           *cost = LIBCALL_COST (2);
10352           return false;
10353         }
10354       return true;
10355
10356     case EQ:
10357     case NE:
10358     case LT:
10359     case LE:
10360     case GT:
10361     case GE:
10362     case LTU:
10363     case LEU:
10364     case GEU:
10365     case GTU:
10366     case ORDERED:
10367     case UNORDERED:
10368     case UNEQ:
10369     case UNLE:
10370     case UNLT:
10371     case UNGE:
10372     case UNGT:
10373     case LTGT:
10374       if (outer_code == SET)
10375         {
10376           /* Is it a store-flag operation?  */
10377           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10378               && XEXP (x, 1) == const0_rtx)
10379             {
10380               /* Thumb also needs an IT insn.  */
10381               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10382               return true;
10383             }
10384           if (XEXP (x, 1) == const0_rtx)
10385             {
10386               switch (code)
10387                 {
10388                 case LT:
10389                   /* LSR Rd, Rn, #31.  */
10390                   if (speed_p)
10391                     *cost += extra_cost->alu.shift;
10392                   break;
10393
10394                 case EQ:
10395                   /* RSBS T1, Rn, #0
10396                      ADC  Rd, Rn, T1.  */
10397
10398                 case NE:
10399                   /* SUBS T1, Rn, #1
10400                      SBC  Rd, Rn, T1.  */
10401                   *cost += COSTS_N_INSNS (1);
10402                   break;
10403
10404                 case LE:
10405                   /* RSBS T1, Rn, Rn, LSR #31
10406                      ADC  Rd, Rn, T1. */
10407                   *cost += COSTS_N_INSNS (1);
10408                   if (speed_p)
10409                     *cost += extra_cost->alu.arith_shift;
10410                   break;
10411
10412                 case GT:
10413                   /* RSB  Rd, Rn, Rn, ASR #1
10414                      LSR  Rd, Rd, #31.  */
10415                   *cost += COSTS_N_INSNS (1);
10416                   if (speed_p)
10417                     *cost += (extra_cost->alu.arith_shift
10418                               + extra_cost->alu.shift);
10419                   break;
10420
10421                 case GE:
10422                   /* ASR  Rd, Rn, #31
10423                      ADD  Rd, Rn, #1.  */
10424                   *cost += COSTS_N_INSNS (1);
10425                   if (speed_p)
10426                     *cost += extra_cost->alu.shift;
10427                   break;
10428
10429                 default:
10430                   /* Remaining cases are either meaningless or would take
10431                      three insns anyway.  */
10432                   *cost = COSTS_N_INSNS (3);
10433                   break;
10434                 }
10435               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10436               return true;
10437             }
10438           else
10439             {
10440               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10441               if (CONST_INT_P (XEXP (x, 1))
10442                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10443                 {
10444                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10445                   return true;
10446                 }
10447
10448               return false;
10449             }
10450         }
10451       /* Not directly inside a set.  If it involves the condition code
10452          register it must be the condition for a branch, cond_exec or
10453          I_T_E operation.  Since the comparison is performed elsewhere
10454          this is just the control part which has no additional
10455          cost.  */
10456       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10457                && XEXP (x, 1) == const0_rtx)
10458         {
10459           *cost = 0;
10460           return true;
10461         }
10462       return false;
10463
10464     case ABS:
10465       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10466           && (mode == SFmode || !TARGET_VFP_SINGLE))
10467         {
10468           if (speed_p)
10469             *cost += extra_cost->fp[mode != SFmode].neg;
10470
10471           return false;
10472         }
10473       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10474         {
10475           *cost = LIBCALL_COST (1);
10476           return false;
10477         }
10478
10479       if (mode == SImode)
10480         {
10481           if (speed_p)
10482             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10483           return false;
10484         }
10485       /* Vector mode?  */
10486       *cost = LIBCALL_COST (1);
10487       return false;
10488
10489     case SIGN_EXTEND:
10490       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10491           && MEM_P (XEXP (x, 0)))
10492         {
10493           if (mode == DImode)
10494             *cost += COSTS_N_INSNS (1);
10495
10496           if (!speed_p)
10497             return true;
10498
10499           if (GET_MODE (XEXP (x, 0)) == SImode)
10500             *cost += extra_cost->ldst.load;
10501           else
10502             *cost += extra_cost->ldst.load_sign_extend;
10503
10504           if (mode == DImode)
10505             *cost += extra_cost->alu.shift;
10506
10507           return true;
10508         }
10509
10510       /* Widening from less than 32-bits requires an extend operation.  */
10511       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10512         {
10513           /* We have SXTB/SXTH.  */
10514           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10515           if (speed_p)
10516             *cost += extra_cost->alu.extend;
10517         }
10518       else if (GET_MODE (XEXP (x, 0)) != SImode)
10519         {
10520           /* Needs two shifts.  */
10521           *cost += COSTS_N_INSNS (1);
10522           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10523           if (speed_p)
10524             *cost += 2 * extra_cost->alu.shift;
10525         }
10526
10527       /* Widening beyond 32-bits requires one more insn.  */
10528       if (mode == DImode)
10529         {
10530           *cost += COSTS_N_INSNS (1);
10531           if (speed_p)
10532             *cost += extra_cost->alu.shift;
10533         }
10534
10535       return true;
10536
10537     case ZERO_EXTEND:
10538       if ((arm_arch4
10539            || GET_MODE (XEXP (x, 0)) == SImode
10540            || GET_MODE (XEXP (x, 0)) == QImode)
10541           && MEM_P (XEXP (x, 0)))
10542         {
10543           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10544
10545           if (mode == DImode)
10546             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10547
10548           return true;
10549         }
10550
10551       /* Widening from less than 32-bits requires an extend operation.  */
10552       if (GET_MODE (XEXP (x, 0)) == QImode)
10553         {
10554           /* UXTB can be a shorter instruction in Thumb2, but it might
10555              be slower than the AND Rd, Rn, #255 alternative.  When
10556              optimizing for speed it should never be slower to use
10557              AND, and we don't really model 16-bit vs 32-bit insns
10558              here.  */
10559           if (speed_p)
10560             *cost += extra_cost->alu.logical;
10561         }
10562       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10563         {
10564           /* We have UXTB/UXTH.  */
10565           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10566           if (speed_p)
10567             *cost += extra_cost->alu.extend;
10568         }
10569       else if (GET_MODE (XEXP (x, 0)) != SImode)
10570         {
10571           /* Needs two shifts.  It's marginally preferable to use
10572              shifts rather than two BIC instructions as the second
10573              shift may merge with a subsequent insn as a shifter
10574              op.  */
10575           *cost = COSTS_N_INSNS (2);
10576           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10577           if (speed_p)
10578             *cost += 2 * extra_cost->alu.shift;
10579         }
10580
10581       /* Widening beyond 32-bits requires one more insn.  */
10582       if (mode == DImode)
10583         {
10584           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10585         }
10586
10587       return true;
10588
10589     case CONST_INT:
10590       *cost = 0;
10591       /* CONST_INT has no mode, so we cannot tell for sure how many
10592          insns are really going to be needed.  The best we can do is
10593          look at the value passed.  If it fits in SImode, then assume
10594          that's the mode it will be used for.  Otherwise assume it
10595          will be used in DImode.  */
10596       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10597         mode = SImode;
10598       else
10599         mode = DImode;
10600
10601       /* Avoid blowing up in arm_gen_constant ().  */
10602       if (!(outer_code == PLUS
10603             || outer_code == AND
10604             || outer_code == IOR
10605             || outer_code == XOR
10606             || outer_code == MINUS))
10607         outer_code = SET;
10608
10609     const_int_cost:
10610       if (mode == SImode)
10611         {
10612           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10613                                                     INTVAL (x), NULL, NULL,
10614                                                     0, 0));
10615           /* Extra costs?  */
10616         }
10617       else
10618         {
10619           *cost += COSTS_N_INSNS (arm_gen_constant
10620                                   (outer_code, SImode, NULL,
10621                                    trunc_int_for_mode (INTVAL (x), SImode),
10622                                    NULL, NULL, 0, 0)
10623                                   + arm_gen_constant (outer_code, SImode, NULL,
10624                                                       INTVAL (x) >> 32, NULL,
10625                                                       NULL, 0, 0));
10626           /* Extra costs?  */
10627         }
10628
10629       return true;
10630
10631     case CONST:
10632     case LABEL_REF:
10633     case SYMBOL_REF:
10634       if (speed_p)
10635         {
10636           if (arm_arch_thumb2 && !flag_pic)
10637             *cost += COSTS_N_INSNS (1);
10638           else
10639             *cost += extra_cost->ldst.load;
10640         }
10641       else
10642         *cost += COSTS_N_INSNS (1);
10643
10644       if (flag_pic)
10645         {
10646           *cost += COSTS_N_INSNS (1);
10647           if (speed_p)
10648             *cost += extra_cost->alu.arith;
10649         }
10650
10651       return true;
10652
10653     case CONST_FIXED:
10654       *cost = COSTS_N_INSNS (4);
10655       /* Fixme.  */
10656       return true;
10657
10658     case CONST_DOUBLE:
10659       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10660           && (mode == SFmode || !TARGET_VFP_SINGLE))
10661         {
10662           if (vfp3_const_double_rtx (x))
10663             {
10664               if (speed_p)
10665                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10666               return true;
10667             }
10668
10669           if (speed_p)
10670             {
10671               if (mode == DFmode)
10672                 *cost += extra_cost->ldst.loadd;
10673               else
10674                 *cost += extra_cost->ldst.loadf;
10675             }
10676           else
10677             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10678
10679           return true;
10680         }
10681       *cost = COSTS_N_INSNS (4);
10682       return true;
10683
10684     case CONST_VECTOR:
10685       /* Fixme.  */
10686       if (TARGET_NEON
10687           && TARGET_HARD_FLOAT
10688           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10689           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10690         *cost = COSTS_N_INSNS (1);
10691       else
10692         *cost = COSTS_N_INSNS (4);
10693       return true;
10694
10695     case HIGH:
10696     case LO_SUM:
10697       /* When optimizing for size, we prefer constant pool entries to
10698          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10699       if (!speed_p)
10700         *cost += 1;
10701       return true;
10702
10703     case CLZ:
10704       if (speed_p)
10705         *cost += extra_cost->alu.clz;
10706       return false;
10707
10708     case SMIN:
10709       if (XEXP (x, 1) == const0_rtx)
10710         {
10711           if (speed_p)
10712             *cost += extra_cost->alu.log_shift;
10713           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10714           return true;
10715         }
10716       /* Fall through.  */
10717     case SMAX:
10718     case UMIN:
10719     case UMAX:
10720       *cost += COSTS_N_INSNS (1);
10721       return false;
10722
10723     case TRUNCATE:
10724       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10725           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10726           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10727           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10728           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10729                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10730               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10731                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10732                       == ZERO_EXTEND))))
10733         {
10734           if (speed_p)
10735             *cost += extra_cost->mult[1].extend;
10736           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10737                               ZERO_EXTEND, 0, speed_p)
10738                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10739                                 ZERO_EXTEND, 0, speed_p));
10740           return true;
10741         }
10742       *cost = LIBCALL_COST (1);
10743       return false;
10744
10745     case UNSPEC_VOLATILE:
10746     case UNSPEC:
10747       return arm_unspec_cost (x, outer_code, speed_p, cost);
10748
10749     case PC:
10750       /* Reading the PC is like reading any other register.  Writing it
10751          is more expensive, but we take that into account elsewhere.  */
10752       *cost = 0;
10753       return true;
10754
10755     case ZERO_EXTRACT:
10756       /* TODO: Simple zero_extract of bottom bits using AND.  */
10757       /* Fall through.  */
10758     case SIGN_EXTRACT:
10759       if (arm_arch6
10760           && mode == SImode
10761           && CONST_INT_P (XEXP (x, 1))
10762           && CONST_INT_P (XEXP (x, 2)))
10763         {
10764           if (speed_p)
10765             *cost += extra_cost->alu.bfx;
10766           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10767           return true;
10768         }
10769       /* Without UBFX/SBFX, need to resort to shift operations.  */
10770       *cost += COSTS_N_INSNS (1);
10771       if (speed_p)
10772         *cost += 2 * extra_cost->alu.shift;
10773       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10774       return true;
10775
10776     case FLOAT_EXTEND:
10777       if (TARGET_HARD_FLOAT)
10778         {
10779           if (speed_p)
10780             *cost += extra_cost->fp[mode == DFmode].widen;
10781           if (!TARGET_VFP5
10782               && GET_MODE (XEXP (x, 0)) == HFmode)
10783             {
10784               /* Pre v8, widening HF->DF is a two-step process, first
10785                  widening to SFmode.  */
10786               *cost += COSTS_N_INSNS (1);
10787               if (speed_p)
10788                 *cost += extra_cost->fp[0].widen;
10789             }
10790           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10791           return true;
10792         }
10793
10794       *cost = LIBCALL_COST (1);
10795       return false;
10796
10797     case FLOAT_TRUNCATE:
10798       if (TARGET_HARD_FLOAT)
10799         {
10800           if (speed_p)
10801             *cost += extra_cost->fp[mode == DFmode].narrow;
10802           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10803           return true;
10804           /* Vector modes?  */
10805         }
10806       *cost = LIBCALL_COST (1);
10807       return false;
10808
10809     case FMA:
10810       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10811         {
10812           rtx op0 = XEXP (x, 0);
10813           rtx op1 = XEXP (x, 1);
10814           rtx op2 = XEXP (x, 2);
10815
10816
10817           /* vfms or vfnma.  */
10818           if (GET_CODE (op0) == NEG)
10819             op0 = XEXP (op0, 0);
10820
10821           /* vfnms or vfnma.  */
10822           if (GET_CODE (op2) == NEG)
10823             op2 = XEXP (op2, 0);
10824
10825           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10826           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10827           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10828
10829           if (speed_p)
10830             *cost += extra_cost->fp[mode ==DFmode].fma;
10831
10832           return true;
10833         }
10834
10835       *cost = LIBCALL_COST (3);
10836       return false;
10837
10838     case FIX:
10839     case UNSIGNED_FIX:
10840       if (TARGET_HARD_FLOAT)
10841         {
10842           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10843              a vcvt fixed-point conversion.  */
10844           if (code == FIX && mode == SImode
10845               && GET_CODE (XEXP (x, 0)) == FIX
10846               && GET_MODE (XEXP (x, 0)) == SFmode
10847               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10848               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10849                  > 0)
10850             {
10851               if (speed_p)
10852                 *cost += extra_cost->fp[0].toint;
10853
10854               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10855                                  code, 0, speed_p);
10856               return true;
10857             }
10858
10859           if (GET_MODE_CLASS (mode) == MODE_INT)
10860             {
10861               mode = GET_MODE (XEXP (x, 0));
10862               if (speed_p)
10863                 *cost += extra_cost->fp[mode == DFmode].toint;
10864               /* Strip of the 'cost' of rounding towards zero.  */
10865               if (GET_CODE (XEXP (x, 0)) == FIX)
10866                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10867                                    0, speed_p);
10868               else
10869                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10870               /* ??? Increase the cost to deal with transferring from
10871                  FP -> CORE registers?  */
10872               return true;
10873             }
10874           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10875                    && TARGET_VFP5)
10876             {
10877               if (speed_p)
10878                 *cost += extra_cost->fp[mode == DFmode].roundint;
10879               return false;
10880             }
10881           /* Vector costs? */
10882         }
10883       *cost = LIBCALL_COST (1);
10884       return false;
10885
10886     case FLOAT:
10887     case UNSIGNED_FLOAT:
10888       if (TARGET_HARD_FLOAT)
10889         {
10890           /* ??? Increase the cost to deal with transferring from CORE
10891              -> FP registers?  */
10892           if (speed_p)
10893             *cost += extra_cost->fp[mode == DFmode].fromint;
10894           return false;
10895         }
10896       *cost = LIBCALL_COST (1);
10897       return false;
10898
10899     case CALL:
10900       return true;
10901
10902     case ASM_OPERANDS:
10903       {
10904       /* Just a guess.  Guess number of instructions in the asm
10905          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10906          though (see PR60663).  */
10907         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10908         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10909
10910         *cost = COSTS_N_INSNS (asm_length + num_operands);
10911         return true;
10912       }
10913     default:
10914       if (mode != VOIDmode)
10915         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10916       else
10917         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10918       return false;
10919     }
10920 }
10921
10922 #undef HANDLE_NARROW_SHIFT_ARITH
10923
10924 /* RTX costs entry point.  */
10925
10926 static bool
10927 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10928                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10929 {
10930   bool result;
10931   int code = GET_CODE (x);
10932   gcc_assert (current_tune->insn_extra_cost);
10933
10934   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10935                                 (enum rtx_code) outer_code,
10936                                 current_tune->insn_extra_cost,
10937                                 total, speed);
10938
10939   if (dump_file && (dump_flags & TDF_DETAILS))
10940     {
10941       print_rtl_single (dump_file, x);
10942       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10943                *total, result ? "final" : "partial");
10944     }
10945   return result;
10946 }
10947
10948 /* All address computations that can be done are free, but rtx cost returns
10949    the same for practically all of them.  So we weight the different types
10950    of address here in the order (most pref first):
10951    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10952 static inline int
10953 arm_arm_address_cost (rtx x)
10954 {
10955   enum rtx_code c  = GET_CODE (x);
10956
10957   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10958     return 0;
10959   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10960     return 10;
10961
10962   if (c == PLUS)
10963     {
10964       if (CONST_INT_P (XEXP (x, 1)))
10965         return 2;
10966
10967       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10968         return 3;
10969
10970       return 4;
10971     }
10972
10973   return 6;
10974 }
10975
10976 static inline int
10977 arm_thumb_address_cost (rtx x)
10978 {
10979   enum rtx_code c  = GET_CODE (x);
10980
10981   if (c == REG)
10982     return 1;
10983   if (c == PLUS
10984       && REG_P (XEXP (x, 0))
10985       && CONST_INT_P (XEXP (x, 1)))
10986     return 1;
10987
10988   return 2;
10989 }
10990
10991 static int
10992 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10993                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10994 {
10995   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10996 }
10997
10998 /* Adjust cost hook for XScale.  */
10999 static bool
11000 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11001                           int * cost)
11002 {
11003   /* Some true dependencies can have a higher cost depending
11004      on precisely how certain input operands are used.  */
11005   if (dep_type == 0
11006       && recog_memoized (insn) >= 0
11007       && recog_memoized (dep) >= 0)
11008     {
11009       int shift_opnum = get_attr_shift (insn);
11010       enum attr_type attr_type = get_attr_type (dep);
11011
11012       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11013          operand for INSN.  If we have a shifted input operand and the
11014          instruction we depend on is another ALU instruction, then we may
11015          have to account for an additional stall.  */
11016       if (shift_opnum != 0
11017           && (attr_type == TYPE_ALU_SHIFT_IMM
11018               || attr_type == TYPE_ALUS_SHIFT_IMM
11019               || attr_type == TYPE_LOGIC_SHIFT_IMM
11020               || attr_type == TYPE_LOGICS_SHIFT_IMM
11021               || attr_type == TYPE_ALU_SHIFT_REG
11022               || attr_type == TYPE_ALUS_SHIFT_REG
11023               || attr_type == TYPE_LOGIC_SHIFT_REG
11024               || attr_type == TYPE_LOGICS_SHIFT_REG
11025               || attr_type == TYPE_MOV_SHIFT
11026               || attr_type == TYPE_MVN_SHIFT
11027               || attr_type == TYPE_MOV_SHIFT_REG
11028               || attr_type == TYPE_MVN_SHIFT_REG))
11029         {
11030           rtx shifted_operand;
11031           int opno;
11032
11033           /* Get the shifted operand.  */
11034           extract_insn (insn);
11035           shifted_operand = recog_data.operand[shift_opnum];
11036
11037           /* Iterate over all the operands in DEP.  If we write an operand
11038              that overlaps with SHIFTED_OPERAND, then we have increase the
11039              cost of this dependency.  */
11040           extract_insn (dep);
11041           preprocess_constraints (dep);
11042           for (opno = 0; opno < recog_data.n_operands; opno++)
11043             {
11044               /* We can ignore strict inputs.  */
11045               if (recog_data.operand_type[opno] == OP_IN)
11046                 continue;
11047
11048               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11049                                            shifted_operand))
11050                 {
11051                   *cost = 2;
11052                   return false;
11053                 }
11054             }
11055         }
11056     }
11057   return true;
11058 }
11059
11060 /* Adjust cost hook for Cortex A9.  */
11061 static bool
11062 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11063                              int * cost)
11064 {
11065   switch (dep_type)
11066     {
11067     case REG_DEP_ANTI:
11068       *cost = 0;
11069       return false;
11070
11071     case REG_DEP_TRUE:
11072     case REG_DEP_OUTPUT:
11073         if (recog_memoized (insn) >= 0
11074             && recog_memoized (dep) >= 0)
11075           {
11076             if (GET_CODE (PATTERN (insn)) == SET)
11077               {
11078                 if (GET_MODE_CLASS
11079                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11080                   || GET_MODE_CLASS
11081                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11082                   {
11083                     enum attr_type attr_type_insn = get_attr_type (insn);
11084                     enum attr_type attr_type_dep = get_attr_type (dep);
11085
11086                     /* By default all dependencies of the form
11087                        s0 = s0 <op> s1
11088                        s0 = s0 <op> s2
11089                        have an extra latency of 1 cycle because
11090                        of the input and output dependency in this
11091                        case. However this gets modeled as an true
11092                        dependency and hence all these checks.  */
11093                     if (REG_P (SET_DEST (PATTERN (insn)))
11094                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11095                       {
11096                         /* FMACS is a special case where the dependent
11097                            instruction can be issued 3 cycles before
11098                            the normal latency in case of an output
11099                            dependency.  */
11100                         if ((attr_type_insn == TYPE_FMACS
11101                              || attr_type_insn == TYPE_FMACD)
11102                             && (attr_type_dep == TYPE_FMACS
11103                                 || attr_type_dep == TYPE_FMACD))
11104                           {
11105                             if (dep_type == REG_DEP_OUTPUT)
11106                               *cost = insn_default_latency (dep) - 3;
11107                             else
11108                               *cost = insn_default_latency (dep);
11109                             return false;
11110                           }
11111                         else
11112                           {
11113                             if (dep_type == REG_DEP_OUTPUT)
11114                               *cost = insn_default_latency (dep) + 1;
11115                             else
11116                               *cost = insn_default_latency (dep);
11117                           }
11118                         return false;
11119                       }
11120                   }
11121               }
11122           }
11123         break;
11124
11125     default:
11126       gcc_unreachable ();
11127     }
11128
11129   return true;
11130 }
11131
11132 /* Adjust cost hook for FA726TE.  */
11133 static bool
11134 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11135                            int * cost)
11136 {
11137   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11138      have penalty of 3.  */
11139   if (dep_type == REG_DEP_TRUE
11140       && recog_memoized (insn) >= 0
11141       && recog_memoized (dep) >= 0
11142       && get_attr_conds (dep) == CONDS_SET)
11143     {
11144       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11145       if (get_attr_conds (insn) == CONDS_USE
11146           && get_attr_type (insn) != TYPE_BRANCH)
11147         {
11148           *cost = 3;
11149           return false;
11150         }
11151
11152       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11153           || get_attr_conds (insn) == CONDS_USE)
11154         {
11155           *cost = 0;
11156           return false;
11157         }
11158     }
11159
11160   return true;
11161 }
11162
11163 /* Implement TARGET_REGISTER_MOVE_COST.
11164
11165    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11166    it is typically more expensive than a single memory access.  We set
11167    the cost to less than two memory accesses so that floating
11168    point to integer conversion does not go through memory.  */
11169
11170 int
11171 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11172                         reg_class_t from, reg_class_t to)
11173 {
11174   if (TARGET_32BIT)
11175     {
11176       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11177           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11178         return 15;
11179       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11180                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11181         return 4;
11182       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11183         return 20;
11184       else
11185         return 2;
11186     }
11187   else
11188     {
11189       if (from == HI_REGS || to == HI_REGS)
11190         return 4;
11191       else
11192         return 2;
11193     }
11194 }
11195
11196 /* Implement TARGET_MEMORY_MOVE_COST.  */
11197
11198 int
11199 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11200                       bool in ATTRIBUTE_UNUSED)
11201 {
11202   if (TARGET_32BIT)
11203     return 10;
11204   else
11205     {
11206       if (GET_MODE_SIZE (mode) < 4)
11207         return 8;
11208       else
11209         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11210     }
11211 }
11212
11213 /* Vectorizer cost model implementation.  */
11214
11215 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11216 static int
11217 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11218                                 tree vectype,
11219                                 int misalign ATTRIBUTE_UNUSED)
11220 {
11221   unsigned elements;
11222
11223   switch (type_of_cost)
11224     {
11225       case scalar_stmt:
11226         return current_tune->vec_costs->scalar_stmt_cost;
11227
11228       case scalar_load:
11229         return current_tune->vec_costs->scalar_load_cost;
11230
11231       case scalar_store:
11232         return current_tune->vec_costs->scalar_store_cost;
11233
11234       case vector_stmt:
11235         return current_tune->vec_costs->vec_stmt_cost;
11236
11237       case vector_load:
11238         return current_tune->vec_costs->vec_align_load_cost;
11239
11240       case vector_store:
11241         return current_tune->vec_costs->vec_store_cost;
11242
11243       case vec_to_scalar:
11244         return current_tune->vec_costs->vec_to_scalar_cost;
11245
11246       case scalar_to_vec:
11247         return current_tune->vec_costs->scalar_to_vec_cost;
11248
11249       case unaligned_load:
11250         return current_tune->vec_costs->vec_unalign_load_cost;
11251
11252       case unaligned_store:
11253         return current_tune->vec_costs->vec_unalign_store_cost;
11254
11255       case cond_branch_taken:
11256         return current_tune->vec_costs->cond_taken_branch_cost;
11257
11258       case cond_branch_not_taken:
11259         return current_tune->vec_costs->cond_not_taken_branch_cost;
11260
11261       case vec_perm:
11262       case vec_promote_demote:
11263         return current_tune->vec_costs->vec_stmt_cost;
11264
11265       case vec_construct:
11266         elements = TYPE_VECTOR_SUBPARTS (vectype);
11267         return elements / 2 + 1;
11268
11269       default:
11270         gcc_unreachable ();
11271     }
11272 }
11273
11274 /* Implement targetm.vectorize.add_stmt_cost.  */
11275
11276 static unsigned
11277 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11278                    struct _stmt_vec_info *stmt_info, int misalign,
11279                    enum vect_cost_model_location where)
11280 {
11281   unsigned *cost = (unsigned *) data;
11282   unsigned retval = 0;
11283
11284   if (flag_vect_cost_model)
11285     {
11286       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11287       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11288
11289       /* Statements in an inner loop relative to the loop being
11290          vectorized are weighted more heavily.  The value here is
11291          arbitrary and could potentially be improved with analysis.  */
11292       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11293         count *= 50;  /* FIXME.  */
11294
11295       retval = (unsigned) (count * stmt_cost);
11296       cost[where] += retval;
11297     }
11298
11299   return retval;
11300 }
11301
11302 /* Return true if and only if this insn can dual-issue only as older.  */
11303 static bool
11304 cortexa7_older_only (rtx_insn *insn)
11305 {
11306   if (recog_memoized (insn) < 0)
11307     return false;
11308
11309   switch (get_attr_type (insn))
11310     {
11311     case TYPE_ALU_DSP_REG:
11312     case TYPE_ALU_SREG:
11313     case TYPE_ALUS_SREG:
11314     case TYPE_LOGIC_REG:
11315     case TYPE_LOGICS_REG:
11316     case TYPE_ADC_REG:
11317     case TYPE_ADCS_REG:
11318     case TYPE_ADR:
11319     case TYPE_BFM:
11320     case TYPE_REV:
11321     case TYPE_MVN_REG:
11322     case TYPE_SHIFT_IMM:
11323     case TYPE_SHIFT_REG:
11324     case TYPE_LOAD_BYTE:
11325     case TYPE_LOAD_4:
11326     case TYPE_STORE_4:
11327     case TYPE_FFARITHS:
11328     case TYPE_FADDS:
11329     case TYPE_FFARITHD:
11330     case TYPE_FADDD:
11331     case TYPE_FMOV:
11332     case TYPE_F_CVT:
11333     case TYPE_FCMPS:
11334     case TYPE_FCMPD:
11335     case TYPE_FCONSTS:
11336     case TYPE_FCONSTD:
11337     case TYPE_FMULS:
11338     case TYPE_FMACS:
11339     case TYPE_FMULD:
11340     case TYPE_FMACD:
11341     case TYPE_FDIVS:
11342     case TYPE_FDIVD:
11343     case TYPE_F_MRC:
11344     case TYPE_F_MRRC:
11345     case TYPE_F_FLAG:
11346     case TYPE_F_LOADS:
11347     case TYPE_F_STORES:
11348       return true;
11349     default:
11350       return false;
11351     }
11352 }
11353
11354 /* Return true if and only if this insn can dual-issue as younger.  */
11355 static bool
11356 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11357 {
11358   if (recog_memoized (insn) < 0)
11359     {
11360       if (verbose > 5)
11361         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11362       return false;
11363     }
11364
11365   switch (get_attr_type (insn))
11366     {
11367     case TYPE_ALU_IMM:
11368     case TYPE_ALUS_IMM:
11369     case TYPE_LOGIC_IMM:
11370     case TYPE_LOGICS_IMM:
11371     case TYPE_EXTEND:
11372     case TYPE_MVN_IMM:
11373     case TYPE_MOV_IMM:
11374     case TYPE_MOV_REG:
11375     case TYPE_MOV_SHIFT:
11376     case TYPE_MOV_SHIFT_REG:
11377     case TYPE_BRANCH:
11378     case TYPE_CALL:
11379       return true;
11380     default:
11381       return false;
11382     }
11383 }
11384
11385
11386 /* Look for an instruction that can dual issue only as an older
11387    instruction, and move it in front of any instructions that can
11388    dual-issue as younger, while preserving the relative order of all
11389    other instructions in the ready list.  This is a hueuristic to help
11390    dual-issue in later cycles, by postponing issue of more flexible
11391    instructions.  This heuristic may affect dual issue opportunities
11392    in the current cycle.  */
11393 static void
11394 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11395                         int *n_readyp, int clock)
11396 {
11397   int i;
11398   int first_older_only = -1, first_younger = -1;
11399
11400   if (verbose > 5)
11401     fprintf (file,
11402              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11403              clock,
11404              *n_readyp);
11405
11406   /* Traverse the ready list from the head (the instruction to issue
11407      first), and looking for the first instruction that can issue as
11408      younger and the first instruction that can dual-issue only as
11409      older.  */
11410   for (i = *n_readyp - 1; i >= 0; i--)
11411     {
11412       rtx_insn *insn = ready[i];
11413       if (cortexa7_older_only (insn))
11414         {
11415           first_older_only = i;
11416           if (verbose > 5)
11417             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11418           break;
11419         }
11420       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11421         first_younger = i;
11422     }
11423
11424   /* Nothing to reorder because either no younger insn found or insn
11425      that can dual-issue only as older appears before any insn that
11426      can dual-issue as younger.  */
11427   if (first_younger == -1)
11428     {
11429       if (verbose > 5)
11430         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11431       return;
11432     }
11433
11434   /* Nothing to reorder because no older-only insn in the ready list.  */
11435   if (first_older_only == -1)
11436     {
11437       if (verbose > 5)
11438         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11439       return;
11440     }
11441
11442   /* Move first_older_only insn before first_younger.  */
11443   if (verbose > 5)
11444     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11445              INSN_UID(ready [first_older_only]),
11446              INSN_UID(ready [first_younger]));
11447   rtx_insn *first_older_only_insn = ready [first_older_only];
11448   for (i = first_older_only; i < first_younger; i++)
11449     {
11450       ready[i] = ready[i+1];
11451     }
11452
11453   ready[i] = first_older_only_insn;
11454   return;
11455 }
11456
11457 /* Implement TARGET_SCHED_REORDER. */
11458 static int
11459 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11460                    int clock)
11461 {
11462   switch (arm_tune)
11463     {
11464     case TARGET_CPU_cortexa7:
11465       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11466       break;
11467     default:
11468       /* Do nothing for other cores.  */
11469       break;
11470     }
11471
11472   return arm_issue_rate ();
11473 }
11474
11475 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11476    It corrects the value of COST based on the relationship between
11477    INSN and DEP through the dependence LINK.  It returns the new
11478    value. There is a per-core adjust_cost hook to adjust scheduler costs
11479    and the per-core hook can choose to completely override the generic
11480    adjust_cost function. Only put bits of code into arm_adjust_cost that
11481    are common across all cores.  */
11482 static int
11483 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11484                  unsigned int)
11485 {
11486   rtx i_pat, d_pat;
11487
11488  /* When generating Thumb-1 code, we want to place flag-setting operations
11489     close to a conditional branch which depends on them, so that we can
11490     omit the comparison. */
11491   if (TARGET_THUMB1
11492       && dep_type == 0
11493       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11494       && recog_memoized (dep) >= 0
11495       && get_attr_conds (dep) == CONDS_SET)
11496     return 0;
11497
11498   if (current_tune->sched_adjust_cost != NULL)
11499     {
11500       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11501         return cost;
11502     }
11503
11504   /* XXX Is this strictly true?  */
11505   if (dep_type == REG_DEP_ANTI
11506       || dep_type == REG_DEP_OUTPUT)
11507     return 0;
11508
11509   /* Call insns don't incur a stall, even if they follow a load.  */
11510   if (dep_type == 0
11511       && CALL_P (insn))
11512     return 1;
11513
11514   if ((i_pat = single_set (insn)) != NULL
11515       && MEM_P (SET_SRC (i_pat))
11516       && (d_pat = single_set (dep)) != NULL
11517       && MEM_P (SET_DEST (d_pat)))
11518     {
11519       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11520       /* This is a load after a store, there is no conflict if the load reads
11521          from a cached area.  Assume that loads from the stack, and from the
11522          constant pool are cached, and that others will miss.  This is a
11523          hack.  */
11524
11525       if ((GET_CODE (src_mem) == SYMBOL_REF
11526            && CONSTANT_POOL_ADDRESS_P (src_mem))
11527           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11528           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11529           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11530         return 1;
11531     }
11532
11533   return cost;
11534 }
11535
11536 int
11537 arm_max_conditional_execute (void)
11538 {
11539   return max_insns_skipped;
11540 }
11541
11542 static int
11543 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11544 {
11545   if (TARGET_32BIT)
11546     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11547   else
11548     return (optimize > 0) ? 2 : 0;
11549 }
11550
11551 static int
11552 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11553 {
11554   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11555 }
11556
11557 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11558    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11559    sequences of non-executed instructions in IT blocks probably take the same
11560    amount of time as executed instructions (and the IT instruction itself takes
11561    space in icache).  This function was experimentally determined to give good
11562    results on a popular embedded benchmark.  */
11563
11564 static int
11565 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11566 {
11567   return (TARGET_32BIT && speed_p) ? 1
11568          : arm_default_branch_cost (speed_p, predictable_p);
11569 }
11570
11571 static int
11572 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11573 {
11574   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11575 }
11576
11577 static bool fp_consts_inited = false;
11578
11579 static REAL_VALUE_TYPE value_fp0;
11580
11581 static void
11582 init_fp_table (void)
11583 {
11584   REAL_VALUE_TYPE r;
11585
11586   r = REAL_VALUE_ATOF ("0", DFmode);
11587   value_fp0 = r;
11588   fp_consts_inited = true;
11589 }
11590
11591 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11592 int
11593 arm_const_double_rtx (rtx x)
11594 {
11595   const REAL_VALUE_TYPE *r;
11596
11597   if (!fp_consts_inited)
11598     init_fp_table ();
11599
11600   r = CONST_DOUBLE_REAL_VALUE (x);
11601   if (REAL_VALUE_MINUS_ZERO (*r))
11602     return 0;
11603
11604   if (real_equal (r, &value_fp0))
11605     return 1;
11606
11607   return 0;
11608 }
11609
11610 /* VFPv3 has a fairly wide range of representable immediates, formed from
11611    "quarter-precision" floating-point values. These can be evaluated using this
11612    formula (with ^ for exponentiation):
11613
11614      -1^s * n * 2^-r
11615
11616    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11617    16 <= n <= 31 and 0 <= r <= 7.
11618
11619    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11620
11621      - A (most-significant) is the sign bit.
11622      - BCD are the exponent (encoded as r XOR 3).
11623      - EFGH are the mantissa (encoded as n - 16).
11624 */
11625
11626 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11627    fconst[sd] instruction, or -1 if X isn't suitable.  */
11628 static int
11629 vfp3_const_double_index (rtx x)
11630 {
11631   REAL_VALUE_TYPE r, m;
11632   int sign, exponent;
11633   unsigned HOST_WIDE_INT mantissa, mant_hi;
11634   unsigned HOST_WIDE_INT mask;
11635   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11636   bool fail;
11637
11638   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11639     return -1;
11640
11641   r = *CONST_DOUBLE_REAL_VALUE (x);
11642
11643   /* We can't represent these things, so detect them first.  */
11644   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11645     return -1;
11646
11647   /* Extract sign, exponent and mantissa.  */
11648   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11649   r = real_value_abs (&r);
11650   exponent = REAL_EXP (&r);
11651   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11652      highest (sign) bit, with a fixed binary point at bit point_pos.
11653      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11654      bits for the mantissa, this may fail (low bits would be lost).  */
11655   real_ldexp (&m, &r, point_pos - exponent);
11656   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11657   mantissa = w.elt (0);
11658   mant_hi = w.elt (1);
11659
11660   /* If there are bits set in the low part of the mantissa, we can't
11661      represent this value.  */
11662   if (mantissa != 0)
11663     return -1;
11664
11665   /* Now make it so that mantissa contains the most-significant bits, and move
11666      the point_pos to indicate that the least-significant bits have been
11667      discarded.  */
11668   point_pos -= HOST_BITS_PER_WIDE_INT;
11669   mantissa = mant_hi;
11670
11671   /* We can permit four significant bits of mantissa only, plus a high bit
11672      which is always 1.  */
11673   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11674   if ((mantissa & mask) != 0)
11675     return -1;
11676
11677   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11678   mantissa >>= point_pos - 5;
11679
11680   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11681      floating-point immediate zero with Neon using an integer-zero load, but
11682      that case is handled elsewhere.)  */
11683   if (mantissa == 0)
11684     return -1;
11685
11686   gcc_assert (mantissa >= 16 && mantissa <= 31);
11687
11688   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11689      normalized significands are in the range [1, 2). (Our mantissa is shifted
11690      left 4 places at this point relative to normalized IEEE754 values).  GCC
11691      internally uses [0.5, 1) (see real.c), so the exponent returned from
11692      REAL_EXP must be altered.  */
11693   exponent = 5 - exponent;
11694
11695   if (exponent < 0 || exponent > 7)
11696     return -1;
11697
11698   /* Sign, mantissa and exponent are now in the correct form to plug into the
11699      formula described in the comment above.  */
11700   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11701 }
11702
11703 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11704 int
11705 vfp3_const_double_rtx (rtx x)
11706 {
11707   if (!TARGET_VFP3)
11708     return 0;
11709
11710   return vfp3_const_double_index (x) != -1;
11711 }
11712
11713 /* Recognize immediates which can be used in various Neon instructions. Legal
11714    immediates are described by the following table (for VMVN variants, the
11715    bitwise inverse of the constant shown is recognized. In either case, VMOV
11716    is output and the correct instruction to use for a given constant is chosen
11717    by the assembler). The constant shown is replicated across all elements of
11718    the destination vector.
11719
11720    insn elems variant constant (binary)
11721    ---- ----- ------- -----------------
11722    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11723    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11724    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11725    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11726    vmov  i16     4    00000000 abcdefgh
11727    vmov  i16     5    abcdefgh 00000000
11728    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11729    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11730    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11731    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11732    vmvn  i16    10    00000000 abcdefgh
11733    vmvn  i16    11    abcdefgh 00000000
11734    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11735    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11736    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11737    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11738    vmov   i8    16    abcdefgh
11739    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11740                       eeeeeeee ffffffff gggggggg hhhhhhhh
11741    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11742    vmov  f32    19    00000000 00000000 00000000 00000000
11743
11744    For case 18, B = !b. Representable values are exactly those accepted by
11745    vfp3_const_double_index, but are output as floating-point numbers rather
11746    than indices.
11747
11748    For case 19, we will change it to vmov.i32 when assembling.
11749
11750    Variants 0-5 (inclusive) may also be used as immediates for the second
11751    operand of VORR/VBIC instructions.
11752
11753    The INVERSE argument causes the bitwise inverse of the given operand to be
11754    recognized instead (used for recognizing legal immediates for the VAND/VORN
11755    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11756    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11757    output, rather than the real insns vbic/vorr).
11758
11759    INVERSE makes no difference to the recognition of float vectors.
11760
11761    The return value is the variant of immediate as shown in the above table, or
11762    -1 if the given value doesn't match any of the listed patterns.
11763 */
11764 static int
11765 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11766                       rtx *modconst, int *elementwidth)
11767 {
11768 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11769   matches = 1;                                  \
11770   for (i = 0; i < idx; i += (STRIDE))           \
11771     if (!(TEST))                                \
11772       matches = 0;                              \
11773   if (matches)                                  \
11774     {                                           \
11775       immtype = (CLASS);                        \
11776       elsize = (ELSIZE);                        \
11777       break;                                    \
11778     }
11779
11780   unsigned int i, elsize = 0, idx = 0, n_elts;
11781   unsigned int innersize;
11782   unsigned char bytes[16];
11783   int immtype = -1, matches;
11784   unsigned int invmask = inverse ? 0xff : 0;
11785   bool vector = GET_CODE (op) == CONST_VECTOR;
11786
11787   if (vector)
11788     n_elts = CONST_VECTOR_NUNITS (op);
11789   else
11790     {
11791       n_elts = 1;
11792       if (mode == VOIDmode)
11793         mode = DImode;
11794     }
11795
11796   innersize = GET_MODE_UNIT_SIZE (mode);
11797
11798   /* Vectors of float constants.  */
11799   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11800     {
11801       rtx el0 = CONST_VECTOR_ELT (op, 0);
11802
11803       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11804         return -1;
11805
11806       /* FP16 vectors cannot be represented.  */
11807       if (GET_MODE_INNER (mode) == HFmode)
11808         return -1;
11809
11810       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11811          are distinct in this context.  */
11812       if (!const_vec_duplicate_p (op))
11813         return -1;
11814
11815       if (modconst)
11816         *modconst = CONST_VECTOR_ELT (op, 0);
11817
11818       if (elementwidth)
11819         *elementwidth = 0;
11820
11821       if (el0 == CONST0_RTX (GET_MODE (el0)))
11822         return 19;
11823       else
11824         return 18;
11825     }
11826
11827   /* The tricks done in the code below apply for little-endian vector layout.
11828      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11829      FIXME: Implement logic for big-endian vectors.  */
11830   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11831     return -1;
11832
11833   /* Splat vector constant out into a byte vector.  */
11834   for (i = 0; i < n_elts; i++)
11835     {
11836       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11837       unsigned HOST_WIDE_INT elpart;
11838
11839       gcc_assert (CONST_INT_P (el));
11840       elpart = INTVAL (el);
11841
11842       for (unsigned int byte = 0; byte < innersize; byte++)
11843         {
11844           bytes[idx++] = (elpart & 0xff) ^ invmask;
11845           elpart >>= BITS_PER_UNIT;
11846         }
11847     }
11848
11849   /* Sanity check.  */
11850   gcc_assert (idx == GET_MODE_SIZE (mode));
11851
11852   do
11853     {
11854       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11855                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11856
11857       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11858                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11859
11860       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11861                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11862
11863       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11864                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11865
11866       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11867
11868       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11869
11870       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11871                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11872
11873       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11874                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11875
11876       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11877                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11878
11879       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11880                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11881
11882       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11883
11884       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11885
11886       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11887                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11888
11889       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11890                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11891
11892       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11893                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11894
11895       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11896                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11897
11898       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11899
11900       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11901                         && bytes[i] == bytes[(i + 8) % idx]);
11902     }
11903   while (0);
11904
11905   if (immtype == -1)
11906     return -1;
11907
11908   if (elementwidth)
11909     *elementwidth = elsize;
11910
11911   if (modconst)
11912     {
11913       unsigned HOST_WIDE_INT imm = 0;
11914
11915       /* Un-invert bytes of recognized vector, if necessary.  */
11916       if (invmask != 0)
11917         for (i = 0; i < idx; i++)
11918           bytes[i] ^= invmask;
11919
11920       if (immtype == 17)
11921         {
11922           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11923           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11924
11925           for (i = 0; i < 8; i++)
11926             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11927                    << (i * BITS_PER_UNIT);
11928
11929           *modconst = GEN_INT (imm);
11930         }
11931       else
11932         {
11933           unsigned HOST_WIDE_INT imm = 0;
11934
11935           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11936             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11937
11938           *modconst = GEN_INT (imm);
11939         }
11940     }
11941
11942   return immtype;
11943 #undef CHECK
11944 }
11945
11946 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11947    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11948    float elements), and a modified constant (whatever should be output for a
11949    VMOV) in *MODCONST.  */
11950
11951 int
11952 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11953                                rtx *modconst, int *elementwidth)
11954 {
11955   rtx tmpconst;
11956   int tmpwidth;
11957   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11958
11959   if (retval == -1)
11960     return 0;
11961
11962   if (modconst)
11963     *modconst = tmpconst;
11964
11965   if (elementwidth)
11966     *elementwidth = tmpwidth;
11967
11968   return 1;
11969 }
11970
11971 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11972    the immediate is valid, write a constant suitable for using as an operand
11973    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11974    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11975
11976 int
11977 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11978                                 rtx *modconst, int *elementwidth)
11979 {
11980   rtx tmpconst;
11981   int tmpwidth;
11982   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11983
11984   if (retval < 0 || retval > 5)
11985     return 0;
11986
11987   if (modconst)
11988     *modconst = tmpconst;
11989
11990   if (elementwidth)
11991     *elementwidth = tmpwidth;
11992
11993   return 1;
11994 }
11995
11996 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
11997    the immediate is valid, write a constant suitable for using as an operand
11998    to VSHR/VSHL to *MODCONST and the corresponding element width to
11999    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12000    because they have different limitations.  */
12001
12002 int
12003 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12004                                 rtx *modconst, int *elementwidth,
12005                                 bool isleftshift)
12006 {
12007   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12008   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12009   unsigned HOST_WIDE_INT last_elt = 0;
12010   unsigned HOST_WIDE_INT maxshift;
12011
12012   /* Split vector constant out into a byte vector.  */
12013   for (i = 0; i < n_elts; i++)
12014     {
12015       rtx el = CONST_VECTOR_ELT (op, i);
12016       unsigned HOST_WIDE_INT elpart;
12017
12018       if (CONST_INT_P (el))
12019         elpart = INTVAL (el);
12020       else if (CONST_DOUBLE_P (el))
12021         return 0;
12022       else
12023         gcc_unreachable ();
12024
12025       if (i != 0 && elpart != last_elt)
12026         return 0;
12027
12028       last_elt = elpart;
12029     }
12030
12031   /* Shift less than element size.  */
12032   maxshift = innersize * 8;
12033
12034   if (isleftshift)
12035     {
12036       /* Left shift immediate value can be from 0 to <size>-1.  */
12037       if (last_elt >= maxshift)
12038         return 0;
12039     }
12040   else
12041     {
12042       /* Right shift immediate value can be from 1 to <size>.  */
12043       if (last_elt == 0 || last_elt > maxshift)
12044         return 0;
12045     }
12046
12047   if (elementwidth)
12048     *elementwidth = innersize * 8;
12049
12050   if (modconst)
12051     *modconst = CONST_VECTOR_ELT (op, 0);
12052
12053   return 1;
12054 }
12055
12056 /* Return a string suitable for output of Neon immediate logic operation
12057    MNEM.  */
12058
12059 char *
12060 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12061                              int inverse, int quad)
12062 {
12063   int width, is_valid;
12064   static char templ[40];
12065
12066   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12067
12068   gcc_assert (is_valid != 0);
12069
12070   if (quad)
12071     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12072   else
12073     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12074
12075   return templ;
12076 }
12077
12078 /* Return a string suitable for output of Neon immediate shift operation
12079    (VSHR or VSHL) MNEM.  */
12080
12081 char *
12082 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12083                              machine_mode mode, int quad,
12084                              bool isleftshift)
12085 {
12086   int width, is_valid;
12087   static char templ[40];
12088
12089   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12090   gcc_assert (is_valid != 0);
12091
12092   if (quad)
12093     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12094   else
12095     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12096
12097   return templ;
12098 }
12099
12100 /* Output a sequence of pairwise operations to implement a reduction.
12101    NOTE: We do "too much work" here, because pairwise operations work on two
12102    registers-worth of operands in one go. Unfortunately we can't exploit those
12103    extra calculations to do the full operation in fewer steps, I don't think.
12104    Although all vector elements of the result but the first are ignored, we
12105    actually calculate the same result in each of the elements. An alternative
12106    such as initially loading a vector with zero to use as each of the second
12107    operands would use up an additional register and take an extra instruction,
12108    for no particular gain.  */
12109
12110 void
12111 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12112                       rtx (*reduc) (rtx, rtx, rtx))
12113 {
12114   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12115   rtx tmpsum = op1;
12116
12117   for (i = parts / 2; i >= 1; i /= 2)
12118     {
12119       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12120       emit_insn (reduc (dest, tmpsum, tmpsum));
12121       tmpsum = dest;
12122     }
12123 }
12124
12125 /* If VALS is a vector constant that can be loaded into a register
12126    using VDUP, generate instructions to do so and return an RTX to
12127    assign to the register.  Otherwise return NULL_RTX.  */
12128
12129 static rtx
12130 neon_vdup_constant (rtx vals)
12131 {
12132   machine_mode mode = GET_MODE (vals);
12133   machine_mode inner_mode = GET_MODE_INNER (mode);
12134   rtx x;
12135
12136   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12137     return NULL_RTX;
12138
12139   if (!const_vec_duplicate_p (vals, &x))
12140     /* The elements are not all the same.  We could handle repeating
12141        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12142        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12143        vdup.i16).  */
12144     return NULL_RTX;
12145
12146   /* We can load this constant by using VDUP and a constant in a
12147      single ARM register.  This will be cheaper than a vector
12148      load.  */
12149
12150   x = copy_to_mode_reg (inner_mode, x);
12151   return gen_rtx_VEC_DUPLICATE (mode, x);
12152 }
12153
12154 /* Generate code to load VALS, which is a PARALLEL containing only
12155    constants (for vec_init) or CONST_VECTOR, efficiently into a
12156    register.  Returns an RTX to copy into the register, or NULL_RTX
12157    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12158
12159 rtx
12160 neon_make_constant (rtx vals)
12161 {
12162   machine_mode mode = GET_MODE (vals);
12163   rtx target;
12164   rtx const_vec = NULL_RTX;
12165   int n_elts = GET_MODE_NUNITS (mode);
12166   int n_const = 0;
12167   int i;
12168
12169   if (GET_CODE (vals) == CONST_VECTOR)
12170     const_vec = vals;
12171   else if (GET_CODE (vals) == PARALLEL)
12172     {
12173       /* A CONST_VECTOR must contain only CONST_INTs and
12174          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12175          Only store valid constants in a CONST_VECTOR.  */
12176       for (i = 0; i < n_elts; ++i)
12177         {
12178           rtx x = XVECEXP (vals, 0, i);
12179           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12180             n_const++;
12181         }
12182       if (n_const == n_elts)
12183         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12184     }
12185   else
12186     gcc_unreachable ();
12187
12188   if (const_vec != NULL
12189       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12190     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12191     return const_vec;
12192   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12193     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12194        pipeline cycle; creating the constant takes one or two ARM
12195        pipeline cycles.  */
12196     return target;
12197   else if (const_vec != NULL_RTX)
12198     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12199        (for either double or quad vectors).  We can not take advantage
12200        of single-cycle VLD1 because we need a PC-relative addressing
12201        mode.  */
12202     return const_vec;
12203   else
12204     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12205        We can not construct an initializer.  */
12206     return NULL_RTX;
12207 }
12208
12209 /* Initialize vector TARGET to VALS.  */
12210
12211 void
12212 neon_expand_vector_init (rtx target, rtx vals)
12213 {
12214   machine_mode mode = GET_MODE (target);
12215   machine_mode inner_mode = GET_MODE_INNER (mode);
12216   int n_elts = GET_MODE_NUNITS (mode);
12217   int n_var = 0, one_var = -1;
12218   bool all_same = true;
12219   rtx x, mem;
12220   int i;
12221
12222   for (i = 0; i < n_elts; ++i)
12223     {
12224       x = XVECEXP (vals, 0, i);
12225       if (!CONSTANT_P (x))
12226         ++n_var, one_var = i;
12227
12228       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12229         all_same = false;
12230     }
12231
12232   if (n_var == 0)
12233     {
12234       rtx constant = neon_make_constant (vals);
12235       if (constant != NULL_RTX)
12236         {
12237           emit_move_insn (target, constant);
12238           return;
12239         }
12240     }
12241
12242   /* Splat a single non-constant element if we can.  */
12243   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12244     {
12245       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12246       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12247       return;
12248     }
12249
12250   /* One field is non-constant.  Load constant then overwrite varying
12251      field.  This is more efficient than using the stack.  */
12252   if (n_var == 1)
12253     {
12254       rtx copy = copy_rtx (vals);
12255       rtx index = GEN_INT (one_var);
12256
12257       /* Load constant part of vector, substitute neighboring value for
12258          varying element.  */
12259       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12260       neon_expand_vector_init (target, copy);
12261
12262       /* Insert variable.  */
12263       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12264       switch (mode)
12265         {
12266         case E_V8QImode:
12267           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12268           break;
12269         case E_V16QImode:
12270           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12271           break;
12272         case E_V4HImode:
12273           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12274           break;
12275         case E_V8HImode:
12276           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12277           break;
12278         case E_V2SImode:
12279           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12280           break;
12281         case E_V4SImode:
12282           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12283           break;
12284         case E_V2SFmode:
12285           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12286           break;
12287         case E_V4SFmode:
12288           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12289           break;
12290         case E_V2DImode:
12291           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12292           break;
12293         default:
12294           gcc_unreachable ();
12295         }
12296       return;
12297     }
12298
12299   /* Construct the vector in memory one field at a time
12300      and load the whole vector.  */
12301   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12302   for (i = 0; i < n_elts; i++)
12303     emit_move_insn (adjust_address_nv (mem, inner_mode,
12304                                     i * GET_MODE_SIZE (inner_mode)),
12305                     XVECEXP (vals, 0, i));
12306   emit_move_insn (target, mem);
12307 }
12308
12309 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12310    ERR if it doesn't.  EXP indicates the source location, which includes the
12311    inlining history for intrinsics.  */
12312
12313 static void
12314 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12315               const_tree exp, const char *desc)
12316 {
12317   HOST_WIDE_INT lane;
12318
12319   gcc_assert (CONST_INT_P (operand));
12320
12321   lane = INTVAL (operand);
12322
12323   if (lane < low || lane >= high)
12324     {
12325       if (exp)
12326         error ("%K%s %wd out of range %wd - %wd",
12327                exp, desc, lane, low, high - 1);
12328       else
12329         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12330     }
12331 }
12332
12333 /* Bounds-check lanes.  */
12334
12335 void
12336 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12337                   const_tree exp)
12338 {
12339   bounds_check (operand, low, high, exp, "lane");
12340 }
12341
12342 /* Bounds-check constants.  */
12343
12344 void
12345 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12346 {
12347   bounds_check (operand, low, high, NULL_TREE, "constant");
12348 }
12349
12350 HOST_WIDE_INT
12351 neon_element_bits (machine_mode mode)
12352 {
12353   return GET_MODE_UNIT_BITSIZE (mode);
12354 }
12355
12356 \f
12357 /* Predicates for `match_operand' and `match_operator'.  */
12358
12359 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12360    WB is true if full writeback address modes are allowed and is false
12361    if limited writeback address modes (POST_INC and PRE_DEC) are
12362    allowed.  */
12363
12364 int
12365 arm_coproc_mem_operand (rtx op, bool wb)
12366 {
12367   rtx ind;
12368
12369   /* Reject eliminable registers.  */
12370   if (! (reload_in_progress || reload_completed || lra_in_progress)
12371       && (   reg_mentioned_p (frame_pointer_rtx, op)
12372           || reg_mentioned_p (arg_pointer_rtx, op)
12373           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12374           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12375           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12376           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12377     return FALSE;
12378
12379   /* Constants are converted into offsets from labels.  */
12380   if (!MEM_P (op))
12381     return FALSE;
12382
12383   ind = XEXP (op, 0);
12384
12385   if (reload_completed
12386       && (GET_CODE (ind) == LABEL_REF
12387           || (GET_CODE (ind) == CONST
12388               && GET_CODE (XEXP (ind, 0)) == PLUS
12389               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12390               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12391     return TRUE;
12392
12393   /* Match: (mem (reg)).  */
12394   if (REG_P (ind))
12395     return arm_address_register_rtx_p (ind, 0);
12396
12397   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12398      acceptable in any case (subject to verification by
12399      arm_address_register_rtx_p).  We need WB to be true to accept
12400      PRE_INC and POST_DEC.  */
12401   if (GET_CODE (ind) == POST_INC
12402       || GET_CODE (ind) == PRE_DEC
12403       || (wb
12404           && (GET_CODE (ind) == PRE_INC
12405               || GET_CODE (ind) == POST_DEC)))
12406     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12407
12408   if (wb
12409       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12410       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12411       && GET_CODE (XEXP (ind, 1)) == PLUS
12412       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12413     ind = XEXP (ind, 1);
12414
12415   /* Match:
12416      (plus (reg)
12417            (const)).  */
12418   if (GET_CODE (ind) == PLUS
12419       && REG_P (XEXP (ind, 0))
12420       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12421       && CONST_INT_P (XEXP (ind, 1))
12422       && INTVAL (XEXP (ind, 1)) > -1024
12423       && INTVAL (XEXP (ind, 1)) <  1024
12424       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12425     return TRUE;
12426
12427   return FALSE;
12428 }
12429
12430 /* Return TRUE if OP is a memory operand which we can load or store a vector
12431    to/from. TYPE is one of the following values:
12432     0 - Vector load/stor (vldr)
12433     1 - Core registers (ldm)
12434     2 - Element/structure loads (vld1)
12435  */
12436 int
12437 neon_vector_mem_operand (rtx op, int type, bool strict)
12438 {
12439   rtx ind;
12440
12441   /* Reject eliminable registers.  */
12442   if (strict && ! (reload_in_progress || reload_completed)
12443       && (reg_mentioned_p (frame_pointer_rtx, op)
12444           || reg_mentioned_p (arg_pointer_rtx, op)
12445           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12446           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12447           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12448           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12449     return FALSE;
12450
12451   /* Constants are converted into offsets from labels.  */
12452   if (!MEM_P (op))
12453     return FALSE;
12454
12455   ind = XEXP (op, 0);
12456
12457   if (reload_completed
12458       && (GET_CODE (ind) == LABEL_REF
12459           || (GET_CODE (ind) == CONST
12460               && GET_CODE (XEXP (ind, 0)) == PLUS
12461               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12462               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12463     return TRUE;
12464
12465   /* Match: (mem (reg)).  */
12466   if (REG_P (ind))
12467     return arm_address_register_rtx_p (ind, 0);
12468
12469   /* Allow post-increment with Neon registers.  */
12470   if ((type != 1 && GET_CODE (ind) == POST_INC)
12471       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12472     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12473
12474   /* Allow post-increment by register for VLDn */
12475   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12476       && GET_CODE (XEXP (ind, 1)) == PLUS
12477       && REG_P (XEXP (XEXP (ind, 1), 1)))
12478      return true;
12479
12480   /* Match:
12481      (plus (reg)
12482           (const)).  */
12483   if (type == 0
12484       && GET_CODE (ind) == PLUS
12485       && REG_P (XEXP (ind, 0))
12486       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12487       && CONST_INT_P (XEXP (ind, 1))
12488       && INTVAL (XEXP (ind, 1)) > -1024
12489       /* For quad modes, we restrict the constant offset to be slightly less
12490          than what the instruction format permits.  We have no such constraint
12491          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12492       && (INTVAL (XEXP (ind, 1))
12493           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12494       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12495     return TRUE;
12496
12497   return FALSE;
12498 }
12499
12500 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12501    type.  */
12502 int
12503 neon_struct_mem_operand (rtx op)
12504 {
12505   rtx ind;
12506
12507   /* Reject eliminable registers.  */
12508   if (! (reload_in_progress || reload_completed)
12509       && (   reg_mentioned_p (frame_pointer_rtx, op)
12510           || reg_mentioned_p (arg_pointer_rtx, op)
12511           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12512           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12513           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12514           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12515     return FALSE;
12516
12517   /* Constants are converted into offsets from labels.  */
12518   if (!MEM_P (op))
12519     return FALSE;
12520
12521   ind = XEXP (op, 0);
12522
12523   if (reload_completed
12524       && (GET_CODE (ind) == LABEL_REF
12525           || (GET_CODE (ind) == CONST
12526               && GET_CODE (XEXP (ind, 0)) == PLUS
12527               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12528               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12529     return TRUE;
12530
12531   /* Match: (mem (reg)).  */
12532   if (REG_P (ind))
12533     return arm_address_register_rtx_p (ind, 0);
12534
12535   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12536   if (GET_CODE (ind) == POST_INC
12537       || GET_CODE (ind) == PRE_DEC)
12538     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12539
12540   return FALSE;
12541 }
12542
12543 /* Return true if X is a register that will be eliminated later on.  */
12544 int
12545 arm_eliminable_register (rtx x)
12546 {
12547   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12548                        || REGNO (x) == ARG_POINTER_REGNUM
12549                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12550                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12551 }
12552
12553 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12554    coprocessor registers.  Otherwise return NO_REGS.  */
12555
12556 enum reg_class
12557 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12558 {
12559   if (mode == HFmode)
12560     {
12561       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12562         return GENERAL_REGS;
12563       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12564         return NO_REGS;
12565       return GENERAL_REGS;
12566     }
12567
12568   /* The neon move patterns handle all legitimate vector and struct
12569      addresses.  */
12570   if (TARGET_NEON
12571       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12572       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12573           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12574           || VALID_NEON_STRUCT_MODE (mode)))
12575     return NO_REGS;
12576
12577   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12578     return NO_REGS;
12579
12580   return GENERAL_REGS;
12581 }
12582
12583 /* Values which must be returned in the most-significant end of the return
12584    register.  */
12585
12586 static bool
12587 arm_return_in_msb (const_tree valtype)
12588 {
12589   return (TARGET_AAPCS_BASED
12590           && BYTES_BIG_ENDIAN
12591           && (AGGREGATE_TYPE_P (valtype)
12592               || TREE_CODE (valtype) == COMPLEX_TYPE
12593               || FIXED_POINT_TYPE_P (valtype)));
12594 }
12595
12596 /* Return TRUE if X references a SYMBOL_REF.  */
12597 int
12598 symbol_mentioned_p (rtx x)
12599 {
12600   const char * fmt;
12601   int i;
12602
12603   if (GET_CODE (x) == SYMBOL_REF)
12604     return 1;
12605
12606   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12607      are constant offsets, not symbols.  */
12608   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12609     return 0;
12610
12611   fmt = GET_RTX_FORMAT (GET_CODE (x));
12612
12613   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12614     {
12615       if (fmt[i] == 'E')
12616         {
12617           int j;
12618
12619           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12620             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12621               return 1;
12622         }
12623       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12624         return 1;
12625     }
12626
12627   return 0;
12628 }
12629
12630 /* Return TRUE if X references a LABEL_REF.  */
12631 int
12632 label_mentioned_p (rtx x)
12633 {
12634   const char * fmt;
12635   int i;
12636
12637   if (GET_CODE (x) == LABEL_REF)
12638     return 1;
12639
12640   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12641      instruction, but they are constant offsets, not symbols.  */
12642   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12643     return 0;
12644
12645   fmt = GET_RTX_FORMAT (GET_CODE (x));
12646   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12647     {
12648       if (fmt[i] == 'E')
12649         {
12650           int j;
12651
12652           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12653             if (label_mentioned_p (XVECEXP (x, i, j)))
12654               return 1;
12655         }
12656       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12657         return 1;
12658     }
12659
12660   return 0;
12661 }
12662
12663 int
12664 tls_mentioned_p (rtx x)
12665 {
12666   switch (GET_CODE (x))
12667     {
12668     case CONST:
12669       return tls_mentioned_p (XEXP (x, 0));
12670
12671     case UNSPEC:
12672       if (XINT (x, 1) == UNSPEC_TLS)
12673         return 1;
12674
12675     /* Fall through.  */
12676     default:
12677       return 0;
12678     }
12679 }
12680
12681 /* Must not copy any rtx that uses a pc-relative address.
12682    Also, disallow copying of load-exclusive instructions that
12683    may appear after splitting of compare-and-swap-style operations
12684    so as to prevent those loops from being transformed away from their
12685    canonical forms (see PR 69904).  */
12686
12687 static bool
12688 arm_cannot_copy_insn_p (rtx_insn *insn)
12689 {
12690   /* The tls call insn cannot be copied, as it is paired with a data
12691      word.  */
12692   if (recog_memoized (insn) == CODE_FOR_tlscall)
12693     return true;
12694
12695   subrtx_iterator::array_type array;
12696   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12697     {
12698       const_rtx x = *iter;
12699       if (GET_CODE (x) == UNSPEC
12700           && (XINT (x, 1) == UNSPEC_PIC_BASE
12701               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12702         return true;
12703     }
12704
12705   rtx set = single_set (insn);
12706   if (set)
12707     {
12708       rtx src = SET_SRC (set);
12709       if (GET_CODE (src) == ZERO_EXTEND)
12710         src = XEXP (src, 0);
12711
12712       /* Catch the load-exclusive and load-acquire operations.  */
12713       if (GET_CODE (src) == UNSPEC_VOLATILE
12714           && (XINT (src, 1) == VUNSPEC_LL
12715               || XINT (src, 1) == VUNSPEC_LAX))
12716         return true;
12717     }
12718   return false;
12719 }
12720
12721 enum rtx_code
12722 minmax_code (rtx x)
12723 {
12724   enum rtx_code code = GET_CODE (x);
12725
12726   switch (code)
12727     {
12728     case SMAX:
12729       return GE;
12730     case SMIN:
12731       return LE;
12732     case UMIN:
12733       return LEU;
12734     case UMAX:
12735       return GEU;
12736     default:
12737       gcc_unreachable ();
12738     }
12739 }
12740
12741 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12742
12743 bool
12744 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12745                         int *mask, bool *signed_sat)
12746 {
12747   /* The high bound must be a power of two minus one.  */
12748   int log = exact_log2 (INTVAL (hi_bound) + 1);
12749   if (log == -1)
12750     return false;
12751
12752   /* The low bound is either zero (for usat) or one less than the
12753      negation of the high bound (for ssat).  */
12754   if (INTVAL (lo_bound) == 0)
12755     {
12756       if (mask)
12757         *mask = log;
12758       if (signed_sat)
12759         *signed_sat = false;
12760
12761       return true;
12762     }
12763
12764   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12765     {
12766       if (mask)
12767         *mask = log + 1;
12768       if (signed_sat)
12769         *signed_sat = true;
12770
12771       return true;
12772     }
12773
12774   return false;
12775 }
12776
12777 /* Return 1 if memory locations are adjacent.  */
12778 int
12779 adjacent_mem_locations (rtx a, rtx b)
12780 {
12781   /* We don't guarantee to preserve the order of these memory refs.  */
12782   if (volatile_refs_p (a) || volatile_refs_p (b))
12783     return 0;
12784
12785   if ((REG_P (XEXP (a, 0))
12786        || (GET_CODE (XEXP (a, 0)) == PLUS
12787            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12788       && (REG_P (XEXP (b, 0))
12789           || (GET_CODE (XEXP (b, 0)) == PLUS
12790               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12791     {
12792       HOST_WIDE_INT val0 = 0, val1 = 0;
12793       rtx reg0, reg1;
12794       int val_diff;
12795
12796       if (GET_CODE (XEXP (a, 0)) == PLUS)
12797         {
12798           reg0 = XEXP (XEXP (a, 0), 0);
12799           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12800         }
12801       else
12802         reg0 = XEXP (a, 0);
12803
12804       if (GET_CODE (XEXP (b, 0)) == PLUS)
12805         {
12806           reg1 = XEXP (XEXP (b, 0), 0);
12807           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12808         }
12809       else
12810         reg1 = XEXP (b, 0);
12811
12812       /* Don't accept any offset that will require multiple
12813          instructions to handle, since this would cause the
12814          arith_adjacentmem pattern to output an overlong sequence.  */
12815       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12816         return 0;
12817
12818       /* Don't allow an eliminable register: register elimination can make
12819          the offset too large.  */
12820       if (arm_eliminable_register (reg0))
12821         return 0;
12822
12823       val_diff = val1 - val0;
12824
12825       if (arm_ld_sched)
12826         {
12827           /* If the target has load delay slots, then there's no benefit
12828              to using an ldm instruction unless the offset is zero and
12829              we are optimizing for size.  */
12830           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12831                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12832                   && (val_diff == 4 || val_diff == -4));
12833         }
12834
12835       return ((REGNO (reg0) == REGNO (reg1))
12836               && (val_diff == 4 || val_diff == -4));
12837     }
12838
12839   return 0;
12840 }
12841
12842 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12843    for load operations, false for store operations.  CONSECUTIVE is true
12844    if the register numbers in the operation must be consecutive in the register
12845    bank. RETURN_PC is true if value is to be loaded in PC.
12846    The pattern we are trying to match for load is:
12847      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12848       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12849        :
12850        :
12851       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12852      ]
12853      where
12854      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12855      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12856      3.  If consecutive is TRUE, then for kth register being loaded,
12857          REGNO (R_dk) = REGNO (R_d0) + k.
12858    The pattern for store is similar.  */
12859 bool
12860 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12861                      bool consecutive, bool return_pc)
12862 {
12863   HOST_WIDE_INT count = XVECLEN (op, 0);
12864   rtx reg, mem, addr;
12865   unsigned regno;
12866   unsigned first_regno;
12867   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12868   rtx elt;
12869   bool addr_reg_in_reglist = false;
12870   bool update = false;
12871   int reg_increment;
12872   int offset_adj;
12873   int regs_per_val;
12874
12875   /* If not in SImode, then registers must be consecutive
12876      (e.g., VLDM instructions for DFmode).  */
12877   gcc_assert ((mode == SImode) || consecutive);
12878   /* Setting return_pc for stores is illegal.  */
12879   gcc_assert (!return_pc || load);
12880
12881   /* Set up the increments and the regs per val based on the mode.  */
12882   reg_increment = GET_MODE_SIZE (mode);
12883   regs_per_val = reg_increment / 4;
12884   offset_adj = return_pc ? 1 : 0;
12885
12886   if (count <= 1
12887       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12888       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12889     return false;
12890
12891   /* Check if this is a write-back.  */
12892   elt = XVECEXP (op, 0, offset_adj);
12893   if (GET_CODE (SET_SRC (elt)) == PLUS)
12894     {
12895       i++;
12896       base = 1;
12897       update = true;
12898
12899       /* The offset adjustment must be the number of registers being
12900          popped times the size of a single register.  */
12901       if (!REG_P (SET_DEST (elt))
12902           || !REG_P (XEXP (SET_SRC (elt), 0))
12903           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12904           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12905           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12906              ((count - 1 - offset_adj) * reg_increment))
12907         return false;
12908     }
12909
12910   i = i + offset_adj;
12911   base = base + offset_adj;
12912   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12913      success depends on the type: VLDM can do just one reg,
12914      LDM must do at least two.  */
12915   if ((count <= i) && (mode == SImode))
12916       return false;
12917
12918   elt = XVECEXP (op, 0, i - 1);
12919   if (GET_CODE (elt) != SET)
12920     return false;
12921
12922   if (load)
12923     {
12924       reg = SET_DEST (elt);
12925       mem = SET_SRC (elt);
12926     }
12927   else
12928     {
12929       reg = SET_SRC (elt);
12930       mem = SET_DEST (elt);
12931     }
12932
12933   if (!REG_P (reg) || !MEM_P (mem))
12934     return false;
12935
12936   regno = REGNO (reg);
12937   first_regno = regno;
12938   addr = XEXP (mem, 0);
12939   if (GET_CODE (addr) == PLUS)
12940     {
12941       if (!CONST_INT_P (XEXP (addr, 1)))
12942         return false;
12943
12944       offset = INTVAL (XEXP (addr, 1));
12945       addr = XEXP (addr, 0);
12946     }
12947
12948   if (!REG_P (addr))
12949     return false;
12950
12951   /* Don't allow SP to be loaded unless it is also the base register. It
12952      guarantees that SP is reset correctly when an LDM instruction
12953      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12954   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12955     return false;
12956
12957   for (; i < count; i++)
12958     {
12959       elt = XVECEXP (op, 0, i);
12960       if (GET_CODE (elt) != SET)
12961         return false;
12962
12963       if (load)
12964         {
12965           reg = SET_DEST (elt);
12966           mem = SET_SRC (elt);
12967         }
12968       else
12969         {
12970           reg = SET_SRC (elt);
12971           mem = SET_DEST (elt);
12972         }
12973
12974       if (!REG_P (reg)
12975           || GET_MODE (reg) != mode
12976           || REGNO (reg) <= regno
12977           || (consecutive
12978               && (REGNO (reg) !=
12979                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12980           /* Don't allow SP to be loaded unless it is also the base register. It
12981              guarantees that SP is reset correctly when an LDM instruction
12982              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12983           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12984           || !MEM_P (mem)
12985           || GET_MODE (mem) != mode
12986           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12987                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12988                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12989                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12990                    offset + (i - base) * reg_increment))
12991               && (!REG_P (XEXP (mem, 0))
12992                   || offset + (i - base) * reg_increment != 0)))
12993         return false;
12994
12995       regno = REGNO (reg);
12996       if (regno == REGNO (addr))
12997         addr_reg_in_reglist = true;
12998     }
12999
13000   if (load)
13001     {
13002       if (update && addr_reg_in_reglist)
13003         return false;
13004
13005       /* For Thumb-1, address register is always modified - either by write-back
13006          or by explicit load.  If the pattern does not describe an update,
13007          then the address register must be in the list of loaded registers.  */
13008       if (TARGET_THUMB1)
13009         return update || addr_reg_in_reglist;
13010     }
13011
13012   return true;
13013 }
13014
13015 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13016    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13017    instruction.  ADD_OFFSET is nonzero if the base address register needs
13018    to be modified with an add instruction before we can use it.  */
13019
13020 static bool
13021 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13022                                  int nops, HOST_WIDE_INT add_offset)
13023  {
13024   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13025      if the offset isn't small enough.  The reason 2 ldrs are faster
13026      is because these ARMs are able to do more than one cache access
13027      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13028      whilst the ARM8 has a double bandwidth cache.  This means that
13029      these cores can do both an instruction fetch and a data fetch in
13030      a single cycle, so the trick of calculating the address into a
13031      scratch register (one of the result regs) and then doing a load
13032      multiple actually becomes slower (and no smaller in code size).
13033      That is the transformation
13034
13035         ldr     rd1, [rbase + offset]
13036         ldr     rd2, [rbase + offset + 4]
13037
13038      to
13039
13040         add     rd1, rbase, offset
13041         ldmia   rd1, {rd1, rd2}
13042
13043      produces worse code -- '3 cycles + any stalls on rd2' instead of
13044      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13045      access per cycle, the first sequence could never complete in less
13046      than 6 cycles, whereas the ldm sequence would only take 5 and
13047      would make better use of sequential accesses if not hitting the
13048      cache.
13049
13050      We cheat here and test 'arm_ld_sched' which we currently know to
13051      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13052      changes, then the test below needs to be reworked.  */
13053   if (nops == 2 && arm_ld_sched && add_offset != 0)
13054     return false;
13055
13056   /* XScale has load-store double instructions, but they have stricter
13057      alignment requirements than load-store multiple, so we cannot
13058      use them.
13059
13060      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13061      the pipeline until completion.
13062
13063         NREGS           CYCLES
13064           1               3
13065           2               4
13066           3               5
13067           4               6
13068
13069      An ldr instruction takes 1-3 cycles, but does not block the
13070      pipeline.
13071
13072         NREGS           CYCLES
13073           1              1-3
13074           2              2-6
13075           3              3-9
13076           4              4-12
13077
13078      Best case ldr will always win.  However, the more ldr instructions
13079      we issue, the less likely we are to be able to schedule them well.
13080      Using ldr instructions also increases code size.
13081
13082      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13083      for counts of 3 or 4 regs.  */
13084   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13085     return false;
13086   return true;
13087 }
13088
13089 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13090    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13091    an array ORDER which describes the sequence to use when accessing the
13092    offsets that produces an ascending order.  In this sequence, each
13093    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13094    must have been filled in with the lowest offset by the caller.
13095    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13096    we use to verify that ORDER produces an ascending order of registers.
13097    Return true if it was possible to construct such an order, false if
13098    not.  */
13099
13100 static bool
13101 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13102                       int *unsorted_regs)
13103 {
13104   int i;
13105   for (i = 1; i < nops; i++)
13106     {
13107       int j;
13108
13109       order[i] = order[i - 1];
13110       for (j = 0; j < nops; j++)
13111         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13112           {
13113             /* We must find exactly one offset that is higher than the
13114                previous one by 4.  */
13115             if (order[i] != order[i - 1])
13116               return false;
13117             order[i] = j;
13118           }
13119       if (order[i] == order[i - 1])
13120         return false;
13121       /* The register numbers must be ascending.  */
13122       if (unsorted_regs != NULL
13123           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13124         return false;
13125     }
13126   return true;
13127 }
13128
13129 /* Used to determine in a peephole whether a sequence of load
13130    instructions can be changed into a load-multiple instruction.
13131    NOPS is the number of separate load instructions we are examining.  The
13132    first NOPS entries in OPERANDS are the destination registers, the
13133    next NOPS entries are memory operands.  If this function is
13134    successful, *BASE is set to the common base register of the memory
13135    accesses; *LOAD_OFFSET is set to the first memory location's offset
13136    from that base register.
13137    REGS is an array filled in with the destination register numbers.
13138    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13139    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13140    the sequence of registers in REGS matches the loads from ascending memory
13141    locations, and the function verifies that the register numbers are
13142    themselves ascending.  If CHECK_REGS is false, the register numbers
13143    are stored in the order they are found in the operands.  */
13144 static int
13145 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13146                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13147 {
13148   int unsorted_regs[MAX_LDM_STM_OPS];
13149   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13150   int order[MAX_LDM_STM_OPS];
13151   rtx base_reg_rtx = NULL;
13152   int base_reg = -1;
13153   int i, ldm_case;
13154
13155   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13156      easily extended if required.  */
13157   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13158
13159   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13160
13161   /* Loop over the operands and check that the memory references are
13162      suitable (i.e. immediate offsets from the same base register).  At
13163      the same time, extract the target register, and the memory
13164      offsets.  */
13165   for (i = 0; i < nops; i++)
13166     {
13167       rtx reg;
13168       rtx offset;
13169
13170       /* Convert a subreg of a mem into the mem itself.  */
13171       if (GET_CODE (operands[nops + i]) == SUBREG)
13172         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13173
13174       gcc_assert (MEM_P (operands[nops + i]));
13175
13176       /* Don't reorder volatile memory references; it doesn't seem worth
13177          looking for the case where the order is ok anyway.  */
13178       if (MEM_VOLATILE_P (operands[nops + i]))
13179         return 0;
13180
13181       offset = const0_rtx;
13182
13183       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13184            || (GET_CODE (reg) == SUBREG
13185                && REG_P (reg = SUBREG_REG (reg))))
13186           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13187               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13188                   || (GET_CODE (reg) == SUBREG
13189                       && REG_P (reg = SUBREG_REG (reg))))
13190               && (CONST_INT_P (offset
13191                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13192         {
13193           if (i == 0)
13194             {
13195               base_reg = REGNO (reg);
13196               base_reg_rtx = reg;
13197               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13198                 return 0;
13199             }
13200           else if (base_reg != (int) REGNO (reg))
13201             /* Not addressed from the same base register.  */
13202             return 0;
13203
13204           unsorted_regs[i] = (REG_P (operands[i])
13205                               ? REGNO (operands[i])
13206                               : REGNO (SUBREG_REG (operands[i])));
13207
13208           /* If it isn't an integer register, or if it overwrites the
13209              base register but isn't the last insn in the list, then
13210              we can't do this.  */
13211           if (unsorted_regs[i] < 0
13212               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13213               || unsorted_regs[i] > 14
13214               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13215             return 0;
13216
13217           /* Don't allow SP to be loaded unless it is also the base
13218              register.  It guarantees that SP is reset correctly when
13219              an LDM instruction is interrupted.  Otherwise, we might
13220              end up with a corrupt stack.  */
13221           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13222             return 0;
13223
13224           unsorted_offsets[i] = INTVAL (offset);
13225           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13226             order[0] = i;
13227         }
13228       else
13229         /* Not a suitable memory address.  */
13230         return 0;
13231     }
13232
13233   /* All the useful information has now been extracted from the
13234      operands into unsorted_regs and unsorted_offsets; additionally,
13235      order[0] has been set to the lowest offset in the list.  Sort
13236      the offsets into order, verifying that they are adjacent, and
13237      check that the register numbers are ascending.  */
13238   if (!compute_offset_order (nops, unsorted_offsets, order,
13239                              check_regs ? unsorted_regs : NULL))
13240     return 0;
13241
13242   if (saved_order)
13243     memcpy (saved_order, order, sizeof order);
13244
13245   if (base)
13246     {
13247       *base = base_reg;
13248
13249       for (i = 0; i < nops; i++)
13250         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13251
13252       *load_offset = unsorted_offsets[order[0]];
13253     }
13254
13255   if (TARGET_THUMB1
13256       && !peep2_reg_dead_p (nops, base_reg_rtx))
13257     return 0;
13258
13259   if (unsorted_offsets[order[0]] == 0)
13260     ldm_case = 1; /* ldmia */
13261   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13262     ldm_case = 2; /* ldmib */
13263   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13264     ldm_case = 3; /* ldmda */
13265   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13266     ldm_case = 4; /* ldmdb */
13267   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13268            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13269     ldm_case = 5;
13270   else
13271     return 0;
13272
13273   if (!multiple_operation_profitable_p (false, nops,
13274                                         ldm_case == 5
13275                                         ? unsorted_offsets[order[0]] : 0))
13276     return 0;
13277
13278   return ldm_case;
13279 }
13280
13281 /* Used to determine in a peephole whether a sequence of store instructions can
13282    be changed into a store-multiple instruction.
13283    NOPS is the number of separate store instructions we are examining.
13284    NOPS_TOTAL is the total number of instructions recognized by the peephole
13285    pattern.
13286    The first NOPS entries in OPERANDS are the source registers, the next
13287    NOPS entries are memory operands.  If this function is successful, *BASE is
13288    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13289    to the first memory location's offset from that base register.  REGS is an
13290    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13291    likewise filled with the corresponding rtx's.
13292    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13293    numbers to an ascending order of stores.
13294    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13295    from ascending memory locations, and the function verifies that the register
13296    numbers are themselves ascending.  If CHECK_REGS is false, the register
13297    numbers are stored in the order they are found in the operands.  */
13298 static int
13299 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13300                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13301                          HOST_WIDE_INT *load_offset, bool check_regs)
13302 {
13303   int unsorted_regs[MAX_LDM_STM_OPS];
13304   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13305   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13306   int order[MAX_LDM_STM_OPS];
13307   int base_reg = -1;
13308   rtx base_reg_rtx = NULL;
13309   int i, stm_case;
13310
13311   /* Write back of base register is currently only supported for Thumb 1.  */
13312   int base_writeback = TARGET_THUMB1;
13313
13314   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13315      easily extended if required.  */
13316   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13317
13318   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13319
13320   /* Loop over the operands and check that the memory references are
13321      suitable (i.e. immediate offsets from the same base register).  At
13322      the same time, extract the target register, and the memory
13323      offsets.  */
13324   for (i = 0; i < nops; i++)
13325     {
13326       rtx reg;
13327       rtx offset;
13328
13329       /* Convert a subreg of a mem into the mem itself.  */
13330       if (GET_CODE (operands[nops + i]) == SUBREG)
13331         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13332
13333       gcc_assert (MEM_P (operands[nops + i]));
13334
13335       /* Don't reorder volatile memory references; it doesn't seem worth
13336          looking for the case where the order is ok anyway.  */
13337       if (MEM_VOLATILE_P (operands[nops + i]))
13338         return 0;
13339
13340       offset = const0_rtx;
13341
13342       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13343            || (GET_CODE (reg) == SUBREG
13344                && REG_P (reg = SUBREG_REG (reg))))
13345           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13346               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13347                   || (GET_CODE (reg) == SUBREG
13348                       && REG_P (reg = SUBREG_REG (reg))))
13349               && (CONST_INT_P (offset
13350                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13351         {
13352           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13353                                   ? operands[i] : SUBREG_REG (operands[i]));
13354           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13355
13356           if (i == 0)
13357             {
13358               base_reg = REGNO (reg);
13359               base_reg_rtx = reg;
13360               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13361                 return 0;
13362             }
13363           else if (base_reg != (int) REGNO (reg))
13364             /* Not addressed from the same base register.  */
13365             return 0;
13366
13367           /* If it isn't an integer register, then we can't do this.  */
13368           if (unsorted_regs[i] < 0
13369               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13370               /* The effects are unpredictable if the base register is
13371                  both updated and stored.  */
13372               || (base_writeback && unsorted_regs[i] == base_reg)
13373               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13374               || unsorted_regs[i] > 14)
13375             return 0;
13376
13377           unsorted_offsets[i] = INTVAL (offset);
13378           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13379             order[0] = i;
13380         }
13381       else
13382         /* Not a suitable memory address.  */
13383         return 0;
13384     }
13385
13386   /* All the useful information has now been extracted from the
13387      operands into unsorted_regs and unsorted_offsets; additionally,
13388      order[0] has been set to the lowest offset in the list.  Sort
13389      the offsets into order, verifying that they are adjacent, and
13390      check that the register numbers are ascending.  */
13391   if (!compute_offset_order (nops, unsorted_offsets, order,
13392                              check_regs ? unsorted_regs : NULL))
13393     return 0;
13394
13395   if (saved_order)
13396     memcpy (saved_order, order, sizeof order);
13397
13398   if (base)
13399     {
13400       *base = base_reg;
13401
13402       for (i = 0; i < nops; i++)
13403         {
13404           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13405           if (reg_rtxs)
13406             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13407         }
13408
13409       *load_offset = unsorted_offsets[order[0]];
13410     }
13411
13412   if (TARGET_THUMB1
13413       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13414     return 0;
13415
13416   if (unsorted_offsets[order[0]] == 0)
13417     stm_case = 1; /* stmia */
13418   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13419     stm_case = 2; /* stmib */
13420   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13421     stm_case = 3; /* stmda */
13422   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13423     stm_case = 4; /* stmdb */
13424   else
13425     return 0;
13426
13427   if (!multiple_operation_profitable_p (false, nops, 0))
13428     return 0;
13429
13430   return stm_case;
13431 }
13432 \f
13433 /* Routines for use in generating RTL.  */
13434
13435 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13436    the instruction; REGS and MEMS are arrays containing the operands.
13437    BASEREG is the base register to be used in addressing the memory operands.
13438    WBACK_OFFSET is nonzero if the instruction should update the base
13439    register.  */
13440
13441 static rtx
13442 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13443                          HOST_WIDE_INT wback_offset)
13444 {
13445   int i = 0, j;
13446   rtx result;
13447
13448   if (!multiple_operation_profitable_p (false, count, 0))
13449     {
13450       rtx seq;
13451
13452       start_sequence ();
13453
13454       for (i = 0; i < count; i++)
13455         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13456
13457       if (wback_offset != 0)
13458         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13459
13460       seq = get_insns ();
13461       end_sequence ();
13462
13463       return seq;
13464     }
13465
13466   result = gen_rtx_PARALLEL (VOIDmode,
13467                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13468   if (wback_offset != 0)
13469     {
13470       XVECEXP (result, 0, 0)
13471         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13472       i = 1;
13473       count++;
13474     }
13475
13476   for (j = 0; i < count; i++, j++)
13477     XVECEXP (result, 0, i)
13478       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13479
13480   return result;
13481 }
13482
13483 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13484    the instruction; REGS and MEMS are arrays containing the operands.
13485    BASEREG is the base register to be used in addressing the memory operands.
13486    WBACK_OFFSET is nonzero if the instruction should update the base
13487    register.  */
13488
13489 static rtx
13490 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13491                           HOST_WIDE_INT wback_offset)
13492 {
13493   int i = 0, j;
13494   rtx result;
13495
13496   if (GET_CODE (basereg) == PLUS)
13497     basereg = XEXP (basereg, 0);
13498
13499   if (!multiple_operation_profitable_p (false, count, 0))
13500     {
13501       rtx seq;
13502
13503       start_sequence ();
13504
13505       for (i = 0; i < count; i++)
13506         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13507
13508       if (wback_offset != 0)
13509         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13510
13511       seq = get_insns ();
13512       end_sequence ();
13513
13514       return seq;
13515     }
13516
13517   result = gen_rtx_PARALLEL (VOIDmode,
13518                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13519   if (wback_offset != 0)
13520     {
13521       XVECEXP (result, 0, 0)
13522         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13523       i = 1;
13524       count++;
13525     }
13526
13527   for (j = 0; i < count; i++, j++)
13528     XVECEXP (result, 0, i)
13529       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13530
13531   return result;
13532 }
13533
13534 /* Generate either a load-multiple or a store-multiple instruction.  This
13535    function can be used in situations where we can start with a single MEM
13536    rtx and adjust its address upwards.
13537    COUNT is the number of operations in the instruction, not counting a
13538    possible update of the base register.  REGS is an array containing the
13539    register operands.
13540    BASEREG is the base register to be used in addressing the memory operands,
13541    which are constructed from BASEMEM.
13542    WRITE_BACK specifies whether the generated instruction should include an
13543    update of the base register.
13544    OFFSETP is used to pass an offset to and from this function; this offset
13545    is not used when constructing the address (instead BASEMEM should have an
13546    appropriate offset in its address), it is used only for setting
13547    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13548
13549 static rtx
13550 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13551                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13552 {
13553   rtx mems[MAX_LDM_STM_OPS];
13554   HOST_WIDE_INT offset = *offsetp;
13555   int i;
13556
13557   gcc_assert (count <= MAX_LDM_STM_OPS);
13558
13559   if (GET_CODE (basereg) == PLUS)
13560     basereg = XEXP (basereg, 0);
13561
13562   for (i = 0; i < count; i++)
13563     {
13564       rtx addr = plus_constant (Pmode, basereg, i * 4);
13565       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13566       offset += 4;
13567     }
13568
13569   if (write_back)
13570     *offsetp = offset;
13571
13572   if (is_load)
13573     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13574                                     write_back ? 4 * count : 0);
13575   else
13576     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13577                                      write_back ? 4 * count : 0);
13578 }
13579
13580 rtx
13581 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13582                        rtx basemem, HOST_WIDE_INT *offsetp)
13583 {
13584   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13585                               offsetp);
13586 }
13587
13588 rtx
13589 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13590                         rtx basemem, HOST_WIDE_INT *offsetp)
13591 {
13592   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13593                               offsetp);
13594 }
13595
13596 /* Called from a peephole2 expander to turn a sequence of loads into an
13597    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13598    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13599    is true if we can reorder the registers because they are used commutatively
13600    subsequently.
13601    Returns true iff we could generate a new instruction.  */
13602
13603 bool
13604 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13605 {
13606   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13607   rtx mems[MAX_LDM_STM_OPS];
13608   int i, j, base_reg;
13609   rtx base_reg_rtx;
13610   HOST_WIDE_INT offset;
13611   int write_back = FALSE;
13612   int ldm_case;
13613   rtx addr;
13614
13615   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13616                                      &base_reg, &offset, !sort_regs);
13617
13618   if (ldm_case == 0)
13619     return false;
13620
13621   if (sort_regs)
13622     for (i = 0; i < nops - 1; i++)
13623       for (j = i + 1; j < nops; j++)
13624         if (regs[i] > regs[j])
13625           {
13626             int t = regs[i];
13627             regs[i] = regs[j];
13628             regs[j] = t;
13629           }
13630   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13631
13632   if (TARGET_THUMB1)
13633     {
13634       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13635       gcc_assert (ldm_case == 1 || ldm_case == 5);
13636       write_back = TRUE;
13637     }
13638
13639   if (ldm_case == 5)
13640     {
13641       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13642       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13643       offset = 0;
13644       if (!TARGET_THUMB1)
13645         base_reg_rtx = newbase;
13646     }
13647
13648   for (i = 0; i < nops; i++)
13649     {
13650       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13651       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13652                                               SImode, addr, 0);
13653     }
13654   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13655                                       write_back ? offset + i * 4 : 0));
13656   return true;
13657 }
13658
13659 /* Called from a peephole2 expander to turn a sequence of stores into an
13660    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13661    NOPS indicates how many separate stores we are trying to combine.
13662    Returns true iff we could generate a new instruction.  */
13663
13664 bool
13665 gen_stm_seq (rtx *operands, int nops)
13666 {
13667   int i;
13668   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13669   rtx mems[MAX_LDM_STM_OPS];
13670   int base_reg;
13671   rtx base_reg_rtx;
13672   HOST_WIDE_INT offset;
13673   int write_back = FALSE;
13674   int stm_case;
13675   rtx addr;
13676   bool base_reg_dies;
13677
13678   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13679                                       mem_order, &base_reg, &offset, true);
13680
13681   if (stm_case == 0)
13682     return false;
13683
13684   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13685
13686   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13687   if (TARGET_THUMB1)
13688     {
13689       gcc_assert (base_reg_dies);
13690       write_back = TRUE;
13691     }
13692
13693   if (stm_case == 5)
13694     {
13695       gcc_assert (base_reg_dies);
13696       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13697       offset = 0;
13698     }
13699
13700   addr = plus_constant (Pmode, base_reg_rtx, offset);
13701
13702   for (i = 0; i < nops; i++)
13703     {
13704       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13705       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13706                                               SImode, addr, 0);
13707     }
13708   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13709                                        write_back ? offset + i * 4 : 0));
13710   return true;
13711 }
13712
13713 /* Called from a peephole2 expander to turn a sequence of stores that are
13714    preceded by constant loads into an STM instruction.  OPERANDS are the
13715    operands found by the peephole matcher; NOPS indicates how many
13716    separate stores we are trying to combine; there are 2 * NOPS
13717    instructions in the peephole.
13718    Returns true iff we could generate a new instruction.  */
13719
13720 bool
13721 gen_const_stm_seq (rtx *operands, int nops)
13722 {
13723   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13724   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13725   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13726   rtx mems[MAX_LDM_STM_OPS];
13727   int base_reg;
13728   rtx base_reg_rtx;
13729   HOST_WIDE_INT offset;
13730   int write_back = FALSE;
13731   int stm_case;
13732   rtx addr;
13733   bool base_reg_dies;
13734   int i, j;
13735   HARD_REG_SET allocated;
13736
13737   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13738                                       mem_order, &base_reg, &offset, false);
13739
13740   if (stm_case == 0)
13741     return false;
13742
13743   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13744
13745   /* If the same register is used more than once, try to find a free
13746      register.  */
13747   CLEAR_HARD_REG_SET (allocated);
13748   for (i = 0; i < nops; i++)
13749     {
13750       for (j = i + 1; j < nops; j++)
13751         if (regs[i] == regs[j])
13752           {
13753             rtx t = peep2_find_free_register (0, nops * 2,
13754                                               TARGET_THUMB1 ? "l" : "r",
13755                                               SImode, &allocated);
13756             if (t == NULL_RTX)
13757               return false;
13758             reg_rtxs[i] = t;
13759             regs[i] = REGNO (t);
13760           }
13761     }
13762
13763   /* Compute an ordering that maps the register numbers to an ascending
13764      sequence.  */
13765   reg_order[0] = 0;
13766   for (i = 0; i < nops; i++)
13767     if (regs[i] < regs[reg_order[0]])
13768       reg_order[0] = i;
13769
13770   for (i = 1; i < nops; i++)
13771     {
13772       int this_order = reg_order[i - 1];
13773       for (j = 0; j < nops; j++)
13774         if (regs[j] > regs[reg_order[i - 1]]
13775             && (this_order == reg_order[i - 1]
13776                 || regs[j] < regs[this_order]))
13777           this_order = j;
13778       reg_order[i] = this_order;
13779     }
13780
13781   /* Ensure that registers that must be live after the instruction end
13782      up with the correct value.  */
13783   for (i = 0; i < nops; i++)
13784     {
13785       int this_order = reg_order[i];
13786       if ((this_order != mem_order[i]
13787            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13788           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13789         return false;
13790     }
13791
13792   /* Load the constants.  */
13793   for (i = 0; i < nops; i++)
13794     {
13795       rtx op = operands[2 * nops + mem_order[i]];
13796       sorted_regs[i] = regs[reg_order[i]];
13797       emit_move_insn (reg_rtxs[reg_order[i]], op);
13798     }
13799
13800   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13801
13802   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13803   if (TARGET_THUMB1)
13804     {
13805       gcc_assert (base_reg_dies);
13806       write_back = TRUE;
13807     }
13808
13809   if (stm_case == 5)
13810     {
13811       gcc_assert (base_reg_dies);
13812       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13813       offset = 0;
13814     }
13815
13816   addr = plus_constant (Pmode, base_reg_rtx, offset);
13817
13818   for (i = 0; i < nops; i++)
13819     {
13820       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13821       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13822                                               SImode, addr, 0);
13823     }
13824   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13825                                        write_back ? offset + i * 4 : 0));
13826   return true;
13827 }
13828
13829 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13830    unaligned copies on processors which support unaligned semantics for those
13831    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13832    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13833    An interleave factor of 1 (the minimum) will perform no interleaving.
13834    Load/store multiple are used for aligned addresses where possible.  */
13835
13836 static void
13837 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13838                                    HOST_WIDE_INT length,
13839                                    unsigned int interleave_factor)
13840 {
13841   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13842   int *regnos = XALLOCAVEC (int, interleave_factor);
13843   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13844   HOST_WIDE_INT i, j;
13845   HOST_WIDE_INT remaining = length, words;
13846   rtx halfword_tmp = NULL, byte_tmp = NULL;
13847   rtx dst, src;
13848   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13849   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13850   HOST_WIDE_INT srcoffset, dstoffset;
13851   HOST_WIDE_INT src_autoinc, dst_autoinc;
13852   rtx mem, addr;
13853
13854   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13855
13856   /* Use hard registers if we have aligned source or destination so we can use
13857      load/store multiple with contiguous registers.  */
13858   if (dst_aligned || src_aligned)
13859     for (i = 0; i < interleave_factor; i++)
13860       regs[i] = gen_rtx_REG (SImode, i);
13861   else
13862     for (i = 0; i < interleave_factor; i++)
13863       regs[i] = gen_reg_rtx (SImode);
13864
13865   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13866   src = copy_addr_to_reg (XEXP (srcbase, 0));
13867
13868   srcoffset = dstoffset = 0;
13869
13870   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13871      For copying the last bytes we want to subtract this offset again.  */
13872   src_autoinc = dst_autoinc = 0;
13873
13874   for (i = 0; i < interleave_factor; i++)
13875     regnos[i] = i;
13876
13877   /* Copy BLOCK_SIZE_BYTES chunks.  */
13878
13879   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13880     {
13881       /* Load words.  */
13882       if (src_aligned && interleave_factor > 1)
13883         {
13884           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13885                                             TRUE, srcbase, &srcoffset));
13886           src_autoinc += UNITS_PER_WORD * interleave_factor;
13887         }
13888       else
13889         {
13890           for (j = 0; j < interleave_factor; j++)
13891             {
13892               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13893                                                  - src_autoinc));
13894               mem = adjust_automodify_address (srcbase, SImode, addr,
13895                                                srcoffset + j * UNITS_PER_WORD);
13896               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13897             }
13898           srcoffset += block_size_bytes;
13899         }
13900
13901       /* Store words.  */
13902       if (dst_aligned && interleave_factor > 1)
13903         {
13904           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13905                                              TRUE, dstbase, &dstoffset));
13906           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13907         }
13908       else
13909         {
13910           for (j = 0; j < interleave_factor; j++)
13911             {
13912               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13913                                                  - dst_autoinc));
13914               mem = adjust_automodify_address (dstbase, SImode, addr,
13915                                                dstoffset + j * UNITS_PER_WORD);
13916               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13917             }
13918           dstoffset += block_size_bytes;
13919         }
13920
13921       remaining -= block_size_bytes;
13922     }
13923
13924   /* Copy any whole words left (note these aren't interleaved with any
13925      subsequent halfword/byte load/stores in the interests of simplicity).  */
13926
13927   words = remaining / UNITS_PER_WORD;
13928
13929   gcc_assert (words < interleave_factor);
13930
13931   if (src_aligned && words > 1)
13932     {
13933       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13934                                         &srcoffset));
13935       src_autoinc += UNITS_PER_WORD * words;
13936     }
13937   else
13938     {
13939       for (j = 0; j < words; j++)
13940         {
13941           addr = plus_constant (Pmode, src,
13942                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13943           mem = adjust_automodify_address (srcbase, SImode, addr,
13944                                            srcoffset + j * UNITS_PER_WORD);
13945           if (src_aligned)
13946             emit_move_insn (regs[j], mem);
13947           else
13948             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13949         }
13950       srcoffset += words * UNITS_PER_WORD;
13951     }
13952
13953   if (dst_aligned && words > 1)
13954     {
13955       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13956                                          &dstoffset));
13957       dst_autoinc += words * UNITS_PER_WORD;
13958     }
13959   else
13960     {
13961       for (j = 0; j < words; j++)
13962         {
13963           addr = plus_constant (Pmode, dst,
13964                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13965           mem = adjust_automodify_address (dstbase, SImode, addr,
13966                                            dstoffset + j * UNITS_PER_WORD);
13967           if (dst_aligned)
13968             emit_move_insn (mem, regs[j]);
13969           else
13970             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13971         }
13972       dstoffset += words * UNITS_PER_WORD;
13973     }
13974
13975   remaining -= words * UNITS_PER_WORD;
13976
13977   gcc_assert (remaining < 4);
13978
13979   /* Copy a halfword if necessary.  */
13980
13981   if (remaining >= 2)
13982     {
13983       halfword_tmp = gen_reg_rtx (SImode);
13984
13985       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13986       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13987       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13988
13989       /* Either write out immediately, or delay until we've loaded the last
13990          byte, depending on interleave factor.  */
13991       if (interleave_factor == 1)
13992         {
13993           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13994           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13995           emit_insn (gen_unaligned_storehi (mem,
13996                        gen_lowpart (HImode, halfword_tmp)));
13997           halfword_tmp = NULL;
13998           dstoffset += 2;
13999         }
14000
14001       remaining -= 2;
14002       srcoffset += 2;
14003     }
14004
14005   gcc_assert (remaining < 2);
14006
14007   /* Copy last byte.  */
14008
14009   if ((remaining & 1) != 0)
14010     {
14011       byte_tmp = gen_reg_rtx (SImode);
14012
14013       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14014       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14015       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14016
14017       if (interleave_factor == 1)
14018         {
14019           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14020           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14021           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14022           byte_tmp = NULL;
14023           dstoffset++;
14024         }
14025
14026       remaining--;
14027       srcoffset++;
14028     }
14029
14030   /* Store last halfword if we haven't done so already.  */
14031
14032   if (halfword_tmp)
14033     {
14034       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14035       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14036       emit_insn (gen_unaligned_storehi (mem,
14037                    gen_lowpart (HImode, halfword_tmp)));
14038       dstoffset += 2;
14039     }
14040
14041   /* Likewise for last byte.  */
14042
14043   if (byte_tmp)
14044     {
14045       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14046       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14047       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14048       dstoffset++;
14049     }
14050
14051   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14052 }
14053
14054 /* From mips_adjust_block_mem:
14055
14056    Helper function for doing a loop-based block operation on memory
14057    reference MEM.  Each iteration of the loop will operate on LENGTH
14058    bytes of MEM.
14059
14060    Create a new base register for use within the loop and point it to
14061    the start of MEM.  Create a new memory reference that uses this
14062    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14063
14064 static void
14065 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14066                       rtx *loop_mem)
14067 {
14068   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14069
14070   /* Although the new mem does not refer to a known location,
14071      it does keep up to LENGTH bytes of alignment.  */
14072   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14073   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14074 }
14075
14076 /* From mips_block_move_loop:
14077
14078    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14079    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14080    the memory regions do not overlap.  */
14081
14082 static void
14083 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14084                                unsigned int interleave_factor,
14085                                HOST_WIDE_INT bytes_per_iter)
14086 {
14087   rtx src_reg, dest_reg, final_src, test;
14088   HOST_WIDE_INT leftover;
14089
14090   leftover = length % bytes_per_iter;
14091   length -= leftover;
14092
14093   /* Create registers and memory references for use within the loop.  */
14094   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14095   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14096
14097   /* Calculate the value that SRC_REG should have after the last iteration of
14098      the loop.  */
14099   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14100                                    0, 0, OPTAB_WIDEN);
14101
14102   /* Emit the start of the loop.  */
14103   rtx_code_label *label = gen_label_rtx ();
14104   emit_label (label);
14105
14106   /* Emit the loop body.  */
14107   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14108                                      interleave_factor);
14109
14110   /* Move on to the next block.  */
14111   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14112   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14113
14114   /* Emit the loop condition.  */
14115   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14116   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14117
14118   /* Mop up any left-over bytes.  */
14119   if (leftover)
14120     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14121 }
14122
14123 /* Emit a block move when either the source or destination is unaligned (not
14124    aligned to a four-byte boundary).  This may need further tuning depending on
14125    core type, optimize_size setting, etc.  */
14126
14127 static int
14128 arm_movmemqi_unaligned (rtx *operands)
14129 {
14130   HOST_WIDE_INT length = INTVAL (operands[2]);
14131
14132   if (optimize_size)
14133     {
14134       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14135       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14136       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14137          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14138          or dst_aligned though: allow more interleaving in those cases since the
14139          resulting code can be smaller.  */
14140       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14141       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14142
14143       if (length > 12)
14144         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14145                                        interleave_factor, bytes_per_iter);
14146       else
14147         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14148                                            interleave_factor);
14149     }
14150   else
14151     {
14152       /* Note that the loop created by arm_block_move_unaligned_loop may be
14153          subject to loop unrolling, which makes tuning this condition a little
14154          redundant.  */
14155       if (length > 32)
14156         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14157       else
14158         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14159     }
14160
14161   return 1;
14162 }
14163
14164 int
14165 arm_gen_movmemqi (rtx *operands)
14166 {
14167   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14168   HOST_WIDE_INT srcoffset, dstoffset;
14169   rtx src, dst, srcbase, dstbase;
14170   rtx part_bytes_reg = NULL;
14171   rtx mem;
14172
14173   if (!CONST_INT_P (operands[2])
14174       || !CONST_INT_P (operands[3])
14175       || INTVAL (operands[2]) > 64)
14176     return 0;
14177
14178   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14179     return arm_movmemqi_unaligned (operands);
14180
14181   if (INTVAL (operands[3]) & 3)
14182     return 0;
14183
14184   dstbase = operands[0];
14185   srcbase = operands[1];
14186
14187   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14188   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14189
14190   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14191   out_words_to_go = INTVAL (operands[2]) / 4;
14192   last_bytes = INTVAL (operands[2]) & 3;
14193   dstoffset = srcoffset = 0;
14194
14195   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14196     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14197
14198   while (in_words_to_go >= 2)
14199     {
14200       if (in_words_to_go > 4)
14201         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14202                                           TRUE, srcbase, &srcoffset));
14203       else
14204         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14205                                           src, FALSE, srcbase,
14206                                           &srcoffset));
14207
14208       if (out_words_to_go)
14209         {
14210           if (out_words_to_go > 4)
14211             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14212                                                TRUE, dstbase, &dstoffset));
14213           else if (out_words_to_go != 1)
14214             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14215                                                out_words_to_go, dst,
14216                                                (last_bytes == 0
14217                                                 ? FALSE : TRUE),
14218                                                dstbase, &dstoffset));
14219           else
14220             {
14221               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14222               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14223               if (last_bytes != 0)
14224                 {
14225                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14226                   dstoffset += 4;
14227                 }
14228             }
14229         }
14230
14231       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14232       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14233     }
14234
14235   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14236   if (out_words_to_go)
14237     {
14238       rtx sreg;
14239
14240       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14241       sreg = copy_to_reg (mem);
14242
14243       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14244       emit_move_insn (mem, sreg);
14245       in_words_to_go--;
14246
14247       gcc_assert (!in_words_to_go);     /* Sanity check */
14248     }
14249
14250   if (in_words_to_go)
14251     {
14252       gcc_assert (in_words_to_go > 0);
14253
14254       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14255       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14256     }
14257
14258   gcc_assert (!last_bytes || part_bytes_reg);
14259
14260   if (BYTES_BIG_ENDIAN && last_bytes)
14261     {
14262       rtx tmp = gen_reg_rtx (SImode);
14263
14264       /* The bytes we want are in the top end of the word.  */
14265       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14266                               GEN_INT (8 * (4 - last_bytes))));
14267       part_bytes_reg = tmp;
14268
14269       while (last_bytes)
14270         {
14271           mem = adjust_automodify_address (dstbase, QImode,
14272                                            plus_constant (Pmode, dst,
14273                                                           last_bytes - 1),
14274                                            dstoffset + last_bytes - 1);
14275           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14276
14277           if (--last_bytes)
14278             {
14279               tmp = gen_reg_rtx (SImode);
14280               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14281               part_bytes_reg = tmp;
14282             }
14283         }
14284
14285     }
14286   else
14287     {
14288       if (last_bytes > 1)
14289         {
14290           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14291           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14292           last_bytes -= 2;
14293           if (last_bytes)
14294             {
14295               rtx tmp = gen_reg_rtx (SImode);
14296               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14297               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14298               part_bytes_reg = tmp;
14299               dstoffset += 2;
14300             }
14301         }
14302
14303       if (last_bytes)
14304         {
14305           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14306           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14307         }
14308     }
14309
14310   return 1;
14311 }
14312
14313 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14314 by mode size.  */
14315 inline static rtx
14316 next_consecutive_mem (rtx mem)
14317 {
14318   machine_mode mode = GET_MODE (mem);
14319   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14320   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14321
14322   return adjust_automodify_address (mem, mode, addr, offset);
14323 }
14324
14325 /* Copy using LDRD/STRD instructions whenever possible.
14326    Returns true upon success. */
14327 bool
14328 gen_movmem_ldrd_strd (rtx *operands)
14329 {
14330   unsigned HOST_WIDE_INT len;
14331   HOST_WIDE_INT align;
14332   rtx src, dst, base;
14333   rtx reg0;
14334   bool src_aligned, dst_aligned;
14335   bool src_volatile, dst_volatile;
14336
14337   gcc_assert (CONST_INT_P (operands[2]));
14338   gcc_assert (CONST_INT_P (operands[3]));
14339
14340   len = UINTVAL (operands[2]);
14341   if (len > 64)
14342     return false;
14343
14344   /* Maximum alignment we can assume for both src and dst buffers.  */
14345   align = INTVAL (operands[3]);
14346
14347   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14348     return false;
14349
14350   /* Place src and dst addresses in registers
14351      and update the corresponding mem rtx.  */
14352   dst = operands[0];
14353   dst_volatile = MEM_VOLATILE_P (dst);
14354   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14355   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14356   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14357
14358   src = operands[1];
14359   src_volatile = MEM_VOLATILE_P (src);
14360   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14361   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14362   src = adjust_automodify_address (src, VOIDmode, base, 0);
14363
14364   if (!unaligned_access && !(src_aligned && dst_aligned))
14365     return false;
14366
14367   if (src_volatile || dst_volatile)
14368     return false;
14369
14370   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14371   if (!(dst_aligned || src_aligned))
14372     return arm_gen_movmemqi (operands);
14373
14374   /* If the either src or dst is unaligned we'll be accessing it as pairs
14375      of unaligned SImode accesses.  Otherwise we can generate DImode
14376      ldrd/strd instructions.  */
14377   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14378   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14379
14380   while (len >= 8)
14381     {
14382       len -= 8;
14383       reg0 = gen_reg_rtx (DImode);
14384       rtx low_reg = NULL_RTX;
14385       rtx hi_reg = NULL_RTX;
14386
14387       if (!src_aligned || !dst_aligned)
14388         {
14389           low_reg = gen_lowpart (SImode, reg0);
14390           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14391         }
14392       if (src_aligned)
14393         emit_move_insn (reg0, src);
14394       else
14395         {
14396           emit_insn (gen_unaligned_loadsi (low_reg, src));
14397           src = next_consecutive_mem (src);
14398           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14399         }
14400
14401       if (dst_aligned)
14402         emit_move_insn (dst, reg0);
14403       else
14404         {
14405           emit_insn (gen_unaligned_storesi (dst, low_reg));
14406           dst = next_consecutive_mem (dst);
14407           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14408         }
14409
14410       src = next_consecutive_mem (src);
14411       dst = next_consecutive_mem (dst);
14412     }
14413
14414   gcc_assert (len < 8);
14415   if (len >= 4)
14416     {
14417       /* More than a word but less than a double-word to copy.  Copy a word.  */
14418       reg0 = gen_reg_rtx (SImode);
14419       src = adjust_address (src, SImode, 0);
14420       dst = adjust_address (dst, SImode, 0);
14421       if (src_aligned)
14422         emit_move_insn (reg0, src);
14423       else
14424         emit_insn (gen_unaligned_loadsi (reg0, src));
14425
14426       if (dst_aligned)
14427         emit_move_insn (dst, reg0);
14428       else
14429         emit_insn (gen_unaligned_storesi (dst, reg0));
14430
14431       src = next_consecutive_mem (src);
14432       dst = next_consecutive_mem (dst);
14433       len -= 4;
14434     }
14435
14436   if (len == 0)
14437     return true;
14438
14439   /* Copy the remaining bytes.  */
14440   if (len >= 2)
14441     {
14442       dst = adjust_address (dst, HImode, 0);
14443       src = adjust_address (src, HImode, 0);
14444       reg0 = gen_reg_rtx (SImode);
14445       if (src_aligned)
14446         emit_insn (gen_zero_extendhisi2 (reg0, src));
14447       else
14448         emit_insn (gen_unaligned_loadhiu (reg0, src));
14449
14450       if (dst_aligned)
14451         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14452       else
14453         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14454
14455       src = next_consecutive_mem (src);
14456       dst = next_consecutive_mem (dst);
14457       if (len == 2)
14458         return true;
14459     }
14460
14461   dst = adjust_address (dst, QImode, 0);
14462   src = adjust_address (src, QImode, 0);
14463   reg0 = gen_reg_rtx (QImode);
14464   emit_move_insn (reg0, src);
14465   emit_move_insn (dst, reg0);
14466   return true;
14467 }
14468
14469 /* Select a dominance comparison mode if possible for a test of the general
14470    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14471    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14472    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14473    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14474    In all cases OP will be either EQ or NE, but we don't need to know which
14475    here.  If we are unable to support a dominance comparison we return
14476    CC mode.  This will then fail to match for the RTL expressions that
14477    generate this call.  */
14478 machine_mode
14479 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14480 {
14481   enum rtx_code cond1, cond2;
14482   int swapped = 0;
14483
14484   /* Currently we will probably get the wrong result if the individual
14485      comparisons are not simple.  This also ensures that it is safe to
14486      reverse a comparison if necessary.  */
14487   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14488        != CCmode)
14489       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14490           != CCmode))
14491     return CCmode;
14492
14493   /* The if_then_else variant of this tests the second condition if the
14494      first passes, but is true if the first fails.  Reverse the first
14495      condition to get a true "inclusive-or" expression.  */
14496   if (cond_or == DOM_CC_NX_OR_Y)
14497     cond1 = reverse_condition (cond1);
14498
14499   /* If the comparisons are not equal, and one doesn't dominate the other,
14500      then we can't do this.  */
14501   if (cond1 != cond2
14502       && !comparison_dominates_p (cond1, cond2)
14503       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14504     return CCmode;
14505
14506   if (swapped)
14507     std::swap (cond1, cond2);
14508
14509   switch (cond1)
14510     {
14511     case EQ:
14512       if (cond_or == DOM_CC_X_AND_Y)
14513         return CC_DEQmode;
14514
14515       switch (cond2)
14516         {
14517         case EQ: return CC_DEQmode;
14518         case LE: return CC_DLEmode;
14519         case LEU: return CC_DLEUmode;
14520         case GE: return CC_DGEmode;
14521         case GEU: return CC_DGEUmode;
14522         default: gcc_unreachable ();
14523         }
14524
14525     case LT:
14526       if (cond_or == DOM_CC_X_AND_Y)
14527         return CC_DLTmode;
14528
14529       switch (cond2)
14530         {
14531         case  LT:
14532             return CC_DLTmode;
14533         case LE:
14534           return CC_DLEmode;
14535         case NE:
14536           return CC_DNEmode;
14537         default:
14538           gcc_unreachable ();
14539         }
14540
14541     case GT:
14542       if (cond_or == DOM_CC_X_AND_Y)
14543         return CC_DGTmode;
14544
14545       switch (cond2)
14546         {
14547         case GT:
14548           return CC_DGTmode;
14549         case GE:
14550           return CC_DGEmode;
14551         case NE:
14552           return CC_DNEmode;
14553         default:
14554           gcc_unreachable ();
14555         }
14556
14557     case LTU:
14558       if (cond_or == DOM_CC_X_AND_Y)
14559         return CC_DLTUmode;
14560
14561       switch (cond2)
14562         {
14563         case LTU:
14564           return CC_DLTUmode;
14565         case LEU:
14566           return CC_DLEUmode;
14567         case NE:
14568           return CC_DNEmode;
14569         default:
14570           gcc_unreachable ();
14571         }
14572
14573     case GTU:
14574       if (cond_or == DOM_CC_X_AND_Y)
14575         return CC_DGTUmode;
14576
14577       switch (cond2)
14578         {
14579         case GTU:
14580           return CC_DGTUmode;
14581         case GEU:
14582           return CC_DGEUmode;
14583         case NE:
14584           return CC_DNEmode;
14585         default:
14586           gcc_unreachable ();
14587         }
14588
14589     /* The remaining cases only occur when both comparisons are the
14590        same.  */
14591     case NE:
14592       gcc_assert (cond1 == cond2);
14593       return CC_DNEmode;
14594
14595     case LE:
14596       gcc_assert (cond1 == cond2);
14597       return CC_DLEmode;
14598
14599     case GE:
14600       gcc_assert (cond1 == cond2);
14601       return CC_DGEmode;
14602
14603     case LEU:
14604       gcc_assert (cond1 == cond2);
14605       return CC_DLEUmode;
14606
14607     case GEU:
14608       gcc_assert (cond1 == cond2);
14609       return CC_DGEUmode;
14610
14611     default:
14612       gcc_unreachable ();
14613     }
14614 }
14615
14616 machine_mode
14617 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14618 {
14619   /* All floating point compares return CCFP if it is an equality
14620      comparison, and CCFPE otherwise.  */
14621   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14622     {
14623       switch (op)
14624         {
14625         case EQ:
14626         case NE:
14627         case UNORDERED:
14628         case ORDERED:
14629         case UNLT:
14630         case UNLE:
14631         case UNGT:
14632         case UNGE:
14633         case UNEQ:
14634         case LTGT:
14635           return CCFPmode;
14636
14637         case LT:
14638         case LE:
14639         case GT:
14640         case GE:
14641           return CCFPEmode;
14642
14643         default:
14644           gcc_unreachable ();
14645         }
14646     }
14647
14648   /* A compare with a shifted operand.  Because of canonicalization, the
14649      comparison will have to be swapped when we emit the assembler.  */
14650   if (GET_MODE (y) == SImode
14651       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14652       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14653           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14654           || GET_CODE (x) == ROTATERT))
14655     return CC_SWPmode;
14656
14657   /* This operation is performed swapped, but since we only rely on the Z
14658      flag we don't need an additional mode.  */
14659   if (GET_MODE (y) == SImode
14660       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14661       && GET_CODE (x) == NEG
14662       && (op == EQ || op == NE))
14663     return CC_Zmode;
14664
14665   /* This is a special case that is used by combine to allow a
14666      comparison of a shifted byte load to be split into a zero-extend
14667      followed by a comparison of the shifted integer (only valid for
14668      equalities and unsigned inequalities).  */
14669   if (GET_MODE (x) == SImode
14670       && GET_CODE (x) == ASHIFT
14671       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14672       && GET_CODE (XEXP (x, 0)) == SUBREG
14673       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14674       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14675       && (op == EQ || op == NE
14676           || op == GEU || op == GTU || op == LTU || op == LEU)
14677       && CONST_INT_P (y))
14678     return CC_Zmode;
14679
14680   /* A construct for a conditional compare, if the false arm contains
14681      0, then both conditions must be true, otherwise either condition
14682      must be true.  Not all conditions are possible, so CCmode is
14683      returned if it can't be done.  */
14684   if (GET_CODE (x) == IF_THEN_ELSE
14685       && (XEXP (x, 2) == const0_rtx
14686           || XEXP (x, 2) == const1_rtx)
14687       && COMPARISON_P (XEXP (x, 0))
14688       && COMPARISON_P (XEXP (x, 1)))
14689     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14690                                          INTVAL (XEXP (x, 2)));
14691
14692   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14693   if (GET_CODE (x) == AND
14694       && (op == EQ || op == NE)
14695       && COMPARISON_P (XEXP (x, 0))
14696       && COMPARISON_P (XEXP (x, 1)))
14697     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14698                                          DOM_CC_X_AND_Y);
14699
14700   if (GET_CODE (x) == IOR
14701       && (op == EQ || op == NE)
14702       && COMPARISON_P (XEXP (x, 0))
14703       && COMPARISON_P (XEXP (x, 1)))
14704     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14705                                          DOM_CC_X_OR_Y);
14706
14707   /* An operation (on Thumb) where we want to test for a single bit.
14708      This is done by shifting that bit up into the top bit of a
14709      scratch register; we can then branch on the sign bit.  */
14710   if (TARGET_THUMB1
14711       && GET_MODE (x) == SImode
14712       && (op == EQ || op == NE)
14713       && GET_CODE (x) == ZERO_EXTRACT
14714       && XEXP (x, 1) == const1_rtx)
14715     return CC_Nmode;
14716
14717   /* An operation that sets the condition codes as a side-effect, the
14718      V flag is not set correctly, so we can only use comparisons where
14719      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14720      instead.)  */
14721   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14722   if (GET_MODE (x) == SImode
14723       && y == const0_rtx
14724       && (op == EQ || op == NE || op == LT || op == GE)
14725       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14726           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14727           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14728           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14729           || GET_CODE (x) == LSHIFTRT
14730           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14731           || GET_CODE (x) == ROTATERT
14732           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14733     return CC_NOOVmode;
14734
14735   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14736     return CC_Zmode;
14737
14738   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14739       && GET_CODE (x) == PLUS
14740       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14741     return CC_Cmode;
14742
14743   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14744     {
14745       switch (op)
14746         {
14747         case EQ:
14748         case NE:
14749           /* A DImode comparison against zero can be implemented by
14750              or'ing the two halves together.  */
14751           if (y == const0_rtx)
14752             return CC_Zmode;
14753
14754           /* We can do an equality test in three Thumb instructions.  */
14755           if (!TARGET_32BIT)
14756             return CC_Zmode;
14757
14758           /* FALLTHROUGH */
14759
14760         case LTU:
14761         case LEU:
14762         case GTU:
14763         case GEU:
14764           /* DImode unsigned comparisons can be implemented by cmp +
14765              cmpeq without a scratch register.  Not worth doing in
14766              Thumb-2.  */
14767           if (TARGET_32BIT)
14768             return CC_CZmode;
14769
14770           /* FALLTHROUGH */
14771
14772         case LT:
14773         case LE:
14774         case GT:
14775         case GE:
14776           /* DImode signed and unsigned comparisons can be implemented
14777              by cmp + sbcs with a scratch register, but that does not
14778              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14779           gcc_assert (op != EQ && op != NE);
14780           return CC_NCVmode;
14781
14782         default:
14783           gcc_unreachable ();
14784         }
14785     }
14786
14787   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14788     return GET_MODE (x);
14789
14790   return CCmode;
14791 }
14792
14793 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14794    return the rtx for register 0 in the proper mode.  FP means this is a
14795    floating point compare: I don't think that it is needed on the arm.  */
14796 rtx
14797 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14798 {
14799   machine_mode mode;
14800   rtx cc_reg;
14801   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14802
14803   /* We might have X as a constant, Y as a register because of the predicates
14804      used for cmpdi.  If so, force X to a register here.  */
14805   if (dimode_comparison && !REG_P (x))
14806     x = force_reg (DImode, x);
14807
14808   mode = SELECT_CC_MODE (code, x, y);
14809   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14810
14811   if (dimode_comparison
14812       && mode != CC_CZmode)
14813     {
14814       rtx clobber, set;
14815
14816       /* To compare two non-zero values for equality, XOR them and
14817          then compare against zero.  Not used for ARM mode; there
14818          CC_CZmode is cheaper.  */
14819       if (mode == CC_Zmode && y != const0_rtx)
14820         {
14821           gcc_assert (!reload_completed);
14822           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14823           y = const0_rtx;
14824         }
14825
14826       /* A scratch register is required.  */
14827       if (reload_completed)
14828         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14829       else
14830         scratch = gen_rtx_SCRATCH (SImode);
14831
14832       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14833       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14834       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14835     }
14836   else
14837     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14838
14839   return cc_reg;
14840 }
14841
14842 /* Generate a sequence of insns that will generate the correct return
14843    address mask depending on the physical architecture that the program
14844    is running on.  */
14845 rtx
14846 arm_gen_return_addr_mask (void)
14847 {
14848   rtx reg = gen_reg_rtx (Pmode);
14849
14850   emit_insn (gen_return_addr_mask (reg));
14851   return reg;
14852 }
14853
14854 void
14855 arm_reload_in_hi (rtx *operands)
14856 {
14857   rtx ref = operands[1];
14858   rtx base, scratch;
14859   HOST_WIDE_INT offset = 0;
14860
14861   if (GET_CODE (ref) == SUBREG)
14862     {
14863       offset = SUBREG_BYTE (ref);
14864       ref = SUBREG_REG (ref);
14865     }
14866
14867   if (REG_P (ref))
14868     {
14869       /* We have a pseudo which has been spilt onto the stack; there
14870          are two cases here: the first where there is a simple
14871          stack-slot replacement and a second where the stack-slot is
14872          out of range, or is used as a subreg.  */
14873       if (reg_equiv_mem (REGNO (ref)))
14874         {
14875           ref = reg_equiv_mem (REGNO (ref));
14876           base = find_replacement (&XEXP (ref, 0));
14877         }
14878       else
14879         /* The slot is out of range, or was dressed up in a SUBREG.  */
14880         base = reg_equiv_address (REGNO (ref));
14881
14882       /* PR 62554: If there is no equivalent memory location then just move
14883          the value as an SImode register move.  This happens when the target
14884          architecture variant does not have an HImode register move.  */
14885       if (base == NULL)
14886         {
14887           gcc_assert (REG_P (operands[0]));
14888           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14889                                 gen_rtx_SUBREG (SImode, ref, 0)));
14890           return;
14891         }
14892     }
14893   else
14894     base = find_replacement (&XEXP (ref, 0));
14895
14896   /* Handle the case where the address is too complex to be offset by 1.  */
14897   if (GET_CODE (base) == MINUS
14898       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14899     {
14900       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14901
14902       emit_set_insn (base_plus, base);
14903       base = base_plus;
14904     }
14905   else if (GET_CODE (base) == PLUS)
14906     {
14907       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14908       HOST_WIDE_INT hi, lo;
14909
14910       offset += INTVAL (XEXP (base, 1));
14911       base = XEXP (base, 0);
14912
14913       /* Rework the address into a legal sequence of insns.  */
14914       /* Valid range for lo is -4095 -> 4095 */
14915       lo = (offset >= 0
14916             ? (offset & 0xfff)
14917             : -((-offset) & 0xfff));
14918
14919       /* Corner case, if lo is the max offset then we would be out of range
14920          once we have added the additional 1 below, so bump the msb into the
14921          pre-loading insn(s).  */
14922       if (lo == 4095)
14923         lo &= 0x7ff;
14924
14925       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14926              ^ (HOST_WIDE_INT) 0x80000000)
14927             - (HOST_WIDE_INT) 0x80000000);
14928
14929       gcc_assert (hi + lo == offset);
14930
14931       if (hi != 0)
14932         {
14933           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14934
14935           /* Get the base address; addsi3 knows how to handle constants
14936              that require more than one insn.  */
14937           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14938           base = base_plus;
14939           offset = lo;
14940         }
14941     }
14942
14943   /* Operands[2] may overlap operands[0] (though it won't overlap
14944      operands[1]), that's why we asked for a DImode reg -- so we can
14945      use the bit that does not overlap.  */
14946   if (REGNO (operands[2]) == REGNO (operands[0]))
14947     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14948   else
14949     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14950
14951   emit_insn (gen_zero_extendqisi2 (scratch,
14952                                    gen_rtx_MEM (QImode,
14953                                                 plus_constant (Pmode, base,
14954                                                                offset))));
14955   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14956                                    gen_rtx_MEM (QImode,
14957                                                 plus_constant (Pmode, base,
14958                                                                offset + 1))));
14959   if (!BYTES_BIG_ENDIAN)
14960     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14961                    gen_rtx_IOR (SImode,
14962                                 gen_rtx_ASHIFT
14963                                 (SImode,
14964                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14965                                  GEN_INT (8)),
14966                                 scratch));
14967   else
14968     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14969                    gen_rtx_IOR (SImode,
14970                                 gen_rtx_ASHIFT (SImode, scratch,
14971                                                 GEN_INT (8)),
14972                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14973 }
14974
14975 /* Handle storing a half-word to memory during reload by synthesizing as two
14976    byte stores.  Take care not to clobber the input values until after we
14977    have moved them somewhere safe.  This code assumes that if the DImode
14978    scratch in operands[2] overlaps either the input value or output address
14979    in some way, then that value must die in this insn (we absolutely need
14980    two scratch registers for some corner cases).  */
14981 void
14982 arm_reload_out_hi (rtx *operands)
14983 {
14984   rtx ref = operands[0];
14985   rtx outval = operands[1];
14986   rtx base, scratch;
14987   HOST_WIDE_INT offset = 0;
14988
14989   if (GET_CODE (ref) == SUBREG)
14990     {
14991       offset = SUBREG_BYTE (ref);
14992       ref = SUBREG_REG (ref);
14993     }
14994
14995   if (REG_P (ref))
14996     {
14997       /* We have a pseudo which has been spilt onto the stack; there
14998          are two cases here: the first where there is a simple
14999          stack-slot replacement and a second where the stack-slot is
15000          out of range, or is used as a subreg.  */
15001       if (reg_equiv_mem (REGNO (ref)))
15002         {
15003           ref = reg_equiv_mem (REGNO (ref));
15004           base = find_replacement (&XEXP (ref, 0));
15005         }
15006       else
15007         /* The slot is out of range, or was dressed up in a SUBREG.  */
15008         base = reg_equiv_address (REGNO (ref));
15009
15010       /* PR 62254: If there is no equivalent memory location then just move
15011          the value as an SImode register move.  This happens when the target
15012          architecture variant does not have an HImode register move.  */
15013       if (base == NULL)
15014         {
15015           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15016
15017           if (REG_P (outval))
15018             {
15019               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15020                                     gen_rtx_SUBREG (SImode, outval, 0)));
15021             }
15022           else /* SUBREG_P (outval)  */
15023             {
15024               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15025                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15026                                       SUBREG_REG (outval)));
15027               else
15028                 /* FIXME: Handle other cases ?  */
15029                 gcc_unreachable ();
15030             }
15031           return;
15032         }
15033     }
15034   else
15035     base = find_replacement (&XEXP (ref, 0));
15036
15037   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15038
15039   /* Handle the case where the address is too complex to be offset by 1.  */
15040   if (GET_CODE (base) == MINUS
15041       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15042     {
15043       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15044
15045       /* Be careful not to destroy OUTVAL.  */
15046       if (reg_overlap_mentioned_p (base_plus, outval))
15047         {
15048           /* Updating base_plus might destroy outval, see if we can
15049              swap the scratch and base_plus.  */
15050           if (!reg_overlap_mentioned_p (scratch, outval))
15051             std::swap (scratch, base_plus);
15052           else
15053             {
15054               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15055
15056               /* Be conservative and copy OUTVAL into the scratch now,
15057                  this should only be necessary if outval is a subreg
15058                  of something larger than a word.  */
15059               /* XXX Might this clobber base?  I can't see how it can,
15060                  since scratch is known to overlap with OUTVAL, and
15061                  must be wider than a word.  */
15062               emit_insn (gen_movhi (scratch_hi, outval));
15063               outval = scratch_hi;
15064             }
15065         }
15066
15067       emit_set_insn (base_plus, base);
15068       base = base_plus;
15069     }
15070   else if (GET_CODE (base) == PLUS)
15071     {
15072       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15073       HOST_WIDE_INT hi, lo;
15074
15075       offset += INTVAL (XEXP (base, 1));
15076       base = XEXP (base, 0);
15077
15078       /* Rework the address into a legal sequence of insns.  */
15079       /* Valid range for lo is -4095 -> 4095 */
15080       lo = (offset >= 0
15081             ? (offset & 0xfff)
15082             : -((-offset) & 0xfff));
15083
15084       /* Corner case, if lo is the max offset then we would be out of range
15085          once we have added the additional 1 below, so bump the msb into the
15086          pre-loading insn(s).  */
15087       if (lo == 4095)
15088         lo &= 0x7ff;
15089
15090       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15091              ^ (HOST_WIDE_INT) 0x80000000)
15092             - (HOST_WIDE_INT) 0x80000000);
15093
15094       gcc_assert (hi + lo == offset);
15095
15096       if (hi != 0)
15097         {
15098           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15099
15100           /* Be careful not to destroy OUTVAL.  */
15101           if (reg_overlap_mentioned_p (base_plus, outval))
15102             {
15103               /* Updating base_plus might destroy outval, see if we
15104                  can swap the scratch and base_plus.  */
15105               if (!reg_overlap_mentioned_p (scratch, outval))
15106                 std::swap (scratch, base_plus);
15107               else
15108                 {
15109                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15110
15111                   /* Be conservative and copy outval into scratch now,
15112                      this should only be necessary if outval is a
15113                      subreg of something larger than a word.  */
15114                   /* XXX Might this clobber base?  I can't see how it
15115                      can, since scratch is known to overlap with
15116                      outval.  */
15117                   emit_insn (gen_movhi (scratch_hi, outval));
15118                   outval = scratch_hi;
15119                 }
15120             }
15121
15122           /* Get the base address; addsi3 knows how to handle constants
15123              that require more than one insn.  */
15124           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15125           base = base_plus;
15126           offset = lo;
15127         }
15128     }
15129
15130   if (BYTES_BIG_ENDIAN)
15131     {
15132       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15133                                          plus_constant (Pmode, base,
15134                                                         offset + 1)),
15135                             gen_lowpart (QImode, outval)));
15136       emit_insn (gen_lshrsi3 (scratch,
15137                               gen_rtx_SUBREG (SImode, outval, 0),
15138                               GEN_INT (8)));
15139       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15140                                                                 offset)),
15141                             gen_lowpart (QImode, scratch)));
15142     }
15143   else
15144     {
15145       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15146                                                                 offset)),
15147                             gen_lowpart (QImode, outval)));
15148       emit_insn (gen_lshrsi3 (scratch,
15149                               gen_rtx_SUBREG (SImode, outval, 0),
15150                               GEN_INT (8)));
15151       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15152                                          plus_constant (Pmode, base,
15153                                                         offset + 1)),
15154                             gen_lowpart (QImode, scratch)));
15155     }
15156 }
15157
15158 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15159    (padded to the size of a word) should be passed in a register.  */
15160
15161 static bool
15162 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15163 {
15164   if (TARGET_AAPCS_BASED)
15165     return must_pass_in_stack_var_size (mode, type);
15166   else
15167     return must_pass_in_stack_var_size_or_pad (mode, type);
15168 }
15169
15170
15171 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15172    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15173    the default.  For AAPCS based ABIs small aggregate types are placed
15174    in the lowest memory address.  */
15175
15176 static pad_direction
15177 arm_function_arg_padding (machine_mode mode, const_tree type)
15178 {
15179   if (!TARGET_AAPCS_BASED)
15180     return default_function_arg_padding (mode, type);
15181
15182   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15183     return PAD_DOWNWARD;
15184
15185   return PAD_UPWARD;
15186 }
15187
15188
15189 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15190    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15191    register has useful data, and return the opposite if the most
15192    significant byte does.  */
15193
15194 bool
15195 arm_pad_reg_upward (machine_mode mode,
15196                     tree type, int first ATTRIBUTE_UNUSED)
15197 {
15198   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15199     {
15200       /* For AAPCS, small aggregates, small fixed-point types,
15201          and small complex types are always padded upwards.  */
15202       if (type)
15203         {
15204           if ((AGGREGATE_TYPE_P (type)
15205                || TREE_CODE (type) == COMPLEX_TYPE
15206                || FIXED_POINT_TYPE_P (type))
15207               && int_size_in_bytes (type) <= 4)
15208             return true;
15209         }
15210       else
15211         {
15212           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15213               && GET_MODE_SIZE (mode) <= 4)
15214             return true;
15215         }
15216     }
15217
15218   /* Otherwise, use default padding.  */
15219   return !BYTES_BIG_ENDIAN;
15220 }
15221
15222 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15223    assuming that the address in the base register is word aligned.  */
15224 bool
15225 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15226 {
15227   HOST_WIDE_INT max_offset;
15228
15229   /* Offset must be a multiple of 4 in Thumb mode.  */
15230   if (TARGET_THUMB2 && ((offset & 3) != 0))
15231     return false;
15232
15233   if (TARGET_THUMB2)
15234     max_offset = 1020;
15235   else if (TARGET_ARM)
15236     max_offset = 255;
15237   else
15238     return false;
15239
15240   return ((offset <= max_offset) && (offset >= -max_offset));
15241 }
15242
15243 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15244    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15245    Assumes that the address in the base register RN is word aligned.  Pattern
15246    guarantees that both memory accesses use the same base register,
15247    the offsets are constants within the range, and the gap between the offsets is 4.
15248    If preload complete then check that registers are legal.  WBACK indicates whether
15249    address is updated.  LOAD indicates whether memory access is load or store.  */
15250 bool
15251 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15252                        bool wback, bool load)
15253 {
15254   unsigned int t, t2, n;
15255
15256   if (!reload_completed)
15257     return true;
15258
15259   if (!offset_ok_for_ldrd_strd (offset))
15260     return false;
15261
15262   t = REGNO (rt);
15263   t2 = REGNO (rt2);
15264   n = REGNO (rn);
15265
15266   if ((TARGET_THUMB2)
15267       && ((wback && (n == t || n == t2))
15268           || (t == SP_REGNUM)
15269           || (t == PC_REGNUM)
15270           || (t2 == SP_REGNUM)
15271           || (t2 == PC_REGNUM)
15272           || (!load && (n == PC_REGNUM))
15273           || (load && (t == t2))
15274           /* Triggers Cortex-M3 LDRD errata.  */
15275           || (!wback && load && fix_cm3_ldrd && (n == t))))
15276     return false;
15277
15278   if ((TARGET_ARM)
15279       && ((wback && (n == t || n == t2))
15280           || (t2 == PC_REGNUM)
15281           || (t % 2 != 0)   /* First destination register is not even.  */
15282           || (t2 != t + 1)
15283           /* PC can be used as base register (for offset addressing only),
15284              but it is depricated.  */
15285           || (n == PC_REGNUM)))
15286     return false;
15287
15288   return true;
15289 }
15290
15291 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15292    operand MEM's address contains an immediate offset from the base
15293    register and has no side effects, in which case it sets BASE and
15294    OFFSET accordingly.  */
15295 static bool
15296 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15297 {
15298   rtx addr;
15299
15300   gcc_assert (base != NULL && offset != NULL);
15301
15302   /* TODO: Handle more general memory operand patterns, such as
15303      PRE_DEC and PRE_INC.  */
15304
15305   if (side_effects_p (mem))
15306     return false;
15307
15308   /* Can't deal with subregs.  */
15309   if (GET_CODE (mem) == SUBREG)
15310     return false;
15311
15312   gcc_assert (MEM_P (mem));
15313
15314   *offset = const0_rtx;
15315
15316   addr = XEXP (mem, 0);
15317
15318   /* If addr isn't valid for DImode, then we can't handle it.  */
15319   if (!arm_legitimate_address_p (DImode, addr,
15320                                  reload_in_progress || reload_completed))
15321     return false;
15322
15323   if (REG_P (addr))
15324     {
15325       *base = addr;
15326       return true;
15327     }
15328   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15329     {
15330       *base = XEXP (addr, 0);
15331       *offset = XEXP (addr, 1);
15332       return (REG_P (*base) && CONST_INT_P (*offset));
15333     }
15334
15335   return false;
15336 }
15337
15338 /* Called from a peephole2 to replace two word-size accesses with a
15339    single LDRD/STRD instruction.  Returns true iff we can generate a
15340    new instruction sequence.  That is, both accesses use the same base
15341    register and the gap between constant offsets is 4.  This function
15342    may reorder its operands to match ldrd/strd RTL templates.
15343    OPERANDS are the operands found by the peephole matcher;
15344    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15345    corresponding memory operands.  LOAD indicaates whether the access
15346    is load or store.  CONST_STORE indicates a store of constant
15347    integer values held in OPERANDS[4,5] and assumes that the pattern
15348    is of length 4 insn, for the purpose of checking dead registers.
15349    COMMUTE indicates that register operands may be reordered.  */
15350 bool
15351 gen_operands_ldrd_strd (rtx *operands, bool load,
15352                         bool const_store, bool commute)
15353 {
15354   int nops = 2;
15355   HOST_WIDE_INT offsets[2], offset;
15356   rtx base = NULL_RTX;
15357   rtx cur_base, cur_offset, tmp;
15358   int i, gap;
15359   HARD_REG_SET regset;
15360
15361   gcc_assert (!const_store || !load);
15362   /* Check that the memory references are immediate offsets from the
15363      same base register.  Extract the base register, the destination
15364      registers, and the corresponding memory offsets.  */
15365   for (i = 0; i < nops; i++)
15366     {
15367       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15368         return false;
15369
15370       if (i == 0)
15371         base = cur_base;
15372       else if (REGNO (base) != REGNO (cur_base))
15373         return false;
15374
15375       offsets[i] = INTVAL (cur_offset);
15376       if (GET_CODE (operands[i]) == SUBREG)
15377         {
15378           tmp = SUBREG_REG (operands[i]);
15379           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15380           operands[i] = tmp;
15381         }
15382     }
15383
15384   /* Make sure there is no dependency between the individual loads.  */
15385   if (load && REGNO (operands[0]) == REGNO (base))
15386     return false; /* RAW */
15387
15388   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15389     return false; /* WAW */
15390
15391   /* If the same input register is used in both stores
15392      when storing different constants, try to find a free register.
15393      For example, the code
15394         mov r0, 0
15395         str r0, [r2]
15396         mov r0, 1
15397         str r0, [r2, #4]
15398      can be transformed into
15399         mov r1, 0
15400         mov r0, 1
15401         strd r1, r0, [r2]
15402      in Thumb mode assuming that r1 is free.
15403      For ARM mode do the same but only if the starting register
15404      can be made to be even.  */
15405   if (const_store
15406       && REGNO (operands[0]) == REGNO (operands[1])
15407       && INTVAL (operands[4]) != INTVAL (operands[5]))
15408     {
15409     if (TARGET_THUMB2)
15410       {
15411         CLEAR_HARD_REG_SET (regset);
15412         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15413         if (tmp == NULL_RTX)
15414           return false;
15415
15416         /* Use the new register in the first load to ensure that
15417            if the original input register is not dead after peephole,
15418            then it will have the correct constant value.  */
15419         operands[0] = tmp;
15420       }
15421     else if (TARGET_ARM)
15422       {
15423         int regno = REGNO (operands[0]);
15424         if (!peep2_reg_dead_p (4, operands[0]))
15425           {
15426             /* When the input register is even and is not dead after the
15427                pattern, it has to hold the second constant but we cannot
15428                form a legal STRD in ARM mode with this register as the second
15429                register.  */
15430             if (regno % 2 == 0)
15431               return false;
15432
15433             /* Is regno-1 free? */
15434             SET_HARD_REG_SET (regset);
15435             CLEAR_HARD_REG_BIT(regset, regno - 1);
15436             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15437             if (tmp == NULL_RTX)
15438               return false;
15439
15440             operands[0] = tmp;
15441           }
15442         else
15443           {
15444             /* Find a DImode register.  */
15445             CLEAR_HARD_REG_SET (regset);
15446             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15447             if (tmp != NULL_RTX)
15448               {
15449                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15450                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15451               }
15452             else
15453               {
15454                 /* Can we use the input register to form a DI register?  */
15455                 SET_HARD_REG_SET (regset);
15456                 CLEAR_HARD_REG_BIT(regset,
15457                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15458                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15459                 if (tmp == NULL_RTX)
15460                   return false;
15461                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15462               }
15463           }
15464
15465         gcc_assert (operands[0] != NULL_RTX);
15466         gcc_assert (operands[1] != NULL_RTX);
15467         gcc_assert (REGNO (operands[0]) % 2 == 0);
15468         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15469       }
15470     }
15471
15472   /* Make sure the instructions are ordered with lower memory access first.  */
15473   if (offsets[0] > offsets[1])
15474     {
15475       gap = offsets[0] - offsets[1];
15476       offset = offsets[1];
15477
15478       /* Swap the instructions such that lower memory is accessed first.  */
15479       std::swap (operands[0], operands[1]);
15480       std::swap (operands[2], operands[3]);
15481       if (const_store)
15482         std::swap (operands[4], operands[5]);
15483     }
15484   else
15485     {
15486       gap = offsets[1] - offsets[0];
15487       offset = offsets[0];
15488     }
15489
15490   /* Make sure accesses are to consecutive memory locations.  */
15491   if (gap != 4)
15492     return false;
15493
15494   /* Make sure we generate legal instructions.  */
15495   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15496                              false, load))
15497     return true;
15498
15499   /* In Thumb state, where registers are almost unconstrained, there
15500      is little hope to fix it.  */
15501   if (TARGET_THUMB2)
15502     return false;
15503
15504   if (load && commute)
15505     {
15506       /* Try reordering registers.  */
15507       std::swap (operands[0], operands[1]);
15508       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15509                                  false, load))
15510         return true;
15511     }
15512
15513   if (const_store)
15514     {
15515       /* If input registers are dead after this pattern, they can be
15516          reordered or replaced by other registers that are free in the
15517          current pattern.  */
15518       if (!peep2_reg_dead_p (4, operands[0])
15519           || !peep2_reg_dead_p (4, operands[1]))
15520         return false;
15521
15522       /* Try to reorder the input registers.  */
15523       /* For example, the code
15524            mov r0, 0
15525            mov r1, 1
15526            str r1, [r2]
15527            str r0, [r2, #4]
15528          can be transformed into
15529            mov r1, 0
15530            mov r0, 1
15531            strd r0, [r2]
15532       */
15533       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15534                                   false, false))
15535         {
15536           std::swap (operands[0], operands[1]);
15537           return true;
15538         }
15539
15540       /* Try to find a free DI register.  */
15541       CLEAR_HARD_REG_SET (regset);
15542       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15543       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15544       while (true)
15545         {
15546           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15547           if (tmp == NULL_RTX)
15548             return false;
15549
15550           /* DREG must be an even-numbered register in DImode.
15551              Split it into SI registers.  */
15552           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15553           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15554           gcc_assert (operands[0] != NULL_RTX);
15555           gcc_assert (operands[1] != NULL_RTX);
15556           gcc_assert (REGNO (operands[0]) % 2 == 0);
15557           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15558
15559           return (operands_ok_ldrd_strd (operands[0], operands[1],
15560                                          base, offset,
15561                                          false, load));
15562         }
15563     }
15564
15565   return false;
15566 }
15567
15568
15569
15570 \f
15571 /* Print a symbolic form of X to the debug file, F.  */
15572 static void
15573 arm_print_value (FILE *f, rtx x)
15574 {
15575   switch (GET_CODE (x))
15576     {
15577     case CONST_INT:
15578       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15579       return;
15580
15581     case CONST_DOUBLE:
15582       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15583       return;
15584
15585     case CONST_VECTOR:
15586       {
15587         int i;
15588
15589         fprintf (f, "<");
15590         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15591           {
15592             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15593             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15594               fputc (',', f);
15595           }
15596         fprintf (f, ">");
15597       }
15598       return;
15599
15600     case CONST_STRING:
15601       fprintf (f, "\"%s\"", XSTR (x, 0));
15602       return;
15603
15604     case SYMBOL_REF:
15605       fprintf (f, "`%s'", XSTR (x, 0));
15606       return;
15607
15608     case LABEL_REF:
15609       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15610       return;
15611
15612     case CONST:
15613       arm_print_value (f, XEXP (x, 0));
15614       return;
15615
15616     case PLUS:
15617       arm_print_value (f, XEXP (x, 0));
15618       fprintf (f, "+");
15619       arm_print_value (f, XEXP (x, 1));
15620       return;
15621
15622     case PC:
15623       fprintf (f, "pc");
15624       return;
15625
15626     default:
15627       fprintf (f, "????");
15628       return;
15629     }
15630 }
15631 \f
15632 /* Routines for manipulation of the constant pool.  */
15633
15634 /* Arm instructions cannot load a large constant directly into a
15635    register; they have to come from a pc relative load.  The constant
15636    must therefore be placed in the addressable range of the pc
15637    relative load.  Depending on the precise pc relative load
15638    instruction the range is somewhere between 256 bytes and 4k.  This
15639    means that we often have to dump a constant inside a function, and
15640    generate code to branch around it.
15641
15642    It is important to minimize this, since the branches will slow
15643    things down and make the code larger.
15644
15645    Normally we can hide the table after an existing unconditional
15646    branch so that there is no interruption of the flow, but in the
15647    worst case the code looks like this:
15648
15649         ldr     rn, L1
15650         ...
15651         b       L2
15652         align
15653         L1:     .long value
15654         L2:
15655         ...
15656
15657         ldr     rn, L3
15658         ...
15659         b       L4
15660         align
15661         L3:     .long value
15662         L4:
15663         ...
15664
15665    We fix this by performing a scan after scheduling, which notices
15666    which instructions need to have their operands fetched from the
15667    constant table and builds the table.
15668
15669    The algorithm starts by building a table of all the constants that
15670    need fixing up and all the natural barriers in the function (places
15671    where a constant table can be dropped without breaking the flow).
15672    For each fixup we note how far the pc-relative replacement will be
15673    able to reach and the offset of the instruction into the function.
15674
15675    Having built the table we then group the fixes together to form
15676    tables that are as large as possible (subject to addressing
15677    constraints) and emit each table of constants after the last
15678    barrier that is within range of all the instructions in the group.
15679    If a group does not contain a barrier, then we forcibly create one
15680    by inserting a jump instruction into the flow.  Once the table has
15681    been inserted, the insns are then modified to reference the
15682    relevant entry in the pool.
15683
15684    Possible enhancements to the algorithm (not implemented) are:
15685
15686    1) For some processors and object formats, there may be benefit in
15687    aligning the pools to the start of cache lines; this alignment
15688    would need to be taken into account when calculating addressability
15689    of a pool.  */
15690
15691 /* These typedefs are located at the start of this file, so that
15692    they can be used in the prototypes there.  This comment is to
15693    remind readers of that fact so that the following structures
15694    can be understood more easily.
15695
15696      typedef struct minipool_node    Mnode;
15697      typedef struct minipool_fixup   Mfix;  */
15698
15699 struct minipool_node
15700 {
15701   /* Doubly linked chain of entries.  */
15702   Mnode * next;
15703   Mnode * prev;
15704   /* The maximum offset into the code that this entry can be placed.  While
15705      pushing fixes for forward references, all entries are sorted in order
15706      of increasing max_address.  */
15707   HOST_WIDE_INT max_address;
15708   /* Similarly for an entry inserted for a backwards ref.  */
15709   HOST_WIDE_INT min_address;
15710   /* The number of fixes referencing this entry.  This can become zero
15711      if we "unpush" an entry.  In this case we ignore the entry when we
15712      come to emit the code.  */
15713   int refcount;
15714   /* The offset from the start of the minipool.  */
15715   HOST_WIDE_INT offset;
15716   /* The value in table.  */
15717   rtx value;
15718   /* The mode of value.  */
15719   machine_mode mode;
15720   /* The size of the value.  With iWMMXt enabled
15721      sizes > 4 also imply an alignment of 8-bytes.  */
15722   int fix_size;
15723 };
15724
15725 struct minipool_fixup
15726 {
15727   Mfix *            next;
15728   rtx_insn *        insn;
15729   HOST_WIDE_INT     address;
15730   rtx *             loc;
15731   machine_mode mode;
15732   int               fix_size;
15733   rtx               value;
15734   Mnode *           minipool;
15735   HOST_WIDE_INT     forwards;
15736   HOST_WIDE_INT     backwards;
15737 };
15738
15739 /* Fixes less than a word need padding out to a word boundary.  */
15740 #define MINIPOOL_FIX_SIZE(mode) \
15741   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15742
15743 static Mnode *  minipool_vector_head;
15744 static Mnode *  minipool_vector_tail;
15745 static rtx_code_label   *minipool_vector_label;
15746 static int      minipool_pad;
15747
15748 /* The linked list of all minipool fixes required for this function.  */
15749 Mfix *          minipool_fix_head;
15750 Mfix *          minipool_fix_tail;
15751 /* The fix entry for the current minipool, once it has been placed.  */
15752 Mfix *          minipool_barrier;
15753
15754 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15755 #define JUMP_TABLES_IN_TEXT_SECTION 0
15756 #endif
15757
15758 static HOST_WIDE_INT
15759 get_jump_table_size (rtx_jump_table_data *insn)
15760 {
15761   /* ADDR_VECs only take room if read-only data does into the text
15762      section.  */
15763   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15764     {
15765       rtx body = PATTERN (insn);
15766       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15767       HOST_WIDE_INT size;
15768       HOST_WIDE_INT modesize;
15769
15770       modesize = GET_MODE_SIZE (GET_MODE (body));
15771       size = modesize * XVECLEN (body, elt);
15772       switch (modesize)
15773         {
15774         case 1:
15775           /* Round up size  of TBB table to a halfword boundary.  */
15776           size = (size + 1) & ~HOST_WIDE_INT_1;
15777           break;
15778         case 2:
15779           /* No padding necessary for TBH.  */
15780           break;
15781         case 4:
15782           /* Add two bytes for alignment on Thumb.  */
15783           if (TARGET_THUMB)
15784             size += 2;
15785           break;
15786         default:
15787           gcc_unreachable ();
15788         }
15789       return size;
15790     }
15791
15792   return 0;
15793 }
15794
15795 /* Return the maximum amount of padding that will be inserted before
15796    label LABEL.  */
15797
15798 static HOST_WIDE_INT
15799 get_label_padding (rtx label)
15800 {
15801   HOST_WIDE_INT align, min_insn_size;
15802
15803   align = 1 << label_to_alignment (label);
15804   min_insn_size = TARGET_THUMB ? 2 : 4;
15805   return align > min_insn_size ? align - min_insn_size : 0;
15806 }
15807
15808 /* Move a minipool fix MP from its current location to before MAX_MP.
15809    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15810    constraints may need updating.  */
15811 static Mnode *
15812 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15813                                HOST_WIDE_INT max_address)
15814 {
15815   /* The code below assumes these are different.  */
15816   gcc_assert (mp != max_mp);
15817
15818   if (max_mp == NULL)
15819     {
15820       if (max_address < mp->max_address)
15821         mp->max_address = max_address;
15822     }
15823   else
15824     {
15825       if (max_address > max_mp->max_address - mp->fix_size)
15826         mp->max_address = max_mp->max_address - mp->fix_size;
15827       else
15828         mp->max_address = max_address;
15829
15830       /* Unlink MP from its current position.  Since max_mp is non-null,
15831        mp->prev must be non-null.  */
15832       mp->prev->next = mp->next;
15833       if (mp->next != NULL)
15834         mp->next->prev = mp->prev;
15835       else
15836         minipool_vector_tail = mp->prev;
15837
15838       /* Re-insert it before MAX_MP.  */
15839       mp->next = max_mp;
15840       mp->prev = max_mp->prev;
15841       max_mp->prev = mp;
15842
15843       if (mp->prev != NULL)
15844         mp->prev->next = mp;
15845       else
15846         minipool_vector_head = mp;
15847     }
15848
15849   /* Save the new entry.  */
15850   max_mp = mp;
15851
15852   /* Scan over the preceding entries and adjust their addresses as
15853      required.  */
15854   while (mp->prev != NULL
15855          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15856     {
15857       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15858       mp = mp->prev;
15859     }
15860
15861   return max_mp;
15862 }
15863
15864 /* Add a constant to the minipool for a forward reference.  Returns the
15865    node added or NULL if the constant will not fit in this pool.  */
15866 static Mnode *
15867 add_minipool_forward_ref (Mfix *fix)
15868 {
15869   /* If set, max_mp is the first pool_entry that has a lower
15870      constraint than the one we are trying to add.  */
15871   Mnode *       max_mp = NULL;
15872   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15873   Mnode *       mp;
15874
15875   /* If the minipool starts before the end of FIX->INSN then this FIX
15876      can not be placed into the current pool.  Furthermore, adding the
15877      new constant pool entry may cause the pool to start FIX_SIZE bytes
15878      earlier.  */
15879   if (minipool_vector_head &&
15880       (fix->address + get_attr_length (fix->insn)
15881        >= minipool_vector_head->max_address - fix->fix_size))
15882     return NULL;
15883
15884   /* Scan the pool to see if a constant with the same value has
15885      already been added.  While we are doing this, also note the
15886      location where we must insert the constant if it doesn't already
15887      exist.  */
15888   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15889     {
15890       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15891           && fix->mode == mp->mode
15892           && (!LABEL_P (fix->value)
15893               || (CODE_LABEL_NUMBER (fix->value)
15894                   == CODE_LABEL_NUMBER (mp->value)))
15895           && rtx_equal_p (fix->value, mp->value))
15896         {
15897           /* More than one fix references this entry.  */
15898           mp->refcount++;
15899           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15900         }
15901
15902       /* Note the insertion point if necessary.  */
15903       if (max_mp == NULL
15904           && mp->max_address > max_address)
15905         max_mp = mp;
15906
15907       /* If we are inserting an 8-bytes aligned quantity and
15908          we have not already found an insertion point, then
15909          make sure that all such 8-byte aligned quantities are
15910          placed at the start of the pool.  */
15911       if (ARM_DOUBLEWORD_ALIGN
15912           && max_mp == NULL
15913           && fix->fix_size >= 8
15914           && mp->fix_size < 8)
15915         {
15916           max_mp = mp;
15917           max_address = mp->max_address;
15918         }
15919     }
15920
15921   /* The value is not currently in the minipool, so we need to create
15922      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15923      the end of the list since the placement is less constrained than
15924      any existing entry.  Otherwise, we insert the new fix before
15925      MAX_MP and, if necessary, adjust the constraints on the other
15926      entries.  */
15927   mp = XNEW (Mnode);
15928   mp->fix_size = fix->fix_size;
15929   mp->mode = fix->mode;
15930   mp->value = fix->value;
15931   mp->refcount = 1;
15932   /* Not yet required for a backwards ref.  */
15933   mp->min_address = -65536;
15934
15935   if (max_mp == NULL)
15936     {
15937       mp->max_address = max_address;
15938       mp->next = NULL;
15939       mp->prev = minipool_vector_tail;
15940
15941       if (mp->prev == NULL)
15942         {
15943           minipool_vector_head = mp;
15944           minipool_vector_label = gen_label_rtx ();
15945         }
15946       else
15947         mp->prev->next = mp;
15948
15949       minipool_vector_tail = mp;
15950     }
15951   else
15952     {
15953       if (max_address > max_mp->max_address - mp->fix_size)
15954         mp->max_address = max_mp->max_address - mp->fix_size;
15955       else
15956         mp->max_address = max_address;
15957
15958       mp->next = max_mp;
15959       mp->prev = max_mp->prev;
15960       max_mp->prev = mp;
15961       if (mp->prev != NULL)
15962         mp->prev->next = mp;
15963       else
15964         minipool_vector_head = mp;
15965     }
15966
15967   /* Save the new entry.  */
15968   max_mp = mp;
15969
15970   /* Scan over the preceding entries and adjust their addresses as
15971      required.  */
15972   while (mp->prev != NULL
15973          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15974     {
15975       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15976       mp = mp->prev;
15977     }
15978
15979   return max_mp;
15980 }
15981
15982 static Mnode *
15983 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15984                                 HOST_WIDE_INT  min_address)
15985 {
15986   HOST_WIDE_INT offset;
15987
15988   /* The code below assumes these are different.  */
15989   gcc_assert (mp != min_mp);
15990
15991   if (min_mp == NULL)
15992     {
15993       if (min_address > mp->min_address)
15994         mp->min_address = min_address;
15995     }
15996   else
15997     {
15998       /* We will adjust this below if it is too loose.  */
15999       mp->min_address = min_address;
16000
16001       /* Unlink MP from its current position.  Since min_mp is non-null,
16002          mp->next must be non-null.  */
16003       mp->next->prev = mp->prev;
16004       if (mp->prev != NULL)
16005         mp->prev->next = mp->next;
16006       else
16007         minipool_vector_head = mp->next;
16008
16009       /* Reinsert it after MIN_MP.  */
16010       mp->prev = min_mp;
16011       mp->next = min_mp->next;
16012       min_mp->next = mp;
16013       if (mp->next != NULL)
16014         mp->next->prev = mp;
16015       else
16016         minipool_vector_tail = mp;
16017     }
16018
16019   min_mp = mp;
16020
16021   offset = 0;
16022   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16023     {
16024       mp->offset = offset;
16025       if (mp->refcount > 0)
16026         offset += mp->fix_size;
16027
16028       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16029         mp->next->min_address = mp->min_address + mp->fix_size;
16030     }
16031
16032   return min_mp;
16033 }
16034
16035 /* Add a constant to the minipool for a backward reference.  Returns the
16036    node added or NULL if the constant will not fit in this pool.
16037
16038    Note that the code for insertion for a backwards reference can be
16039    somewhat confusing because the calculated offsets for each fix do
16040    not take into account the size of the pool (which is still under
16041    construction.  */
16042 static Mnode *
16043 add_minipool_backward_ref (Mfix *fix)
16044 {
16045   /* If set, min_mp is the last pool_entry that has a lower constraint
16046      than the one we are trying to add.  */
16047   Mnode *min_mp = NULL;
16048   /* This can be negative, since it is only a constraint.  */
16049   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16050   Mnode *mp;
16051
16052   /* If we can't reach the current pool from this insn, or if we can't
16053      insert this entry at the end of the pool without pushing other
16054      fixes out of range, then we don't try.  This ensures that we
16055      can't fail later on.  */
16056   if (min_address >= minipool_barrier->address
16057       || (minipool_vector_tail->min_address + fix->fix_size
16058           >= minipool_barrier->address))
16059     return NULL;
16060
16061   /* Scan the pool to see if a constant with the same value has
16062      already been added.  While we are doing this, also note the
16063      location where we must insert the constant if it doesn't already
16064      exist.  */
16065   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16066     {
16067       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16068           && fix->mode == mp->mode
16069           && (!LABEL_P (fix->value)
16070               || (CODE_LABEL_NUMBER (fix->value)
16071                   == CODE_LABEL_NUMBER (mp->value)))
16072           && rtx_equal_p (fix->value, mp->value)
16073           /* Check that there is enough slack to move this entry to the
16074              end of the table (this is conservative).  */
16075           && (mp->max_address
16076               > (minipool_barrier->address
16077                  + minipool_vector_tail->offset
16078                  + minipool_vector_tail->fix_size)))
16079         {
16080           mp->refcount++;
16081           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16082         }
16083
16084       if (min_mp != NULL)
16085         mp->min_address += fix->fix_size;
16086       else
16087         {
16088           /* Note the insertion point if necessary.  */
16089           if (mp->min_address < min_address)
16090             {
16091               /* For now, we do not allow the insertion of 8-byte alignment
16092                  requiring nodes anywhere but at the start of the pool.  */
16093               if (ARM_DOUBLEWORD_ALIGN
16094                   && fix->fix_size >= 8 && mp->fix_size < 8)
16095                 return NULL;
16096               else
16097                 min_mp = mp;
16098             }
16099           else if (mp->max_address
16100                    < minipool_barrier->address + mp->offset + fix->fix_size)
16101             {
16102               /* Inserting before this entry would push the fix beyond
16103                  its maximum address (which can happen if we have
16104                  re-located a forwards fix); force the new fix to come
16105                  after it.  */
16106               if (ARM_DOUBLEWORD_ALIGN
16107                   && fix->fix_size >= 8 && mp->fix_size < 8)
16108                 return NULL;
16109               else
16110                 {
16111                   min_mp = mp;
16112                   min_address = mp->min_address + fix->fix_size;
16113                 }
16114             }
16115           /* Do not insert a non-8-byte aligned quantity before 8-byte
16116              aligned quantities.  */
16117           else if (ARM_DOUBLEWORD_ALIGN
16118                    && fix->fix_size < 8
16119                    && mp->fix_size >= 8)
16120             {
16121               min_mp = mp;
16122               min_address = mp->min_address + fix->fix_size;
16123             }
16124         }
16125     }
16126
16127   /* We need to create a new entry.  */
16128   mp = XNEW (Mnode);
16129   mp->fix_size = fix->fix_size;
16130   mp->mode = fix->mode;
16131   mp->value = fix->value;
16132   mp->refcount = 1;
16133   mp->max_address = minipool_barrier->address + 65536;
16134
16135   mp->min_address = min_address;
16136
16137   if (min_mp == NULL)
16138     {
16139       mp->prev = NULL;
16140       mp->next = minipool_vector_head;
16141
16142       if (mp->next == NULL)
16143         {
16144           minipool_vector_tail = mp;
16145           minipool_vector_label = gen_label_rtx ();
16146         }
16147       else
16148         mp->next->prev = mp;
16149
16150       minipool_vector_head = mp;
16151     }
16152   else
16153     {
16154       mp->next = min_mp->next;
16155       mp->prev = min_mp;
16156       min_mp->next = mp;
16157
16158       if (mp->next != NULL)
16159         mp->next->prev = mp;
16160       else
16161         minipool_vector_tail = mp;
16162     }
16163
16164   /* Save the new entry.  */
16165   min_mp = mp;
16166
16167   if (mp->prev)
16168     mp = mp->prev;
16169   else
16170     mp->offset = 0;
16171
16172   /* Scan over the following entries and adjust their offsets.  */
16173   while (mp->next != NULL)
16174     {
16175       if (mp->next->min_address < mp->min_address + mp->fix_size)
16176         mp->next->min_address = mp->min_address + mp->fix_size;
16177
16178       if (mp->refcount)
16179         mp->next->offset = mp->offset + mp->fix_size;
16180       else
16181         mp->next->offset = mp->offset;
16182
16183       mp = mp->next;
16184     }
16185
16186   return min_mp;
16187 }
16188
16189 static void
16190 assign_minipool_offsets (Mfix *barrier)
16191 {
16192   HOST_WIDE_INT offset = 0;
16193   Mnode *mp;
16194
16195   minipool_barrier = barrier;
16196
16197   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16198     {
16199       mp->offset = offset;
16200
16201       if (mp->refcount > 0)
16202         offset += mp->fix_size;
16203     }
16204 }
16205
16206 /* Output the literal table */
16207 static void
16208 dump_minipool (rtx_insn *scan)
16209 {
16210   Mnode * mp;
16211   Mnode * nmp;
16212   int align64 = 0;
16213
16214   if (ARM_DOUBLEWORD_ALIGN)
16215     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16216       if (mp->refcount > 0 && mp->fix_size >= 8)
16217         {
16218           align64 = 1;
16219           break;
16220         }
16221
16222   if (dump_file)
16223     fprintf (dump_file,
16224              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16225              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16226
16227   scan = emit_label_after (gen_label_rtx (), scan);
16228   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16229   scan = emit_label_after (minipool_vector_label, scan);
16230
16231   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16232     {
16233       if (mp->refcount > 0)
16234         {
16235           if (dump_file)
16236             {
16237               fprintf (dump_file,
16238                        ";;  Offset %u, min %ld, max %ld ",
16239                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16240                        (unsigned long) mp->max_address);
16241               arm_print_value (dump_file, mp->value);
16242               fputc ('\n', dump_file);
16243             }
16244
16245           rtx val = copy_rtx (mp->value);
16246
16247           switch (GET_MODE_SIZE (mp->mode))
16248             {
16249 #ifdef HAVE_consttable_1
16250             case 1:
16251               scan = emit_insn_after (gen_consttable_1 (val), scan);
16252               break;
16253
16254 #endif
16255 #ifdef HAVE_consttable_2
16256             case 2:
16257               scan = emit_insn_after (gen_consttable_2 (val), scan);
16258               break;
16259
16260 #endif
16261 #ifdef HAVE_consttable_4
16262             case 4:
16263               scan = emit_insn_after (gen_consttable_4 (val), scan);
16264               break;
16265
16266 #endif
16267 #ifdef HAVE_consttable_8
16268             case 8:
16269               scan = emit_insn_after (gen_consttable_8 (val), scan);
16270               break;
16271
16272 #endif
16273 #ifdef HAVE_consttable_16
16274             case 16:
16275               scan = emit_insn_after (gen_consttable_16 (val), scan);
16276               break;
16277
16278 #endif
16279             default:
16280               gcc_unreachable ();
16281             }
16282         }
16283
16284       nmp = mp->next;
16285       free (mp);
16286     }
16287
16288   minipool_vector_head = minipool_vector_tail = NULL;
16289   scan = emit_insn_after (gen_consttable_end (), scan);
16290   scan = emit_barrier_after (scan);
16291 }
16292
16293 /* Return the cost of forcibly inserting a barrier after INSN.  */
16294 static int
16295 arm_barrier_cost (rtx_insn *insn)
16296 {
16297   /* Basing the location of the pool on the loop depth is preferable,
16298      but at the moment, the basic block information seems to be
16299      corrupt by this stage of the compilation.  */
16300   int base_cost = 50;
16301   rtx_insn *next = next_nonnote_insn (insn);
16302
16303   if (next != NULL && LABEL_P (next))
16304     base_cost -= 20;
16305
16306   switch (GET_CODE (insn))
16307     {
16308     case CODE_LABEL:
16309       /* It will always be better to place the table before the label, rather
16310          than after it.  */
16311       return 50;
16312
16313     case INSN:
16314     case CALL_INSN:
16315       return base_cost;
16316
16317     case JUMP_INSN:
16318       return base_cost - 10;
16319
16320     default:
16321       return base_cost + 10;
16322     }
16323 }
16324
16325 /* Find the best place in the insn stream in the range
16326    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16327    Create the barrier by inserting a jump and add a new fix entry for
16328    it.  */
16329 static Mfix *
16330 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16331 {
16332   HOST_WIDE_INT count = 0;
16333   rtx_barrier *barrier;
16334   rtx_insn *from = fix->insn;
16335   /* The instruction after which we will insert the jump.  */
16336   rtx_insn *selected = NULL;
16337   int selected_cost;
16338   /* The address at which the jump instruction will be placed.  */
16339   HOST_WIDE_INT selected_address;
16340   Mfix * new_fix;
16341   HOST_WIDE_INT max_count = max_address - fix->address;
16342   rtx_code_label *label = gen_label_rtx ();
16343
16344   selected_cost = arm_barrier_cost (from);
16345   selected_address = fix->address;
16346
16347   while (from && count < max_count)
16348     {
16349       rtx_jump_table_data *tmp;
16350       int new_cost;
16351
16352       /* This code shouldn't have been called if there was a natural barrier
16353          within range.  */
16354       gcc_assert (!BARRIER_P (from));
16355
16356       /* Count the length of this insn.  This must stay in sync with the
16357          code that pushes minipool fixes.  */
16358       if (LABEL_P (from))
16359         count += get_label_padding (from);
16360       else
16361         count += get_attr_length (from);
16362
16363       /* If there is a jump table, add its length.  */
16364       if (tablejump_p (from, NULL, &tmp))
16365         {
16366           count += get_jump_table_size (tmp);
16367
16368           /* Jump tables aren't in a basic block, so base the cost on
16369              the dispatch insn.  If we select this location, we will
16370              still put the pool after the table.  */
16371           new_cost = arm_barrier_cost (from);
16372
16373           if (count < max_count
16374               && (!selected || new_cost <= selected_cost))
16375             {
16376               selected = tmp;
16377               selected_cost = new_cost;
16378               selected_address = fix->address + count;
16379             }
16380
16381           /* Continue after the dispatch table.  */
16382           from = NEXT_INSN (tmp);
16383           continue;
16384         }
16385
16386       new_cost = arm_barrier_cost (from);
16387
16388       if (count < max_count
16389           && (!selected || new_cost <= selected_cost))
16390         {
16391           selected = from;
16392           selected_cost = new_cost;
16393           selected_address = fix->address + count;
16394         }
16395
16396       from = NEXT_INSN (from);
16397     }
16398
16399   /* Make sure that we found a place to insert the jump.  */
16400   gcc_assert (selected);
16401
16402   /* Make sure we do not split a call and its corresponding
16403      CALL_ARG_LOCATION note.  */
16404   if (CALL_P (selected))
16405     {
16406       rtx_insn *next = NEXT_INSN (selected);
16407       if (next && NOTE_P (next)
16408           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16409           selected = next;
16410     }
16411
16412   /* Create a new JUMP_INSN that branches around a barrier.  */
16413   from = emit_jump_insn_after (gen_jump (label), selected);
16414   JUMP_LABEL (from) = label;
16415   barrier = emit_barrier_after (from);
16416   emit_label_after (label, barrier);
16417
16418   /* Create a minipool barrier entry for the new barrier.  */
16419   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16420   new_fix->insn = barrier;
16421   new_fix->address = selected_address;
16422   new_fix->next = fix->next;
16423   fix->next = new_fix;
16424
16425   return new_fix;
16426 }
16427
16428 /* Record that there is a natural barrier in the insn stream at
16429    ADDRESS.  */
16430 static void
16431 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16432 {
16433   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16434
16435   fix->insn = insn;
16436   fix->address = address;
16437
16438   fix->next = NULL;
16439   if (minipool_fix_head != NULL)
16440     minipool_fix_tail->next = fix;
16441   else
16442     minipool_fix_head = fix;
16443
16444   minipool_fix_tail = fix;
16445 }
16446
16447 /* Record INSN, which will need fixing up to load a value from the
16448    minipool.  ADDRESS is the offset of the insn since the start of the
16449    function; LOC is a pointer to the part of the insn which requires
16450    fixing; VALUE is the constant that must be loaded, which is of type
16451    MODE.  */
16452 static void
16453 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16454                    machine_mode mode, rtx value)
16455 {
16456   gcc_assert (!arm_disable_literal_pool);
16457   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16458
16459   fix->insn = insn;
16460   fix->address = address;
16461   fix->loc = loc;
16462   fix->mode = mode;
16463   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16464   fix->value = value;
16465   fix->forwards = get_attr_pool_range (insn);
16466   fix->backwards = get_attr_neg_pool_range (insn);
16467   fix->minipool = NULL;
16468
16469   /* If an insn doesn't have a range defined for it, then it isn't
16470      expecting to be reworked by this code.  Better to stop now than
16471      to generate duff assembly code.  */
16472   gcc_assert (fix->forwards || fix->backwards);
16473
16474   /* If an entry requires 8-byte alignment then assume all constant pools
16475      require 4 bytes of padding.  Trying to do this later on a per-pool
16476      basis is awkward because existing pool entries have to be modified.  */
16477   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16478     minipool_pad = 4;
16479
16480   if (dump_file)
16481     {
16482       fprintf (dump_file,
16483                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16484                GET_MODE_NAME (mode),
16485                INSN_UID (insn), (unsigned long) address,
16486                -1 * (long)fix->backwards, (long)fix->forwards);
16487       arm_print_value (dump_file, fix->value);
16488       fprintf (dump_file, "\n");
16489     }
16490
16491   /* Add it to the chain of fixes.  */
16492   fix->next = NULL;
16493
16494   if (minipool_fix_head != NULL)
16495     minipool_fix_tail->next = fix;
16496   else
16497     minipool_fix_head = fix;
16498
16499   minipool_fix_tail = fix;
16500 }
16501
16502 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16503    Returns the number of insns needed, or 99 if we always want to synthesize
16504    the value.  */
16505 int
16506 arm_max_const_double_inline_cost ()
16507 {
16508   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16509 }
16510
16511 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16512    Returns the number of insns needed, or 99 if we don't know how to
16513    do it.  */
16514 int
16515 arm_const_double_inline_cost (rtx val)
16516 {
16517   rtx lowpart, highpart;
16518   machine_mode mode;
16519
16520   mode = GET_MODE (val);
16521
16522   if (mode == VOIDmode)
16523     mode = DImode;
16524
16525   gcc_assert (GET_MODE_SIZE (mode) == 8);
16526
16527   lowpart = gen_lowpart (SImode, val);
16528   highpart = gen_highpart_mode (SImode, mode, val);
16529
16530   gcc_assert (CONST_INT_P (lowpart));
16531   gcc_assert (CONST_INT_P (highpart));
16532
16533   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16534                             NULL_RTX, NULL_RTX, 0, 0)
16535           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16536                               NULL_RTX, NULL_RTX, 0, 0));
16537 }
16538
16539 /* Cost of loading a SImode constant.  */
16540 static inline int
16541 arm_const_inline_cost (enum rtx_code code, rtx val)
16542 {
16543   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16544                            NULL_RTX, NULL_RTX, 1, 0);
16545 }
16546
16547 /* Return true if it is worthwhile to split a 64-bit constant into two
16548    32-bit operations.  This is the case if optimizing for size, or
16549    if we have load delay slots, or if one 32-bit part can be done with
16550    a single data operation.  */
16551 bool
16552 arm_const_double_by_parts (rtx val)
16553 {
16554   machine_mode mode = GET_MODE (val);
16555   rtx part;
16556
16557   if (optimize_size || arm_ld_sched)
16558     return true;
16559
16560   if (mode == VOIDmode)
16561     mode = DImode;
16562
16563   part = gen_highpart_mode (SImode, mode, val);
16564
16565   gcc_assert (CONST_INT_P (part));
16566
16567   if (const_ok_for_arm (INTVAL (part))
16568       || const_ok_for_arm (~INTVAL (part)))
16569     return true;
16570
16571   part = gen_lowpart (SImode, val);
16572
16573   gcc_assert (CONST_INT_P (part));
16574
16575   if (const_ok_for_arm (INTVAL (part))
16576       || const_ok_for_arm (~INTVAL (part)))
16577     return true;
16578
16579   return false;
16580 }
16581
16582 /* Return true if it is possible to inline both the high and low parts
16583    of a 64-bit constant into 32-bit data processing instructions.  */
16584 bool
16585 arm_const_double_by_immediates (rtx val)
16586 {
16587   machine_mode mode = GET_MODE (val);
16588   rtx part;
16589
16590   if (mode == VOIDmode)
16591     mode = DImode;
16592
16593   part = gen_highpart_mode (SImode, mode, val);
16594
16595   gcc_assert (CONST_INT_P (part));
16596
16597   if (!const_ok_for_arm (INTVAL (part)))
16598     return false;
16599
16600   part = gen_lowpart (SImode, val);
16601
16602   gcc_assert (CONST_INT_P (part));
16603
16604   if (!const_ok_for_arm (INTVAL (part)))
16605     return false;
16606
16607   return true;
16608 }
16609
16610 /* Scan INSN and note any of its operands that need fixing.
16611    If DO_PUSHES is false we do not actually push any of the fixups
16612    needed.  */
16613 static void
16614 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16615 {
16616   int opno;
16617
16618   extract_constrain_insn (insn);
16619
16620   if (recog_data.n_alternatives == 0)
16621     return;
16622
16623   /* Fill in recog_op_alt with information about the constraints of
16624      this insn.  */
16625   preprocess_constraints (insn);
16626
16627   const operand_alternative *op_alt = which_op_alt ();
16628   for (opno = 0; opno < recog_data.n_operands; opno++)
16629     {
16630       /* Things we need to fix can only occur in inputs.  */
16631       if (recog_data.operand_type[opno] != OP_IN)
16632         continue;
16633
16634       /* If this alternative is a memory reference, then any mention
16635          of constants in this alternative is really to fool reload
16636          into allowing us to accept one there.  We need to fix them up
16637          now so that we output the right code.  */
16638       if (op_alt[opno].memory_ok)
16639         {
16640           rtx op = recog_data.operand[opno];
16641
16642           if (CONSTANT_P (op))
16643             {
16644               if (do_pushes)
16645                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16646                                    recog_data.operand_mode[opno], op);
16647             }
16648           else if (MEM_P (op)
16649                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16650                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16651             {
16652               if (do_pushes)
16653                 {
16654                   rtx cop = avoid_constant_pool_reference (op);
16655
16656                   /* Casting the address of something to a mode narrower
16657                      than a word can cause avoid_constant_pool_reference()
16658                      to return the pool reference itself.  That's no good to
16659                      us here.  Lets just hope that we can use the
16660                      constant pool value directly.  */
16661                   if (op == cop)
16662                     cop = get_pool_constant (XEXP (op, 0));
16663
16664                   push_minipool_fix (insn, address,
16665                                      recog_data.operand_loc[opno],
16666                                      recog_data.operand_mode[opno], cop);
16667                 }
16668
16669             }
16670         }
16671     }
16672
16673   return;
16674 }
16675
16676 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16677    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16678    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16679    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16680    or four masks, depending on whether it is being computed for a
16681    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16682    respectively.  The tree for the type of the argument or a field within an
16683    argument is passed in ARG_TYPE, the current register this argument or field
16684    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16685    argument or field starts at is passed in STARTING_BIT and the last used bit
16686    is kept in LAST_USED_BIT which is also updated accordingly.  */
16687
16688 static unsigned HOST_WIDE_INT
16689 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16690                                uint32_t * padding_bits_to_clear,
16691                                unsigned starting_bit, int * last_used_bit)
16692
16693 {
16694   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16695
16696   if (TREE_CODE (arg_type) == RECORD_TYPE)
16697     {
16698       unsigned current_bit = starting_bit;
16699       tree field;
16700       long int offset, size;
16701
16702
16703       field = TYPE_FIELDS (arg_type);
16704       while (field)
16705         {
16706           /* The offset within a structure is always an offset from
16707              the start of that structure.  Make sure we take that into the
16708              calculation of the register based offset that we use here.  */
16709           offset = starting_bit;
16710           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16711           offset %= 32;
16712
16713           /* This is the actual size of the field, for bitfields this is the
16714              bitfield width and not the container size.  */
16715           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16716
16717           if (*last_used_bit != offset)
16718             {
16719               if (offset < *last_used_bit)
16720                 {
16721                   /* This field's offset is before the 'last_used_bit', that
16722                      means this field goes on the next register.  So we need to
16723                      pad the rest of the current register and increase the
16724                      register number.  */
16725                   uint32_t mask;
16726                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16727                   mask++;
16728
16729                   padding_bits_to_clear[*regno] |= mask;
16730                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16731                   (*regno)++;
16732                 }
16733               else
16734                 {
16735                   /* Otherwise we pad the bits between the last field's end and
16736                      the start of the new field.  */
16737                   uint32_t mask;
16738
16739                   mask = ((uint32_t)-1) >> (32 - offset);
16740                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16741                   padding_bits_to_clear[*regno] |= mask;
16742                 }
16743               current_bit = offset;
16744             }
16745
16746           /* Calculate further padding bits for inner structs/unions too.  */
16747           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16748             {
16749               *last_used_bit = current_bit;
16750               not_to_clear_reg_mask
16751                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16752                                                   padding_bits_to_clear, offset,
16753                                                   last_used_bit);
16754             }
16755           else
16756             {
16757               /* Update 'current_bit' with this field's size.  If the
16758                  'current_bit' lies in a subsequent register, update 'regno' and
16759                  reset 'current_bit' to point to the current bit in that new
16760                  register.  */
16761               current_bit += size;
16762               while (current_bit >= 32)
16763                 {
16764                   current_bit-=32;
16765                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16766                   (*regno)++;
16767                 }
16768               *last_used_bit = current_bit;
16769             }
16770
16771           field = TREE_CHAIN (field);
16772         }
16773       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16774     }
16775   else if (TREE_CODE (arg_type) == UNION_TYPE)
16776     {
16777       tree field, field_t;
16778       int i, regno_t, field_size;
16779       int max_reg = -1;
16780       int max_bit = -1;
16781       uint32_t mask;
16782       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16783         = {-1, -1, -1, -1};
16784
16785       /* To compute the padding bits in a union we only consider bits as
16786          padding bits if they are always either a padding bit or fall outside a
16787          fields size for all fields in the union.  */
16788       field = TYPE_FIELDS (arg_type);
16789       while (field)
16790         {
16791           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16792             = {0U, 0U, 0U, 0U};
16793           int last_used_bit_t = *last_used_bit;
16794           regno_t = *regno;
16795           field_t = TREE_TYPE (field);
16796
16797           /* If the field's type is either a record or a union make sure to
16798              compute their padding bits too.  */
16799           if (RECORD_OR_UNION_TYPE_P (field_t))
16800             not_to_clear_reg_mask
16801               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16802                                                 &padding_bits_to_clear_t[0],
16803                                                 starting_bit, &last_used_bit_t);
16804           else
16805             {
16806               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16807               regno_t = (field_size / 32) + *regno;
16808               last_used_bit_t = (starting_bit + field_size) % 32;
16809             }
16810
16811           for (i = *regno; i < regno_t; i++)
16812             {
16813               /* For all but the last register used by this field only keep the
16814                  padding bits that were padding bits in this field.  */
16815               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16816             }
16817
16818             /* For the last register, keep all padding bits that were padding
16819                bits in this field and any padding bits that are still valid
16820                as padding bits but fall outside of this field's size.  */
16821             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16822             padding_bits_to_clear_res[regno_t]
16823               &= padding_bits_to_clear_t[regno_t] | mask;
16824
16825           /* Update the maximum size of the fields in terms of registers used
16826              ('max_reg') and the 'last_used_bit' in said register.  */
16827           if (max_reg < regno_t)
16828             {
16829               max_reg = regno_t;
16830               max_bit = last_used_bit_t;
16831             }
16832           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16833             max_bit = last_used_bit_t;
16834
16835           field = TREE_CHAIN (field);
16836         }
16837
16838       /* Update the current padding_bits_to_clear using the intersection of the
16839          padding bits of all the fields.  */
16840       for (i=*regno; i < max_reg; i++)
16841         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16842
16843       /* Do not keep trailing padding bits, we do not know yet whether this
16844          is the end of the argument.  */
16845       mask = ((uint32_t) 1 << max_bit) - 1;
16846       padding_bits_to_clear[max_reg]
16847         |= padding_bits_to_clear_res[max_reg] & mask;
16848
16849       *regno = max_reg;
16850       *last_used_bit = max_bit;
16851     }
16852   else
16853     /* This function should only be used for structs and unions.  */
16854     gcc_unreachable ();
16855
16856   return not_to_clear_reg_mask;
16857 }
16858
16859 /* In the context of ARMv8-M Security Extensions, this function is used for both
16860    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16861    registers are used when returning or passing arguments, which is then
16862    returned as a mask.  It will also compute a mask to indicate padding/unused
16863    bits for each of these registers, and passes this through the
16864    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16865    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16866    the starting register used to pass this argument or return value is passed
16867    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16868    for struct and union types.  */
16869
16870 static unsigned HOST_WIDE_INT
16871 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16872                              uint32_t * padding_bits_to_clear)
16873
16874 {
16875   int last_used_bit = 0;
16876   unsigned HOST_WIDE_INT not_to_clear_mask;
16877
16878   if (RECORD_OR_UNION_TYPE_P (arg_type))
16879     {
16880       not_to_clear_mask
16881         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16882                                          padding_bits_to_clear, 0,
16883                                          &last_used_bit);
16884
16885
16886       /* If the 'last_used_bit' is not zero, that means we are still using a
16887          part of the last 'regno'.  In such cases we must clear the trailing
16888          bits.  Otherwise we are not using regno and we should mark it as to
16889          clear.  */
16890       if (last_used_bit != 0)
16891         padding_bits_to_clear[regno]
16892           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16893       else
16894         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16895     }
16896   else
16897     {
16898       not_to_clear_mask = 0;
16899       /* We are not dealing with structs nor unions.  So these arguments may be
16900          passed in floating point registers too.  In some cases a BLKmode is
16901          used when returning or passing arguments in multiple VFP registers.  */
16902       if (GET_MODE (arg_rtx) == BLKmode)
16903         {
16904           int i, arg_regs;
16905           rtx reg;
16906
16907           /* This should really only occur when dealing with the hard-float
16908              ABI.  */
16909           gcc_assert (TARGET_HARD_FLOAT_ABI);
16910
16911           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16912             {
16913               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16914               gcc_assert (REG_P (reg));
16915
16916               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16917
16918               /* If we are dealing with DF mode, make sure we don't
16919                  clear either of the registers it addresses.  */
16920               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16921               if (arg_regs > 1)
16922                 {
16923                   unsigned HOST_WIDE_INT mask;
16924                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16925                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16926                   not_to_clear_mask |= mask;
16927                 }
16928             }
16929         }
16930       else
16931         {
16932           /* Otherwise we can rely on the MODE to determine how many registers
16933              are being used by this argument.  */
16934           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16935           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16936           if (arg_regs > 1)
16937             {
16938               unsigned HOST_WIDE_INT
16939               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16940               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16941               not_to_clear_mask |= mask;
16942             }
16943         }
16944     }
16945
16946   return not_to_clear_mask;
16947 }
16948
16949 /* Clears caller saved registers not used to pass arguments before a
16950    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16951    registers is done in __gnu_cmse_nonsecure_call libcall.
16952    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16953
16954 static void
16955 cmse_nonsecure_call_clear_caller_saved (void)
16956 {
16957   basic_block bb;
16958
16959   FOR_EACH_BB_FN (bb, cfun)
16960     {
16961       rtx_insn *insn;
16962
16963       FOR_BB_INSNS (bb, insn)
16964         {
16965           uint64_t to_clear_mask, float_mask;
16966           rtx_insn *seq;
16967           rtx pat, call, unspec, reg, cleared_reg, tmp;
16968           unsigned int regno, maxregno;
16969           rtx address;
16970           CUMULATIVE_ARGS args_so_far_v;
16971           cumulative_args_t args_so_far;
16972           tree arg_type, fntype;
16973           bool using_r4, first_param = true;
16974           function_args_iterator args_iter;
16975           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16976           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16977
16978           if (!NONDEBUG_INSN_P (insn))
16979             continue;
16980
16981           if (!CALL_P (insn))
16982             continue;
16983
16984           pat = PATTERN (insn);
16985           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16986           call = XVECEXP (pat, 0, 0);
16987
16988           /* Get the real call RTX if the insn sets a value, ie. returns.  */
16989           if (GET_CODE (call) == SET)
16990               call = SET_SRC (call);
16991
16992           /* Check if it is a cmse_nonsecure_call.  */
16993           unspec = XEXP (call, 0);
16994           if (GET_CODE (unspec) != UNSPEC
16995               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16996             continue;
16997
16998           /* Determine the caller-saved registers we need to clear.  */
16999           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17000           maxregno = NUM_ARG_REGS - 1;
17001           /* Only look at the caller-saved floating point registers in case of
17002              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17003              lazy store and loads which clear both caller- and callee-saved
17004              registers.  */
17005           if (TARGET_HARD_FLOAT_ABI)
17006             {
17007               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17008               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17009               to_clear_mask |= float_mask;
17010               maxregno = D7_VFP_REGNUM;
17011             }
17012
17013           /* Make sure the register used to hold the function address is not
17014              cleared.  */
17015           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17016           gcc_assert (MEM_P (address));
17017           gcc_assert (REG_P (XEXP (address, 0)));
17018           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17019
17020           /* Set basic block of call insn so that df rescan is performed on
17021              insns inserted here.  */
17022           set_block_for_insn (insn, bb);
17023           df_set_flags (DF_DEFER_INSN_RESCAN);
17024           start_sequence ();
17025
17026           /* Make sure the scheduler doesn't schedule other insns beyond
17027              here.  */
17028           emit_insn (gen_blockage ());
17029
17030           /* Walk through all arguments and clear registers appropriately.
17031           */
17032           fntype = TREE_TYPE (MEM_EXPR (address));
17033           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17034                                     NULL_TREE);
17035           args_so_far = pack_cumulative_args (&args_so_far_v);
17036           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17037             {
17038               rtx arg_rtx;
17039               machine_mode arg_mode = TYPE_MODE (arg_type);
17040
17041               if (VOID_TYPE_P (arg_type))
17042                 continue;
17043
17044               if (!first_param)
17045                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17046                                           true);
17047
17048               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17049                                           true);
17050               gcc_assert (REG_P (arg_rtx));
17051               to_clear_mask
17052                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17053                                                REGNO (arg_rtx),
17054                                                padding_bits_to_clear_ptr);
17055
17056               first_param = false;
17057             }
17058
17059           /* Clear padding bits where needed.  */
17060           cleared_reg = XEXP (address, 0);
17061           reg = gen_rtx_REG (SImode, IP_REGNUM);
17062           using_r4 = false;
17063           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17064             {
17065               if (padding_bits_to_clear[regno] == 0)
17066                 continue;
17067
17068               /* If this is a Thumb-1 target copy the address of the function
17069                  we are calling from 'r4' into 'ip' such that we can use r4 to
17070                  clear the unused bits in the arguments.  */
17071               if (TARGET_THUMB1 && !using_r4)
17072                 {
17073                   using_r4 =  true;
17074                   reg = cleared_reg;
17075                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17076                                           reg);
17077                 }
17078
17079               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17080               emit_move_insn (reg, tmp);
17081               /* Also fill the top half of the negated
17082                  padding_bits_to_clear.  */
17083               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17084                 {
17085                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17086                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17087                                                                 GEN_INT (16),
17088                                                                 GEN_INT (16)),
17089                                           tmp));
17090                 }
17091
17092               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17093                                      gen_rtx_REG (SImode, regno),
17094                                      reg));
17095
17096             }
17097           if (using_r4)
17098             emit_move_insn (cleared_reg,
17099                             gen_rtx_REG (SImode, IP_REGNUM));
17100
17101           /* We use right shift and left shift to clear the LSB of the address
17102              we jump to instead of using bic, to avoid having to use an extra
17103              register on Thumb-1.  */
17104           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17105           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17106           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17107           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17108
17109           /* Clearing all registers that leak before doing a non-secure
17110              call.  */
17111           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17112             {
17113               if (!(to_clear_mask & (1LL << regno)))
17114                 continue;
17115
17116               /* If regno is an even vfp register and its successor is also to
17117                  be cleared, use vmov.  */
17118               if (IS_VFP_REGNUM (regno))
17119                 {
17120                   if (TARGET_VFP_DOUBLE
17121                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17122                       && to_clear_mask & (1LL << (regno + 1)))
17123                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17124                                     CONST0_RTX (DFmode));
17125                   else
17126                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17127                                     CONST0_RTX (SFmode));
17128                 }
17129               else
17130                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17131             }
17132
17133           seq = get_insns ();
17134           end_sequence ();
17135           emit_insn_before (seq, insn);
17136
17137         }
17138     }
17139 }
17140
17141 /* Rewrite move insn into subtract of 0 if the condition codes will
17142    be useful in next conditional jump insn.  */
17143
17144 static void
17145 thumb1_reorg (void)
17146 {
17147   basic_block bb;
17148
17149   FOR_EACH_BB_FN (bb, cfun)
17150     {
17151       rtx dest, src;
17152       rtx cmp, op0, op1, set = NULL;
17153       rtx_insn *prev, *insn = BB_END (bb);
17154       bool insn_clobbered = false;
17155
17156       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17157         insn = PREV_INSN (insn);
17158
17159       /* Find the last cbranchsi4_insn in basic block BB.  */
17160       if (insn == BB_HEAD (bb)
17161           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17162         continue;
17163
17164       /* Get the register with which we are comparing.  */
17165       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17166       op0 = XEXP (cmp, 0);
17167       op1 = XEXP (cmp, 1);
17168
17169       /* Check that comparison is against ZERO.  */
17170       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17171         continue;
17172
17173       /* Find the first flag setting insn before INSN in basic block BB.  */
17174       gcc_assert (insn != BB_HEAD (bb));
17175       for (prev = PREV_INSN (insn);
17176            (!insn_clobbered
17177             && prev != BB_HEAD (bb)
17178             && (NOTE_P (prev)
17179                 || DEBUG_INSN_P (prev)
17180                 || ((set = single_set (prev)) != NULL
17181                     && get_attr_conds (prev) == CONDS_NOCOND)));
17182            prev = PREV_INSN (prev))
17183         {
17184           if (reg_set_p (op0, prev))
17185             insn_clobbered = true;
17186         }
17187
17188       /* Skip if op0 is clobbered by insn other than prev. */
17189       if (insn_clobbered)
17190         continue;
17191
17192       if (!set)
17193         continue;
17194
17195       dest = SET_DEST (set);
17196       src = SET_SRC (set);
17197       if (!low_register_operand (dest, SImode)
17198           || !low_register_operand (src, SImode))
17199         continue;
17200
17201       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17202          in INSN.  Both src and dest of the move insn are checked.  */
17203       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17204         {
17205           dest = copy_rtx (dest);
17206           src = copy_rtx (src);
17207           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17208           PATTERN (prev) = gen_rtx_SET (dest, src);
17209           INSN_CODE (prev) = -1;
17210           /* Set test register in INSN to dest.  */
17211           XEXP (cmp, 0) = copy_rtx (dest);
17212           INSN_CODE (insn) = -1;
17213         }
17214     }
17215 }
17216
17217 /* Convert instructions to their cc-clobbering variant if possible, since
17218    that allows us to use smaller encodings.  */
17219
17220 static void
17221 thumb2_reorg (void)
17222 {
17223   basic_block bb;
17224   regset_head live;
17225
17226   INIT_REG_SET (&live);
17227
17228   /* We are freeing block_for_insn in the toplev to keep compatibility
17229      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17230   compute_bb_for_insn ();
17231   df_analyze ();
17232
17233   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17234
17235   FOR_EACH_BB_FN (bb, cfun)
17236     {
17237       if ((current_tune->disparage_flag_setting_t16_encodings
17238            == tune_params::DISPARAGE_FLAGS_ALL)
17239           && optimize_bb_for_speed_p (bb))
17240         continue;
17241
17242       rtx_insn *insn;
17243       Convert_Action action = SKIP;
17244       Convert_Action action_for_partial_flag_setting
17245         = ((current_tune->disparage_flag_setting_t16_encodings
17246             != tune_params::DISPARAGE_FLAGS_NEITHER)
17247            && optimize_bb_for_speed_p (bb))
17248           ? SKIP : CONV;
17249
17250       COPY_REG_SET (&live, DF_LR_OUT (bb));
17251       df_simulate_initialize_backwards (bb, &live);
17252       FOR_BB_INSNS_REVERSE (bb, insn)
17253         {
17254           if (NONJUMP_INSN_P (insn)
17255               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17256               && GET_CODE (PATTERN (insn)) == SET)
17257             {
17258               action = SKIP;
17259               rtx pat = PATTERN (insn);
17260               rtx dst = XEXP (pat, 0);
17261               rtx src = XEXP (pat, 1);
17262               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17263
17264               if (UNARY_P (src) || BINARY_P (src))
17265                   op0 = XEXP (src, 0);
17266
17267               if (BINARY_P (src))
17268                   op1 = XEXP (src, 1);
17269
17270               if (low_register_operand (dst, SImode))
17271                 {
17272                   switch (GET_CODE (src))
17273                     {
17274                     case PLUS:
17275                       /* Adding two registers and storing the result
17276                          in the first source is already a 16-bit
17277                          operation.  */
17278                       if (rtx_equal_p (dst, op0)
17279                           && register_operand (op1, SImode))
17280                         break;
17281
17282                       if (low_register_operand (op0, SImode))
17283                         {
17284                           /* ADDS <Rd>,<Rn>,<Rm>  */
17285                           if (low_register_operand (op1, SImode))
17286                             action = CONV;
17287                           /* ADDS <Rdn>,#<imm8>  */
17288                           /* SUBS <Rdn>,#<imm8>  */
17289                           else if (rtx_equal_p (dst, op0)
17290                                    && CONST_INT_P (op1)
17291                                    && IN_RANGE (INTVAL (op1), -255, 255))
17292                             action = CONV;
17293                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17294                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17295                           else if (CONST_INT_P (op1)
17296                                    && IN_RANGE (INTVAL (op1), -7, 7))
17297                             action = CONV;
17298                         }
17299                       /* ADCS <Rd>, <Rn>  */
17300                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17301                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17302                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17303                                                        SImode)
17304                               && COMPARISON_P (op1)
17305                               && cc_register (XEXP (op1, 0), VOIDmode)
17306                               && maybe_get_arm_condition_code (op1) == ARM_CS
17307                               && XEXP (op1, 1) == const0_rtx)
17308                         action = CONV;
17309                       break;
17310
17311                     case MINUS:
17312                       /* RSBS <Rd>,<Rn>,#0
17313                          Not handled here: see NEG below.  */
17314                       /* SUBS <Rd>,<Rn>,#<imm3>
17315                          SUBS <Rdn>,#<imm8>
17316                          Not handled here: see PLUS above.  */
17317                       /* SUBS <Rd>,<Rn>,<Rm>  */
17318                       if (low_register_operand (op0, SImode)
17319                           && low_register_operand (op1, SImode))
17320                             action = CONV;
17321                       break;
17322
17323                     case MULT:
17324                       /* MULS <Rdm>,<Rn>,<Rdm>
17325                          As an exception to the rule, this is only used
17326                          when optimizing for size since MULS is slow on all
17327                          known implementations.  We do not even want to use
17328                          MULS in cold code, if optimizing for speed, so we
17329                          test the global flag here.  */
17330                       if (!optimize_size)
17331                         break;
17332                       /* Fall through.  */
17333                     case AND:
17334                     case IOR:
17335                     case XOR:
17336                       /* ANDS <Rdn>,<Rm>  */
17337                       if (rtx_equal_p (dst, op0)
17338                           && low_register_operand (op1, SImode))
17339                         action = action_for_partial_flag_setting;
17340                       else if (rtx_equal_p (dst, op1)
17341                                && low_register_operand (op0, SImode))
17342                         action = action_for_partial_flag_setting == SKIP
17343                                  ? SKIP : SWAP_CONV;
17344                       break;
17345
17346                     case ASHIFTRT:
17347                     case ASHIFT:
17348                     case LSHIFTRT:
17349                       /* ASRS <Rdn>,<Rm> */
17350                       /* LSRS <Rdn>,<Rm> */
17351                       /* LSLS <Rdn>,<Rm> */
17352                       if (rtx_equal_p (dst, op0)
17353                           && low_register_operand (op1, SImode))
17354                         action = action_for_partial_flag_setting;
17355                       /* ASRS <Rd>,<Rm>,#<imm5> */
17356                       /* LSRS <Rd>,<Rm>,#<imm5> */
17357                       /* LSLS <Rd>,<Rm>,#<imm5> */
17358                       else if (low_register_operand (op0, SImode)
17359                                && CONST_INT_P (op1)
17360                                && IN_RANGE (INTVAL (op1), 0, 31))
17361                         action = action_for_partial_flag_setting;
17362                       break;
17363
17364                     case ROTATERT:
17365                       /* RORS <Rdn>,<Rm>  */
17366                       if (rtx_equal_p (dst, op0)
17367                           && low_register_operand (op1, SImode))
17368                         action = action_for_partial_flag_setting;
17369                       break;
17370
17371                     case NOT:
17372                       /* MVNS <Rd>,<Rm>  */
17373                       if (low_register_operand (op0, SImode))
17374                         action = action_for_partial_flag_setting;
17375                       break;
17376
17377                     case NEG:
17378                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17379                       if (low_register_operand (op0, SImode))
17380                         action = CONV;
17381                       break;
17382
17383                     case CONST_INT:
17384                       /* MOVS <Rd>,#<imm8>  */
17385                       if (CONST_INT_P (src)
17386                           && IN_RANGE (INTVAL (src), 0, 255))
17387                         action = action_for_partial_flag_setting;
17388                       break;
17389
17390                     case REG:
17391                       /* MOVS and MOV<c> with registers have different
17392                          encodings, so are not relevant here.  */
17393                       break;
17394
17395                     default:
17396                       break;
17397                     }
17398                 }
17399
17400               if (action != SKIP)
17401                 {
17402                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17403                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17404                   rtvec vec;
17405
17406                   if (action == SWAP_CONV)
17407                     {
17408                       src = copy_rtx (src);
17409                       XEXP (src, 0) = op1;
17410                       XEXP (src, 1) = op0;
17411                       pat = gen_rtx_SET (dst, src);
17412                       vec = gen_rtvec (2, pat, clobber);
17413                     }
17414                   else /* action == CONV */
17415                     vec = gen_rtvec (2, pat, clobber);
17416
17417                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17418                   INSN_CODE (insn) = -1;
17419                 }
17420             }
17421
17422           if (NONDEBUG_INSN_P (insn))
17423             df_simulate_one_insn_backwards (bb, insn, &live);
17424         }
17425     }
17426
17427   CLEAR_REG_SET (&live);
17428 }
17429
17430 /* Gcc puts the pool in the wrong place for ARM, since we can only
17431    load addresses a limited distance around the pc.  We do some
17432    special munging to move the constant pool values to the correct
17433    point in the code.  */
17434 static void
17435 arm_reorg (void)
17436 {
17437   rtx_insn *insn;
17438   HOST_WIDE_INT address = 0;
17439   Mfix * fix;
17440
17441   if (use_cmse)
17442     cmse_nonsecure_call_clear_caller_saved ();
17443   if (TARGET_THUMB1)
17444     thumb1_reorg ();
17445   else if (TARGET_THUMB2)
17446     thumb2_reorg ();
17447
17448   /* Ensure all insns that must be split have been split at this point.
17449      Otherwise, the pool placement code below may compute incorrect
17450      insn lengths.  Note that when optimizing, all insns have already
17451      been split at this point.  */
17452   if (!optimize)
17453     split_all_insns_noflow ();
17454
17455   /* Make sure we do not attempt to create a literal pool even though it should
17456      no longer be necessary to create any.  */
17457   if (arm_disable_literal_pool)
17458     return ;
17459
17460   minipool_fix_head = minipool_fix_tail = NULL;
17461
17462   /* The first insn must always be a note, or the code below won't
17463      scan it properly.  */
17464   insn = get_insns ();
17465   gcc_assert (NOTE_P (insn));
17466   minipool_pad = 0;
17467
17468   /* Scan all the insns and record the operands that will need fixing.  */
17469   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17470     {
17471       if (BARRIER_P (insn))
17472         push_minipool_barrier (insn, address);
17473       else if (INSN_P (insn))
17474         {
17475           rtx_jump_table_data *table;
17476
17477           note_invalid_constants (insn, address, true);
17478           address += get_attr_length (insn);
17479
17480           /* If the insn is a vector jump, add the size of the table
17481              and skip the table.  */
17482           if (tablejump_p (insn, NULL, &table))
17483             {
17484               address += get_jump_table_size (table);
17485               insn = table;
17486             }
17487         }
17488       else if (LABEL_P (insn))
17489         /* Add the worst-case padding due to alignment.  We don't add
17490            the _current_ padding because the minipool insertions
17491            themselves might change it.  */
17492         address += get_label_padding (insn);
17493     }
17494
17495   fix = minipool_fix_head;
17496
17497   /* Now scan the fixups and perform the required changes.  */
17498   while (fix)
17499     {
17500       Mfix * ftmp;
17501       Mfix * fdel;
17502       Mfix *  last_added_fix;
17503       Mfix * last_barrier = NULL;
17504       Mfix * this_fix;
17505
17506       /* Skip any further barriers before the next fix.  */
17507       while (fix && BARRIER_P (fix->insn))
17508         fix = fix->next;
17509
17510       /* No more fixes.  */
17511       if (fix == NULL)
17512         break;
17513
17514       last_added_fix = NULL;
17515
17516       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17517         {
17518           if (BARRIER_P (ftmp->insn))
17519             {
17520               if (ftmp->address >= minipool_vector_head->max_address)
17521                 break;
17522
17523               last_barrier = ftmp;
17524             }
17525           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17526             break;
17527
17528           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17529         }
17530
17531       /* If we found a barrier, drop back to that; any fixes that we
17532          could have reached but come after the barrier will now go in
17533          the next mini-pool.  */
17534       if (last_barrier != NULL)
17535         {
17536           /* Reduce the refcount for those fixes that won't go into this
17537              pool after all.  */
17538           for (fdel = last_barrier->next;
17539                fdel && fdel != ftmp;
17540                fdel = fdel->next)
17541             {
17542               fdel->minipool->refcount--;
17543               fdel->minipool = NULL;
17544             }
17545
17546           ftmp = last_barrier;
17547         }
17548       else
17549         {
17550           /* ftmp is first fix that we can't fit into this pool and
17551              there no natural barriers that we could use.  Insert a
17552              new barrier in the code somewhere between the previous
17553              fix and this one, and arrange to jump around it.  */
17554           HOST_WIDE_INT max_address;
17555
17556           /* The last item on the list of fixes must be a barrier, so
17557              we can never run off the end of the list of fixes without
17558              last_barrier being set.  */
17559           gcc_assert (ftmp);
17560
17561           max_address = minipool_vector_head->max_address;
17562           /* Check that there isn't another fix that is in range that
17563              we couldn't fit into this pool because the pool was
17564              already too large: we need to put the pool before such an
17565              instruction.  The pool itself may come just after the
17566              fix because create_fix_barrier also allows space for a
17567              jump instruction.  */
17568           if (ftmp->address < max_address)
17569             max_address = ftmp->address + 1;
17570
17571           last_barrier = create_fix_barrier (last_added_fix, max_address);
17572         }
17573
17574       assign_minipool_offsets (last_barrier);
17575
17576       while (ftmp)
17577         {
17578           if (!BARRIER_P (ftmp->insn)
17579               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17580                   == NULL))
17581             break;
17582
17583           ftmp = ftmp->next;
17584         }
17585
17586       /* Scan over the fixes we have identified for this pool, fixing them
17587          up and adding the constants to the pool itself.  */
17588       for (this_fix = fix; this_fix && ftmp != this_fix;
17589            this_fix = this_fix->next)
17590         if (!BARRIER_P (this_fix->insn))
17591           {
17592             rtx addr
17593               = plus_constant (Pmode,
17594                                gen_rtx_LABEL_REF (VOIDmode,
17595                                                   minipool_vector_label),
17596                                this_fix->minipool->offset);
17597             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17598           }
17599
17600       dump_minipool (last_barrier->insn);
17601       fix = ftmp;
17602     }
17603
17604   /* From now on we must synthesize any constants that we can't handle
17605      directly.  This can happen if the RTL gets split during final
17606      instruction generation.  */
17607   cfun->machine->after_arm_reorg = 1;
17608
17609   /* Free the minipool memory.  */
17610   obstack_free (&minipool_obstack, minipool_startobj);
17611 }
17612 \f
17613 /* Routines to output assembly language.  */
17614
17615 /* Return string representation of passed in real value.  */
17616 static const char *
17617 fp_const_from_val (REAL_VALUE_TYPE *r)
17618 {
17619   if (!fp_consts_inited)
17620     init_fp_table ();
17621
17622   gcc_assert (real_equal (r, &value_fp0));
17623   return "0";
17624 }
17625
17626 /* OPERANDS[0] is the entire list of insns that constitute pop,
17627    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17628    is in the list, UPDATE is true iff the list contains explicit
17629    update of base register.  */
17630 void
17631 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17632                          bool update)
17633 {
17634   int i;
17635   char pattern[100];
17636   int offset;
17637   const char *conditional;
17638   int num_saves = XVECLEN (operands[0], 0);
17639   unsigned int regno;
17640   unsigned int regno_base = REGNO (operands[1]);
17641   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17642
17643   offset = 0;
17644   offset += update ? 1 : 0;
17645   offset += return_pc ? 1 : 0;
17646
17647   /* Is the base register in the list?  */
17648   for (i = offset; i < num_saves; i++)
17649     {
17650       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17651       /* If SP is in the list, then the base register must be SP.  */
17652       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17653       /* If base register is in the list, there must be no explicit update.  */
17654       if (regno == regno_base)
17655         gcc_assert (!update);
17656     }
17657
17658   conditional = reverse ? "%?%D0" : "%?%d0";
17659   /* Can't use POP if returning from an interrupt.  */
17660   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17661     sprintf (pattern, "pop%s\t{", conditional);
17662   else
17663     {
17664       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17665          It's just a convention, their semantics are identical.  */
17666       if (regno_base == SP_REGNUM)
17667         sprintf (pattern, "ldmfd%s\t", conditional);
17668       else if (update)
17669         sprintf (pattern, "ldmia%s\t", conditional);
17670       else
17671         sprintf (pattern, "ldm%s\t", conditional);
17672
17673       strcat (pattern, reg_names[regno_base]);
17674       if (update)
17675         strcat (pattern, "!, {");
17676       else
17677         strcat (pattern, ", {");
17678     }
17679
17680   /* Output the first destination register.  */
17681   strcat (pattern,
17682           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17683
17684   /* Output the rest of the destination registers.  */
17685   for (i = offset + 1; i < num_saves; i++)
17686     {
17687       strcat (pattern, ", ");
17688       strcat (pattern,
17689               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17690     }
17691
17692   strcat (pattern, "}");
17693
17694   if (interrupt_p && return_pc)
17695     strcat (pattern, "^");
17696
17697   output_asm_insn (pattern, &cond);
17698 }
17699
17700
17701 /* Output the assembly for a store multiple.  */
17702
17703 const char *
17704 vfp_output_vstmd (rtx * operands)
17705 {
17706   char pattern[100];
17707   int p;
17708   int base;
17709   int i;
17710   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17711                    ? XEXP (operands[0], 0)
17712                    : XEXP (XEXP (operands[0], 0), 0);
17713   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17714
17715   if (push_p)
17716     strcpy (pattern, "vpush%?.64\t{%P1");
17717   else
17718     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17719
17720   p = strlen (pattern);
17721
17722   gcc_assert (REG_P (operands[1]));
17723
17724   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17725   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17726     {
17727       p += sprintf (&pattern[p], ", d%d", base + i);
17728     }
17729   strcpy (&pattern[p], "}");
17730
17731   output_asm_insn (pattern, operands);
17732   return "";
17733 }
17734
17735
17736 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17737    number of bytes pushed.  */
17738
17739 static int
17740 vfp_emit_fstmd (int base_reg, int count)
17741 {
17742   rtx par;
17743   rtx dwarf;
17744   rtx tmp, reg;
17745   int i;
17746
17747   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17748      register pairs are stored by a store multiple insn.  We avoid this
17749      by pushing an extra pair.  */
17750   if (count == 2 && !arm_arch6)
17751     {
17752       if (base_reg == LAST_VFP_REGNUM - 3)
17753         base_reg -= 2;
17754       count++;
17755     }
17756
17757   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17758      larger stores into multiple parts (up to a maximum of two, in
17759      practice).  */
17760   if (count > 16)
17761     {
17762       int saved;
17763       /* NOTE: base_reg is an internal register number, so each D register
17764          counts as 2.  */
17765       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17766       saved += vfp_emit_fstmd (base_reg, 16);
17767       return saved;
17768     }
17769
17770   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17771   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17772
17773   reg = gen_rtx_REG (DFmode, base_reg);
17774   base_reg += 2;
17775
17776   XVECEXP (par, 0, 0)
17777     = gen_rtx_SET (gen_frame_mem
17778                    (BLKmode,
17779                     gen_rtx_PRE_MODIFY (Pmode,
17780                                         stack_pointer_rtx,
17781                                         plus_constant
17782                                         (Pmode, stack_pointer_rtx,
17783                                          - (count * 8)))
17784                     ),
17785                    gen_rtx_UNSPEC (BLKmode,
17786                                    gen_rtvec (1, reg),
17787                                    UNSPEC_PUSH_MULT));
17788
17789   tmp = gen_rtx_SET (stack_pointer_rtx,
17790                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17791   RTX_FRAME_RELATED_P (tmp) = 1;
17792   XVECEXP (dwarf, 0, 0) = tmp;
17793
17794   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17795   RTX_FRAME_RELATED_P (tmp) = 1;
17796   XVECEXP (dwarf, 0, 1) = tmp;
17797
17798   for (i = 1; i < count; i++)
17799     {
17800       reg = gen_rtx_REG (DFmode, base_reg);
17801       base_reg += 2;
17802       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17803
17804       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17805                                         plus_constant (Pmode,
17806                                                        stack_pointer_rtx,
17807                                                        i * 8)),
17808                          reg);
17809       RTX_FRAME_RELATED_P (tmp) = 1;
17810       XVECEXP (dwarf, 0, i + 1) = tmp;
17811     }
17812
17813   par = emit_insn (par);
17814   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17815   RTX_FRAME_RELATED_P (par) = 1;
17816
17817   return count * 8;
17818 }
17819
17820 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17821    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17822
17823 bool
17824 detect_cmse_nonsecure_call (tree addr)
17825 {
17826   if (!addr)
17827     return FALSE;
17828
17829   tree fntype = TREE_TYPE (addr);
17830   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17831                                     TYPE_ATTRIBUTES (fntype)))
17832     return TRUE;
17833   return FALSE;
17834 }
17835
17836
17837 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17838    the call target.  */
17839
17840 void
17841 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17842 {
17843   rtx insn;
17844
17845   insn = emit_call_insn (pat);
17846
17847   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17848      If the call might use such an entry, add a use of the PIC register
17849      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17850   if (TARGET_VXWORKS_RTP
17851       && flag_pic
17852       && !sibcall
17853       && GET_CODE (addr) == SYMBOL_REF
17854       && (SYMBOL_REF_DECL (addr)
17855           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17856           : !SYMBOL_REF_LOCAL_P (addr)))
17857     {
17858       require_pic_register ();
17859       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17860     }
17861
17862   if (TARGET_AAPCS_BASED)
17863     {
17864       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17865          linker.  We need to add an IP clobber to allow setting
17866          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17867          is not needed since it's a fixed register.  */
17868       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17869       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17870     }
17871 }
17872
17873 /* Output a 'call' insn.  */
17874 const char *
17875 output_call (rtx *operands)
17876 {
17877   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17878
17879   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17880   if (REGNO (operands[0]) == LR_REGNUM)
17881     {
17882       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17883       output_asm_insn ("mov%?\t%0, %|lr", operands);
17884     }
17885
17886   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17887
17888   if (TARGET_INTERWORK || arm_arch4t)
17889     output_asm_insn ("bx%?\t%0", operands);
17890   else
17891     output_asm_insn ("mov%?\t%|pc, %0", operands);
17892
17893   return "";
17894 }
17895
17896 /* Output a move from arm registers to arm registers of a long double
17897    OPERANDS[0] is the destination.
17898    OPERANDS[1] is the source.  */
17899 const char *
17900 output_mov_long_double_arm_from_arm (rtx *operands)
17901 {
17902   /* We have to be careful here because the two might overlap.  */
17903   int dest_start = REGNO (operands[0]);
17904   int src_start = REGNO (operands[1]);
17905   rtx ops[2];
17906   int i;
17907
17908   if (dest_start < src_start)
17909     {
17910       for (i = 0; i < 3; i++)
17911         {
17912           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17913           ops[1] = gen_rtx_REG (SImode, src_start + i);
17914           output_asm_insn ("mov%?\t%0, %1", ops);
17915         }
17916     }
17917   else
17918     {
17919       for (i = 2; i >= 0; i--)
17920         {
17921           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17922           ops[1] = gen_rtx_REG (SImode, src_start + i);
17923           output_asm_insn ("mov%?\t%0, %1", ops);
17924         }
17925     }
17926
17927   return "";
17928 }
17929
17930 void
17931 arm_emit_movpair (rtx dest, rtx src)
17932  {
17933   /* If the src is an immediate, simplify it.  */
17934   if (CONST_INT_P (src))
17935     {
17936       HOST_WIDE_INT val = INTVAL (src);
17937       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17938       if ((val >> 16) & 0x0000ffff)
17939         {
17940           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17941                                                GEN_INT (16)),
17942                          GEN_INT ((val >> 16) & 0x0000ffff));
17943           rtx_insn *insn = get_last_insn ();
17944           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17945         }
17946       return;
17947     }
17948    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17949    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17950    rtx_insn *insn = get_last_insn ();
17951    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17952  }
17953
17954 /* Output a move between double words.  It must be REG<-MEM
17955    or MEM<-REG.  */
17956 const char *
17957 output_move_double (rtx *operands, bool emit, int *count)
17958 {
17959   enum rtx_code code0 = GET_CODE (operands[0]);
17960   enum rtx_code code1 = GET_CODE (operands[1]);
17961   rtx otherops[3];
17962   if (count)
17963     *count = 1;
17964
17965   /* The only case when this might happen is when
17966      you are looking at the length of a DImode instruction
17967      that has an invalid constant in it.  */
17968   if (code0 == REG && code1 != MEM)
17969     {
17970       gcc_assert (!emit);
17971       *count = 2;
17972       return "";
17973     }
17974
17975   if (code0 == REG)
17976     {
17977       unsigned int reg0 = REGNO (operands[0]);
17978
17979       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17980
17981       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17982
17983       switch (GET_CODE (XEXP (operands[1], 0)))
17984         {
17985         case REG:
17986
17987           if (emit)
17988             {
17989               if (TARGET_LDRD
17990                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17991                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17992               else
17993                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17994             }
17995           break;
17996
17997         case PRE_INC:
17998           gcc_assert (TARGET_LDRD);
17999           if (emit)
18000             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18001           break;
18002
18003         case PRE_DEC:
18004           if (emit)
18005             {
18006               if (TARGET_LDRD)
18007                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18008               else
18009                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18010             }
18011           break;
18012
18013         case POST_INC:
18014           if (emit)
18015             {
18016               if (TARGET_LDRD)
18017                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18018               else
18019                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18020             }
18021           break;
18022
18023         case POST_DEC:
18024           gcc_assert (TARGET_LDRD);
18025           if (emit)
18026             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18027           break;
18028
18029         case PRE_MODIFY:
18030         case POST_MODIFY:
18031           /* Autoicrement addressing modes should never have overlapping
18032              base and destination registers, and overlapping index registers
18033              are already prohibited, so this doesn't need to worry about
18034              fix_cm3_ldrd.  */
18035           otherops[0] = operands[0];
18036           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18037           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18038
18039           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18040             {
18041               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18042                 {
18043                   /* Registers overlap so split out the increment.  */
18044                   if (emit)
18045                     {
18046                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18047                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18048                     }
18049                   if (count)
18050                     *count = 2;
18051                 }
18052               else
18053                 {
18054                   /* Use a single insn if we can.
18055                      FIXME: IWMMXT allows offsets larger than ldrd can
18056                      handle, fix these up with a pair of ldr.  */
18057                   if (TARGET_THUMB2
18058                       || !CONST_INT_P (otherops[2])
18059                       || (INTVAL (otherops[2]) > -256
18060                           && INTVAL (otherops[2]) < 256))
18061                     {
18062                       if (emit)
18063                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18064                     }
18065                   else
18066                     {
18067                       if (emit)
18068                         {
18069                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18070                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18071                         }
18072                       if (count)
18073                         *count = 2;
18074
18075                     }
18076                 }
18077             }
18078           else
18079             {
18080               /* Use a single insn if we can.
18081                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18082                  fix these up with a pair of ldr.  */
18083               if (TARGET_THUMB2
18084                   || !CONST_INT_P (otherops[2])
18085                   || (INTVAL (otherops[2]) > -256
18086                       && INTVAL (otherops[2]) < 256))
18087                 {
18088                   if (emit)
18089                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18090                 }
18091               else
18092                 {
18093                   if (emit)
18094                     {
18095                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18096                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18097                     }
18098                   if (count)
18099                     *count = 2;
18100                 }
18101             }
18102           break;
18103
18104         case LABEL_REF:
18105         case CONST:
18106           /* We might be able to use ldrd %0, %1 here.  However the range is
18107              different to ldr/adr, and it is broken on some ARMv7-M
18108              implementations.  */
18109           /* Use the second register of the pair to avoid problematic
18110              overlap.  */
18111           otherops[1] = operands[1];
18112           if (emit)
18113             output_asm_insn ("adr%?\t%0, %1", otherops);
18114           operands[1] = otherops[0];
18115           if (emit)
18116             {
18117               if (TARGET_LDRD)
18118                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18119               else
18120                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18121             }
18122
18123           if (count)
18124             *count = 2;
18125           break;
18126
18127           /* ??? This needs checking for thumb2.  */
18128         default:
18129           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18130                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18131             {
18132               otherops[0] = operands[0];
18133               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18134               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18135
18136               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18137                 {
18138                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18139                     {
18140                       switch ((int) INTVAL (otherops[2]))
18141                         {
18142                         case -8:
18143                           if (emit)
18144                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18145                           return "";
18146                         case -4:
18147                           if (TARGET_THUMB2)
18148                             break;
18149                           if (emit)
18150                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18151                           return "";
18152                         case 4:
18153                           if (TARGET_THUMB2)
18154                             break;
18155                           if (emit)
18156                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18157                           return "";
18158                         }
18159                     }
18160                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18161                   operands[1] = otherops[0];
18162                   if (TARGET_LDRD
18163                       && (REG_P (otherops[2])
18164                           || TARGET_THUMB2
18165                           || (CONST_INT_P (otherops[2])
18166                               && INTVAL (otherops[2]) > -256
18167                               && INTVAL (otherops[2]) < 256)))
18168                     {
18169                       if (reg_overlap_mentioned_p (operands[0],
18170                                                    otherops[2]))
18171                         {
18172                           /* Swap base and index registers over to
18173                              avoid a conflict.  */
18174                           std::swap (otherops[1], otherops[2]);
18175                         }
18176                       /* If both registers conflict, it will usually
18177                          have been fixed by a splitter.  */
18178                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18179                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18180                         {
18181                           if (emit)
18182                             {
18183                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18184                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18185                             }
18186                           if (count)
18187                             *count = 2;
18188                         }
18189                       else
18190                         {
18191                           otherops[0] = operands[0];
18192                           if (emit)
18193                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18194                         }
18195                       return "";
18196                     }
18197
18198                   if (CONST_INT_P (otherops[2]))
18199                     {
18200                       if (emit)
18201                         {
18202                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18203                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18204                           else
18205                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18206                         }
18207                     }
18208                   else
18209                     {
18210                       if (emit)
18211                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18212                     }
18213                 }
18214               else
18215                 {
18216                   if (emit)
18217                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18218                 }
18219
18220               if (count)
18221                 *count = 2;
18222
18223               if (TARGET_LDRD)
18224                 return "ldrd%?\t%0, [%1]";
18225
18226               return "ldmia%?\t%1, %M0";
18227             }
18228           else
18229             {
18230               otherops[1] = adjust_address (operands[1], SImode, 4);
18231               /* Take care of overlapping base/data reg.  */
18232               if (reg_mentioned_p (operands[0], operands[1]))
18233                 {
18234                   if (emit)
18235                     {
18236                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18237                       output_asm_insn ("ldr%?\t%0, %1", operands);
18238                     }
18239                   if (count)
18240                     *count = 2;
18241
18242                 }
18243               else
18244                 {
18245                   if (emit)
18246                     {
18247                       output_asm_insn ("ldr%?\t%0, %1", operands);
18248                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18249                     }
18250                   if (count)
18251                     *count = 2;
18252                 }
18253             }
18254         }
18255     }
18256   else
18257     {
18258       /* Constraints should ensure this.  */
18259       gcc_assert (code0 == MEM && code1 == REG);
18260       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18261                   || (TARGET_ARM && TARGET_LDRD));
18262
18263       switch (GET_CODE (XEXP (operands[0], 0)))
18264         {
18265         case REG:
18266           if (emit)
18267             {
18268               if (TARGET_LDRD)
18269                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18270               else
18271                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18272             }
18273           break;
18274
18275         case PRE_INC:
18276           gcc_assert (TARGET_LDRD);
18277           if (emit)
18278             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18279           break;
18280
18281         case PRE_DEC:
18282           if (emit)
18283             {
18284               if (TARGET_LDRD)
18285                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18286               else
18287                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18288             }
18289           break;
18290
18291         case POST_INC:
18292           if (emit)
18293             {
18294               if (TARGET_LDRD)
18295                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18296               else
18297                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18298             }
18299           break;
18300
18301         case POST_DEC:
18302           gcc_assert (TARGET_LDRD);
18303           if (emit)
18304             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18305           break;
18306
18307         case PRE_MODIFY:
18308         case POST_MODIFY:
18309           otherops[0] = operands[1];
18310           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18311           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18312
18313           /* IWMMXT allows offsets larger than ldrd can handle,
18314              fix these up with a pair of ldr.  */
18315           if (!TARGET_THUMB2
18316               && CONST_INT_P (otherops[2])
18317               && (INTVAL(otherops[2]) <= -256
18318                   || INTVAL(otherops[2]) >= 256))
18319             {
18320               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18321                 {
18322                   if (emit)
18323                     {
18324                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18325                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18326                     }
18327                   if (count)
18328                     *count = 2;
18329                 }
18330               else
18331                 {
18332                   if (emit)
18333                     {
18334                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18335                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18336                     }
18337                   if (count)
18338                     *count = 2;
18339                 }
18340             }
18341           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18342             {
18343               if (emit)
18344                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18345             }
18346           else
18347             {
18348               if (emit)
18349                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18350             }
18351           break;
18352
18353         case PLUS:
18354           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18355           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18356             {
18357               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18358                 {
18359                 case -8:
18360                   if (emit)
18361                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18362                   return "";
18363
18364                 case -4:
18365                   if (TARGET_THUMB2)
18366                     break;
18367                   if (emit)
18368                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18369                   return "";
18370
18371                 case 4:
18372                   if (TARGET_THUMB2)
18373                     break;
18374                   if (emit)
18375                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18376                   return "";
18377                 }
18378             }
18379           if (TARGET_LDRD
18380               && (REG_P (otherops[2])
18381                   || TARGET_THUMB2
18382                   || (CONST_INT_P (otherops[2])
18383                       && INTVAL (otherops[2]) > -256
18384                       && INTVAL (otherops[2]) < 256)))
18385             {
18386               otherops[0] = operands[1];
18387               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18388               if (emit)
18389                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18390               return "";
18391             }
18392           /* Fall through */
18393
18394         default:
18395           otherops[0] = adjust_address (operands[0], SImode, 4);
18396           otherops[1] = operands[1];
18397           if (emit)
18398             {
18399               output_asm_insn ("str%?\t%1, %0", operands);
18400               output_asm_insn ("str%?\t%H1, %0", otherops);
18401             }
18402           if (count)
18403             *count = 2;
18404         }
18405     }
18406
18407   return "";
18408 }
18409
18410 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18411    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18412
18413 const char *
18414 output_move_quad (rtx *operands)
18415 {
18416   if (REG_P (operands[0]))
18417     {
18418       /* Load, or reg->reg move.  */
18419
18420       if (MEM_P (operands[1]))
18421         {
18422           switch (GET_CODE (XEXP (operands[1], 0)))
18423             {
18424             case REG:
18425               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18426               break;
18427
18428             case LABEL_REF:
18429             case CONST:
18430               output_asm_insn ("adr%?\t%0, %1", operands);
18431               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18432               break;
18433
18434             default:
18435               gcc_unreachable ();
18436             }
18437         }
18438       else
18439         {
18440           rtx ops[2];
18441           int dest, src, i;
18442
18443           gcc_assert (REG_P (operands[1]));
18444
18445           dest = REGNO (operands[0]);
18446           src = REGNO (operands[1]);
18447
18448           /* This seems pretty dumb, but hopefully GCC won't try to do it
18449              very often.  */
18450           if (dest < src)
18451             for (i = 0; i < 4; i++)
18452               {
18453                 ops[0] = gen_rtx_REG (SImode, dest + i);
18454                 ops[1] = gen_rtx_REG (SImode, src + i);
18455                 output_asm_insn ("mov%?\t%0, %1", ops);
18456               }
18457           else
18458             for (i = 3; i >= 0; i--)
18459               {
18460                 ops[0] = gen_rtx_REG (SImode, dest + i);
18461                 ops[1] = gen_rtx_REG (SImode, src + i);
18462                 output_asm_insn ("mov%?\t%0, %1", ops);
18463               }
18464         }
18465     }
18466   else
18467     {
18468       gcc_assert (MEM_P (operands[0]));
18469       gcc_assert (REG_P (operands[1]));
18470       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18471
18472       switch (GET_CODE (XEXP (operands[0], 0)))
18473         {
18474         case REG:
18475           output_asm_insn ("stm%?\t%m0, %M1", operands);
18476           break;
18477
18478         default:
18479           gcc_unreachable ();
18480         }
18481     }
18482
18483   return "";
18484 }
18485
18486 /* Output a VFP load or store instruction.  */
18487
18488 const char *
18489 output_move_vfp (rtx *operands)
18490 {
18491   rtx reg, mem, addr, ops[2];
18492   int load = REG_P (operands[0]);
18493   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18494   int sp = (!TARGET_VFP_FP16INST
18495             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18496   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18497   const char *templ;
18498   char buff[50];
18499   machine_mode mode;
18500
18501   reg = operands[!load];
18502   mem = operands[load];
18503
18504   mode = GET_MODE (reg);
18505
18506   gcc_assert (REG_P (reg));
18507   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18508   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18509               || mode == SFmode
18510               || mode == DFmode
18511               || mode == HImode
18512               || mode == SImode
18513               || mode == DImode
18514               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18515   gcc_assert (MEM_P (mem));
18516
18517   addr = XEXP (mem, 0);
18518
18519   switch (GET_CODE (addr))
18520     {
18521     case PRE_DEC:
18522       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18523       ops[0] = XEXP (addr, 0);
18524       ops[1] = reg;
18525       break;
18526
18527     case POST_INC:
18528       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18529       ops[0] = XEXP (addr, 0);
18530       ops[1] = reg;
18531       break;
18532
18533     default:
18534       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18535       ops[0] = reg;
18536       ops[1] = mem;
18537       break;
18538     }
18539
18540   sprintf (buff, templ,
18541            load ? "ld" : "st",
18542            dp ? "64" : sp ? "32" : "16",
18543            dp ? "P" : "",
18544            integer_p ? "\t%@ int" : "");
18545   output_asm_insn (buff, ops);
18546
18547   return "";
18548 }
18549
18550 /* Output a Neon double-word or quad-word load or store, or a load
18551    or store for larger structure modes.
18552
18553    WARNING: The ordering of elements is weird in big-endian mode,
18554    because the EABI requires that vectors stored in memory appear
18555    as though they were stored by a VSTM, as required by the EABI.
18556    GCC RTL defines element ordering based on in-memory order.
18557    This can be different from the architectural ordering of elements
18558    within a NEON register. The intrinsics defined in arm_neon.h use the
18559    NEON register element ordering, not the GCC RTL element ordering.
18560
18561    For example, the in-memory ordering of a big-endian a quadword
18562    vector with 16-bit elements when stored from register pair {d0,d1}
18563    will be (lowest address first, d0[N] is NEON register element N):
18564
18565      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18566
18567    When necessary, quadword registers (dN, dN+1) are moved to ARM
18568    registers from rN in the order:
18569
18570      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18571
18572    So that STM/LDM can be used on vectors in ARM registers, and the
18573    same memory layout will result as if VSTM/VLDM were used.
18574
18575    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18576    possible, which allows use of appropriate alignment tags.
18577    Note that the choice of "64" is independent of the actual vector
18578    element size; this size simply ensures that the behavior is
18579    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18580
18581    Due to limitations of those instructions, use of VST1.64/VLD1.64
18582    is not possible if:
18583     - the address contains PRE_DEC, or
18584     - the mode refers to more than 4 double-word registers
18585
18586    In those cases, it would be possible to replace VSTM/VLDM by a
18587    sequence of instructions; this is not currently implemented since
18588    this is not certain to actually improve performance.  */
18589
18590 const char *
18591 output_move_neon (rtx *operands)
18592 {
18593   rtx reg, mem, addr, ops[2];
18594   int regno, nregs, load = REG_P (operands[0]);
18595   const char *templ;
18596   char buff[50];
18597   machine_mode mode;
18598
18599   reg = operands[!load];
18600   mem = operands[load];
18601
18602   mode = GET_MODE (reg);
18603
18604   gcc_assert (REG_P (reg));
18605   regno = REGNO (reg);
18606   nregs = REG_NREGS (reg) / 2;
18607   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18608               || NEON_REGNO_OK_FOR_QUAD (regno));
18609   gcc_assert (VALID_NEON_DREG_MODE (mode)
18610               || VALID_NEON_QREG_MODE (mode)
18611               || VALID_NEON_STRUCT_MODE (mode));
18612   gcc_assert (MEM_P (mem));
18613
18614   addr = XEXP (mem, 0);
18615
18616   /* Strip off const from addresses like (const (plus (...))).  */
18617   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18618     addr = XEXP (addr, 0);
18619
18620   switch (GET_CODE (addr))
18621     {
18622     case POST_INC:
18623       /* We have to use vldm / vstm for too-large modes.  */
18624       if (nregs > 4)
18625         {
18626           templ = "v%smia%%?\t%%0!, %%h1";
18627           ops[0] = XEXP (addr, 0);
18628         }
18629       else
18630         {
18631           templ = "v%s1.64\t%%h1, %%A0";
18632           ops[0] = mem;
18633         }
18634       ops[1] = reg;
18635       break;
18636
18637     case PRE_DEC:
18638       /* We have to use vldm / vstm in this case, since there is no
18639          pre-decrement form of the vld1 / vst1 instructions.  */
18640       templ = "v%smdb%%?\t%%0!, %%h1";
18641       ops[0] = XEXP (addr, 0);
18642       ops[1] = reg;
18643       break;
18644
18645     case POST_MODIFY:
18646       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18647       gcc_unreachable ();
18648
18649     case REG:
18650       /* We have to use vldm / vstm for too-large modes.  */
18651       if (nregs > 1)
18652         {
18653           if (nregs > 4)
18654             templ = "v%smia%%?\t%%m0, %%h1";
18655           else
18656             templ = "v%s1.64\t%%h1, %%A0";
18657
18658           ops[0] = mem;
18659           ops[1] = reg;
18660           break;
18661         }
18662       /* Fall through.  */
18663     case LABEL_REF:
18664     case PLUS:
18665       {
18666         int i;
18667         int overlap = -1;
18668         for (i = 0; i < nregs; i++)
18669           {
18670             /* We're only using DImode here because it's a convenient size.  */
18671             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18672             ops[1] = adjust_address (mem, DImode, 8 * i);
18673             if (reg_overlap_mentioned_p (ops[0], mem))
18674               {
18675                 gcc_assert (overlap == -1);
18676                 overlap = i;
18677               }
18678             else
18679               {
18680                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18681                 output_asm_insn (buff, ops);
18682               }
18683           }
18684         if (overlap != -1)
18685           {
18686             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18687             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18688             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18689             output_asm_insn (buff, ops);
18690           }
18691
18692         return "";
18693       }
18694
18695     default:
18696       gcc_unreachable ();
18697     }
18698
18699   sprintf (buff, templ, load ? "ld" : "st");
18700   output_asm_insn (buff, ops);
18701
18702   return "";
18703 }
18704
18705 /* Compute and return the length of neon_mov<mode>, where <mode> is
18706    one of VSTRUCT modes: EI, OI, CI or XI.  */
18707 int
18708 arm_attr_length_move_neon (rtx_insn *insn)
18709 {
18710   rtx reg, mem, addr;
18711   int load;
18712   machine_mode mode;
18713
18714   extract_insn_cached (insn);
18715
18716   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18717     {
18718       mode = GET_MODE (recog_data.operand[0]);
18719       switch (mode)
18720         {
18721         case E_EImode:
18722         case E_OImode:
18723           return 8;
18724         case E_CImode:
18725           return 12;
18726         case E_XImode:
18727           return 16;
18728         default:
18729           gcc_unreachable ();
18730         }
18731     }
18732
18733   load = REG_P (recog_data.operand[0]);
18734   reg = recog_data.operand[!load];
18735   mem = recog_data.operand[load];
18736
18737   gcc_assert (MEM_P (mem));
18738
18739   addr = XEXP (mem, 0);
18740
18741   /* Strip off const from addresses like (const (plus (...))).  */
18742   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18743     addr = XEXP (addr, 0);
18744
18745   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18746     {
18747       int insns = REG_NREGS (reg) / 2;
18748       return insns * 4;
18749     }
18750   else
18751     return 4;
18752 }
18753
18754 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18755    return zero.  */
18756
18757 int
18758 arm_address_offset_is_imm (rtx_insn *insn)
18759 {
18760   rtx mem, addr;
18761
18762   extract_insn_cached (insn);
18763
18764   if (REG_P (recog_data.operand[0]))
18765     return 0;
18766
18767   mem = recog_data.operand[0];
18768
18769   gcc_assert (MEM_P (mem));
18770
18771   addr = XEXP (mem, 0);
18772
18773   if (REG_P (addr)
18774       || (GET_CODE (addr) == PLUS
18775           && REG_P (XEXP (addr, 0))
18776           && CONST_INT_P (XEXP (addr, 1))))
18777     return 1;
18778   else
18779     return 0;
18780 }
18781
18782 /* Output an ADD r, s, #n where n may be too big for one instruction.
18783    If adding zero to one register, output nothing.  */
18784 const char *
18785 output_add_immediate (rtx *operands)
18786 {
18787   HOST_WIDE_INT n = INTVAL (operands[2]);
18788
18789   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18790     {
18791       if (n < 0)
18792         output_multi_immediate (operands,
18793                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18794                                 -n);
18795       else
18796         output_multi_immediate (operands,
18797                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18798                                 n);
18799     }
18800
18801   return "";
18802 }
18803
18804 /* Output a multiple immediate operation.
18805    OPERANDS is the vector of operands referred to in the output patterns.
18806    INSTR1 is the output pattern to use for the first constant.
18807    INSTR2 is the output pattern to use for subsequent constants.
18808    IMMED_OP is the index of the constant slot in OPERANDS.
18809    N is the constant value.  */
18810 static const char *
18811 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18812                         int immed_op, HOST_WIDE_INT n)
18813 {
18814 #if HOST_BITS_PER_WIDE_INT > 32
18815   n &= 0xffffffff;
18816 #endif
18817
18818   if (n == 0)
18819     {
18820       /* Quick and easy output.  */
18821       operands[immed_op] = const0_rtx;
18822       output_asm_insn (instr1, operands);
18823     }
18824   else
18825     {
18826       int i;
18827       const char * instr = instr1;
18828
18829       /* Note that n is never zero here (which would give no output).  */
18830       for (i = 0; i < 32; i += 2)
18831         {
18832           if (n & (3 << i))
18833             {
18834               operands[immed_op] = GEN_INT (n & (255 << i));
18835               output_asm_insn (instr, operands);
18836               instr = instr2;
18837               i += 6;
18838             }
18839         }
18840     }
18841
18842   return "";
18843 }
18844
18845 /* Return the name of a shifter operation.  */
18846 static const char *
18847 arm_shift_nmem(enum rtx_code code)
18848 {
18849   switch (code)
18850     {
18851     case ASHIFT:
18852       return ARM_LSL_NAME;
18853
18854     case ASHIFTRT:
18855       return "asr";
18856
18857     case LSHIFTRT:
18858       return "lsr";
18859
18860     case ROTATERT:
18861       return "ror";
18862
18863     default:
18864       abort();
18865     }
18866 }
18867
18868 /* Return the appropriate ARM instruction for the operation code.
18869    The returned result should not be overwritten.  OP is the rtx of the
18870    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18871    was shifted.  */
18872 const char *
18873 arithmetic_instr (rtx op, int shift_first_arg)
18874 {
18875   switch (GET_CODE (op))
18876     {
18877     case PLUS:
18878       return "add";
18879
18880     case MINUS:
18881       return shift_first_arg ? "rsb" : "sub";
18882
18883     case IOR:
18884       return "orr";
18885
18886     case XOR:
18887       return "eor";
18888
18889     case AND:
18890       return "and";
18891
18892     case ASHIFT:
18893     case ASHIFTRT:
18894     case LSHIFTRT:
18895     case ROTATERT:
18896       return arm_shift_nmem(GET_CODE(op));
18897
18898     default:
18899       gcc_unreachable ();
18900     }
18901 }
18902
18903 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18904    for the operation code.  The returned result should not be overwritten.
18905    OP is the rtx code of the shift.
18906    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18907    shift.  */
18908 static const char *
18909 shift_op (rtx op, HOST_WIDE_INT *amountp)
18910 {
18911   const char * mnem;
18912   enum rtx_code code = GET_CODE (op);
18913
18914   switch (code)
18915     {
18916     case ROTATE:
18917       if (!CONST_INT_P (XEXP (op, 1)))
18918         {
18919           output_operand_lossage ("invalid shift operand");
18920           return NULL;
18921         }
18922
18923       code = ROTATERT;
18924       *amountp = 32 - INTVAL (XEXP (op, 1));
18925       mnem = "ror";
18926       break;
18927
18928     case ASHIFT:
18929     case ASHIFTRT:
18930     case LSHIFTRT:
18931     case ROTATERT:
18932       mnem = arm_shift_nmem(code);
18933       if (CONST_INT_P (XEXP (op, 1)))
18934         {
18935           *amountp = INTVAL (XEXP (op, 1));
18936         }
18937       else if (REG_P (XEXP (op, 1)))
18938         {
18939           *amountp = -1;
18940           return mnem;
18941         }
18942       else
18943         {
18944           output_operand_lossage ("invalid shift operand");
18945           return NULL;
18946         }
18947       break;
18948
18949     case MULT:
18950       /* We never have to worry about the amount being other than a
18951          power of 2, since this case can never be reloaded from a reg.  */
18952       if (!CONST_INT_P (XEXP (op, 1)))
18953         {
18954           output_operand_lossage ("invalid shift operand");
18955           return NULL;
18956         }
18957
18958       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18959
18960       /* Amount must be a power of two.  */
18961       if (*amountp & (*amountp - 1))
18962         {
18963           output_operand_lossage ("invalid shift operand");
18964           return NULL;
18965         }
18966
18967       *amountp = exact_log2 (*amountp);
18968       gcc_assert (IN_RANGE (*amountp, 0, 31));
18969       return ARM_LSL_NAME;
18970
18971     default:
18972       output_operand_lossage ("invalid shift operand");
18973       return NULL;
18974     }
18975
18976   /* This is not 100% correct, but follows from the desire to merge
18977      multiplication by a power of 2 with the recognizer for a
18978      shift.  >=32 is not a valid shift for "lsl", so we must try and
18979      output a shift that produces the correct arithmetical result.
18980      Using lsr #32 is identical except for the fact that the carry bit
18981      is not set correctly if we set the flags; but we never use the
18982      carry bit from such an operation, so we can ignore that.  */
18983   if (code == ROTATERT)
18984     /* Rotate is just modulo 32.  */
18985     *amountp &= 31;
18986   else if (*amountp != (*amountp & 31))
18987     {
18988       if (code == ASHIFT)
18989         mnem = "lsr";
18990       *amountp = 32;
18991     }
18992
18993   /* Shifts of 0 are no-ops.  */
18994   if (*amountp == 0)
18995     return NULL;
18996
18997   return mnem;
18998 }
18999
19000 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19001    because /bin/as is horribly restrictive.  The judgement about
19002    whether or not each character is 'printable' (and can be output as
19003    is) or not (and must be printed with an octal escape) must be made
19004    with reference to the *host* character set -- the situation is
19005    similar to that discussed in the comments above pp_c_char in
19006    c-pretty-print.c.  */
19007
19008 #define MAX_ASCII_LEN 51
19009
19010 void
19011 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19012 {
19013   int i;
19014   int len_so_far = 0;
19015
19016   fputs ("\t.ascii\t\"", stream);
19017
19018   for (i = 0; i < len; i++)
19019     {
19020       int c = p[i];
19021
19022       if (len_so_far >= MAX_ASCII_LEN)
19023         {
19024           fputs ("\"\n\t.ascii\t\"", stream);
19025           len_so_far = 0;
19026         }
19027
19028       if (ISPRINT (c))
19029         {
19030           if (c == '\\' || c == '\"')
19031             {
19032               putc ('\\', stream);
19033               len_so_far++;
19034             }
19035           putc (c, stream);
19036           len_so_far++;
19037         }
19038       else
19039         {
19040           fprintf (stream, "\\%03o", c);
19041           len_so_far += 4;
19042         }
19043     }
19044
19045   fputs ("\"\n", stream);
19046 }
19047 \f
19048 /* Whether a register is callee saved or not.  This is necessary because high
19049    registers are marked as caller saved when optimizing for size on Thumb-1
19050    targets despite being callee saved in order to avoid using them.  */
19051 #define callee_saved_reg_p(reg) \
19052   (!call_used_regs[reg] \
19053    || (TARGET_THUMB1 && optimize_size \
19054        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19055
19056 /* Compute the register save mask for registers 0 through 12
19057    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19058
19059 static unsigned long
19060 arm_compute_save_reg0_reg12_mask (void)
19061 {
19062   unsigned long func_type = arm_current_func_type ();
19063   unsigned long save_reg_mask = 0;
19064   unsigned int reg;
19065
19066   if (IS_INTERRUPT (func_type))
19067     {
19068       unsigned int max_reg;
19069       /* Interrupt functions must not corrupt any registers,
19070          even call clobbered ones.  If this is a leaf function
19071          we can just examine the registers used by the RTL, but
19072          otherwise we have to assume that whatever function is
19073          called might clobber anything, and so we have to save
19074          all the call-clobbered registers as well.  */
19075       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19076         /* FIQ handlers have registers r8 - r12 banked, so
19077            we only need to check r0 - r7, Normal ISRs only
19078            bank r14 and r15, so we must check up to r12.
19079            r13 is the stack pointer which is always preserved,
19080            so we do not need to consider it here.  */
19081         max_reg = 7;
19082       else
19083         max_reg = 12;
19084
19085       for (reg = 0; reg <= max_reg; reg++)
19086         if (df_regs_ever_live_p (reg)
19087             || (! crtl->is_leaf && call_used_regs[reg]))
19088           save_reg_mask |= (1 << reg);
19089
19090       /* Also save the pic base register if necessary.  */
19091       if (flag_pic
19092           && !TARGET_SINGLE_PIC_BASE
19093           && arm_pic_register != INVALID_REGNUM
19094           && crtl->uses_pic_offset_table)
19095         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19096     }
19097   else if (IS_VOLATILE(func_type))
19098     {
19099       /* For noreturn functions we historically omitted register saves
19100          altogether.  However this really messes up debugging.  As a
19101          compromise save just the frame pointers.  Combined with the link
19102          register saved elsewhere this should be sufficient to get
19103          a backtrace.  */
19104       if (frame_pointer_needed)
19105         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19106       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19107         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19108       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19109         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19110     }
19111   else
19112     {
19113       /* In the normal case we only need to save those registers
19114          which are call saved and which are used by this function.  */
19115       for (reg = 0; reg <= 11; reg++)
19116         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19117           save_reg_mask |= (1 << reg);
19118
19119       /* Handle the frame pointer as a special case.  */
19120       if (frame_pointer_needed)
19121         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19122
19123       /* If we aren't loading the PIC register,
19124          don't stack it even though it may be live.  */
19125       if (flag_pic
19126           && !TARGET_SINGLE_PIC_BASE
19127           && arm_pic_register != INVALID_REGNUM
19128           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19129               || crtl->uses_pic_offset_table))
19130         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19131
19132       /* The prologue will copy SP into R0, so save it.  */
19133       if (IS_STACKALIGN (func_type))
19134         save_reg_mask |= 1;
19135     }
19136
19137   /* Save registers so the exception handler can modify them.  */
19138   if (crtl->calls_eh_return)
19139     {
19140       unsigned int i;
19141
19142       for (i = 0; ; i++)
19143         {
19144           reg = EH_RETURN_DATA_REGNO (i);
19145           if (reg == INVALID_REGNUM)
19146             break;
19147           save_reg_mask |= 1 << reg;
19148         }
19149     }
19150
19151   return save_reg_mask;
19152 }
19153
19154 /* Return true if r3 is live at the start of the function.  */
19155
19156 static bool
19157 arm_r3_live_at_start_p (void)
19158 {
19159   /* Just look at cfg info, which is still close enough to correct at this
19160      point.  This gives false positives for broken functions that might use
19161      uninitialized data that happens to be allocated in r3, but who cares?  */
19162   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19163 }
19164
19165 /* Compute the number of bytes used to store the static chain register on the
19166    stack, above the stack frame.  We need to know this accurately to get the
19167    alignment of the rest of the stack frame correct.  */
19168
19169 static int
19170 arm_compute_static_chain_stack_bytes (void)
19171 {
19172   /* See the defining assertion in arm_expand_prologue.  */
19173   if (IS_NESTED (arm_current_func_type ())
19174       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19175           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19176                || flag_stack_clash_protection)
19177               && !df_regs_ever_live_p (LR_REGNUM)))
19178       && arm_r3_live_at_start_p ()
19179       && crtl->args.pretend_args_size == 0)
19180     return 4;
19181
19182   return 0;
19183 }
19184
19185 /* Compute a bit mask of which core registers need to be
19186    saved on the stack for the current function.
19187    This is used by arm_compute_frame_layout, which may add extra registers.  */
19188
19189 static unsigned long
19190 arm_compute_save_core_reg_mask (void)
19191 {
19192   unsigned int save_reg_mask = 0;
19193   unsigned long func_type = arm_current_func_type ();
19194   unsigned int reg;
19195
19196   if (IS_NAKED (func_type))
19197     /* This should never really happen.  */
19198     return 0;
19199
19200   /* If we are creating a stack frame, then we must save the frame pointer,
19201      IP (which will hold the old stack pointer), LR and the PC.  */
19202   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19203     save_reg_mask |=
19204       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19205       | (1 << IP_REGNUM)
19206       | (1 << LR_REGNUM)
19207       | (1 << PC_REGNUM);
19208
19209   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19210
19211   /* Decide if we need to save the link register.
19212      Interrupt routines have their own banked link register,
19213      so they never need to save it.
19214      Otherwise if we do not use the link register we do not need to save
19215      it.  If we are pushing other registers onto the stack however, we
19216      can save an instruction in the epilogue by pushing the link register
19217      now and then popping it back into the PC.  This incurs extra memory
19218      accesses though, so we only do it when optimizing for size, and only
19219      if we know that we will not need a fancy return sequence.  */
19220   if (df_regs_ever_live_p (LR_REGNUM)
19221       || (save_reg_mask
19222           && optimize_size
19223           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19224           && !crtl->tail_call_emit
19225           && !crtl->calls_eh_return))
19226     save_reg_mask |= 1 << LR_REGNUM;
19227
19228   if (cfun->machine->lr_save_eliminated)
19229     save_reg_mask &= ~ (1 << LR_REGNUM);
19230
19231   if (TARGET_REALLY_IWMMXT
19232       && ((bit_count (save_reg_mask)
19233            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19234                            arm_compute_static_chain_stack_bytes())
19235            ) % 2) != 0)
19236     {
19237       /* The total number of registers that are going to be pushed
19238          onto the stack is odd.  We need to ensure that the stack
19239          is 64-bit aligned before we start to save iWMMXt registers,
19240          and also before we start to create locals.  (A local variable
19241          might be a double or long long which we will load/store using
19242          an iWMMXt instruction).  Therefore we need to push another
19243          ARM register, so that the stack will be 64-bit aligned.  We
19244          try to avoid using the arg registers (r0 -r3) as they might be
19245          used to pass values in a tail call.  */
19246       for (reg = 4; reg <= 12; reg++)
19247         if ((save_reg_mask & (1 << reg)) == 0)
19248           break;
19249
19250       if (reg <= 12)
19251         save_reg_mask |= (1 << reg);
19252       else
19253         {
19254           cfun->machine->sibcall_blocked = 1;
19255           save_reg_mask |= (1 << 3);
19256         }
19257     }
19258
19259   /* We may need to push an additional register for use initializing the
19260      PIC base register.  */
19261   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19262       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19263     {
19264       reg = thumb_find_work_register (1 << 4);
19265       if (!call_used_regs[reg])
19266         save_reg_mask |= (1 << reg);
19267     }
19268
19269   return save_reg_mask;
19270 }
19271
19272 /* Compute a bit mask of which core registers need to be
19273    saved on the stack for the current function.  */
19274 static unsigned long
19275 thumb1_compute_save_core_reg_mask (void)
19276 {
19277   unsigned long mask;
19278   unsigned reg;
19279
19280   mask = 0;
19281   for (reg = 0; reg < 12; reg ++)
19282     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19283       mask |= 1 << reg;
19284
19285   /* Handle the frame pointer as a special case.  */
19286   if (frame_pointer_needed)
19287     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19288
19289   if (flag_pic
19290       && !TARGET_SINGLE_PIC_BASE
19291       && arm_pic_register != INVALID_REGNUM
19292       && crtl->uses_pic_offset_table)
19293     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19294
19295   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19296   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19297     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19298
19299   /* LR will also be pushed if any lo regs are pushed.  */
19300   if (mask & 0xff || thumb_force_lr_save ())
19301     mask |= (1 << LR_REGNUM);
19302
19303   /* Make sure we have a low work register if we need one.
19304      We will need one if we are going to push a high register,
19305      but we are not currently intending to push a low register.  */
19306   if ((mask & 0xff) == 0
19307       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19308     {
19309       /* Use thumb_find_work_register to choose which register
19310          we will use.  If the register is live then we will
19311          have to push it.  Use LAST_LO_REGNUM as our fallback
19312          choice for the register to select.  */
19313       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19314       /* Make sure the register returned by thumb_find_work_register is
19315          not part of the return value.  */
19316       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19317         reg = LAST_LO_REGNUM;
19318
19319       if (callee_saved_reg_p (reg))
19320         mask |= 1 << reg;
19321     }
19322
19323   /* The 504 below is 8 bytes less than 512 because there are two possible
19324      alignment words.  We can't tell here if they will be present or not so we
19325      have to play it safe and assume that they are. */
19326   if ((CALLER_INTERWORKING_SLOT_SIZE +
19327        ROUND_UP_WORD (get_frame_size ()) +
19328        crtl->outgoing_args_size) >= 504)
19329     {
19330       /* This is the same as the code in thumb1_expand_prologue() which
19331          determines which register to use for stack decrement. */
19332       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19333         if (mask & (1 << reg))
19334           break;
19335
19336       if (reg > LAST_LO_REGNUM)
19337         {
19338           /* Make sure we have a register available for stack decrement. */
19339           mask |= 1 << LAST_LO_REGNUM;
19340         }
19341     }
19342
19343   return mask;
19344 }
19345
19346
19347 /* Return the number of bytes required to save VFP registers.  */
19348 static int
19349 arm_get_vfp_saved_size (void)
19350 {
19351   unsigned int regno;
19352   int count;
19353   int saved;
19354
19355   saved = 0;
19356   /* Space for saved VFP registers.  */
19357   if (TARGET_HARD_FLOAT)
19358     {
19359       count = 0;
19360       for (regno = FIRST_VFP_REGNUM;
19361            regno < LAST_VFP_REGNUM;
19362            regno += 2)
19363         {
19364           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19365               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19366             {
19367               if (count > 0)
19368                 {
19369                   /* Workaround ARM10 VFPr1 bug.  */
19370                   if (count == 2 && !arm_arch6)
19371                     count++;
19372                   saved += count * 8;
19373                 }
19374               count = 0;
19375             }
19376           else
19377             count++;
19378         }
19379       if (count > 0)
19380         {
19381           if (count == 2 && !arm_arch6)
19382             count++;
19383           saved += count * 8;
19384         }
19385     }
19386   return saved;
19387 }
19388
19389
19390 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19391    everything bar the final return instruction.  If simple_return is true,
19392    then do not output epilogue, because it has already been emitted in RTL.  */
19393 const char *
19394 output_return_instruction (rtx operand, bool really_return, bool reverse,
19395                            bool simple_return)
19396 {
19397   char conditional[10];
19398   char instr[100];
19399   unsigned reg;
19400   unsigned long live_regs_mask;
19401   unsigned long func_type;
19402   arm_stack_offsets *offsets;
19403
19404   func_type = arm_current_func_type ();
19405
19406   if (IS_NAKED (func_type))
19407     return "";
19408
19409   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19410     {
19411       /* If this function was declared non-returning, and we have
19412          found a tail call, then we have to trust that the called
19413          function won't return.  */
19414       if (really_return)
19415         {
19416           rtx ops[2];
19417
19418           /* Otherwise, trap an attempted return by aborting.  */
19419           ops[0] = operand;
19420           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19421                                        : "abort");
19422           assemble_external_libcall (ops[1]);
19423           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19424         }
19425
19426       return "";
19427     }
19428
19429   gcc_assert (!cfun->calls_alloca || really_return);
19430
19431   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19432
19433   cfun->machine->return_used_this_function = 1;
19434
19435   offsets = arm_get_frame_offsets ();
19436   live_regs_mask = offsets->saved_regs_mask;
19437
19438   if (!simple_return && live_regs_mask)
19439     {
19440       const char * return_reg;
19441
19442       /* If we do not have any special requirements for function exit
19443          (e.g. interworking) then we can load the return address
19444          directly into the PC.  Otherwise we must load it into LR.  */
19445       if (really_return
19446           && !IS_CMSE_ENTRY (func_type)
19447           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19448         return_reg = reg_names[PC_REGNUM];
19449       else
19450         return_reg = reg_names[LR_REGNUM];
19451
19452       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19453         {
19454           /* There are three possible reasons for the IP register
19455              being saved.  1) a stack frame was created, in which case
19456              IP contains the old stack pointer, or 2) an ISR routine
19457              corrupted it, or 3) it was saved to align the stack on
19458              iWMMXt.  In case 1, restore IP into SP, otherwise just
19459              restore IP.  */
19460           if (frame_pointer_needed)
19461             {
19462               live_regs_mask &= ~ (1 << IP_REGNUM);
19463               live_regs_mask |=   (1 << SP_REGNUM);
19464             }
19465           else
19466             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19467         }
19468
19469       /* On some ARM architectures it is faster to use LDR rather than
19470          LDM to load a single register.  On other architectures, the
19471          cost is the same.  In 26 bit mode, or for exception handlers,
19472          we have to use LDM to load the PC so that the CPSR is also
19473          restored.  */
19474       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19475         if (live_regs_mask == (1U << reg))
19476           break;
19477
19478       if (reg <= LAST_ARM_REGNUM
19479           && (reg != LR_REGNUM
19480               || ! really_return
19481               || ! IS_INTERRUPT (func_type)))
19482         {
19483           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19484                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19485         }
19486       else
19487         {
19488           char *p;
19489           int first = 1;
19490
19491           /* Generate the load multiple instruction to restore the
19492              registers.  Note we can get here, even if
19493              frame_pointer_needed is true, but only if sp already
19494              points to the base of the saved core registers.  */
19495           if (live_regs_mask & (1 << SP_REGNUM))
19496             {
19497               unsigned HOST_WIDE_INT stack_adjust;
19498
19499               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19500               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19501
19502               if (stack_adjust && arm_arch5 && TARGET_ARM)
19503                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19504               else
19505                 {
19506                   /* If we can't use ldmib (SA110 bug),
19507                      then try to pop r3 instead.  */
19508                   if (stack_adjust)
19509                     live_regs_mask |= 1 << 3;
19510
19511                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19512                 }
19513             }
19514           /* For interrupt returns we have to use an LDM rather than
19515              a POP so that we can use the exception return variant.  */
19516           else if (IS_INTERRUPT (func_type))
19517             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19518           else
19519             sprintf (instr, "pop%s\t{", conditional);
19520
19521           p = instr + strlen (instr);
19522
19523           for (reg = 0; reg <= SP_REGNUM; reg++)
19524             if (live_regs_mask & (1 << reg))
19525               {
19526                 int l = strlen (reg_names[reg]);
19527
19528                 if (first)
19529                   first = 0;
19530                 else
19531                   {
19532                     memcpy (p, ", ", 2);
19533                     p += 2;
19534                   }
19535
19536                 memcpy (p, "%|", 2);
19537                 memcpy (p + 2, reg_names[reg], l);
19538                 p += l + 2;
19539               }
19540
19541           if (live_regs_mask & (1 << LR_REGNUM))
19542             {
19543               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19544               /* If returning from an interrupt, restore the CPSR.  */
19545               if (IS_INTERRUPT (func_type))
19546                 strcat (p, "^");
19547             }
19548           else
19549             strcpy (p, "}");
19550         }
19551
19552       output_asm_insn (instr, & operand);
19553
19554       /* See if we need to generate an extra instruction to
19555          perform the actual function return.  */
19556       if (really_return
19557           && func_type != ARM_FT_INTERWORKED
19558           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19559         {
19560           /* The return has already been handled
19561              by loading the LR into the PC.  */
19562           return "";
19563         }
19564     }
19565
19566   if (really_return)
19567     {
19568       switch ((int) ARM_FUNC_TYPE (func_type))
19569         {
19570         case ARM_FT_ISR:
19571         case ARM_FT_FIQ:
19572           /* ??? This is wrong for unified assembly syntax.  */
19573           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19574           break;
19575
19576         case ARM_FT_INTERWORKED:
19577           gcc_assert (arm_arch5 || arm_arch4t);
19578           sprintf (instr, "bx%s\t%%|lr", conditional);
19579           break;
19580
19581         case ARM_FT_EXCEPTION:
19582           /* ??? This is wrong for unified assembly syntax.  */
19583           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19584           break;
19585
19586         default:
19587           if (IS_CMSE_ENTRY (func_type))
19588             {
19589               /* Check if we have to clear the 'GE bits' which is only used if
19590                  parallel add and subtraction instructions are available.  */
19591               if (TARGET_INT_SIMD)
19592                 snprintf (instr, sizeof (instr),
19593                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19594               else
19595                 snprintf (instr, sizeof (instr),
19596                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19597
19598               output_asm_insn (instr, & operand);
19599               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19600                 {
19601                   /* Clear the cumulative exception-status bits (0-4,7) and the
19602                      condition code bits (28-31) of the FPSCR.  We need to
19603                      remember to clear the first scratch register used (IP) and
19604                      save and restore the second (r4).  */
19605                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19606                   output_asm_insn (instr, & operand);
19607                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19608                   output_asm_insn (instr, & operand);
19609                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19610                   output_asm_insn (instr, & operand);
19611                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19612                   output_asm_insn (instr, & operand);
19613                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19614                   output_asm_insn (instr, & operand);
19615                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19616                   output_asm_insn (instr, & operand);
19617                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19618                   output_asm_insn (instr, & operand);
19619                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19620                   output_asm_insn (instr, & operand);
19621                 }
19622               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19623             }
19624           /* Use bx if it's available.  */
19625           else if (arm_arch5 || arm_arch4t)
19626             sprintf (instr, "bx%s\t%%|lr", conditional);
19627           else
19628             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19629           break;
19630         }
19631
19632       output_asm_insn (instr, & operand);
19633     }
19634
19635   return "";
19636 }
19637
19638 /* Output in FILE asm statements needed to declare the NAME of the function
19639    defined by its DECL node.  */
19640
19641 void
19642 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19643 {
19644   size_t cmse_name_len;
19645   char *cmse_name = 0;
19646   char cmse_prefix[] = "__acle_se_";
19647
19648   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19649      extra function label for each function with the 'cmse_nonsecure_entry'
19650      attribute.  This extra function label should be prepended with
19651      '__acle_se_', telling the linker that it needs to create secure gateway
19652      veneers for this function.  */
19653   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19654                                     DECL_ATTRIBUTES (decl)))
19655     {
19656       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19657       cmse_name = XALLOCAVEC (char, cmse_name_len);
19658       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19659       targetm.asm_out.globalize_label (file, cmse_name);
19660
19661       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19662       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19663     }
19664
19665   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19666   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19667   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19668   ASM_OUTPUT_LABEL (file, name);
19669
19670   if (cmse_name)
19671     ASM_OUTPUT_LABEL (file, cmse_name);
19672
19673   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19674 }
19675
19676 /* Write the function name into the code section, directly preceding
19677    the function prologue.
19678
19679    Code will be output similar to this:
19680      t0
19681          .ascii "arm_poke_function_name", 0
19682          .align
19683      t1
19684          .word 0xff000000 + (t1 - t0)
19685      arm_poke_function_name
19686          mov     ip, sp
19687          stmfd   sp!, {fp, ip, lr, pc}
19688          sub     fp, ip, #4
19689
19690    When performing a stack backtrace, code can inspect the value
19691    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19692    at location pc - 12 and the top 8 bits are set, then we know
19693    that there is a function name embedded immediately preceding this
19694    location and has length ((pc[-3]) & 0xff000000).
19695
19696    We assume that pc is declared as a pointer to an unsigned long.
19697
19698    It is of no benefit to output the function name if we are assembling
19699    a leaf function.  These function types will not contain a stack
19700    backtrace structure, therefore it is not possible to determine the
19701    function name.  */
19702 void
19703 arm_poke_function_name (FILE *stream, const char *name)
19704 {
19705   unsigned long alignlength;
19706   unsigned long length;
19707   rtx           x;
19708
19709   length      = strlen (name) + 1;
19710   alignlength = ROUND_UP_WORD (length);
19711
19712   ASM_OUTPUT_ASCII (stream, name, length);
19713   ASM_OUTPUT_ALIGN (stream, 2);
19714   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19715   assemble_aligned_integer (UNITS_PER_WORD, x);
19716 }
19717
19718 /* Place some comments into the assembler stream
19719    describing the current function.  */
19720 static void
19721 arm_output_function_prologue (FILE *f)
19722 {
19723   unsigned long func_type;
19724
19725   /* Sanity check.  */
19726   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19727
19728   func_type = arm_current_func_type ();
19729
19730   switch ((int) ARM_FUNC_TYPE (func_type))
19731     {
19732     default:
19733     case ARM_FT_NORMAL:
19734       break;
19735     case ARM_FT_INTERWORKED:
19736       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19737       break;
19738     case ARM_FT_ISR:
19739       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19740       break;
19741     case ARM_FT_FIQ:
19742       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19743       break;
19744     case ARM_FT_EXCEPTION:
19745       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19746       break;
19747     }
19748
19749   if (IS_NAKED (func_type))
19750     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19751
19752   if (IS_VOLATILE (func_type))
19753     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19754
19755   if (IS_NESTED (func_type))
19756     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19757   if (IS_STACKALIGN (func_type))
19758     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19759   if (IS_CMSE_ENTRY (func_type))
19760     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19761
19762   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19763                crtl->args.size,
19764                crtl->args.pretend_args_size,
19765                (HOST_WIDE_INT) get_frame_size ());
19766
19767   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19768                frame_pointer_needed,
19769                cfun->machine->uses_anonymous_args);
19770
19771   if (cfun->machine->lr_save_eliminated)
19772     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19773
19774   if (crtl->calls_eh_return)
19775     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19776
19777 }
19778
19779 static void
19780 arm_output_function_epilogue (FILE *)
19781 {
19782   arm_stack_offsets *offsets;
19783
19784   if (TARGET_THUMB1)
19785     {
19786       int regno;
19787
19788       /* Emit any call-via-reg trampolines that are needed for v4t support
19789          of call_reg and call_value_reg type insns.  */
19790       for (regno = 0; regno < LR_REGNUM; regno++)
19791         {
19792           rtx label = cfun->machine->call_via[regno];
19793
19794           if (label != NULL)
19795             {
19796               switch_to_section (function_section (current_function_decl));
19797               targetm.asm_out.internal_label (asm_out_file, "L",
19798                                               CODE_LABEL_NUMBER (label));
19799               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19800             }
19801         }
19802
19803       /* ??? Probably not safe to set this here, since it assumes that a
19804          function will be emitted as assembly immediately after we generate
19805          RTL for it.  This does not happen for inline functions.  */
19806       cfun->machine->return_used_this_function = 0;
19807     }
19808   else /* TARGET_32BIT */
19809     {
19810       /* We need to take into account any stack-frame rounding.  */
19811       offsets = arm_get_frame_offsets ();
19812
19813       gcc_assert (!use_return_insn (FALSE, NULL)
19814                   || (cfun->machine->return_used_this_function != 0)
19815                   || offsets->saved_regs == offsets->outgoing_args
19816                   || frame_pointer_needed);
19817     }
19818 }
19819
19820 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19821    STR and STRD.  If an even number of registers are being pushed, one
19822    or more STRD patterns are created for each register pair.  If an
19823    odd number of registers are pushed, emit an initial STR followed by
19824    as many STRD instructions as are needed.  This works best when the
19825    stack is initially 64-bit aligned (the normal case), since it
19826    ensures that each STRD is also 64-bit aligned.  */
19827 static void
19828 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19829 {
19830   int num_regs = 0;
19831   int i;
19832   int regno;
19833   rtx par = NULL_RTX;
19834   rtx dwarf = NULL_RTX;
19835   rtx tmp;
19836   bool first = true;
19837
19838   num_regs = bit_count (saved_regs_mask);
19839
19840   /* Must be at least one register to save, and can't save SP or PC.  */
19841   gcc_assert (num_regs > 0 && num_regs <= 14);
19842   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19843   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19844
19845   /* Create sequence for DWARF info.  All the frame-related data for
19846      debugging is held in this wrapper.  */
19847   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19848
19849   /* Describe the stack adjustment.  */
19850   tmp = gen_rtx_SET (stack_pointer_rtx,
19851                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19852   RTX_FRAME_RELATED_P (tmp) = 1;
19853   XVECEXP (dwarf, 0, 0) = tmp;
19854
19855   /* Find the first register.  */
19856   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19857     ;
19858
19859   i = 0;
19860
19861   /* If there's an odd number of registers to push.  Start off by
19862      pushing a single register.  This ensures that subsequent strd
19863      operations are dword aligned (assuming that SP was originally
19864      64-bit aligned).  */
19865   if ((num_regs & 1) != 0)
19866     {
19867       rtx reg, mem, insn;
19868
19869       reg = gen_rtx_REG (SImode, regno);
19870       if (num_regs == 1)
19871         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19872                                                      stack_pointer_rtx));
19873       else
19874         mem = gen_frame_mem (Pmode,
19875                              gen_rtx_PRE_MODIFY
19876                              (Pmode, stack_pointer_rtx,
19877                               plus_constant (Pmode, stack_pointer_rtx,
19878                                              -4 * num_regs)));
19879
19880       tmp = gen_rtx_SET (mem, reg);
19881       RTX_FRAME_RELATED_P (tmp) = 1;
19882       insn = emit_insn (tmp);
19883       RTX_FRAME_RELATED_P (insn) = 1;
19884       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19885       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19886       RTX_FRAME_RELATED_P (tmp) = 1;
19887       i++;
19888       regno++;
19889       XVECEXP (dwarf, 0, i) = tmp;
19890       first = false;
19891     }
19892
19893   while (i < num_regs)
19894     if (saved_regs_mask & (1 << regno))
19895       {
19896         rtx reg1, reg2, mem1, mem2;
19897         rtx tmp0, tmp1, tmp2;
19898         int regno2;
19899
19900         /* Find the register to pair with this one.  */
19901         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19902              regno2++)
19903           ;
19904
19905         reg1 = gen_rtx_REG (SImode, regno);
19906         reg2 = gen_rtx_REG (SImode, regno2);
19907
19908         if (first)
19909           {
19910             rtx insn;
19911
19912             first = false;
19913             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19914                                                         stack_pointer_rtx,
19915                                                         -4 * num_regs));
19916             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19917                                                         stack_pointer_rtx,
19918                                                         -4 * (num_regs - 1)));
19919             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19920                                 plus_constant (Pmode, stack_pointer_rtx,
19921                                                -4 * (num_regs)));
19922             tmp1 = gen_rtx_SET (mem1, reg1);
19923             tmp2 = gen_rtx_SET (mem2, reg2);
19924             RTX_FRAME_RELATED_P (tmp0) = 1;
19925             RTX_FRAME_RELATED_P (tmp1) = 1;
19926             RTX_FRAME_RELATED_P (tmp2) = 1;
19927             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19928             XVECEXP (par, 0, 0) = tmp0;
19929             XVECEXP (par, 0, 1) = tmp1;
19930             XVECEXP (par, 0, 2) = tmp2;
19931             insn = emit_insn (par);
19932             RTX_FRAME_RELATED_P (insn) = 1;
19933             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19934           }
19935         else
19936           {
19937             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19938                                                         stack_pointer_rtx,
19939                                                         4 * i));
19940             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19941                                                         stack_pointer_rtx,
19942                                                         4 * (i + 1)));
19943             tmp1 = gen_rtx_SET (mem1, reg1);
19944             tmp2 = gen_rtx_SET (mem2, reg2);
19945             RTX_FRAME_RELATED_P (tmp1) = 1;
19946             RTX_FRAME_RELATED_P (tmp2) = 1;
19947             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19948             XVECEXP (par, 0, 0) = tmp1;
19949             XVECEXP (par, 0, 1) = tmp2;
19950             emit_insn (par);
19951           }
19952
19953         /* Create unwind information.  This is an approximation.  */
19954         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19955                                            plus_constant (Pmode,
19956                                                           stack_pointer_rtx,
19957                                                           4 * i)),
19958                             reg1);
19959         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19960                                            plus_constant (Pmode,
19961                                                           stack_pointer_rtx,
19962                                                           4 * (i + 1))),
19963                             reg2);
19964
19965         RTX_FRAME_RELATED_P (tmp1) = 1;
19966         RTX_FRAME_RELATED_P (tmp2) = 1;
19967         XVECEXP (dwarf, 0, i + 1) = tmp1;
19968         XVECEXP (dwarf, 0, i + 2) = tmp2;
19969         i += 2;
19970         regno = regno2 + 1;
19971       }
19972     else
19973       regno++;
19974
19975   return;
19976 }
19977
19978 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19979    whenever possible, otherwise it emits single-word stores.  The first store
19980    also allocates stack space for all saved registers, using writeback with
19981    post-addressing mode.  All other stores use offset addressing.  If no STRD
19982    can be emitted, this function emits a sequence of single-word stores,
19983    and not an STM as before, because single-word stores provide more freedom
19984    scheduling and can be turned into an STM by peephole optimizations.  */
19985 static void
19986 arm_emit_strd_push (unsigned long saved_regs_mask)
19987 {
19988   int num_regs = 0;
19989   int i, j, dwarf_index  = 0;
19990   int offset = 0;
19991   rtx dwarf = NULL_RTX;
19992   rtx insn = NULL_RTX;
19993   rtx tmp, mem;
19994
19995   /* TODO: A more efficient code can be emitted by changing the
19996      layout, e.g., first push all pairs that can use STRD to keep the
19997      stack aligned, and then push all other registers.  */
19998   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19999     if (saved_regs_mask & (1 << i))
20000       num_regs++;
20001
20002   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20003   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20004   gcc_assert (num_regs > 0);
20005
20006   /* Create sequence for DWARF info.  */
20007   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20008
20009   /* For dwarf info, we generate explicit stack update.  */
20010   tmp = gen_rtx_SET (stack_pointer_rtx,
20011                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20012   RTX_FRAME_RELATED_P (tmp) = 1;
20013   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20014
20015   /* Save registers.  */
20016   offset = - 4 * num_regs;
20017   j = 0;
20018   while (j <= LAST_ARM_REGNUM)
20019     if (saved_regs_mask & (1 << j))
20020       {
20021         if ((j % 2 == 0)
20022             && (saved_regs_mask & (1 << (j + 1))))
20023           {
20024             /* Current register and previous register form register pair for
20025                which STRD can be generated.  */
20026             if (offset < 0)
20027               {
20028                 /* Allocate stack space for all saved registers.  */
20029                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20030                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20031                 mem = gen_frame_mem (DImode, tmp);
20032                 offset = 0;
20033               }
20034             else if (offset > 0)
20035               mem = gen_frame_mem (DImode,
20036                                    plus_constant (Pmode,
20037                                                   stack_pointer_rtx,
20038                                                   offset));
20039             else
20040               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20041
20042             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20043             RTX_FRAME_RELATED_P (tmp) = 1;
20044             tmp = emit_insn (tmp);
20045
20046             /* Record the first store insn.  */
20047             if (dwarf_index == 1)
20048               insn = tmp;
20049
20050             /* Generate dwarf info.  */
20051             mem = gen_frame_mem (SImode,
20052                                  plus_constant (Pmode,
20053                                                 stack_pointer_rtx,
20054                                                 offset));
20055             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20056             RTX_FRAME_RELATED_P (tmp) = 1;
20057             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20058
20059             mem = gen_frame_mem (SImode,
20060                                  plus_constant (Pmode,
20061                                                 stack_pointer_rtx,
20062                                                 offset + 4));
20063             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20064             RTX_FRAME_RELATED_P (tmp) = 1;
20065             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20066
20067             offset += 8;
20068             j += 2;
20069           }
20070         else
20071           {
20072             /* Emit a single word store.  */
20073             if (offset < 0)
20074               {
20075                 /* Allocate stack space for all saved registers.  */
20076                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20077                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20078                 mem = gen_frame_mem (SImode, tmp);
20079                 offset = 0;
20080               }
20081             else if (offset > 0)
20082               mem = gen_frame_mem (SImode,
20083                                    plus_constant (Pmode,
20084                                                   stack_pointer_rtx,
20085                                                   offset));
20086             else
20087               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20088
20089             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20090             RTX_FRAME_RELATED_P (tmp) = 1;
20091             tmp = emit_insn (tmp);
20092
20093             /* Record the first store insn.  */
20094             if (dwarf_index == 1)
20095               insn = tmp;
20096
20097             /* Generate dwarf info.  */
20098             mem = gen_frame_mem (SImode,
20099                                  plus_constant(Pmode,
20100                                                stack_pointer_rtx,
20101                                                offset));
20102             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20103             RTX_FRAME_RELATED_P (tmp) = 1;
20104             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20105
20106             offset += 4;
20107             j += 1;
20108           }
20109       }
20110     else
20111       j++;
20112
20113   /* Attach dwarf info to the first insn we generate.  */
20114   gcc_assert (insn != NULL_RTX);
20115   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20116   RTX_FRAME_RELATED_P (insn) = 1;
20117 }
20118
20119 /* Generate and emit an insn that we will recognize as a push_multi.
20120    Unfortunately, since this insn does not reflect very well the actual
20121    semantics of the operation, we need to annotate the insn for the benefit
20122    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20123    MASK for registers that should be annotated for DWARF2 frame unwind
20124    information.  */
20125 static rtx
20126 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20127 {
20128   int num_regs = 0;
20129   int num_dwarf_regs = 0;
20130   int i, j;
20131   rtx par;
20132   rtx dwarf;
20133   int dwarf_par_index;
20134   rtx tmp, reg;
20135
20136   /* We don't record the PC in the dwarf frame information.  */
20137   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20138
20139   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20140     {
20141       if (mask & (1 << i))
20142         num_regs++;
20143       if (dwarf_regs_mask & (1 << i))
20144         num_dwarf_regs++;
20145     }
20146
20147   gcc_assert (num_regs && num_regs <= 16);
20148   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20149
20150   /* For the body of the insn we are going to generate an UNSPEC in
20151      parallel with several USEs.  This allows the insn to be recognized
20152      by the push_multi pattern in the arm.md file.
20153
20154      The body of the insn looks something like this:
20155
20156        (parallel [
20157            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20158                                         (const_int:SI <num>)))
20159                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20160            (use (reg:SI XX))
20161            (use (reg:SI YY))
20162            ...
20163         ])
20164
20165      For the frame note however, we try to be more explicit and actually
20166      show each register being stored into the stack frame, plus a (single)
20167      decrement of the stack pointer.  We do it this way in order to be
20168      friendly to the stack unwinding code, which only wants to see a single
20169      stack decrement per instruction.  The RTL we generate for the note looks
20170      something like this:
20171
20172       (sequence [
20173            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20174            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20175            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20176            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20177            ...
20178         ])
20179
20180      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20181      instead we'd have a parallel expression detailing all
20182      the stores to the various memory addresses so that debug
20183      information is more up-to-date. Remember however while writing
20184      this to take care of the constraints with the push instruction.
20185
20186      Note also that this has to be taken care of for the VFP registers.
20187
20188      For more see PR43399.  */
20189
20190   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20191   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20192   dwarf_par_index = 1;
20193
20194   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20195     {
20196       if (mask & (1 << i))
20197         {
20198           reg = gen_rtx_REG (SImode, i);
20199
20200           XVECEXP (par, 0, 0)
20201             = gen_rtx_SET (gen_frame_mem
20202                            (BLKmode,
20203                             gen_rtx_PRE_MODIFY (Pmode,
20204                                                 stack_pointer_rtx,
20205                                                 plus_constant
20206                                                 (Pmode, stack_pointer_rtx,
20207                                                  -4 * num_regs))
20208                             ),
20209                            gen_rtx_UNSPEC (BLKmode,
20210                                            gen_rtvec (1, reg),
20211                                            UNSPEC_PUSH_MULT));
20212
20213           if (dwarf_regs_mask & (1 << i))
20214             {
20215               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20216                                  reg);
20217               RTX_FRAME_RELATED_P (tmp) = 1;
20218               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20219             }
20220
20221           break;
20222         }
20223     }
20224
20225   for (j = 1, i++; j < num_regs; i++)
20226     {
20227       if (mask & (1 << i))
20228         {
20229           reg = gen_rtx_REG (SImode, i);
20230
20231           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20232
20233           if (dwarf_regs_mask & (1 << i))
20234             {
20235               tmp
20236                 = gen_rtx_SET (gen_frame_mem
20237                                (SImode,
20238                                 plus_constant (Pmode, stack_pointer_rtx,
20239                                                4 * j)),
20240                                reg);
20241               RTX_FRAME_RELATED_P (tmp) = 1;
20242               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20243             }
20244
20245           j++;
20246         }
20247     }
20248
20249   par = emit_insn (par);
20250
20251   tmp = gen_rtx_SET (stack_pointer_rtx,
20252                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20253   RTX_FRAME_RELATED_P (tmp) = 1;
20254   XVECEXP (dwarf, 0, 0) = tmp;
20255
20256   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20257
20258   return par;
20259 }
20260
20261 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20262    SIZE is the offset to be adjusted.
20263    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20264 static void
20265 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20266 {
20267   rtx dwarf;
20268
20269   RTX_FRAME_RELATED_P (insn) = 1;
20270   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20271   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20272 }
20273
20274 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20275    SAVED_REGS_MASK shows which registers need to be restored.
20276
20277    Unfortunately, since this insn does not reflect very well the actual
20278    semantics of the operation, we need to annotate the insn for the benefit
20279    of DWARF2 frame unwind information.  */
20280 static void
20281 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20282 {
20283   int num_regs = 0;
20284   int i, j;
20285   rtx par;
20286   rtx dwarf = NULL_RTX;
20287   rtx tmp, reg;
20288   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20289   int offset_adj;
20290   int emit_update;
20291
20292   offset_adj = return_in_pc ? 1 : 0;
20293   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20294     if (saved_regs_mask & (1 << i))
20295       num_regs++;
20296
20297   gcc_assert (num_regs && num_regs <= 16);
20298
20299   /* If SP is in reglist, then we don't emit SP update insn.  */
20300   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20301
20302   /* The parallel needs to hold num_regs SETs
20303      and one SET for the stack update.  */
20304   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20305
20306   if (return_in_pc)
20307     XVECEXP (par, 0, 0) = ret_rtx;
20308
20309   if (emit_update)
20310     {
20311       /* Increment the stack pointer, based on there being
20312          num_regs 4-byte registers to restore.  */
20313       tmp = gen_rtx_SET (stack_pointer_rtx,
20314                          plus_constant (Pmode,
20315                                         stack_pointer_rtx,
20316                                         4 * num_regs));
20317       RTX_FRAME_RELATED_P (tmp) = 1;
20318       XVECEXP (par, 0, offset_adj) = tmp;
20319     }
20320
20321   /* Now restore every reg, which may include PC.  */
20322   for (j = 0, i = 0; j < num_regs; i++)
20323     if (saved_regs_mask & (1 << i))
20324       {
20325         reg = gen_rtx_REG (SImode, i);
20326         if ((num_regs == 1) && emit_update && !return_in_pc)
20327           {
20328             /* Emit single load with writeback.  */
20329             tmp = gen_frame_mem (SImode,
20330                                  gen_rtx_POST_INC (Pmode,
20331                                                    stack_pointer_rtx));
20332             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20333             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20334             return;
20335           }
20336
20337         tmp = gen_rtx_SET (reg,
20338                            gen_frame_mem
20339                            (SImode,
20340                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20341         RTX_FRAME_RELATED_P (tmp) = 1;
20342         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20343
20344         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20345            should not have PC, skip PC.  */
20346         if (i != PC_REGNUM)
20347           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20348
20349         j++;
20350       }
20351
20352   if (return_in_pc)
20353     par = emit_jump_insn (par);
20354   else
20355     par = emit_insn (par);
20356
20357   REG_NOTES (par) = dwarf;
20358   if (!return_in_pc)
20359     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20360                                  stack_pointer_rtx, stack_pointer_rtx);
20361 }
20362
20363 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20364    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20365
20366    Unfortunately, since this insn does not reflect very well the actual
20367    semantics of the operation, we need to annotate the insn for the benefit
20368    of DWARF2 frame unwind information.  */
20369 static void
20370 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20371 {
20372   int i, j;
20373   rtx par;
20374   rtx dwarf = NULL_RTX;
20375   rtx tmp, reg;
20376
20377   gcc_assert (num_regs && num_regs <= 32);
20378
20379     /* Workaround ARM10 VFPr1 bug.  */
20380   if (num_regs == 2 && !arm_arch6)
20381     {
20382       if (first_reg == 15)
20383         first_reg--;
20384
20385       num_regs++;
20386     }
20387
20388   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20389      there could be up to 32 D-registers to restore.
20390      If there are more than 16 D-registers, make two recursive calls,
20391      each of which emits one pop_multi instruction.  */
20392   if (num_regs > 16)
20393     {
20394       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20395       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20396       return;
20397     }
20398
20399   /* The parallel needs to hold num_regs SETs
20400      and one SET for the stack update.  */
20401   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20402
20403   /* Increment the stack pointer, based on there being
20404      num_regs 8-byte registers to restore.  */
20405   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20406   RTX_FRAME_RELATED_P (tmp) = 1;
20407   XVECEXP (par, 0, 0) = tmp;
20408
20409   /* Now show every reg that will be restored, using a SET for each.  */
20410   for (j = 0, i=first_reg; j < num_regs; i += 2)
20411     {
20412       reg = gen_rtx_REG (DFmode, i);
20413
20414       tmp = gen_rtx_SET (reg,
20415                          gen_frame_mem
20416                          (DFmode,
20417                           plus_constant (Pmode, base_reg, 8 * j)));
20418       RTX_FRAME_RELATED_P (tmp) = 1;
20419       XVECEXP (par, 0, j + 1) = tmp;
20420
20421       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20422
20423       j++;
20424     }
20425
20426   par = emit_insn (par);
20427   REG_NOTES (par) = dwarf;
20428
20429   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20430   if (REGNO (base_reg) == IP_REGNUM)
20431     {
20432       RTX_FRAME_RELATED_P (par) = 1;
20433       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20434     }
20435   else
20436     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20437                                  base_reg, base_reg);
20438 }
20439
20440 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20441    number of registers are being popped, multiple LDRD patterns are created for
20442    all register pairs.  If odd number of registers are popped, last register is
20443    loaded by using LDR pattern.  */
20444 static void
20445 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20446 {
20447   int num_regs = 0;
20448   int i, j;
20449   rtx par = NULL_RTX;
20450   rtx dwarf = NULL_RTX;
20451   rtx tmp, reg, tmp1;
20452   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20453
20454   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20455     if (saved_regs_mask & (1 << i))
20456       num_regs++;
20457
20458   gcc_assert (num_regs && num_regs <= 16);
20459
20460   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20461      to be popped.  So, if num_regs is even, now it will become odd,
20462      and we can generate pop with PC.  If num_regs is odd, it will be
20463      even now, and ldr with return can be generated for PC.  */
20464   if (return_in_pc)
20465     num_regs--;
20466
20467   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20468
20469   /* Var j iterates over all the registers to gather all the registers in
20470      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20471      A PARALLEL RTX of register-pair is created here, so that pattern for
20472      LDRD can be matched.  As PC is always last register to be popped, and
20473      we have already decremented num_regs if PC, we don't have to worry
20474      about PC in this loop.  */
20475   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20476     if (saved_regs_mask & (1 << j))
20477       {
20478         /* Create RTX for memory load.  */
20479         reg = gen_rtx_REG (SImode, j);
20480         tmp = gen_rtx_SET (reg,
20481                            gen_frame_mem (SImode,
20482                                plus_constant (Pmode,
20483                                               stack_pointer_rtx, 4 * i)));
20484         RTX_FRAME_RELATED_P (tmp) = 1;
20485
20486         if (i % 2 == 0)
20487           {
20488             /* When saved-register index (i) is even, the RTX to be emitted is
20489                yet to be created.  Hence create it first.  The LDRD pattern we
20490                are generating is :
20491                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20492                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20493                where target registers need not be consecutive.  */
20494             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20495             dwarf = NULL_RTX;
20496           }
20497
20498         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20499            added as 0th element and if i is odd, reg_i is added as 1st element
20500            of LDRD pattern shown above.  */
20501         XVECEXP (par, 0, (i % 2)) = tmp;
20502         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20503
20504         if ((i % 2) == 1)
20505           {
20506             /* When saved-register index (i) is odd, RTXs for both the registers
20507                to be loaded are generated in above given LDRD pattern, and the
20508                pattern can be emitted now.  */
20509             par = emit_insn (par);
20510             REG_NOTES (par) = dwarf;
20511             RTX_FRAME_RELATED_P (par) = 1;
20512           }
20513
20514         i++;
20515       }
20516
20517   /* If the number of registers pushed is odd AND return_in_pc is false OR
20518      number of registers are even AND return_in_pc is true, last register is
20519      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20520      then LDR with post increment.  */
20521
20522   /* Increment the stack pointer, based on there being
20523      num_regs 4-byte registers to restore.  */
20524   tmp = gen_rtx_SET (stack_pointer_rtx,
20525                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20526   RTX_FRAME_RELATED_P (tmp) = 1;
20527   tmp = emit_insn (tmp);
20528   if (!return_in_pc)
20529     {
20530       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20531                                    stack_pointer_rtx, stack_pointer_rtx);
20532     }
20533
20534   dwarf = NULL_RTX;
20535
20536   if (((num_regs % 2) == 1 && !return_in_pc)
20537       || ((num_regs % 2) == 0 && return_in_pc))
20538     {
20539       /* Scan for the single register to be popped.  Skip until the saved
20540          register is found.  */
20541       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20542
20543       /* Gen LDR with post increment here.  */
20544       tmp1 = gen_rtx_MEM (SImode,
20545                           gen_rtx_POST_INC (SImode,
20546                                             stack_pointer_rtx));
20547       set_mem_alias_set (tmp1, get_frame_alias_set ());
20548
20549       reg = gen_rtx_REG (SImode, j);
20550       tmp = gen_rtx_SET (reg, tmp1);
20551       RTX_FRAME_RELATED_P (tmp) = 1;
20552       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20553
20554       if (return_in_pc)
20555         {
20556           /* If return_in_pc, j must be PC_REGNUM.  */
20557           gcc_assert (j == PC_REGNUM);
20558           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20559           XVECEXP (par, 0, 0) = ret_rtx;
20560           XVECEXP (par, 0, 1) = tmp;
20561           par = emit_jump_insn (par);
20562         }
20563       else
20564         {
20565           par = emit_insn (tmp);
20566           REG_NOTES (par) = dwarf;
20567           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20568                                        stack_pointer_rtx, stack_pointer_rtx);
20569         }
20570
20571     }
20572   else if ((num_regs % 2) == 1 && return_in_pc)
20573     {
20574       /* There are 2 registers to be popped.  So, generate the pattern
20575          pop_multiple_with_stack_update_and_return to pop in PC.  */
20576       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20577     }
20578
20579   return;
20580 }
20581
20582 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20583    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20584    offset addressing and then generates one separate stack udpate. This provides
20585    more scheduling freedom, compared to writeback on every load.  However,
20586    if the function returns using load into PC directly
20587    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20588    before the last load.  TODO: Add a peephole optimization to recognize
20589    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20590    peephole optimization to merge the load at stack-offset zero
20591    with the stack update instruction using load with writeback
20592    in post-index addressing mode.  */
20593 static void
20594 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20595 {
20596   int j = 0;
20597   int offset = 0;
20598   rtx par = NULL_RTX;
20599   rtx dwarf = NULL_RTX;
20600   rtx tmp, mem;
20601
20602   /* Restore saved registers.  */
20603   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20604   j = 0;
20605   while (j <= LAST_ARM_REGNUM)
20606     if (saved_regs_mask & (1 << j))
20607       {
20608         if ((j % 2) == 0
20609             && (saved_regs_mask & (1 << (j + 1)))
20610             && (j + 1) != PC_REGNUM)
20611           {
20612             /* Current register and next register form register pair for which
20613                LDRD can be generated. PC is always the last register popped, and
20614                we handle it separately.  */
20615             if (offset > 0)
20616               mem = gen_frame_mem (DImode,
20617                                    plus_constant (Pmode,
20618                                                   stack_pointer_rtx,
20619                                                   offset));
20620             else
20621               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20622
20623             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20624             tmp = emit_insn (tmp);
20625             RTX_FRAME_RELATED_P (tmp) = 1;
20626
20627             /* Generate dwarf info.  */
20628
20629             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20630                                     gen_rtx_REG (SImode, j),
20631                                     NULL_RTX);
20632             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20633                                     gen_rtx_REG (SImode, j + 1),
20634                                     dwarf);
20635
20636             REG_NOTES (tmp) = dwarf;
20637
20638             offset += 8;
20639             j += 2;
20640           }
20641         else if (j != PC_REGNUM)
20642           {
20643             /* Emit a single word load.  */
20644             if (offset > 0)
20645               mem = gen_frame_mem (SImode,
20646                                    plus_constant (Pmode,
20647                                                   stack_pointer_rtx,
20648                                                   offset));
20649             else
20650               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20651
20652             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20653             tmp = emit_insn (tmp);
20654             RTX_FRAME_RELATED_P (tmp) = 1;
20655
20656             /* Generate dwarf info.  */
20657             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20658                                               gen_rtx_REG (SImode, j),
20659                                               NULL_RTX);
20660
20661             offset += 4;
20662             j += 1;
20663           }
20664         else /* j == PC_REGNUM */
20665           j++;
20666       }
20667     else
20668       j++;
20669
20670   /* Update the stack.  */
20671   if (offset > 0)
20672     {
20673       tmp = gen_rtx_SET (stack_pointer_rtx,
20674                          plus_constant (Pmode,
20675                                         stack_pointer_rtx,
20676                                         offset));
20677       tmp = emit_insn (tmp);
20678       arm_add_cfa_adjust_cfa_note (tmp, offset,
20679                                    stack_pointer_rtx, stack_pointer_rtx);
20680       offset = 0;
20681     }
20682
20683   if (saved_regs_mask & (1 << PC_REGNUM))
20684     {
20685       /* Only PC is to be popped.  */
20686       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20687       XVECEXP (par, 0, 0) = ret_rtx;
20688       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20689                          gen_frame_mem (SImode,
20690                                         gen_rtx_POST_INC (SImode,
20691                                                           stack_pointer_rtx)));
20692       RTX_FRAME_RELATED_P (tmp) = 1;
20693       XVECEXP (par, 0, 1) = tmp;
20694       par = emit_jump_insn (par);
20695
20696       /* Generate dwarf info.  */
20697       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20698                               gen_rtx_REG (SImode, PC_REGNUM),
20699                               NULL_RTX);
20700       REG_NOTES (par) = dwarf;
20701       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20702                                    stack_pointer_rtx, stack_pointer_rtx);
20703     }
20704 }
20705
20706 /* Calculate the size of the return value that is passed in registers.  */
20707 static unsigned
20708 arm_size_return_regs (void)
20709 {
20710   machine_mode mode;
20711
20712   if (crtl->return_rtx != 0)
20713     mode = GET_MODE (crtl->return_rtx);
20714   else
20715     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20716
20717   return GET_MODE_SIZE (mode);
20718 }
20719
20720 /* Return true if the current function needs to save/restore LR.  */
20721 static bool
20722 thumb_force_lr_save (void)
20723 {
20724   return !cfun->machine->lr_save_eliminated
20725          && (!crtl->is_leaf
20726              || thumb_far_jump_used_p ()
20727              || df_regs_ever_live_p (LR_REGNUM));
20728 }
20729
20730 /* We do not know if r3 will be available because
20731    we do have an indirect tailcall happening in this
20732    particular case.  */
20733 static bool
20734 is_indirect_tailcall_p (rtx call)
20735 {
20736   rtx pat = PATTERN (call);
20737
20738   /* Indirect tail call.  */
20739   pat = XVECEXP (pat, 0, 0);
20740   if (GET_CODE (pat) == SET)
20741     pat = SET_SRC (pat);
20742
20743   pat = XEXP (XEXP (pat, 0), 0);
20744   return REG_P (pat);
20745 }
20746
20747 /* Return true if r3 is used by any of the tail call insns in the
20748    current function.  */
20749 static bool
20750 any_sibcall_could_use_r3 (void)
20751 {
20752   edge_iterator ei;
20753   edge e;
20754
20755   if (!crtl->tail_call_emit)
20756     return false;
20757   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20758     if (e->flags & EDGE_SIBCALL)
20759       {
20760         rtx_insn *call = BB_END (e->src);
20761         if (!CALL_P (call))
20762           call = prev_nonnote_nondebug_insn (call);
20763         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20764         if (find_regno_fusage (call, USE, 3)
20765             || is_indirect_tailcall_p (call))
20766           return true;
20767       }
20768   return false;
20769 }
20770
20771
20772 /* Compute the distance from register FROM to register TO.
20773    These can be the arg pointer (26), the soft frame pointer (25),
20774    the stack pointer (13) or the hard frame pointer (11).
20775    In thumb mode r7 is used as the soft frame pointer, if needed.
20776    Typical stack layout looks like this:
20777
20778        old stack pointer -> |    |
20779                              ----
20780                             |    | \
20781                             |    |   saved arguments for
20782                             |    |   vararg functions
20783                             |    | /
20784                               --
20785    hard FP & arg pointer -> |    | \
20786                             |    |   stack
20787                             |    |   frame
20788                             |    | /
20789                               --
20790                             |    | \
20791                             |    |   call saved
20792                             |    |   registers
20793       soft frame pointer -> |    | /
20794                               --
20795                             |    | \
20796                             |    |   local
20797                             |    |   variables
20798      locals base pointer -> |    | /
20799                               --
20800                             |    | \
20801                             |    |   outgoing
20802                             |    |   arguments
20803    current stack pointer -> |    | /
20804                               --
20805
20806   For a given function some or all of these stack components
20807   may not be needed, giving rise to the possibility of
20808   eliminating some of the registers.
20809
20810   The values returned by this function must reflect the behavior
20811   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20812
20813   The sign of the number returned reflects the direction of stack
20814   growth, so the values are positive for all eliminations except
20815   from the soft frame pointer to the hard frame pointer.
20816
20817   SFP may point just inside the local variables block to ensure correct
20818   alignment.  */
20819
20820
20821 /* Return cached stack offsets.  */
20822
20823 static arm_stack_offsets *
20824 arm_get_frame_offsets (void)
20825 {
20826   struct arm_stack_offsets *offsets;
20827
20828   offsets = &cfun->machine->stack_offsets;
20829
20830   return offsets;
20831 }
20832
20833
20834 /* Calculate stack offsets.  These are used to calculate register elimination
20835    offsets and in prologue/epilogue code.  Also calculates which registers
20836    should be saved.  */
20837
20838 static void
20839 arm_compute_frame_layout (void)
20840 {
20841   struct arm_stack_offsets *offsets;
20842   unsigned long func_type;
20843   int saved;
20844   int core_saved;
20845   HOST_WIDE_INT frame_size;
20846   int i;
20847
20848   offsets = &cfun->machine->stack_offsets;
20849
20850   /* Initially this is the size of the local variables.  It will translated
20851      into an offset once we have determined the size of preceding data.  */
20852   frame_size = ROUND_UP_WORD (get_frame_size ());
20853
20854   /* Space for variadic functions.  */
20855   offsets->saved_args = crtl->args.pretend_args_size;
20856
20857   /* In Thumb mode this is incorrect, but never used.  */
20858   offsets->frame
20859     = (offsets->saved_args
20860        + arm_compute_static_chain_stack_bytes ()
20861        + (frame_pointer_needed ? 4 : 0));
20862
20863   if (TARGET_32BIT)
20864     {
20865       unsigned int regno;
20866
20867       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20868       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20869       saved = core_saved;
20870
20871       /* We know that SP will be doubleword aligned on entry, and we must
20872          preserve that condition at any subroutine call.  We also require the
20873          soft frame pointer to be doubleword aligned.  */
20874
20875       if (TARGET_REALLY_IWMMXT)
20876         {
20877           /* Check for the call-saved iWMMXt registers.  */
20878           for (regno = FIRST_IWMMXT_REGNUM;
20879                regno <= LAST_IWMMXT_REGNUM;
20880                regno++)
20881             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20882               saved += 8;
20883         }
20884
20885       func_type = arm_current_func_type ();
20886       /* Space for saved VFP registers.  */
20887       if (! IS_VOLATILE (func_type)
20888           && TARGET_HARD_FLOAT)
20889         saved += arm_get_vfp_saved_size ();
20890     }
20891   else /* TARGET_THUMB1 */
20892     {
20893       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20894       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20895       saved = core_saved;
20896       if (TARGET_BACKTRACE)
20897         saved += 16;
20898     }
20899
20900   /* Saved registers include the stack frame.  */
20901   offsets->saved_regs
20902     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20903   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20904
20905   /* A leaf function does not need any stack alignment if it has nothing
20906      on the stack.  */
20907   if (crtl->is_leaf && frame_size == 0
20908       /* However if it calls alloca(), we have a dynamically allocated
20909          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20910       && ! cfun->calls_alloca)
20911     {
20912       offsets->outgoing_args = offsets->soft_frame;
20913       offsets->locals_base = offsets->soft_frame;
20914       return;
20915     }
20916
20917   /* Ensure SFP has the correct alignment.  */
20918   if (ARM_DOUBLEWORD_ALIGN
20919       && (offsets->soft_frame & 7))
20920     {
20921       offsets->soft_frame += 4;
20922       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20923          when there is a stack frame as the alignment will be rolled into
20924          the normal stack adjustment.  */
20925       if (frame_size + crtl->outgoing_args_size == 0)
20926         {
20927           int reg = -1;
20928
20929           /* Register r3 is caller-saved.  Normally it does not need to be
20930              saved on entry by the prologue.  However if we choose to save
20931              it for padding then we may confuse the compiler into thinking
20932              a prologue sequence is required when in fact it is not.  This
20933              will occur when shrink-wrapping if r3 is used as a scratch
20934              register and there are no other callee-saved writes.
20935
20936              This situation can be avoided when other callee-saved registers
20937              are available and r3 is not mandatory if we choose a callee-saved
20938              register for padding.  */
20939           bool prefer_callee_reg_p = false;
20940
20941           /* If it is safe to use r3, then do so.  This sometimes
20942              generates better code on Thumb-2 by avoiding the need to
20943              use 32-bit push/pop instructions.  */
20944           if (! any_sibcall_could_use_r3 ()
20945               && arm_size_return_regs () <= 12
20946               && (offsets->saved_regs_mask & (1 << 3)) == 0
20947               && (TARGET_THUMB2
20948                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20949             {
20950               reg = 3;
20951               if (!TARGET_THUMB2)
20952                 prefer_callee_reg_p = true;
20953             }
20954           if (reg == -1
20955               || prefer_callee_reg_p)
20956             {
20957               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20958                 {
20959                   /* Avoid fixed registers; they may be changed at
20960                      arbitrary times so it's unsafe to restore them
20961                      during the epilogue.  */
20962                   if (!fixed_regs[i]
20963                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20964                     {
20965                       reg = i;
20966                       break;
20967                     }
20968                 }
20969             }
20970
20971           if (reg != -1)
20972             {
20973               offsets->saved_regs += 4;
20974               offsets->saved_regs_mask |= (1 << reg);
20975             }
20976         }
20977     }
20978
20979   offsets->locals_base = offsets->soft_frame + frame_size;
20980   offsets->outgoing_args = (offsets->locals_base
20981                             + crtl->outgoing_args_size);
20982
20983   if (ARM_DOUBLEWORD_ALIGN)
20984     {
20985       /* Ensure SP remains doubleword aligned.  */
20986       if (offsets->outgoing_args & 7)
20987         offsets->outgoing_args += 4;
20988       gcc_assert (!(offsets->outgoing_args & 7));
20989     }
20990 }
20991
20992
20993 /* Calculate the relative offsets for the different stack pointers.  Positive
20994    offsets are in the direction of stack growth.  */
20995
20996 HOST_WIDE_INT
20997 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20998 {
20999   arm_stack_offsets *offsets;
21000
21001   offsets = arm_get_frame_offsets ();
21002
21003   /* OK, now we have enough information to compute the distances.
21004      There must be an entry in these switch tables for each pair
21005      of registers in ELIMINABLE_REGS, even if some of the entries
21006      seem to be redundant or useless.  */
21007   switch (from)
21008     {
21009     case ARG_POINTER_REGNUM:
21010       switch (to)
21011         {
21012         case THUMB_HARD_FRAME_POINTER_REGNUM:
21013           return 0;
21014
21015         case FRAME_POINTER_REGNUM:
21016           /* This is the reverse of the soft frame pointer
21017              to hard frame pointer elimination below.  */
21018           return offsets->soft_frame - offsets->saved_args;
21019
21020         case ARM_HARD_FRAME_POINTER_REGNUM:
21021           /* This is only non-zero in the case where the static chain register
21022              is stored above the frame.  */
21023           return offsets->frame - offsets->saved_args - 4;
21024
21025         case STACK_POINTER_REGNUM:
21026           /* If nothing has been pushed on the stack at all
21027              then this will return -4.  This *is* correct!  */
21028           return offsets->outgoing_args - (offsets->saved_args + 4);
21029
21030         default:
21031           gcc_unreachable ();
21032         }
21033       gcc_unreachable ();
21034
21035     case FRAME_POINTER_REGNUM:
21036       switch (to)
21037         {
21038         case THUMB_HARD_FRAME_POINTER_REGNUM:
21039           return 0;
21040
21041         case ARM_HARD_FRAME_POINTER_REGNUM:
21042           /* The hard frame pointer points to the top entry in the
21043              stack frame.  The soft frame pointer to the bottom entry
21044              in the stack frame.  If there is no stack frame at all,
21045              then they are identical.  */
21046
21047           return offsets->frame - offsets->soft_frame;
21048
21049         case STACK_POINTER_REGNUM:
21050           return offsets->outgoing_args - offsets->soft_frame;
21051
21052         default:
21053           gcc_unreachable ();
21054         }
21055       gcc_unreachable ();
21056
21057     default:
21058       /* You cannot eliminate from the stack pointer.
21059          In theory you could eliminate from the hard frame
21060          pointer to the stack pointer, but this will never
21061          happen, since if a stack frame is not needed the
21062          hard frame pointer will never be used.  */
21063       gcc_unreachable ();
21064     }
21065 }
21066
21067 /* Given FROM and TO register numbers, say whether this elimination is
21068    allowed.  Frame pointer elimination is automatically handled.
21069
21070    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21071    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21072    pointer, we must eliminate FRAME_POINTER_REGNUM into
21073    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21074    ARG_POINTER_REGNUM.  */
21075
21076 bool
21077 arm_can_eliminate (const int from, const int to)
21078 {
21079   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21080           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21081           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21082           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21083            true);
21084 }
21085
21086 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21087    number of bytes pushed.  */
21088
21089 static int
21090 arm_save_coproc_regs(void)
21091 {
21092   int saved_size = 0;
21093   unsigned reg;
21094   unsigned start_reg;
21095   rtx insn;
21096
21097   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21098     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21099       {
21100         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21101         insn = gen_rtx_MEM (V2SImode, insn);
21102         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21103         RTX_FRAME_RELATED_P (insn) = 1;
21104         saved_size += 8;
21105       }
21106
21107   if (TARGET_HARD_FLOAT)
21108     {
21109       start_reg = FIRST_VFP_REGNUM;
21110
21111       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21112         {
21113           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21114               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21115             {
21116               if (start_reg != reg)
21117                 saved_size += vfp_emit_fstmd (start_reg,
21118                                               (reg - start_reg) / 2);
21119               start_reg = reg + 2;
21120             }
21121         }
21122       if (start_reg != reg)
21123         saved_size += vfp_emit_fstmd (start_reg,
21124                                       (reg - start_reg) / 2);
21125     }
21126   return saved_size;
21127 }
21128
21129
21130 /* Set the Thumb frame pointer from the stack pointer.  */
21131
21132 static void
21133 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21134 {
21135   HOST_WIDE_INT amount;
21136   rtx insn, dwarf;
21137
21138   amount = offsets->outgoing_args - offsets->locals_base;
21139   if (amount < 1024)
21140     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21141                                   stack_pointer_rtx, GEN_INT (amount)));
21142   else
21143     {
21144       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21145       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21146          expects the first two operands to be the same.  */
21147       if (TARGET_THUMB2)
21148         {
21149           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21150                                         stack_pointer_rtx,
21151                                         hard_frame_pointer_rtx));
21152         }
21153       else
21154         {
21155           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21156                                         hard_frame_pointer_rtx,
21157                                         stack_pointer_rtx));
21158         }
21159       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21160                            plus_constant (Pmode, stack_pointer_rtx, amount));
21161       RTX_FRAME_RELATED_P (dwarf) = 1;
21162       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21163     }
21164
21165   RTX_FRAME_RELATED_P (insn) = 1;
21166 }
21167
21168 struct scratch_reg {
21169   rtx reg;
21170   bool saved;
21171 };
21172
21173 /* Return a short-lived scratch register for use as a 2nd scratch register on
21174    function entry after the registers are saved in the prologue.  This register
21175    must be released by means of release_scratch_register_on_entry.  IP is not
21176    considered since it is always used as the 1st scratch register if available.
21177
21178    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21179    mask of live registers.  */
21180
21181 static void
21182 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21183                                unsigned long live_regs)
21184 {
21185   int regno = -1;
21186
21187   sr->saved = false;
21188
21189   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21190     regno = LR_REGNUM;
21191   else
21192     {
21193       unsigned int i;
21194
21195       for (i = 4; i < 11; i++)
21196         if (regno1 != i && (live_regs & (1 << i)) != 0)
21197           {
21198             regno = i;
21199             break;
21200           }
21201
21202       if (regno < 0)
21203         {
21204           /* If IP is used as the 1st scratch register for a nested function,
21205              then either r3 wasn't available or is used to preserve IP.  */
21206           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21207             regno1 = 3;
21208           regno = (regno1 == 3 ? 2 : 3);
21209           sr->saved
21210             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21211                                regno);
21212         }
21213     }
21214
21215   sr->reg = gen_rtx_REG (SImode, regno);
21216   if (sr->saved)
21217     {
21218       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21219       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21220       rtx x = gen_rtx_SET (stack_pointer_rtx,
21221                            plus_constant (Pmode, stack_pointer_rtx, -4));
21222       RTX_FRAME_RELATED_P (insn) = 1;
21223       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21224     }
21225 }
21226
21227 /* Release a scratch register obtained from the preceding function.  */
21228
21229 static void
21230 release_scratch_register_on_entry (struct scratch_reg *sr)
21231 {
21232   if (sr->saved)
21233     {
21234       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21235       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21236       rtx x = gen_rtx_SET (stack_pointer_rtx,
21237                            plus_constant (Pmode, stack_pointer_rtx, 4));
21238       RTX_FRAME_RELATED_P (insn) = 1;
21239       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21240     }
21241 }
21242
21243 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21244
21245 #if PROBE_INTERVAL > 4096
21246 #error Cannot use indexed addressing mode for stack probing
21247 #endif
21248
21249 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21250    inclusive.  These are offsets from the current stack pointer.  REGNO1
21251    is the index number of the 1st scratch register and LIVE_REGS is the
21252    mask of live registers.  */
21253
21254 static void
21255 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21256                             unsigned int regno1, unsigned long live_regs)
21257 {
21258   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21259
21260   /* See if we have a constant small number of probes to generate.  If so,
21261      that's the easy case.  */
21262   if (size <= PROBE_INTERVAL)
21263     {
21264       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21265       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21266       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21267     }
21268
21269   /* The run-time loop is made up of 10 insns in the generic case while the
21270      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21271   else if (size <= 5 * PROBE_INTERVAL)
21272     {
21273       HOST_WIDE_INT i, rem;
21274
21275       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21276       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21277       emit_stack_probe (reg1);
21278
21279       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21280          it exceeds SIZE.  If only two probes are needed, this will not
21281          generate any code.  Then probe at FIRST + SIZE.  */
21282       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21283         {
21284           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21285           emit_stack_probe (reg1);
21286         }
21287
21288       rem = size - (i - PROBE_INTERVAL);
21289       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21290         {
21291           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21292           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21293         }
21294       else
21295         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21296     }
21297
21298   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21299      extra careful with variables wrapping around because we might be at
21300      the very top (or the very bottom) of the address space and we have
21301      to be able to handle this case properly; in particular, we use an
21302      equality test for the loop condition.  */
21303   else
21304     {
21305       HOST_WIDE_INT rounded_size;
21306       struct scratch_reg sr;
21307
21308       get_scratch_register_on_entry (&sr, regno1, live_regs);
21309
21310       emit_move_insn (reg1, GEN_INT (first));
21311
21312
21313       /* Step 1: round SIZE to the previous multiple of the interval.  */
21314
21315       rounded_size = size & -PROBE_INTERVAL;
21316       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21317
21318
21319       /* Step 2: compute initial and final value of the loop counter.  */
21320
21321       /* TEST_ADDR = SP + FIRST.  */
21322       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21323
21324       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21325       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21326
21327
21328       /* Step 3: the loop
21329
21330          do
21331            {
21332              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21333              probe at TEST_ADDR
21334            }
21335          while (TEST_ADDR != LAST_ADDR)
21336
21337          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21338          until it is equal to ROUNDED_SIZE.  */
21339
21340       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21341
21342
21343       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21344          that SIZE is equal to ROUNDED_SIZE.  */
21345
21346       if (size != rounded_size)
21347         {
21348           HOST_WIDE_INT rem = size - rounded_size;
21349
21350           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21351             {
21352               emit_set_insn (sr.reg,
21353                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21354               emit_stack_probe (plus_constant (Pmode, sr.reg,
21355                                                PROBE_INTERVAL - rem));
21356             }
21357           else
21358             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21359         }
21360
21361       release_scratch_register_on_entry (&sr);
21362     }
21363
21364   /* Make sure nothing is scheduled before we are done.  */
21365   emit_insn (gen_blockage ());
21366 }
21367
21368 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21369    absolute addresses.  */
21370
21371 const char *
21372 output_probe_stack_range (rtx reg1, rtx reg2)
21373 {
21374   static int labelno = 0;
21375   char loop_lab[32];
21376   rtx xops[2];
21377
21378   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21379
21380   /* Loop.  */
21381   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21382
21383   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21384   xops[0] = reg1;
21385   xops[1] = GEN_INT (PROBE_INTERVAL);
21386   output_asm_insn ("sub\t%0, %0, %1", xops);
21387
21388   /* Probe at TEST_ADDR.  */
21389   output_asm_insn ("str\tr0, [%0, #0]", xops);
21390
21391   /* Test if TEST_ADDR == LAST_ADDR.  */
21392   xops[1] = reg2;
21393   output_asm_insn ("cmp\t%0, %1", xops);
21394
21395   /* Branch.  */
21396   fputs ("\tbne\t", asm_out_file);
21397   assemble_name_raw (asm_out_file, loop_lab);
21398   fputc ('\n', asm_out_file);
21399
21400   return "";
21401 }
21402
21403 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21404    function.  */
21405 void
21406 arm_expand_prologue (void)
21407 {
21408   rtx amount;
21409   rtx insn;
21410   rtx ip_rtx;
21411   unsigned long live_regs_mask;
21412   unsigned long func_type;
21413   int fp_offset = 0;
21414   int saved_pretend_args = 0;
21415   int saved_regs = 0;
21416   unsigned HOST_WIDE_INT args_to_push;
21417   HOST_WIDE_INT size;
21418   arm_stack_offsets *offsets;
21419   bool clobber_ip;
21420
21421   func_type = arm_current_func_type ();
21422
21423   /* Naked functions don't have prologues.  */
21424   if (IS_NAKED (func_type))
21425     {
21426       if (flag_stack_usage_info)
21427         current_function_static_stack_size = 0;
21428       return;
21429     }
21430
21431   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21432   args_to_push = crtl->args.pretend_args_size;
21433
21434   /* Compute which register we will have to save onto the stack.  */
21435   offsets = arm_get_frame_offsets ();
21436   live_regs_mask = offsets->saved_regs_mask;
21437
21438   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21439
21440   if (IS_STACKALIGN (func_type))
21441     {
21442       rtx r0, r1;
21443
21444       /* Handle a word-aligned stack pointer.  We generate the following:
21445
21446           mov r0, sp
21447           bic r1, r0, #7
21448           mov sp, r1
21449           <save and restore r0 in normal prologue/epilogue>
21450           mov sp, r0
21451           bx lr
21452
21453          The unwinder doesn't need to know about the stack realignment.
21454          Just tell it we saved SP in r0.  */
21455       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21456
21457       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21458       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21459
21460       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21461       RTX_FRAME_RELATED_P (insn) = 1;
21462       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21463
21464       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21465
21466       /* ??? The CFA changes here, which may cause GDB to conclude that it
21467          has entered a different function.  That said, the unwind info is
21468          correct, individually, before and after this instruction because
21469          we've described the save of SP, which will override the default
21470          handling of SP as restoring from the CFA.  */
21471       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21472     }
21473
21474   /* The static chain register is the same as the IP register.  If it is
21475      clobbered when creating the frame, we need to save and restore it.  */
21476   clobber_ip = IS_NESTED (func_type)
21477                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21478                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21479                         || flag_stack_clash_protection)
21480                        && !df_regs_ever_live_p (LR_REGNUM)
21481                        && arm_r3_live_at_start_p ()));
21482
21483   /* Find somewhere to store IP whilst the frame is being created.
21484      We try the following places in order:
21485
21486        1. The last argument register r3 if it is available.
21487        2. A slot on the stack above the frame if there are no
21488           arguments to push onto the stack.
21489        3. Register r3 again, after pushing the argument registers
21490           onto the stack, if this is a varargs function.
21491        4. The last slot on the stack created for the arguments to
21492           push, if this isn't a varargs function.
21493
21494      Note - we only need to tell the dwarf2 backend about the SP
21495      adjustment in the second variant; the static chain register
21496      doesn't need to be unwound, as it doesn't contain a value
21497      inherited from the caller.  */
21498   if (clobber_ip)
21499     {
21500       if (!arm_r3_live_at_start_p ())
21501         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21502       else if (args_to_push == 0)
21503         {
21504           rtx addr, dwarf;
21505
21506           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21507           saved_regs += 4;
21508
21509           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21510           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21511           fp_offset = 4;
21512
21513           /* Just tell the dwarf backend that we adjusted SP.  */
21514           dwarf = gen_rtx_SET (stack_pointer_rtx,
21515                                plus_constant (Pmode, stack_pointer_rtx,
21516                                               -fp_offset));
21517           RTX_FRAME_RELATED_P (insn) = 1;
21518           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21519         }
21520       else
21521         {
21522           /* Store the args on the stack.  */
21523           if (cfun->machine->uses_anonymous_args)
21524             {
21525               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21526                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21527               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21528               saved_pretend_args = 1;
21529             }
21530           else
21531             {
21532               rtx addr, dwarf;
21533
21534               if (args_to_push == 4)
21535                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21536               else
21537                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21538                                            plus_constant (Pmode,
21539                                                           stack_pointer_rtx,
21540                                                           -args_to_push));
21541
21542               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21543
21544               /* Just tell the dwarf backend that we adjusted SP.  */
21545               dwarf = gen_rtx_SET (stack_pointer_rtx,
21546                                    plus_constant (Pmode, stack_pointer_rtx,
21547                                                   -args_to_push));
21548               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21549             }
21550
21551           RTX_FRAME_RELATED_P (insn) = 1;
21552           fp_offset = args_to_push;
21553           args_to_push = 0;
21554         }
21555     }
21556
21557   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21558     {
21559       if (IS_INTERRUPT (func_type))
21560         {
21561           /* Interrupt functions must not corrupt any registers.
21562              Creating a frame pointer however, corrupts the IP
21563              register, so we must push it first.  */
21564           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21565
21566           /* Do not set RTX_FRAME_RELATED_P on this insn.
21567              The dwarf stack unwinding code only wants to see one
21568              stack decrement per function, and this is not it.  If
21569              this instruction is labeled as being part of the frame
21570              creation sequence then dwarf2out_frame_debug_expr will
21571              die when it encounters the assignment of IP to FP
21572              later on, since the use of SP here establishes SP as
21573              the CFA register and not IP.
21574
21575              Anyway this instruction is not really part of the stack
21576              frame creation although it is part of the prologue.  */
21577         }
21578
21579       insn = emit_set_insn (ip_rtx,
21580                             plus_constant (Pmode, stack_pointer_rtx,
21581                                            fp_offset));
21582       RTX_FRAME_RELATED_P (insn) = 1;
21583     }
21584
21585   if (args_to_push)
21586     {
21587       /* Push the argument registers, or reserve space for them.  */
21588       if (cfun->machine->uses_anonymous_args)
21589         insn = emit_multi_reg_push
21590           ((0xf0 >> (args_to_push / 4)) & 0xf,
21591            (0xf0 >> (args_to_push / 4)) & 0xf);
21592       else
21593         insn = emit_insn
21594           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21595                        GEN_INT (- args_to_push)));
21596       RTX_FRAME_RELATED_P (insn) = 1;
21597     }
21598
21599   /* If this is an interrupt service routine, and the link register
21600      is going to be pushed, and we're not generating extra
21601      push of IP (needed when frame is needed and frame layout if apcs),
21602      subtracting four from LR now will mean that the function return
21603      can be done with a single instruction.  */
21604   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21605       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21606       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21607       && TARGET_ARM)
21608     {
21609       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21610
21611       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21612     }
21613
21614   if (live_regs_mask)
21615     {
21616       unsigned long dwarf_regs_mask = live_regs_mask;
21617
21618       saved_regs += bit_count (live_regs_mask) * 4;
21619       if (optimize_size && !frame_pointer_needed
21620           && saved_regs == offsets->saved_regs - offsets->saved_args)
21621         {
21622           /* If no coprocessor registers are being pushed and we don't have
21623              to worry about a frame pointer then push extra registers to
21624              create the stack frame.  This is done in a way that does not
21625              alter the frame layout, so is independent of the epilogue.  */
21626           int n;
21627           int frame;
21628           n = 0;
21629           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21630             n++;
21631           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21632           if (frame && n * 4 >= frame)
21633             {
21634               n = frame / 4;
21635               live_regs_mask |= (1 << n) - 1;
21636               saved_regs += frame;
21637             }
21638         }
21639
21640       if (TARGET_LDRD
21641           && current_tune->prefer_ldrd_strd
21642           && !optimize_function_for_size_p (cfun))
21643         {
21644           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21645           if (TARGET_THUMB2)
21646             thumb2_emit_strd_push (live_regs_mask);
21647           else if (TARGET_ARM
21648                    && !TARGET_APCS_FRAME
21649                    && !IS_INTERRUPT (func_type))
21650             arm_emit_strd_push (live_regs_mask);
21651           else
21652             {
21653               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21654               RTX_FRAME_RELATED_P (insn) = 1;
21655             }
21656         }
21657       else
21658         {
21659           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21660           RTX_FRAME_RELATED_P (insn) = 1;
21661         }
21662     }
21663
21664   if (! IS_VOLATILE (func_type))
21665     saved_regs += arm_save_coproc_regs ();
21666
21667   if (frame_pointer_needed && TARGET_ARM)
21668     {
21669       /* Create the new frame pointer.  */
21670       if (TARGET_APCS_FRAME)
21671         {
21672           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21673           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21674           RTX_FRAME_RELATED_P (insn) = 1;
21675         }
21676       else
21677         {
21678           insn = GEN_INT (saved_regs - (4 + fp_offset));
21679           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21680                                         stack_pointer_rtx, insn));
21681           RTX_FRAME_RELATED_P (insn) = 1;
21682         }
21683     }
21684
21685   size = offsets->outgoing_args - offsets->saved_args;
21686   if (flag_stack_usage_info)
21687     current_function_static_stack_size = size;
21688
21689   /* If this isn't an interrupt service routine and we have a frame, then do
21690      stack checking.  We use IP as the first scratch register, except for the
21691      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21692   if (!IS_INTERRUPT (func_type)
21693       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21694           || flag_stack_clash_protection))
21695     {
21696       unsigned int regno;
21697
21698       if (!IS_NESTED (func_type) || clobber_ip)
21699         regno = IP_REGNUM;
21700       else if (df_regs_ever_live_p (LR_REGNUM))
21701         regno = LR_REGNUM;
21702       else
21703         regno = 3;
21704
21705       if (crtl->is_leaf && !cfun->calls_alloca)
21706         {
21707           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21708             arm_emit_probe_stack_range (get_stack_check_protect (),
21709                                         size - get_stack_check_protect (),
21710                                         regno, live_regs_mask);
21711         }
21712       else if (size > 0)
21713         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21714                                     regno, live_regs_mask);
21715     }
21716
21717   /* Recover the static chain register.  */
21718   if (clobber_ip)
21719     {
21720       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21721         insn = gen_rtx_REG (SImode, 3);
21722       else
21723         {
21724           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21725           insn = gen_frame_mem (SImode, insn);
21726         }
21727       emit_set_insn (ip_rtx, insn);
21728       emit_insn (gen_force_register_use (ip_rtx));
21729     }
21730
21731   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21732     {
21733       /* This add can produce multiple insns for a large constant, so we
21734          need to get tricky.  */
21735       rtx_insn *last = get_last_insn ();
21736
21737       amount = GEN_INT (offsets->saved_args + saved_regs
21738                         - offsets->outgoing_args);
21739
21740       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21741                                     amount));
21742       do
21743         {
21744           last = last ? NEXT_INSN (last) : get_insns ();
21745           RTX_FRAME_RELATED_P (last) = 1;
21746         }
21747       while (last != insn);
21748
21749       /* If the frame pointer is needed, emit a special barrier that
21750          will prevent the scheduler from moving stores to the frame
21751          before the stack adjustment.  */
21752       if (frame_pointer_needed)
21753         emit_insn (gen_stack_tie (stack_pointer_rtx,
21754                                   hard_frame_pointer_rtx));
21755     }
21756
21757
21758   if (frame_pointer_needed && TARGET_THUMB2)
21759     thumb_set_frame_pointer (offsets);
21760
21761   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21762     {
21763       unsigned long mask;
21764
21765       mask = live_regs_mask;
21766       mask &= THUMB2_WORK_REGS;
21767       if (!IS_NESTED (func_type))
21768         mask |= (1 << IP_REGNUM);
21769       arm_load_pic_register (mask);
21770     }
21771
21772   /* If we are profiling, make sure no instructions are scheduled before
21773      the call to mcount.  Similarly if the user has requested no
21774      scheduling in the prolog.  Similarly if we want non-call exceptions
21775      using the EABI unwinder, to prevent faulting instructions from being
21776      swapped with a stack adjustment.  */
21777   if (crtl->profile || !TARGET_SCHED_PROLOG
21778       || (arm_except_unwind_info (&global_options) == UI_TARGET
21779           && cfun->can_throw_non_call_exceptions))
21780     emit_insn (gen_blockage ());
21781
21782   /* If the link register is being kept alive, with the return address in it,
21783      then make sure that it does not get reused by the ce2 pass.  */
21784   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21785     cfun->machine->lr_save_eliminated = 1;
21786 }
21787 \f
21788 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21789 static void
21790 arm_print_condition (FILE *stream)
21791 {
21792   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21793     {
21794       /* Branch conversion is not implemented for Thumb-2.  */
21795       if (TARGET_THUMB)
21796         {
21797           output_operand_lossage ("predicated Thumb instruction");
21798           return;
21799         }
21800       if (current_insn_predicate != NULL)
21801         {
21802           output_operand_lossage
21803             ("predicated instruction in conditional sequence");
21804           return;
21805         }
21806
21807       fputs (arm_condition_codes[arm_current_cc], stream);
21808     }
21809   else if (current_insn_predicate)
21810     {
21811       enum arm_cond_code code;
21812
21813       if (TARGET_THUMB1)
21814         {
21815           output_operand_lossage ("predicated Thumb instruction");
21816           return;
21817         }
21818
21819       code = get_arm_condition_code (current_insn_predicate);
21820       fputs (arm_condition_codes[code], stream);
21821     }
21822 }
21823
21824
21825 /* Globally reserved letters: acln
21826    Puncutation letters currently used: @_|?().!#
21827    Lower case letters currently used: bcdefhimpqtvwxyz
21828    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21829    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21830
21831    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21832
21833    If CODE is 'd', then the X is a condition operand and the instruction
21834    should only be executed if the condition is true.
21835    if CODE is 'D', then the X is a condition operand and the instruction
21836    should only be executed if the condition is false: however, if the mode
21837    of the comparison is CCFPEmode, then always execute the instruction -- we
21838    do this because in these circumstances !GE does not necessarily imply LT;
21839    in these cases the instruction pattern will take care to make sure that
21840    an instruction containing %d will follow, thereby undoing the effects of
21841    doing this instruction unconditionally.
21842    If CODE is 'N' then X is a floating point operand that must be negated
21843    before output.
21844    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21845    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21846 static void
21847 arm_print_operand (FILE *stream, rtx x, int code)
21848 {
21849   switch (code)
21850     {
21851     case '@':
21852       fputs (ASM_COMMENT_START, stream);
21853       return;
21854
21855     case '_':
21856       fputs (user_label_prefix, stream);
21857       return;
21858
21859     case '|':
21860       fputs (REGISTER_PREFIX, stream);
21861       return;
21862
21863     case '?':
21864       arm_print_condition (stream);
21865       return;
21866
21867     case '.':
21868       /* The current condition code for a condition code setting instruction.
21869          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21870       fputc('s', stream);
21871       arm_print_condition (stream);
21872       return;
21873
21874     case '!':
21875       /* If the instruction is conditionally executed then print
21876          the current condition code, otherwise print 's'.  */
21877       gcc_assert (TARGET_THUMB2);
21878       if (current_insn_predicate)
21879         arm_print_condition (stream);
21880       else
21881         fputc('s', stream);
21882       break;
21883
21884     /* %# is a "break" sequence. It doesn't output anything, but is used to
21885        separate e.g. operand numbers from following text, if that text consists
21886        of further digits which we don't want to be part of the operand
21887        number.  */
21888     case '#':
21889       return;
21890
21891     case 'N':
21892       {
21893         REAL_VALUE_TYPE r;
21894         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21895         fprintf (stream, "%s", fp_const_from_val (&r));
21896       }
21897       return;
21898
21899     /* An integer or symbol address without a preceding # sign.  */
21900     case 'c':
21901       switch (GET_CODE (x))
21902         {
21903         case CONST_INT:
21904           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21905           break;
21906
21907         case SYMBOL_REF:
21908           output_addr_const (stream, x);
21909           break;
21910
21911         case CONST:
21912           if (GET_CODE (XEXP (x, 0)) == PLUS
21913               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21914             {
21915               output_addr_const (stream, x);
21916               break;
21917             }
21918           /* Fall through.  */
21919
21920         default:
21921           output_operand_lossage ("Unsupported operand for code '%c'", code);
21922         }
21923       return;
21924
21925     /* An integer that we want to print in HEX.  */
21926     case 'x':
21927       switch (GET_CODE (x))
21928         {
21929         case CONST_INT:
21930           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21931           break;
21932
21933         default:
21934           output_operand_lossage ("Unsupported operand for code '%c'", code);
21935         }
21936       return;
21937
21938     case 'B':
21939       if (CONST_INT_P (x))
21940         {
21941           HOST_WIDE_INT val;
21942           val = ARM_SIGN_EXTEND (~INTVAL (x));
21943           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21944         }
21945       else
21946         {
21947           putc ('~', stream);
21948           output_addr_const (stream, x);
21949         }
21950       return;
21951
21952     case 'b':
21953       /* Print the log2 of a CONST_INT.  */
21954       {
21955         HOST_WIDE_INT val;
21956
21957         if (!CONST_INT_P (x)
21958             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21959           output_operand_lossage ("Unsupported operand for code '%c'", code);
21960         else
21961           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21962       }
21963       return;
21964
21965     case 'L':
21966       /* The low 16 bits of an immediate constant.  */
21967       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21968       return;
21969
21970     case 'i':
21971       fprintf (stream, "%s", arithmetic_instr (x, 1));
21972       return;
21973
21974     case 'I':
21975       fprintf (stream, "%s", arithmetic_instr (x, 0));
21976       return;
21977
21978     case 'S':
21979       {
21980         HOST_WIDE_INT val;
21981         const char *shift;
21982
21983         shift = shift_op (x, &val);
21984
21985         if (shift)
21986           {
21987             fprintf (stream, ", %s ", shift);
21988             if (val == -1)
21989               arm_print_operand (stream, XEXP (x, 1), 0);
21990             else
21991               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21992           }
21993       }
21994       return;
21995
21996       /* An explanation of the 'Q', 'R' and 'H' register operands:
21997
21998          In a pair of registers containing a DI or DF value the 'Q'
21999          operand returns the register number of the register containing
22000          the least significant part of the value.  The 'R' operand returns
22001          the register number of the register containing the most
22002          significant part of the value.
22003
22004          The 'H' operand returns the higher of the two register numbers.
22005          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22006          same as the 'Q' operand, since the most significant part of the
22007          value is held in the lower number register.  The reverse is true
22008          on systems where WORDS_BIG_ENDIAN is false.
22009
22010          The purpose of these operands is to distinguish between cases
22011          where the endian-ness of the values is important (for example
22012          when they are added together), and cases where the endian-ness
22013          is irrelevant, but the order of register operations is important.
22014          For example when loading a value from memory into a register
22015          pair, the endian-ness does not matter.  Provided that the value
22016          from the lower memory address is put into the lower numbered
22017          register, and the value from the higher address is put into the
22018          higher numbered register, the load will work regardless of whether
22019          the value being loaded is big-wordian or little-wordian.  The
22020          order of the two register loads can matter however, if the address
22021          of the memory location is actually held in one of the registers
22022          being overwritten by the load.
22023
22024          The 'Q' and 'R' constraints are also available for 64-bit
22025          constants.  */
22026     case 'Q':
22027       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22028         {
22029           rtx part = gen_lowpart (SImode, x);
22030           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22031           return;
22032         }
22033
22034       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22035         {
22036           output_operand_lossage ("invalid operand for code '%c'", code);
22037           return;
22038         }
22039
22040       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22041       return;
22042
22043     case 'R':
22044       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22045         {
22046           machine_mode mode = GET_MODE (x);
22047           rtx part;
22048
22049           if (mode == VOIDmode)
22050             mode = DImode;
22051           part = gen_highpart_mode (SImode, mode, x);
22052           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22053           return;
22054         }
22055
22056       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22057         {
22058           output_operand_lossage ("invalid operand for code '%c'", code);
22059           return;
22060         }
22061
22062       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22063       return;
22064
22065     case 'H':
22066       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22067         {
22068           output_operand_lossage ("invalid operand for code '%c'", code);
22069           return;
22070         }
22071
22072       asm_fprintf (stream, "%r", REGNO (x) + 1);
22073       return;
22074
22075     case 'J':
22076       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22077         {
22078           output_operand_lossage ("invalid operand for code '%c'", code);
22079           return;
22080         }
22081
22082       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22083       return;
22084
22085     case 'K':
22086       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22087         {
22088           output_operand_lossage ("invalid operand for code '%c'", code);
22089           return;
22090         }
22091
22092       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22093       return;
22094
22095     case 'm':
22096       asm_fprintf (stream, "%r",
22097                    REG_P (XEXP (x, 0))
22098                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22099       return;
22100
22101     case 'M':
22102       asm_fprintf (stream, "{%r-%r}",
22103                    REGNO (x),
22104                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22105       return;
22106
22107     /* Like 'M', but writing doubleword vector registers, for use by Neon
22108        insns.  */
22109     case 'h':
22110       {
22111         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22112         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22113         if (numregs == 1)
22114           asm_fprintf (stream, "{d%d}", regno);
22115         else
22116           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22117       }
22118       return;
22119
22120     case 'd':
22121       /* CONST_TRUE_RTX means always -- that's the default.  */
22122       if (x == const_true_rtx)
22123         return;
22124
22125       if (!COMPARISON_P (x))
22126         {
22127           output_operand_lossage ("invalid operand for code '%c'", code);
22128           return;
22129         }
22130
22131       fputs (arm_condition_codes[get_arm_condition_code (x)],
22132              stream);
22133       return;
22134
22135     case 'D':
22136       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22137          want to do that.  */
22138       if (x == const_true_rtx)
22139         {
22140           output_operand_lossage ("instruction never executed");
22141           return;
22142         }
22143       if (!COMPARISON_P (x))
22144         {
22145           output_operand_lossage ("invalid operand for code '%c'", code);
22146           return;
22147         }
22148
22149       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22150                                  (get_arm_condition_code (x))],
22151              stream);
22152       return;
22153
22154     case 's':
22155     case 'V':
22156     case 'W':
22157     case 'X':
22158     case 'Y':
22159     case 'Z':
22160       /* Former Maverick support, removed after GCC-4.7.  */
22161       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22162       return;
22163
22164     case 'U':
22165       if (!REG_P (x)
22166           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22167           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22168         /* Bad value for wCG register number.  */
22169         {
22170           output_operand_lossage ("invalid operand for code '%c'", code);
22171           return;
22172         }
22173
22174       else
22175         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22176       return;
22177
22178       /* Print an iWMMXt control register name.  */
22179     case 'w':
22180       if (!CONST_INT_P (x)
22181           || INTVAL (x) < 0
22182           || INTVAL (x) >= 16)
22183         /* Bad value for wC register number.  */
22184         {
22185           output_operand_lossage ("invalid operand for code '%c'", code);
22186           return;
22187         }
22188
22189       else
22190         {
22191           static const char * wc_reg_names [16] =
22192             {
22193               "wCID",  "wCon",  "wCSSF", "wCASF",
22194               "wC4",   "wC5",   "wC6",   "wC7",
22195               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22196               "wC12",  "wC13",  "wC14",  "wC15"
22197             };
22198
22199           fputs (wc_reg_names [INTVAL (x)], stream);
22200         }
22201       return;
22202
22203     /* Print the high single-precision register of a VFP double-precision
22204        register.  */
22205     case 'p':
22206       {
22207         machine_mode mode = GET_MODE (x);
22208         int regno;
22209
22210         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22211           {
22212             output_operand_lossage ("invalid operand for code '%c'", code);
22213             return;
22214           }
22215
22216         regno = REGNO (x);
22217         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22218           {
22219             output_operand_lossage ("invalid operand for code '%c'", code);
22220             return;
22221           }
22222
22223         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22224       }
22225       return;
22226
22227     /* Print a VFP/Neon double precision or quad precision register name.  */
22228     case 'P':
22229     case 'q':
22230       {
22231         machine_mode mode = GET_MODE (x);
22232         int is_quad = (code == 'q');
22233         int regno;
22234
22235         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22236           {
22237             output_operand_lossage ("invalid operand for code '%c'", code);
22238             return;
22239           }
22240
22241         if (!REG_P (x)
22242             || !IS_VFP_REGNUM (REGNO (x)))
22243           {
22244             output_operand_lossage ("invalid operand for code '%c'", code);
22245             return;
22246           }
22247
22248         regno = REGNO (x);
22249         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22250             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22251           {
22252             output_operand_lossage ("invalid operand for code '%c'", code);
22253             return;
22254           }
22255
22256         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22257           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22258       }
22259       return;
22260
22261     /* These two codes print the low/high doubleword register of a Neon quad
22262        register, respectively.  For pair-structure types, can also print
22263        low/high quadword registers.  */
22264     case 'e':
22265     case 'f':
22266       {
22267         machine_mode mode = GET_MODE (x);
22268         int regno;
22269
22270         if ((GET_MODE_SIZE (mode) != 16
22271              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22272           {
22273             output_operand_lossage ("invalid operand for code '%c'", code);
22274             return;
22275           }
22276
22277         regno = REGNO (x);
22278         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22279           {
22280             output_operand_lossage ("invalid operand for code '%c'", code);
22281             return;
22282           }
22283
22284         if (GET_MODE_SIZE (mode) == 16)
22285           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22286                                   + (code == 'f' ? 1 : 0));
22287         else
22288           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22289                                   + (code == 'f' ? 1 : 0));
22290       }
22291       return;
22292
22293     /* Print a VFPv3 floating-point constant, represented as an integer
22294        index.  */
22295     case 'G':
22296       {
22297         int index = vfp3_const_double_index (x);
22298         gcc_assert (index != -1);
22299         fprintf (stream, "%d", index);
22300       }
22301       return;
22302
22303     /* Print bits representing opcode features for Neon.
22304
22305        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22306        and polynomials as unsigned.
22307
22308        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22309
22310        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22311
22312     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22313     case 'T':
22314       {
22315         HOST_WIDE_INT bits = INTVAL (x);
22316         fputc ("uspf"[bits & 3], stream);
22317       }
22318       return;
22319
22320     /* Likewise, but signed and unsigned integers are both 'i'.  */
22321     case 'F':
22322       {
22323         HOST_WIDE_INT bits = INTVAL (x);
22324         fputc ("iipf"[bits & 3], stream);
22325       }
22326       return;
22327
22328     /* As for 'T', but emit 'u' instead of 'p'.  */
22329     case 't':
22330       {
22331         HOST_WIDE_INT bits = INTVAL (x);
22332         fputc ("usuf"[bits & 3], stream);
22333       }
22334       return;
22335
22336     /* Bit 2: rounding (vs none).  */
22337     case 'O':
22338       {
22339         HOST_WIDE_INT bits = INTVAL (x);
22340         fputs ((bits & 4) != 0 ? "r" : "", stream);
22341       }
22342       return;
22343
22344     /* Memory operand for vld1/vst1 instruction.  */
22345     case 'A':
22346       {
22347         rtx addr;
22348         bool postinc = FALSE;
22349         rtx postinc_reg = NULL;
22350         unsigned align, memsize, align_bits;
22351
22352         gcc_assert (MEM_P (x));
22353         addr = XEXP (x, 0);
22354         if (GET_CODE (addr) == POST_INC)
22355           {
22356             postinc = 1;
22357             addr = XEXP (addr, 0);
22358           }
22359         if (GET_CODE (addr) == POST_MODIFY)
22360           {
22361             postinc_reg = XEXP( XEXP (addr, 1), 1);
22362             addr = XEXP (addr, 0);
22363           }
22364         asm_fprintf (stream, "[%r", REGNO (addr));
22365
22366         /* We know the alignment of this access, so we can emit a hint in the
22367            instruction (for some alignments) as an aid to the memory subsystem
22368            of the target.  */
22369         align = MEM_ALIGN (x) >> 3;
22370         memsize = MEM_SIZE (x);
22371
22372         /* Only certain alignment specifiers are supported by the hardware.  */
22373         if (memsize == 32 && (align % 32) == 0)
22374           align_bits = 256;
22375         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22376           align_bits = 128;
22377         else if (memsize >= 8 && (align % 8) == 0)
22378           align_bits = 64;
22379         else
22380           align_bits = 0;
22381
22382         if (align_bits != 0)
22383           asm_fprintf (stream, ":%d", align_bits);
22384
22385         asm_fprintf (stream, "]");
22386
22387         if (postinc)
22388           fputs("!", stream);
22389         if (postinc_reg)
22390           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22391       }
22392       return;
22393
22394     case 'C':
22395       {
22396         rtx addr;
22397
22398         gcc_assert (MEM_P (x));
22399         addr = XEXP (x, 0);
22400         gcc_assert (REG_P (addr));
22401         asm_fprintf (stream, "[%r]", REGNO (addr));
22402       }
22403       return;
22404
22405     /* Translate an S register number into a D register number and element index.  */
22406     case 'y':
22407       {
22408         machine_mode mode = GET_MODE (x);
22409         int regno;
22410
22411         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22412           {
22413             output_operand_lossage ("invalid operand for code '%c'", code);
22414             return;
22415           }
22416
22417         regno = REGNO (x);
22418         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22419           {
22420             output_operand_lossage ("invalid operand for code '%c'", code);
22421             return;
22422           }
22423
22424         regno = regno - FIRST_VFP_REGNUM;
22425         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22426       }
22427       return;
22428
22429     case 'v':
22430         gcc_assert (CONST_DOUBLE_P (x));
22431         int result;
22432         result = vfp3_const_double_for_fract_bits (x);
22433         if (result == 0)
22434           result = vfp3_const_double_for_bits (x);
22435         fprintf (stream, "#%d", result);
22436         return;
22437
22438     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22439        number into a D register number and element index.  */
22440     case 'z':
22441       {
22442         machine_mode mode = GET_MODE (x);
22443         int regno;
22444
22445         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22446           {
22447             output_operand_lossage ("invalid operand for code '%c'", code);
22448             return;
22449           }
22450
22451         regno = REGNO (x);
22452         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22453           {
22454             output_operand_lossage ("invalid operand for code '%c'", code);
22455             return;
22456           }
22457
22458         regno = regno - FIRST_VFP_REGNUM;
22459         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22460       }
22461       return;
22462
22463     default:
22464       if (x == 0)
22465         {
22466           output_operand_lossage ("missing operand");
22467           return;
22468         }
22469
22470       switch (GET_CODE (x))
22471         {
22472         case REG:
22473           asm_fprintf (stream, "%r", REGNO (x));
22474           break;
22475
22476         case MEM:
22477           output_address (GET_MODE (x), XEXP (x, 0));
22478           break;
22479
22480         case CONST_DOUBLE:
22481           {
22482             char fpstr[20];
22483             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22484                               sizeof (fpstr), 0, 1);
22485             fprintf (stream, "#%s", fpstr);
22486           }
22487           break;
22488
22489         default:
22490           gcc_assert (GET_CODE (x) != NEG);
22491           fputc ('#', stream);
22492           if (GET_CODE (x) == HIGH)
22493             {
22494               fputs (":lower16:", stream);
22495               x = XEXP (x, 0);
22496             }
22497
22498           output_addr_const (stream, x);
22499           break;
22500         }
22501     }
22502 }
22503 \f
22504 /* Target hook for printing a memory address.  */
22505 static void
22506 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22507 {
22508   if (TARGET_32BIT)
22509     {
22510       int is_minus = GET_CODE (x) == MINUS;
22511
22512       if (REG_P (x))
22513         asm_fprintf (stream, "[%r]", REGNO (x));
22514       else if (GET_CODE (x) == PLUS || is_minus)
22515         {
22516           rtx base = XEXP (x, 0);
22517           rtx index = XEXP (x, 1);
22518           HOST_WIDE_INT offset = 0;
22519           if (!REG_P (base)
22520               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22521             {
22522               /* Ensure that BASE is a register.  */
22523               /* (one of them must be).  */
22524               /* Also ensure the SP is not used as in index register.  */
22525               std::swap (base, index);
22526             }
22527           switch (GET_CODE (index))
22528             {
22529             case CONST_INT:
22530               offset = INTVAL (index);
22531               if (is_minus)
22532                 offset = -offset;
22533               asm_fprintf (stream, "[%r, #%wd]",
22534                            REGNO (base), offset);
22535               break;
22536
22537             case REG:
22538               asm_fprintf (stream, "[%r, %s%r]",
22539                            REGNO (base), is_minus ? "-" : "",
22540                            REGNO (index));
22541               break;
22542
22543             case MULT:
22544             case ASHIFTRT:
22545             case LSHIFTRT:
22546             case ASHIFT:
22547             case ROTATERT:
22548               {
22549                 asm_fprintf (stream, "[%r, %s%r",
22550                              REGNO (base), is_minus ? "-" : "",
22551                              REGNO (XEXP (index, 0)));
22552                 arm_print_operand (stream, index, 'S');
22553                 fputs ("]", stream);
22554                 break;
22555               }
22556
22557             default:
22558               gcc_unreachable ();
22559             }
22560         }
22561       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22562                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22563         {
22564           gcc_assert (REG_P (XEXP (x, 0)));
22565
22566           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22567             asm_fprintf (stream, "[%r, #%s%d]!",
22568                          REGNO (XEXP (x, 0)),
22569                          GET_CODE (x) == PRE_DEC ? "-" : "",
22570                          GET_MODE_SIZE (mode));
22571           else
22572             asm_fprintf (stream, "[%r], #%s%d",
22573                          REGNO (XEXP (x, 0)),
22574                          GET_CODE (x) == POST_DEC ? "-" : "",
22575                          GET_MODE_SIZE (mode));
22576         }
22577       else if (GET_CODE (x) == PRE_MODIFY)
22578         {
22579           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22580           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22581             asm_fprintf (stream, "#%wd]!",
22582                          INTVAL (XEXP (XEXP (x, 1), 1)));
22583           else
22584             asm_fprintf (stream, "%r]!",
22585                          REGNO (XEXP (XEXP (x, 1), 1)));
22586         }
22587       else if (GET_CODE (x) == POST_MODIFY)
22588         {
22589           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22590           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22591             asm_fprintf (stream, "#%wd",
22592                          INTVAL (XEXP (XEXP (x, 1), 1)));
22593           else
22594             asm_fprintf (stream, "%r",
22595                          REGNO (XEXP (XEXP (x, 1), 1)));
22596         }
22597       else output_addr_const (stream, x);
22598     }
22599   else
22600     {
22601       if (REG_P (x))
22602         asm_fprintf (stream, "[%r]", REGNO (x));
22603       else if (GET_CODE (x) == POST_INC)
22604         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22605       else if (GET_CODE (x) == PLUS)
22606         {
22607           gcc_assert (REG_P (XEXP (x, 0)));
22608           if (CONST_INT_P (XEXP (x, 1)))
22609             asm_fprintf (stream, "[%r, #%wd]",
22610                          REGNO (XEXP (x, 0)),
22611                          INTVAL (XEXP (x, 1)));
22612           else
22613             asm_fprintf (stream, "[%r, %r]",
22614                          REGNO (XEXP (x, 0)),
22615                          REGNO (XEXP (x, 1)));
22616         }
22617       else
22618         output_addr_const (stream, x);
22619     }
22620 }
22621 \f
22622 /* Target hook for indicating whether a punctuation character for
22623    TARGET_PRINT_OPERAND is valid.  */
22624 static bool
22625 arm_print_operand_punct_valid_p (unsigned char code)
22626 {
22627   return (code == '@' || code == '|' || code == '.'
22628           || code == '(' || code == ')' || code == '#'
22629           || (TARGET_32BIT && (code == '?'))
22630           || (TARGET_THUMB2 && (code == '!'))
22631           || (TARGET_THUMB && (code == '_')));
22632 }
22633 \f
22634 /* Target hook for assembling integer objects.  The ARM version needs to
22635    handle word-sized values specially.  */
22636 static bool
22637 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22638 {
22639   machine_mode mode;
22640
22641   if (size == UNITS_PER_WORD && aligned_p)
22642     {
22643       fputs ("\t.word\t", asm_out_file);
22644       output_addr_const (asm_out_file, x);
22645
22646       /* Mark symbols as position independent.  We only do this in the
22647          .text segment, not in the .data segment.  */
22648       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22649           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22650         {
22651           /* See legitimize_pic_address for an explanation of the
22652              TARGET_VXWORKS_RTP check.  */
22653           /* References to weak symbols cannot be resolved locally:
22654              they may be overridden by a non-weak definition at link
22655              time.  */
22656           if (!arm_pic_data_is_text_relative
22657               || (GET_CODE (x) == SYMBOL_REF
22658                   && (!SYMBOL_REF_LOCAL_P (x)
22659                       || (SYMBOL_REF_DECL (x)
22660                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22661             fputs ("(GOT)", asm_out_file);
22662           else
22663             fputs ("(GOTOFF)", asm_out_file);
22664         }
22665       fputc ('\n', asm_out_file);
22666       return true;
22667     }
22668
22669   mode = GET_MODE (x);
22670
22671   if (arm_vector_mode_supported_p (mode))
22672     {
22673       int i, units;
22674
22675       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22676
22677       units = CONST_VECTOR_NUNITS (x);
22678       size = GET_MODE_UNIT_SIZE (mode);
22679
22680       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22681         for (i = 0; i < units; i++)
22682           {
22683             rtx elt = CONST_VECTOR_ELT (x, i);
22684             assemble_integer
22685               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22686           }
22687       else
22688         for (i = 0; i < units; i++)
22689           {
22690             rtx elt = CONST_VECTOR_ELT (x, i);
22691             assemble_real
22692               (*CONST_DOUBLE_REAL_VALUE (elt),
22693                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22694                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22695           }
22696
22697       return true;
22698     }
22699
22700   return default_assemble_integer (x, size, aligned_p);
22701 }
22702
22703 static void
22704 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22705 {
22706   section *s;
22707
22708   if (!TARGET_AAPCS_BASED)
22709     {
22710       (is_ctor ?
22711        default_named_section_asm_out_constructor
22712        : default_named_section_asm_out_destructor) (symbol, priority);
22713       return;
22714     }
22715
22716   /* Put these in the .init_array section, using a special relocation.  */
22717   if (priority != DEFAULT_INIT_PRIORITY)
22718     {
22719       char buf[18];
22720       sprintf (buf, "%s.%.5u",
22721                is_ctor ? ".init_array" : ".fini_array",
22722                priority);
22723       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22724     }
22725   else if (is_ctor)
22726     s = ctors_section;
22727   else
22728     s = dtors_section;
22729
22730   switch_to_section (s);
22731   assemble_align (POINTER_SIZE);
22732   fputs ("\t.word\t", asm_out_file);
22733   output_addr_const (asm_out_file, symbol);
22734   fputs ("(target1)\n", asm_out_file);
22735 }
22736
22737 /* Add a function to the list of static constructors.  */
22738
22739 static void
22740 arm_elf_asm_constructor (rtx symbol, int priority)
22741 {
22742   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22743 }
22744
22745 /* Add a function to the list of static destructors.  */
22746
22747 static void
22748 arm_elf_asm_destructor (rtx symbol, int priority)
22749 {
22750   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22751 }
22752 \f
22753 /* A finite state machine takes care of noticing whether or not instructions
22754    can be conditionally executed, and thus decrease execution time and code
22755    size by deleting branch instructions.  The fsm is controlled by
22756    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22757
22758 /* The state of the fsm controlling condition codes are:
22759    0: normal, do nothing special
22760    1: make ASM_OUTPUT_OPCODE not output this instruction
22761    2: make ASM_OUTPUT_OPCODE not output this instruction
22762    3: make instructions conditional
22763    4: make instructions conditional
22764
22765    State transitions (state->state by whom under condition):
22766    0 -> 1 final_prescan_insn if the `target' is a label
22767    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22768    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22769    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22770    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22771           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22772    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22773           (the target insn is arm_target_insn).
22774
22775    If the jump clobbers the conditions then we use states 2 and 4.
22776
22777    A similar thing can be done with conditional return insns.
22778
22779    XXX In case the `target' is an unconditional branch, this conditionalising
22780    of the instructions always reduces code size, but not always execution
22781    time.  But then, I want to reduce the code size to somewhere near what
22782    /bin/cc produces.  */
22783
22784 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22785    instructions.  When a COND_EXEC instruction is seen the subsequent
22786    instructions are scanned so that multiple conditional instructions can be
22787    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22788    specify the length and true/false mask for the IT block.  These will be
22789    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22790
22791 /* Returns the index of the ARM condition code string in
22792    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22793    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22794
22795 enum arm_cond_code
22796 maybe_get_arm_condition_code (rtx comparison)
22797 {
22798   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22799   enum arm_cond_code code;
22800   enum rtx_code comp_code = GET_CODE (comparison);
22801
22802   if (GET_MODE_CLASS (mode) != MODE_CC)
22803     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22804                            XEXP (comparison, 1));
22805
22806   switch (mode)
22807     {
22808     case E_CC_DNEmode: code = ARM_NE; goto dominance;
22809     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22810     case E_CC_DGEmode: code = ARM_GE; goto dominance;
22811     case E_CC_DGTmode: code = ARM_GT; goto dominance;
22812     case E_CC_DLEmode: code = ARM_LE; goto dominance;
22813     case E_CC_DLTmode: code = ARM_LT; goto dominance;
22814     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22815     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22816     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22817     case E_CC_DLTUmode: code = ARM_CC;
22818
22819     dominance:
22820       if (comp_code == EQ)
22821         return ARM_INVERSE_CONDITION_CODE (code);
22822       if (comp_code == NE)
22823         return code;
22824       return ARM_NV;
22825
22826     case E_CC_NOOVmode:
22827       switch (comp_code)
22828         {
22829         case NE: return ARM_NE;
22830         case EQ: return ARM_EQ;
22831         case GE: return ARM_PL;
22832         case LT: return ARM_MI;
22833         default: return ARM_NV;
22834         }
22835
22836     case E_CC_Zmode:
22837       switch (comp_code)
22838         {
22839         case NE: return ARM_NE;
22840         case EQ: return ARM_EQ;
22841         default: return ARM_NV;
22842         }
22843
22844     case E_CC_Nmode:
22845       switch (comp_code)
22846         {
22847         case NE: return ARM_MI;
22848         case EQ: return ARM_PL;
22849         default: return ARM_NV;
22850         }
22851
22852     case E_CCFPEmode:
22853     case E_CCFPmode:
22854       /* We can handle all cases except UNEQ and LTGT.  */
22855       switch (comp_code)
22856         {
22857         case GE: return ARM_GE;
22858         case GT: return ARM_GT;
22859         case LE: return ARM_LS;
22860         case LT: return ARM_MI;
22861         case NE: return ARM_NE;
22862         case EQ: return ARM_EQ;
22863         case ORDERED: return ARM_VC;
22864         case UNORDERED: return ARM_VS;
22865         case UNLT: return ARM_LT;
22866         case UNLE: return ARM_LE;
22867         case UNGT: return ARM_HI;
22868         case UNGE: return ARM_PL;
22869           /* UNEQ and LTGT do not have a representation.  */
22870         case UNEQ: /* Fall through.  */
22871         case LTGT: /* Fall through.  */
22872         default: return ARM_NV;
22873         }
22874
22875     case E_CC_SWPmode:
22876       switch (comp_code)
22877         {
22878         case NE: return ARM_NE;
22879         case EQ: return ARM_EQ;
22880         case GE: return ARM_LE;
22881         case GT: return ARM_LT;
22882         case LE: return ARM_GE;
22883         case LT: return ARM_GT;
22884         case GEU: return ARM_LS;
22885         case GTU: return ARM_CC;
22886         case LEU: return ARM_CS;
22887         case LTU: return ARM_HI;
22888         default: return ARM_NV;
22889         }
22890
22891     case E_CC_Cmode:
22892       switch (comp_code)
22893         {
22894         case LTU: return ARM_CS;
22895         case GEU: return ARM_CC;
22896         case NE: return ARM_CS;
22897         case EQ: return ARM_CC;
22898         default: return ARM_NV;
22899         }
22900
22901     case E_CC_CZmode:
22902       switch (comp_code)
22903         {
22904         case NE: return ARM_NE;
22905         case EQ: return ARM_EQ;
22906         case GEU: return ARM_CS;
22907         case GTU: return ARM_HI;
22908         case LEU: return ARM_LS;
22909         case LTU: return ARM_CC;
22910         default: return ARM_NV;
22911         }
22912
22913     case E_CC_NCVmode:
22914       switch (comp_code)
22915         {
22916         case GE: return ARM_GE;
22917         case LT: return ARM_LT;
22918         case GEU: return ARM_CS;
22919         case LTU: return ARM_CC;
22920         default: return ARM_NV;
22921         }
22922
22923     case E_CC_Vmode:
22924       switch (comp_code)
22925         {
22926         case NE: return ARM_VS;
22927         case EQ: return ARM_VC;
22928         default: return ARM_NV;
22929         }
22930
22931     case E_CCmode:
22932       switch (comp_code)
22933         {
22934         case NE: return ARM_NE;
22935         case EQ: return ARM_EQ;
22936         case GE: return ARM_GE;
22937         case GT: return ARM_GT;
22938         case LE: return ARM_LE;
22939         case LT: return ARM_LT;
22940         case GEU: return ARM_CS;
22941         case GTU: return ARM_HI;
22942         case LEU: return ARM_LS;
22943         case LTU: return ARM_CC;
22944         default: return ARM_NV;
22945         }
22946
22947     default: gcc_unreachable ();
22948     }
22949 }
22950
22951 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22952 static enum arm_cond_code
22953 get_arm_condition_code (rtx comparison)
22954 {
22955   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22956   gcc_assert (code != ARM_NV);
22957   return code;
22958 }
22959
22960 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
22961    code registers when not targetting Thumb1.  The VFP condition register
22962    only exists when generating hard-float code.  */
22963 static bool
22964 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22965 {
22966   if (!TARGET_32BIT)
22967     return false;
22968
22969   *p1 = CC_REGNUM;
22970   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22971   return true;
22972 }
22973
22974 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22975    instructions.  */
22976 void
22977 thumb2_final_prescan_insn (rtx_insn *insn)
22978 {
22979   rtx_insn *first_insn = insn;
22980   rtx body = PATTERN (insn);
22981   rtx predicate;
22982   enum arm_cond_code code;
22983   int n;
22984   int mask;
22985   int max;
22986
22987   /* max_insns_skipped in the tune was already taken into account in the
22988      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22989      just emit the IT blocks as we can.  It does not make sense to split
22990      the IT blocks.  */
22991   max = MAX_INSN_PER_IT_BLOCK;
22992
22993   /* Remove the previous insn from the count of insns to be output.  */
22994   if (arm_condexec_count)
22995       arm_condexec_count--;
22996
22997   /* Nothing to do if we are already inside a conditional block.  */
22998   if (arm_condexec_count)
22999     return;
23000
23001   if (GET_CODE (body) != COND_EXEC)
23002     return;
23003
23004   /* Conditional jumps are implemented directly.  */
23005   if (JUMP_P (insn))
23006     return;
23007
23008   predicate = COND_EXEC_TEST (body);
23009   arm_current_cc = get_arm_condition_code (predicate);
23010
23011   n = get_attr_ce_count (insn);
23012   arm_condexec_count = 1;
23013   arm_condexec_mask = (1 << n) - 1;
23014   arm_condexec_masklen = n;
23015   /* See if subsequent instructions can be combined into the same block.  */
23016   for (;;)
23017     {
23018       insn = next_nonnote_insn (insn);
23019
23020       /* Jumping into the middle of an IT block is illegal, so a label or
23021          barrier terminates the block.  */
23022       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23023         break;
23024
23025       body = PATTERN (insn);
23026       /* USE and CLOBBER aren't really insns, so just skip them.  */
23027       if (GET_CODE (body) == USE
23028           || GET_CODE (body) == CLOBBER)
23029         continue;
23030
23031       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23032       if (GET_CODE (body) != COND_EXEC)
23033         break;
23034       /* Maximum number of conditionally executed instructions in a block.  */
23035       n = get_attr_ce_count (insn);
23036       if (arm_condexec_masklen + n > max)
23037         break;
23038
23039       predicate = COND_EXEC_TEST (body);
23040       code = get_arm_condition_code (predicate);
23041       mask = (1 << n) - 1;
23042       if (arm_current_cc == code)
23043         arm_condexec_mask |= (mask << arm_condexec_masklen);
23044       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23045         break;
23046
23047       arm_condexec_count++;
23048       arm_condexec_masklen += n;
23049
23050       /* A jump must be the last instruction in a conditional block.  */
23051       if (JUMP_P (insn))
23052         break;
23053     }
23054   /* Restore recog_data (getting the attributes of other insns can
23055      destroy this array, but final.c assumes that it remains intact
23056      across this call).  */
23057   extract_constrain_insn_cached (first_insn);
23058 }
23059
23060 void
23061 arm_final_prescan_insn (rtx_insn *insn)
23062 {
23063   /* BODY will hold the body of INSN.  */
23064   rtx body = PATTERN (insn);
23065
23066   /* This will be 1 if trying to repeat the trick, and things need to be
23067      reversed if it appears to fail.  */
23068   int reverse = 0;
23069
23070   /* If we start with a return insn, we only succeed if we find another one.  */
23071   int seeking_return = 0;
23072   enum rtx_code return_code = UNKNOWN;
23073
23074   /* START_INSN will hold the insn from where we start looking.  This is the
23075      first insn after the following code_label if REVERSE is true.  */
23076   rtx_insn *start_insn = insn;
23077
23078   /* If in state 4, check if the target branch is reached, in order to
23079      change back to state 0.  */
23080   if (arm_ccfsm_state == 4)
23081     {
23082       if (insn == arm_target_insn)
23083         {
23084           arm_target_insn = NULL;
23085           arm_ccfsm_state = 0;
23086         }
23087       return;
23088     }
23089
23090   /* If in state 3, it is possible to repeat the trick, if this insn is an
23091      unconditional branch to a label, and immediately following this branch
23092      is the previous target label which is only used once, and the label this
23093      branch jumps to is not too far off.  */
23094   if (arm_ccfsm_state == 3)
23095     {
23096       if (simplejump_p (insn))
23097         {
23098           start_insn = next_nonnote_insn (start_insn);
23099           if (BARRIER_P (start_insn))
23100             {
23101               /* XXX Isn't this always a barrier?  */
23102               start_insn = next_nonnote_insn (start_insn);
23103             }
23104           if (LABEL_P (start_insn)
23105               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23106               && LABEL_NUSES (start_insn) == 1)
23107             reverse = TRUE;
23108           else
23109             return;
23110         }
23111       else if (ANY_RETURN_P (body))
23112         {
23113           start_insn = next_nonnote_insn (start_insn);
23114           if (BARRIER_P (start_insn))
23115             start_insn = next_nonnote_insn (start_insn);
23116           if (LABEL_P (start_insn)
23117               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23118               && LABEL_NUSES (start_insn) == 1)
23119             {
23120               reverse = TRUE;
23121               seeking_return = 1;
23122               return_code = GET_CODE (body);
23123             }
23124           else
23125             return;
23126         }
23127       else
23128         return;
23129     }
23130
23131   gcc_assert (!arm_ccfsm_state || reverse);
23132   if (!JUMP_P (insn))
23133     return;
23134
23135   /* This jump might be paralleled with a clobber of the condition codes
23136      the jump should always come first */
23137   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23138     body = XVECEXP (body, 0, 0);
23139
23140   if (reverse
23141       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23142           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23143     {
23144       int insns_skipped;
23145       int fail = FALSE, succeed = FALSE;
23146       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23147       int then_not_else = TRUE;
23148       rtx_insn *this_insn = start_insn;
23149       rtx label = 0;
23150
23151       /* Register the insn jumped to.  */
23152       if (reverse)
23153         {
23154           if (!seeking_return)
23155             label = XEXP (SET_SRC (body), 0);
23156         }
23157       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23158         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23159       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23160         {
23161           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23162           then_not_else = FALSE;
23163         }
23164       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23165         {
23166           seeking_return = 1;
23167           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23168         }
23169       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23170         {
23171           seeking_return = 1;
23172           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23173           then_not_else = FALSE;
23174         }
23175       else
23176         gcc_unreachable ();
23177
23178       /* See how many insns this branch skips, and what kind of insns.  If all
23179          insns are okay, and the label or unconditional branch to the same
23180          label is not too far away, succeed.  */
23181       for (insns_skipped = 0;
23182            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23183         {
23184           rtx scanbody;
23185
23186           this_insn = next_nonnote_insn (this_insn);
23187           if (!this_insn)
23188             break;
23189
23190           switch (GET_CODE (this_insn))
23191             {
23192             case CODE_LABEL:
23193               /* Succeed if it is the target label, otherwise fail since
23194                  control falls in from somewhere else.  */
23195               if (this_insn == label)
23196                 {
23197                   arm_ccfsm_state = 1;
23198                   succeed = TRUE;
23199                 }
23200               else
23201                 fail = TRUE;
23202               break;
23203
23204             case BARRIER:
23205               /* Succeed if the following insn is the target label.
23206                  Otherwise fail.
23207                  If return insns are used then the last insn in a function
23208                  will be a barrier.  */
23209               this_insn = next_nonnote_insn (this_insn);
23210               if (this_insn && this_insn == label)
23211                 {
23212                   arm_ccfsm_state = 1;
23213                   succeed = TRUE;
23214                 }
23215               else
23216                 fail = TRUE;
23217               break;
23218
23219             case CALL_INSN:
23220               /* The AAPCS says that conditional calls should not be
23221                  used since they make interworking inefficient (the
23222                  linker can't transform BL<cond> into BLX).  That's
23223                  only a problem if the machine has BLX.  */
23224               if (arm_arch5)
23225                 {
23226                   fail = TRUE;
23227                   break;
23228                 }
23229
23230               /* Succeed if the following insn is the target label, or
23231                  if the following two insns are a barrier and the
23232                  target label.  */
23233               this_insn = next_nonnote_insn (this_insn);
23234               if (this_insn && BARRIER_P (this_insn))
23235                 this_insn = next_nonnote_insn (this_insn);
23236
23237               if (this_insn && this_insn == label
23238                   && insns_skipped < max_insns_skipped)
23239                 {
23240                   arm_ccfsm_state = 1;
23241                   succeed = TRUE;
23242                 }
23243               else
23244                 fail = TRUE;
23245               break;
23246
23247             case JUMP_INSN:
23248               /* If this is an unconditional branch to the same label, succeed.
23249                  If it is to another label, do nothing.  If it is conditional,
23250                  fail.  */
23251               /* XXX Probably, the tests for SET and the PC are
23252                  unnecessary.  */
23253
23254               scanbody = PATTERN (this_insn);
23255               if (GET_CODE (scanbody) == SET
23256                   && GET_CODE (SET_DEST (scanbody)) == PC)
23257                 {
23258                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23259                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23260                     {
23261                       arm_ccfsm_state = 2;
23262                       succeed = TRUE;
23263                     }
23264                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23265                     fail = TRUE;
23266                 }
23267               /* Fail if a conditional return is undesirable (e.g. on a
23268                  StrongARM), but still allow this if optimizing for size.  */
23269               else if (GET_CODE (scanbody) == return_code
23270                        && !use_return_insn (TRUE, NULL)
23271                        && !optimize_size)
23272                 fail = TRUE;
23273               else if (GET_CODE (scanbody) == return_code)
23274                 {
23275                   arm_ccfsm_state = 2;
23276                   succeed = TRUE;
23277                 }
23278               else if (GET_CODE (scanbody) == PARALLEL)
23279                 {
23280                   switch (get_attr_conds (this_insn))
23281                     {
23282                     case CONDS_NOCOND:
23283                       break;
23284                     default:
23285                       fail = TRUE;
23286                       break;
23287                     }
23288                 }
23289               else
23290                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23291
23292               break;
23293
23294             case INSN:
23295               /* Instructions using or affecting the condition codes make it
23296                  fail.  */
23297               scanbody = PATTERN (this_insn);
23298               if (!(GET_CODE (scanbody) == SET
23299                     || GET_CODE (scanbody) == PARALLEL)
23300                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23301                 fail = TRUE;
23302               break;
23303
23304             default:
23305               break;
23306             }
23307         }
23308       if (succeed)
23309         {
23310           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23311             arm_target_label = CODE_LABEL_NUMBER (label);
23312           else
23313             {
23314               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23315
23316               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23317                 {
23318                   this_insn = next_nonnote_insn (this_insn);
23319                   gcc_assert (!this_insn
23320                               || (!BARRIER_P (this_insn)
23321                                   && !LABEL_P (this_insn)));
23322                 }
23323               if (!this_insn)
23324                 {
23325                   /* Oh, dear! we ran off the end.. give up.  */
23326                   extract_constrain_insn_cached (insn);
23327                   arm_ccfsm_state = 0;
23328                   arm_target_insn = NULL;
23329                   return;
23330                 }
23331               arm_target_insn = this_insn;
23332             }
23333
23334           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23335              what it was.  */
23336           if (!reverse)
23337             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23338
23339           if (reverse || then_not_else)
23340             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23341         }
23342
23343       /* Restore recog_data (getting the attributes of other insns can
23344          destroy this array, but final.c assumes that it remains intact
23345          across this call.  */
23346       extract_constrain_insn_cached (insn);
23347     }
23348 }
23349
23350 /* Output IT instructions.  */
23351 void
23352 thumb2_asm_output_opcode (FILE * stream)
23353 {
23354   char buff[5];
23355   int n;
23356
23357   if (arm_condexec_mask)
23358     {
23359       for (n = 0; n < arm_condexec_masklen; n++)
23360         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23361       buff[n] = 0;
23362       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23363                   arm_condition_codes[arm_current_cc]);
23364       arm_condexec_mask = 0;
23365     }
23366 }
23367
23368 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23369    UNITS_PER_WORD bytes wide.  */
23370 static unsigned int
23371 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23372 {
23373   if (TARGET_32BIT
23374       && regno > PC_REGNUM
23375       && regno != FRAME_POINTER_REGNUM
23376       && regno != ARG_POINTER_REGNUM
23377       && !IS_VFP_REGNUM (regno))
23378     return 1;
23379
23380   return ARM_NUM_REGS (mode);
23381 }
23382
23383 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23384 static bool
23385 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23386 {
23387   if (GET_MODE_CLASS (mode) == MODE_CC)
23388     return (regno == CC_REGNUM
23389             || (TARGET_HARD_FLOAT
23390                 && regno == VFPCC_REGNUM));
23391
23392   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23393     return false;
23394
23395   if (TARGET_THUMB1)
23396     /* For the Thumb we only allow values bigger than SImode in
23397        registers 0 - 6, so that there is always a second low
23398        register available to hold the upper part of the value.
23399        We probably we ought to ensure that the register is the
23400        start of an even numbered register pair.  */
23401     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23402
23403   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23404     {
23405       if (mode == SFmode || mode == SImode)
23406         return VFP_REGNO_OK_FOR_SINGLE (regno);
23407
23408       if (mode == DFmode)
23409         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23410
23411       if (mode == HFmode)
23412         return VFP_REGNO_OK_FOR_SINGLE (regno);
23413
23414       /* VFP registers can hold HImode values.  */
23415       if (mode == HImode)
23416         return VFP_REGNO_OK_FOR_SINGLE (regno);
23417
23418       if (TARGET_NEON)
23419         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23420                || (VALID_NEON_QREG_MODE (mode)
23421                    && NEON_REGNO_OK_FOR_QUAD (regno))
23422                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23423                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23424                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23425                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23426                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23427
23428       return false;
23429     }
23430
23431   if (TARGET_REALLY_IWMMXT)
23432     {
23433       if (IS_IWMMXT_GR_REGNUM (regno))
23434         return mode == SImode;
23435
23436       if (IS_IWMMXT_REGNUM (regno))
23437         return VALID_IWMMXT_REG_MODE (mode);
23438     }
23439
23440   /* We allow almost any value to be stored in the general registers.
23441      Restrict doubleword quantities to even register pairs in ARM state
23442      so that we can use ldrd.  Do not allow very large Neon structure
23443      opaque modes in general registers; they would use too many.  */
23444   if (regno <= LAST_ARM_REGNUM)
23445     {
23446       if (ARM_NUM_REGS (mode) > 4)
23447         return false;
23448
23449       if (TARGET_THUMB2)
23450         return true;
23451
23452       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23453     }
23454
23455   if (regno == FRAME_POINTER_REGNUM
23456       || regno == ARG_POINTER_REGNUM)
23457     /* We only allow integers in the fake hard registers.  */
23458     return GET_MODE_CLASS (mode) == MODE_INT;
23459
23460   return false;
23461 }
23462
23463 /* Implement TARGET_MODES_TIEABLE_P.  */
23464
23465 static bool
23466 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23467 {
23468   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23469     return true;
23470
23471   /* We specifically want to allow elements of "structure" modes to
23472      be tieable to the structure.  This more general condition allows
23473      other rarer situations too.  */
23474   if (TARGET_NEON
23475       && (VALID_NEON_DREG_MODE (mode1)
23476           || VALID_NEON_QREG_MODE (mode1)
23477           || VALID_NEON_STRUCT_MODE (mode1))
23478       && (VALID_NEON_DREG_MODE (mode2)
23479           || VALID_NEON_QREG_MODE (mode2)
23480           || VALID_NEON_STRUCT_MODE (mode2)))
23481     return true;
23482
23483   return false;
23484 }
23485
23486 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23487    not used in arm mode.  */
23488
23489 enum reg_class
23490 arm_regno_class (int regno)
23491 {
23492   if (regno == PC_REGNUM)
23493     return NO_REGS;
23494
23495   if (TARGET_THUMB1)
23496     {
23497       if (regno == STACK_POINTER_REGNUM)
23498         return STACK_REG;
23499       if (regno == CC_REGNUM)
23500         return CC_REG;
23501       if (regno < 8)
23502         return LO_REGS;
23503       return HI_REGS;
23504     }
23505
23506   if (TARGET_THUMB2 && regno < 8)
23507     return LO_REGS;
23508
23509   if (   regno <= LAST_ARM_REGNUM
23510       || regno == FRAME_POINTER_REGNUM
23511       || regno == ARG_POINTER_REGNUM)
23512     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23513
23514   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23515     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23516
23517   if (IS_VFP_REGNUM (regno))
23518     {
23519       if (regno <= D7_VFP_REGNUM)
23520         return VFP_D0_D7_REGS;
23521       else if (regno <= LAST_LO_VFP_REGNUM)
23522         return VFP_LO_REGS;
23523       else
23524         return VFP_HI_REGS;
23525     }
23526
23527   if (IS_IWMMXT_REGNUM (regno))
23528     return IWMMXT_REGS;
23529
23530   if (IS_IWMMXT_GR_REGNUM (regno))
23531     return IWMMXT_GR_REGS;
23532
23533   return NO_REGS;
23534 }
23535
23536 /* Handle a special case when computing the offset
23537    of an argument from the frame pointer.  */
23538 int
23539 arm_debugger_arg_offset (int value, rtx addr)
23540 {
23541   rtx_insn *insn;
23542
23543   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23544   if (value != 0)
23545     return 0;
23546
23547   /* We can only cope with the case where the address is held in a register.  */
23548   if (!REG_P (addr))
23549     return 0;
23550
23551   /* If we are using the frame pointer to point at the argument, then
23552      an offset of 0 is correct.  */
23553   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23554     return 0;
23555
23556   /* If we are using the stack pointer to point at the
23557      argument, then an offset of 0 is correct.  */
23558   /* ??? Check this is consistent with thumb2 frame layout.  */
23559   if ((TARGET_THUMB || !frame_pointer_needed)
23560       && REGNO (addr) == SP_REGNUM)
23561     return 0;
23562
23563   /* Oh dear.  The argument is pointed to by a register rather
23564      than being held in a register, or being stored at a known
23565      offset from the frame pointer.  Since GDB only understands
23566      those two kinds of argument we must translate the address
23567      held in the register into an offset from the frame pointer.
23568      We do this by searching through the insns for the function
23569      looking to see where this register gets its value.  If the
23570      register is initialized from the frame pointer plus an offset
23571      then we are in luck and we can continue, otherwise we give up.
23572
23573      This code is exercised by producing debugging information
23574      for a function with arguments like this:
23575
23576            double func (double a, double b, int c, double d) {return d;}
23577
23578      Without this code the stab for parameter 'd' will be set to
23579      an offset of 0 from the frame pointer, rather than 8.  */
23580
23581   /* The if() statement says:
23582
23583      If the insn is a normal instruction
23584      and if the insn is setting the value in a register
23585      and if the register being set is the register holding the address of the argument
23586      and if the address is computing by an addition
23587      that involves adding to a register
23588      which is the frame pointer
23589      a constant integer
23590
23591      then...  */
23592
23593   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23594     {
23595       if (   NONJUMP_INSN_P (insn)
23596           && GET_CODE (PATTERN (insn)) == SET
23597           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23598           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23599           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23600           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23601           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23602              )
23603         {
23604           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23605
23606           break;
23607         }
23608     }
23609
23610   if (value == 0)
23611     {
23612       debug_rtx (addr);
23613       warning (0, "unable to compute real location of stacked parameter");
23614       value = 8; /* XXX magic hack */
23615     }
23616
23617   return value;
23618 }
23619 \f
23620 /* Implement TARGET_PROMOTED_TYPE.  */
23621
23622 static tree
23623 arm_promoted_type (const_tree t)
23624 {
23625   if (SCALAR_FLOAT_TYPE_P (t)
23626       && TYPE_PRECISION (t) == 16
23627       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23628     return float_type_node;
23629   return NULL_TREE;
23630 }
23631
23632 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23633    This simply adds HFmode as a supported mode; even though we don't
23634    implement arithmetic on this type directly, it's supported by
23635    optabs conversions, much the way the double-word arithmetic is
23636    special-cased in the default hook.  */
23637
23638 static bool
23639 arm_scalar_mode_supported_p (scalar_mode mode)
23640 {
23641   if (mode == HFmode)
23642     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23643   else if (ALL_FIXED_POINT_MODE_P (mode))
23644     return true;
23645   else
23646     return default_scalar_mode_supported_p (mode);
23647 }
23648
23649 /* Set the value of FLT_EVAL_METHOD.
23650    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23651
23652     0: evaluate all operations and constants, whose semantic type has at
23653        most the range and precision of type float, to the range and
23654        precision of float; evaluate all other operations and constants to
23655        the range and precision of the semantic type;
23656
23657     N, where _FloatN is a supported interchange floating type
23658        evaluate all operations and constants, whose semantic type has at
23659        most the range and precision of _FloatN type, to the range and
23660        precision of the _FloatN type; evaluate all other operations and
23661        constants to the range and precision of the semantic type;
23662
23663    If we have the ARMv8.2-A extensions then we support _Float16 in native
23664    precision, so we should set this to 16.  Otherwise, we support the type,
23665    but want to evaluate expressions in float precision, so set this to
23666    0.  */
23667
23668 static enum flt_eval_method
23669 arm_excess_precision (enum excess_precision_type type)
23670 {
23671   switch (type)
23672     {
23673       case EXCESS_PRECISION_TYPE_FAST:
23674       case EXCESS_PRECISION_TYPE_STANDARD:
23675         /* We can calculate either in 16-bit range and precision or
23676            32-bit range and precision.  Make that decision based on whether
23677            we have native support for the ARMv8.2-A 16-bit floating-point
23678            instructions or not.  */
23679         return (TARGET_VFP_FP16INST
23680                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23681                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23682       case EXCESS_PRECISION_TYPE_IMPLICIT:
23683         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23684       default:
23685         gcc_unreachable ();
23686     }
23687   return FLT_EVAL_METHOD_UNPREDICTABLE;
23688 }
23689
23690
23691 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23692    _Float16 if we are using anything other than ieee format for 16-bit
23693    floating point.  Otherwise, punt to the default implementation.  */
23694 static opt_scalar_float_mode
23695 arm_floatn_mode (int n, bool extended)
23696 {
23697   if (!extended && n == 16)
23698     {
23699       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23700         return HFmode;
23701       return opt_scalar_float_mode ();
23702     }
23703
23704   return default_floatn_mode (n, extended);
23705 }
23706
23707
23708 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23709    not to early-clobber SRC registers in the process.
23710
23711    We assume that the operands described by SRC and DEST represent a
23712    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23713    number of components into which the copy has been decomposed.  */
23714 void
23715 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23716 {
23717   unsigned int i;
23718
23719   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23720       || REGNO (operands[0]) < REGNO (operands[1]))
23721     {
23722       for (i = 0; i < count; i++)
23723         {
23724           operands[2 * i] = dest[i];
23725           operands[2 * i + 1] = src[i];
23726         }
23727     }
23728   else
23729     {
23730       for (i = 0; i < count; i++)
23731         {
23732           operands[2 * i] = dest[count - i - 1];
23733           operands[2 * i + 1] = src[count - i - 1];
23734         }
23735     }
23736 }
23737
23738 /* Split operands into moves from op[1] + op[2] into op[0].  */
23739
23740 void
23741 neon_split_vcombine (rtx operands[3])
23742 {
23743   unsigned int dest = REGNO (operands[0]);
23744   unsigned int src1 = REGNO (operands[1]);
23745   unsigned int src2 = REGNO (operands[2]);
23746   machine_mode halfmode = GET_MODE (operands[1]);
23747   unsigned int halfregs = REG_NREGS (operands[1]);
23748   rtx destlo, desthi;
23749
23750   if (src1 == dest && src2 == dest + halfregs)
23751     {
23752       /* No-op move.  Can't split to nothing; emit something.  */
23753       emit_note (NOTE_INSN_DELETED);
23754       return;
23755     }
23756
23757   /* Preserve register attributes for variable tracking.  */
23758   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23759   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23760                                GET_MODE_SIZE (halfmode));
23761
23762   /* Special case of reversed high/low parts.  Use VSWP.  */
23763   if (src2 == dest && src1 == dest + halfregs)
23764     {
23765       rtx x = gen_rtx_SET (destlo, operands[1]);
23766       rtx y = gen_rtx_SET (desthi, operands[2]);
23767       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23768       return;
23769     }
23770
23771   if (!reg_overlap_mentioned_p (operands[2], destlo))
23772     {
23773       /* Try to avoid unnecessary moves if part of the result
23774          is in the right place already.  */
23775       if (src1 != dest)
23776         emit_move_insn (destlo, operands[1]);
23777       if (src2 != dest + halfregs)
23778         emit_move_insn (desthi, operands[2]);
23779     }
23780   else
23781     {
23782       if (src2 != dest + halfregs)
23783         emit_move_insn (desthi, operands[2]);
23784       if (src1 != dest)
23785         emit_move_insn (destlo, operands[1]);
23786     }
23787 }
23788 \f
23789 /* Return the number (counting from 0) of
23790    the least significant set bit in MASK.  */
23791
23792 inline static int
23793 number_of_first_bit_set (unsigned mask)
23794 {
23795   return ctz_hwi (mask);
23796 }
23797
23798 /* Like emit_multi_reg_push, but allowing for a different set of
23799    registers to be described as saved.  MASK is the set of registers
23800    to be saved; REAL_REGS is the set of registers to be described as
23801    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23802
23803 static rtx_insn *
23804 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23805 {
23806   unsigned long regno;
23807   rtx par[10], tmp, reg;
23808   rtx_insn *insn;
23809   int i, j;
23810
23811   /* Build the parallel of the registers actually being stored.  */
23812   for (i = 0; mask; ++i, mask &= mask - 1)
23813     {
23814       regno = ctz_hwi (mask);
23815       reg = gen_rtx_REG (SImode, regno);
23816
23817       if (i == 0)
23818         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23819       else
23820         tmp = gen_rtx_USE (VOIDmode, reg);
23821
23822       par[i] = tmp;
23823     }
23824
23825   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23826   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23827   tmp = gen_frame_mem (BLKmode, tmp);
23828   tmp = gen_rtx_SET (tmp, par[0]);
23829   par[0] = tmp;
23830
23831   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23832   insn = emit_insn (tmp);
23833
23834   /* Always build the stack adjustment note for unwind info.  */
23835   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23836   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23837   par[0] = tmp;
23838
23839   /* Build the parallel of the registers recorded as saved for unwind.  */
23840   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23841     {
23842       regno = ctz_hwi (real_regs);
23843       reg = gen_rtx_REG (SImode, regno);
23844
23845       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23846       tmp = gen_frame_mem (SImode, tmp);
23847       tmp = gen_rtx_SET (tmp, reg);
23848       RTX_FRAME_RELATED_P (tmp) = 1;
23849       par[j + 1] = tmp;
23850     }
23851
23852   if (j == 0)
23853     tmp = par[0];
23854   else
23855     {
23856       RTX_FRAME_RELATED_P (par[0]) = 1;
23857       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23858     }
23859
23860   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23861
23862   return insn;
23863 }
23864
23865 /* Emit code to push or pop registers to or from the stack.  F is the
23866    assembly file.  MASK is the registers to pop.  */
23867 static void
23868 thumb_pop (FILE *f, unsigned long mask)
23869 {
23870   int regno;
23871   int lo_mask = mask & 0xFF;
23872
23873   gcc_assert (mask);
23874
23875   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23876     {
23877       /* Special case.  Do not generate a POP PC statement here, do it in
23878          thumb_exit() */
23879       thumb_exit (f, -1);
23880       return;
23881     }
23882
23883   fprintf (f, "\tpop\t{");
23884
23885   /* Look at the low registers first.  */
23886   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23887     {
23888       if (lo_mask & 1)
23889         {
23890           asm_fprintf (f, "%r", regno);
23891
23892           if ((lo_mask & ~1) != 0)
23893             fprintf (f, ", ");
23894         }
23895     }
23896
23897   if (mask & (1 << PC_REGNUM))
23898     {
23899       /* Catch popping the PC.  */
23900       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23901           || IS_CMSE_ENTRY (arm_current_func_type ()))
23902         {
23903           /* The PC is never poped directly, instead
23904              it is popped into r3 and then BX is used.  */
23905           fprintf (f, "}\n");
23906
23907           thumb_exit (f, -1);
23908
23909           return;
23910         }
23911       else
23912         {
23913           if (mask & 0xFF)
23914             fprintf (f, ", ");
23915
23916           asm_fprintf (f, "%r", PC_REGNUM);
23917         }
23918     }
23919
23920   fprintf (f, "}\n");
23921 }
23922
23923 /* Generate code to return from a thumb function.
23924    If 'reg_containing_return_addr' is -1, then the return address is
23925    actually on the stack, at the stack pointer.  */
23926 static void
23927 thumb_exit (FILE *f, int reg_containing_return_addr)
23928 {
23929   unsigned regs_available_for_popping;
23930   unsigned regs_to_pop;
23931   int pops_needed;
23932   unsigned available;
23933   unsigned required;
23934   machine_mode mode;
23935   int size;
23936   int restore_a4 = FALSE;
23937
23938   /* Compute the registers we need to pop.  */
23939   regs_to_pop = 0;
23940   pops_needed = 0;
23941
23942   if (reg_containing_return_addr == -1)
23943     {
23944       regs_to_pop |= 1 << LR_REGNUM;
23945       ++pops_needed;
23946     }
23947
23948   if (TARGET_BACKTRACE)
23949     {
23950       /* Restore the (ARM) frame pointer and stack pointer.  */
23951       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23952       pops_needed += 2;
23953     }
23954
23955   /* If there is nothing to pop then just emit the BX instruction and
23956      return.  */
23957   if (pops_needed == 0)
23958     {
23959       if (crtl->calls_eh_return)
23960         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23961
23962       if (IS_CMSE_ENTRY (arm_current_func_type ()))
23963         {
23964           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23965                        reg_containing_return_addr);
23966           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23967         }
23968       else
23969         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23970       return;
23971     }
23972   /* Otherwise if we are not supporting interworking and we have not created
23973      a backtrace structure and the function was not entered in ARM mode then
23974      just pop the return address straight into the PC.  */
23975   else if (!TARGET_INTERWORK
23976            && !TARGET_BACKTRACE
23977            && !is_called_in_ARM_mode (current_function_decl)
23978            && !crtl->calls_eh_return
23979            && !IS_CMSE_ENTRY (arm_current_func_type ()))
23980     {
23981       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23982       return;
23983     }
23984
23985   /* Find out how many of the (return) argument registers we can corrupt.  */
23986   regs_available_for_popping = 0;
23987
23988   /* If returning via __builtin_eh_return, the bottom three registers
23989      all contain information needed for the return.  */
23990   if (crtl->calls_eh_return)
23991     size = 12;
23992   else
23993     {
23994       /* If we can deduce the registers used from the function's
23995          return value.  This is more reliable that examining
23996          df_regs_ever_live_p () because that will be set if the register is
23997          ever used in the function, not just if the register is used
23998          to hold a return value.  */
23999
24000       if (crtl->return_rtx != 0)
24001         mode = GET_MODE (crtl->return_rtx);
24002       else
24003         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24004
24005       size = GET_MODE_SIZE (mode);
24006
24007       if (size == 0)
24008         {
24009           /* In a void function we can use any argument register.
24010              In a function that returns a structure on the stack
24011              we can use the second and third argument registers.  */
24012           if (mode == VOIDmode)
24013             regs_available_for_popping =
24014               (1 << ARG_REGISTER (1))
24015               | (1 << ARG_REGISTER (2))
24016               | (1 << ARG_REGISTER (3));
24017           else
24018             regs_available_for_popping =
24019               (1 << ARG_REGISTER (2))
24020               | (1 << ARG_REGISTER (3));
24021         }
24022       else if (size <= 4)
24023         regs_available_for_popping =
24024           (1 << ARG_REGISTER (2))
24025           | (1 << ARG_REGISTER (3));
24026       else if (size <= 8)
24027         regs_available_for_popping =
24028           (1 << ARG_REGISTER (3));
24029     }
24030
24031   /* Match registers to be popped with registers into which we pop them.  */
24032   for (available = regs_available_for_popping,
24033        required  = regs_to_pop;
24034        required != 0 && available != 0;
24035        available &= ~(available & - available),
24036        required  &= ~(required  & - required))
24037     -- pops_needed;
24038
24039   /* If we have any popping registers left over, remove them.  */
24040   if (available > 0)
24041     regs_available_for_popping &= ~available;
24042
24043   /* Otherwise if we need another popping register we can use
24044      the fourth argument register.  */
24045   else if (pops_needed)
24046     {
24047       /* If we have not found any free argument registers and
24048          reg a4 contains the return address, we must move it.  */
24049       if (regs_available_for_popping == 0
24050           && reg_containing_return_addr == LAST_ARG_REGNUM)
24051         {
24052           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24053           reg_containing_return_addr = LR_REGNUM;
24054         }
24055       else if (size > 12)
24056         {
24057           /* Register a4 is being used to hold part of the return value,
24058              but we have dire need of a free, low register.  */
24059           restore_a4 = TRUE;
24060
24061           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24062         }
24063
24064       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24065         {
24066           /* The fourth argument register is available.  */
24067           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24068
24069           --pops_needed;
24070         }
24071     }
24072
24073   /* Pop as many registers as we can.  */
24074   thumb_pop (f, regs_available_for_popping);
24075
24076   /* Process the registers we popped.  */
24077   if (reg_containing_return_addr == -1)
24078     {
24079       /* The return address was popped into the lowest numbered register.  */
24080       regs_to_pop &= ~(1 << LR_REGNUM);
24081
24082       reg_containing_return_addr =
24083         number_of_first_bit_set (regs_available_for_popping);
24084
24085       /* Remove this register for the mask of available registers, so that
24086          the return address will not be corrupted by further pops.  */
24087       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24088     }
24089
24090   /* If we popped other registers then handle them here.  */
24091   if (regs_available_for_popping)
24092     {
24093       int frame_pointer;
24094
24095       /* Work out which register currently contains the frame pointer.  */
24096       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24097
24098       /* Move it into the correct place.  */
24099       asm_fprintf (f, "\tmov\t%r, %r\n",
24100                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24101
24102       /* (Temporarily) remove it from the mask of popped registers.  */
24103       regs_available_for_popping &= ~(1 << frame_pointer);
24104       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24105
24106       if (regs_available_for_popping)
24107         {
24108           int stack_pointer;
24109
24110           /* We popped the stack pointer as well,
24111              find the register that contains it.  */
24112           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24113
24114           /* Move it into the stack register.  */
24115           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24116
24117           /* At this point we have popped all necessary registers, so
24118              do not worry about restoring regs_available_for_popping
24119              to its correct value:
24120
24121              assert (pops_needed == 0)
24122              assert (regs_available_for_popping == (1 << frame_pointer))
24123              assert (regs_to_pop == (1 << STACK_POINTER))  */
24124         }
24125       else
24126         {
24127           /* Since we have just move the popped value into the frame
24128              pointer, the popping register is available for reuse, and
24129              we know that we still have the stack pointer left to pop.  */
24130           regs_available_for_popping |= (1 << frame_pointer);
24131         }
24132     }
24133
24134   /* If we still have registers left on the stack, but we no longer have
24135      any registers into which we can pop them, then we must move the return
24136      address into the link register and make available the register that
24137      contained it.  */
24138   if (regs_available_for_popping == 0 && pops_needed > 0)
24139     {
24140       regs_available_for_popping |= 1 << reg_containing_return_addr;
24141
24142       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24143                    reg_containing_return_addr);
24144
24145       reg_containing_return_addr = LR_REGNUM;
24146     }
24147
24148   /* If we have registers left on the stack then pop some more.
24149      We know that at most we will want to pop FP and SP.  */
24150   if (pops_needed > 0)
24151     {
24152       int  popped_into;
24153       int  move_to;
24154
24155       thumb_pop (f, regs_available_for_popping);
24156
24157       /* We have popped either FP or SP.
24158          Move whichever one it is into the correct register.  */
24159       popped_into = number_of_first_bit_set (regs_available_for_popping);
24160       move_to     = number_of_first_bit_set (regs_to_pop);
24161
24162       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24163       --pops_needed;
24164     }
24165
24166   /* If we still have not popped everything then we must have only
24167      had one register available to us and we are now popping the SP.  */
24168   if (pops_needed > 0)
24169     {
24170       int  popped_into;
24171
24172       thumb_pop (f, regs_available_for_popping);
24173
24174       popped_into = number_of_first_bit_set (regs_available_for_popping);
24175
24176       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24177       /*
24178         assert (regs_to_pop == (1 << STACK_POINTER))
24179         assert (pops_needed == 1)
24180       */
24181     }
24182
24183   /* If necessary restore the a4 register.  */
24184   if (restore_a4)
24185     {
24186       if (reg_containing_return_addr != LR_REGNUM)
24187         {
24188           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24189           reg_containing_return_addr = LR_REGNUM;
24190         }
24191
24192       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24193     }
24194
24195   if (crtl->calls_eh_return)
24196     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24197
24198   /* Return to caller.  */
24199   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24200     {
24201       /* This is for the cases where LR is not being used to contain the return
24202          address.  It may therefore contain information that we might not want
24203          to leak, hence it must be cleared.  The value in R0 will never be a
24204          secret at this point, so it is safe to use it, see the clearing code
24205          in 'cmse_nonsecure_entry_clear_before_return'.  */
24206       if (reg_containing_return_addr != LR_REGNUM)
24207         asm_fprintf (f, "\tmov\tlr, r0\n");
24208
24209       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24210       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24211     }
24212   else
24213     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24214 }
24215 \f
24216 /* Scan INSN just before assembler is output for it.
24217    For Thumb-1, we track the status of the condition codes; this
24218    information is used in the cbranchsi4_insn pattern.  */
24219 void
24220 thumb1_final_prescan_insn (rtx_insn *insn)
24221 {
24222   if (flag_print_asm_name)
24223     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24224                  INSN_ADDRESSES (INSN_UID (insn)));
24225   /* Don't overwrite the previous setter when we get to a cbranch.  */
24226   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24227     {
24228       enum attr_conds conds;
24229
24230       if (cfun->machine->thumb1_cc_insn)
24231         {
24232           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24233               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24234             CC_STATUS_INIT;
24235         }
24236       conds = get_attr_conds (insn);
24237       if (conds == CONDS_SET)
24238         {
24239           rtx set = single_set (insn);
24240           cfun->machine->thumb1_cc_insn = insn;
24241           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24242           cfun->machine->thumb1_cc_op1 = const0_rtx;
24243           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24244           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24245             {
24246               rtx src1 = XEXP (SET_SRC (set), 1);
24247               if (src1 == const0_rtx)
24248                 cfun->machine->thumb1_cc_mode = CCmode;
24249             }
24250           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24251             {
24252               /* Record the src register operand instead of dest because
24253                  cprop_hardreg pass propagates src.  */
24254               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24255             }
24256         }
24257       else if (conds != CONDS_NOCOND)
24258         cfun->machine->thumb1_cc_insn = NULL_RTX;
24259     }
24260
24261     /* Check if unexpected far jump is used.  */
24262     if (cfun->machine->lr_save_eliminated
24263         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24264       internal_error("Unexpected thumb1 far jump");
24265 }
24266
24267 int
24268 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24269 {
24270   unsigned HOST_WIDE_INT mask = 0xff;
24271   int i;
24272
24273   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24274   if (val == 0) /* XXX */
24275     return 0;
24276
24277   for (i = 0; i < 25; i++)
24278     if ((val & (mask << i)) == val)
24279       return 1;
24280
24281   return 0;
24282 }
24283
24284 /* Returns nonzero if the current function contains,
24285    or might contain a far jump.  */
24286 static int
24287 thumb_far_jump_used_p (void)
24288 {
24289   rtx_insn *insn;
24290   bool far_jump = false;
24291   unsigned int func_size = 0;
24292
24293   /* If we have already decided that far jumps may be used,
24294      do not bother checking again, and always return true even if
24295      it turns out that they are not being used.  Once we have made
24296      the decision that far jumps are present (and that hence the link
24297      register will be pushed onto the stack) we cannot go back on it.  */
24298   if (cfun->machine->far_jump_used)
24299     return 1;
24300
24301   /* If this function is not being called from the prologue/epilogue
24302      generation code then it must be being called from the
24303      INITIAL_ELIMINATION_OFFSET macro.  */
24304   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24305     {
24306       /* In this case we know that we are being asked about the elimination
24307          of the arg pointer register.  If that register is not being used,
24308          then there are no arguments on the stack, and we do not have to
24309          worry that a far jump might force the prologue to push the link
24310          register, changing the stack offsets.  In this case we can just
24311          return false, since the presence of far jumps in the function will
24312          not affect stack offsets.
24313
24314          If the arg pointer is live (or if it was live, but has now been
24315          eliminated and so set to dead) then we do have to test to see if
24316          the function might contain a far jump.  This test can lead to some
24317          false negatives, since before reload is completed, then length of
24318          branch instructions is not known, so gcc defaults to returning their
24319          longest length, which in turn sets the far jump attribute to true.
24320
24321          A false negative will not result in bad code being generated, but it
24322          will result in a needless push and pop of the link register.  We
24323          hope that this does not occur too often.
24324
24325          If we need doubleword stack alignment this could affect the other
24326          elimination offsets so we can't risk getting it wrong.  */
24327       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24328         cfun->machine->arg_pointer_live = 1;
24329       else if (!cfun->machine->arg_pointer_live)
24330         return 0;
24331     }
24332
24333   /* We should not change far_jump_used during or after reload, as there is
24334      no chance to change stack frame layout.  */
24335   if (reload_in_progress || reload_completed)
24336     return 0;
24337
24338   /* Check to see if the function contains a branch
24339      insn with the far jump attribute set.  */
24340   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24341     {
24342       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24343         {
24344           far_jump = true;
24345         }
24346       func_size += get_attr_length (insn);
24347     }
24348
24349   /* Attribute far_jump will always be true for thumb1 before
24350      shorten_branch pass.  So checking far_jump attribute before
24351      shorten_branch isn't much useful.
24352
24353      Following heuristic tries to estimate more accurately if a far jump
24354      may finally be used.  The heuristic is very conservative as there is
24355      no chance to roll-back the decision of not to use far jump.
24356
24357      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24358      2-byte insn is associated with a 4 byte constant pool.  Using
24359      function size 2048/3 as the threshold is conservative enough.  */
24360   if (far_jump)
24361     {
24362       if ((func_size * 3) >= 2048)
24363         {
24364           /* Record the fact that we have decided that
24365              the function does use far jumps.  */
24366           cfun->machine->far_jump_used = 1;
24367           return 1;
24368         }
24369     }
24370
24371   return 0;
24372 }
24373
24374 /* Return nonzero if FUNC must be entered in ARM mode.  */
24375 static bool
24376 is_called_in_ARM_mode (tree func)
24377 {
24378   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24379
24380   /* Ignore the problem about functions whose address is taken.  */
24381   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24382     return true;
24383
24384 #ifdef ARM_PE
24385   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24386 #else
24387   return false;
24388 #endif
24389 }
24390
24391 /* Given the stack offsets and register mask in OFFSETS, decide how
24392    many additional registers to push instead of subtracting a constant
24393    from SP.  For epilogues the principle is the same except we use pop.
24394    FOR_PROLOGUE indicates which we're generating.  */
24395 static int
24396 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24397 {
24398   HOST_WIDE_INT amount;
24399   unsigned long live_regs_mask = offsets->saved_regs_mask;
24400   /* Extract a mask of the ones we can give to the Thumb's push/pop
24401      instruction.  */
24402   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24403   /* Then count how many other high registers will need to be pushed.  */
24404   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24405   int n_free, reg_base, size;
24406
24407   if (!for_prologue && frame_pointer_needed)
24408     amount = offsets->locals_base - offsets->saved_regs;
24409   else
24410     amount = offsets->outgoing_args - offsets->saved_regs;
24411
24412   /* If the stack frame size is 512 exactly, we can save one load
24413      instruction, which should make this a win even when optimizing
24414      for speed.  */
24415   if (!optimize_size && amount != 512)
24416     return 0;
24417
24418   /* Can't do this if there are high registers to push.  */
24419   if (high_regs_pushed != 0)
24420     return 0;
24421
24422   /* Shouldn't do it in the prologue if no registers would normally
24423      be pushed at all.  In the epilogue, also allow it if we'll have
24424      a pop insn for the PC.  */
24425   if  (l_mask == 0
24426        && (for_prologue
24427            || TARGET_BACKTRACE
24428            || (live_regs_mask & 1 << LR_REGNUM) == 0
24429            || TARGET_INTERWORK
24430            || crtl->args.pretend_args_size != 0))
24431     return 0;
24432
24433   /* Don't do this if thumb_expand_prologue wants to emit instructions
24434      between the push and the stack frame allocation.  */
24435   if (for_prologue
24436       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24437           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24438     return 0;
24439
24440   reg_base = 0;
24441   n_free = 0;
24442   if (!for_prologue)
24443     {
24444       size = arm_size_return_regs ();
24445       reg_base = ARM_NUM_INTS (size);
24446       live_regs_mask >>= reg_base;
24447     }
24448
24449   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24450          && (for_prologue || call_used_regs[reg_base + n_free]))
24451     {
24452       live_regs_mask >>= 1;
24453       n_free++;
24454     }
24455
24456   if (n_free == 0)
24457     return 0;
24458   gcc_assert (amount / 4 * 4 == amount);
24459
24460   if (amount >= 512 && (amount - n_free * 4) < 512)
24461     return (amount - 508) / 4;
24462   if (amount <= n_free * 4)
24463     return amount / 4;
24464   return 0;
24465 }
24466
24467 /* The bits which aren't usefully expanded as rtl.  */
24468 const char *
24469 thumb1_unexpanded_epilogue (void)
24470 {
24471   arm_stack_offsets *offsets;
24472   int regno;
24473   unsigned long live_regs_mask = 0;
24474   int high_regs_pushed = 0;
24475   int extra_pop;
24476   int had_to_push_lr;
24477   int size;
24478
24479   if (cfun->machine->return_used_this_function != 0)
24480     return "";
24481
24482   if (IS_NAKED (arm_current_func_type ()))
24483     return "";
24484
24485   offsets = arm_get_frame_offsets ();
24486   live_regs_mask = offsets->saved_regs_mask;
24487   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24488
24489   /* If we can deduce the registers used from the function's return value.
24490      This is more reliable that examining df_regs_ever_live_p () because that
24491      will be set if the register is ever used in the function, not just if
24492      the register is used to hold a return value.  */
24493   size = arm_size_return_regs ();
24494
24495   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24496   if (extra_pop > 0)
24497     {
24498       unsigned long extra_mask = (1 << extra_pop) - 1;
24499       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24500     }
24501
24502   /* The prolog may have pushed some high registers to use as
24503      work registers.  e.g. the testsuite file:
24504      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24505      compiles to produce:
24506         push    {r4, r5, r6, r7, lr}
24507         mov     r7, r9
24508         mov     r6, r8
24509         push    {r6, r7}
24510      as part of the prolog.  We have to undo that pushing here.  */
24511
24512   if (high_regs_pushed)
24513     {
24514       unsigned long mask = live_regs_mask & 0xff;
24515       int next_hi_reg;
24516
24517       /* The available low registers depend on the size of the value we are
24518          returning.  */
24519       if (size <= 12)
24520         mask |=  1 << 3;
24521       if (size <= 8)
24522         mask |= 1 << 2;
24523
24524       if (mask == 0)
24525         /* Oh dear!  We have no low registers into which we can pop
24526            high registers!  */
24527         internal_error
24528           ("no low registers available for popping high registers");
24529
24530       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24531         if (live_regs_mask & (1 << next_hi_reg))
24532           break;
24533
24534       while (high_regs_pushed)
24535         {
24536           /* Find lo register(s) into which the high register(s) can
24537              be popped.  */
24538           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24539             {
24540               if (mask & (1 << regno))
24541                 high_regs_pushed--;
24542               if (high_regs_pushed == 0)
24543                 break;
24544             }
24545
24546           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24547
24548           /* Pop the values into the low register(s).  */
24549           thumb_pop (asm_out_file, mask);
24550
24551           /* Move the value(s) into the high registers.  */
24552           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24553             {
24554               if (mask & (1 << regno))
24555                 {
24556                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24557                                regno);
24558
24559                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24560                     if (live_regs_mask & (1 << next_hi_reg))
24561                       break;
24562                 }
24563             }
24564         }
24565       live_regs_mask &= ~0x0f00;
24566     }
24567
24568   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24569   live_regs_mask &= 0xff;
24570
24571   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24572     {
24573       /* Pop the return address into the PC.  */
24574       if (had_to_push_lr)
24575         live_regs_mask |= 1 << PC_REGNUM;
24576
24577       /* Either no argument registers were pushed or a backtrace
24578          structure was created which includes an adjusted stack
24579          pointer, so just pop everything.  */
24580       if (live_regs_mask)
24581         thumb_pop (asm_out_file, live_regs_mask);
24582
24583       /* We have either just popped the return address into the
24584          PC or it is was kept in LR for the entire function.
24585          Note that thumb_pop has already called thumb_exit if the
24586          PC was in the list.  */
24587       if (!had_to_push_lr)
24588         thumb_exit (asm_out_file, LR_REGNUM);
24589     }
24590   else
24591     {
24592       /* Pop everything but the return address.  */
24593       if (live_regs_mask)
24594         thumb_pop (asm_out_file, live_regs_mask);
24595
24596       if (had_to_push_lr)
24597         {
24598           if (size > 12)
24599             {
24600               /* We have no free low regs, so save one.  */
24601               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24602                            LAST_ARG_REGNUM);
24603             }
24604
24605           /* Get the return address into a temporary register.  */
24606           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24607
24608           if (size > 12)
24609             {
24610               /* Move the return address to lr.  */
24611               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24612                            LAST_ARG_REGNUM);
24613               /* Restore the low register.  */
24614               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24615                            IP_REGNUM);
24616               regno = LR_REGNUM;
24617             }
24618           else
24619             regno = LAST_ARG_REGNUM;
24620         }
24621       else
24622         regno = LR_REGNUM;
24623
24624       /* Remove the argument registers that were pushed onto the stack.  */
24625       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24626                    SP_REGNUM, SP_REGNUM,
24627                    crtl->args.pretend_args_size);
24628
24629       thumb_exit (asm_out_file, regno);
24630     }
24631
24632   return "";
24633 }
24634
24635 /* Functions to save and restore machine-specific function data.  */
24636 static struct machine_function *
24637 arm_init_machine_status (void)
24638 {
24639   struct machine_function *machine;
24640   machine = ggc_cleared_alloc<machine_function> ();
24641
24642 #if ARM_FT_UNKNOWN != 0
24643   machine->func_type = ARM_FT_UNKNOWN;
24644 #endif
24645   return machine;
24646 }
24647
24648 /* Return an RTX indicating where the return address to the
24649    calling function can be found.  */
24650 rtx
24651 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24652 {
24653   if (count != 0)
24654     return NULL_RTX;
24655
24656   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24657 }
24658
24659 /* Do anything needed before RTL is emitted for each function.  */
24660 void
24661 arm_init_expanders (void)
24662 {
24663   /* Arrange to initialize and mark the machine per-function status.  */
24664   init_machine_status = arm_init_machine_status;
24665
24666   /* This is to stop the combine pass optimizing away the alignment
24667      adjustment of va_arg.  */
24668   /* ??? It is claimed that this should not be necessary.  */
24669   if (cfun)
24670     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24671 }
24672
24673 /* Check that FUNC is called with a different mode.  */
24674
24675 bool
24676 arm_change_mode_p (tree func)
24677 {
24678   if (TREE_CODE (func) != FUNCTION_DECL)
24679     return false;
24680
24681   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24682
24683   if (!callee_tree)
24684     callee_tree = target_option_default_node;
24685
24686   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24687   int flags = callee_opts->x_target_flags;
24688
24689   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24690 }
24691
24692 /* Like arm_compute_initial_elimination offset.  Simpler because there
24693    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24694    to point at the base of the local variables after static stack
24695    space for a function has been allocated.  */
24696
24697 HOST_WIDE_INT
24698 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24699 {
24700   arm_stack_offsets *offsets;
24701
24702   offsets = arm_get_frame_offsets ();
24703
24704   switch (from)
24705     {
24706     case ARG_POINTER_REGNUM:
24707       switch (to)
24708         {
24709         case STACK_POINTER_REGNUM:
24710           return offsets->outgoing_args - offsets->saved_args;
24711
24712         case FRAME_POINTER_REGNUM:
24713           return offsets->soft_frame - offsets->saved_args;
24714
24715         case ARM_HARD_FRAME_POINTER_REGNUM:
24716           return offsets->saved_regs - offsets->saved_args;
24717
24718         case THUMB_HARD_FRAME_POINTER_REGNUM:
24719           return offsets->locals_base - offsets->saved_args;
24720
24721         default:
24722           gcc_unreachable ();
24723         }
24724       break;
24725
24726     case FRAME_POINTER_REGNUM:
24727       switch (to)
24728         {
24729         case STACK_POINTER_REGNUM:
24730           return offsets->outgoing_args - offsets->soft_frame;
24731
24732         case ARM_HARD_FRAME_POINTER_REGNUM:
24733           return offsets->saved_regs - offsets->soft_frame;
24734
24735         case THUMB_HARD_FRAME_POINTER_REGNUM:
24736           return offsets->locals_base - offsets->soft_frame;
24737
24738         default:
24739           gcc_unreachable ();
24740         }
24741       break;
24742
24743     default:
24744       gcc_unreachable ();
24745     }
24746 }
24747
24748 /* Generate the function's prologue.  */
24749
24750 void
24751 thumb1_expand_prologue (void)
24752 {
24753   rtx_insn *insn;
24754
24755   HOST_WIDE_INT amount;
24756   HOST_WIDE_INT size;
24757   arm_stack_offsets *offsets;
24758   unsigned long func_type;
24759   int regno;
24760   unsigned long live_regs_mask;
24761   unsigned long l_mask;
24762   unsigned high_regs_pushed = 0;
24763   bool lr_needs_saving;
24764
24765   func_type = arm_current_func_type ();
24766
24767   /* Naked functions don't have prologues.  */
24768   if (IS_NAKED (func_type))
24769     {
24770       if (flag_stack_usage_info)
24771         current_function_static_stack_size = 0;
24772       return;
24773     }
24774
24775   if (IS_INTERRUPT (func_type))
24776     {
24777       error ("interrupt Service Routines cannot be coded in Thumb mode");
24778       return;
24779     }
24780
24781   if (is_called_in_ARM_mode (current_function_decl))
24782     emit_insn (gen_prologue_thumb1_interwork ());
24783
24784   offsets = arm_get_frame_offsets ();
24785   live_regs_mask = offsets->saved_regs_mask;
24786   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24787
24788   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24789   l_mask = live_regs_mask & 0x40ff;
24790   /* Then count how many other high registers will need to be pushed.  */
24791   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24792
24793   if (crtl->args.pretend_args_size)
24794     {
24795       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24796
24797       if (cfun->machine->uses_anonymous_args)
24798         {
24799           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24800           unsigned long mask;
24801
24802           mask = 1ul << (LAST_ARG_REGNUM + 1);
24803           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24804
24805           insn = thumb1_emit_multi_reg_push (mask, 0);
24806         }
24807       else
24808         {
24809           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24810                                         stack_pointer_rtx, x));
24811         }
24812       RTX_FRAME_RELATED_P (insn) = 1;
24813     }
24814
24815   if (TARGET_BACKTRACE)
24816     {
24817       HOST_WIDE_INT offset = 0;
24818       unsigned work_register;
24819       rtx work_reg, x, arm_hfp_rtx;
24820
24821       /* We have been asked to create a stack backtrace structure.
24822          The code looks like this:
24823
24824          0   .align 2
24825          0   func:
24826          0     sub   SP, #16         Reserve space for 4 registers.
24827          2     push  {R7}            Push low registers.
24828          4     add   R7, SP, #20     Get the stack pointer before the push.
24829          6     str   R7, [SP, #8]    Store the stack pointer
24830                                         (before reserving the space).
24831          8     mov   R7, PC          Get hold of the start of this code + 12.
24832         10     str   R7, [SP, #16]   Store it.
24833         12     mov   R7, FP          Get hold of the current frame pointer.
24834         14     str   R7, [SP, #4]    Store it.
24835         16     mov   R7, LR          Get hold of the current return address.
24836         18     str   R7, [SP, #12]   Store it.
24837         20     add   R7, SP, #16     Point at the start of the
24838                                         backtrace structure.
24839         22     mov   FP, R7          Put this value into the frame pointer.  */
24840
24841       work_register = thumb_find_work_register (live_regs_mask);
24842       work_reg = gen_rtx_REG (SImode, work_register);
24843       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24844
24845       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24846                                     stack_pointer_rtx, GEN_INT (-16)));
24847       RTX_FRAME_RELATED_P (insn) = 1;
24848
24849       if (l_mask)
24850         {
24851           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24852           RTX_FRAME_RELATED_P (insn) = 1;
24853           lr_needs_saving = false;
24854
24855           offset = bit_count (l_mask) * UNITS_PER_WORD;
24856         }
24857
24858       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24859       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24860
24861       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24862       x = gen_frame_mem (SImode, x);
24863       emit_move_insn (x, work_reg);
24864
24865       /* Make sure that the instruction fetching the PC is in the right place
24866          to calculate "start of backtrace creation code + 12".  */
24867       /* ??? The stores using the common WORK_REG ought to be enough to
24868          prevent the scheduler from doing anything weird.  Failing that
24869          we could always move all of the following into an UNSPEC_VOLATILE.  */
24870       if (l_mask)
24871         {
24872           x = gen_rtx_REG (SImode, PC_REGNUM);
24873           emit_move_insn (work_reg, x);
24874
24875           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24876           x = gen_frame_mem (SImode, x);
24877           emit_move_insn (x, work_reg);
24878
24879           emit_move_insn (work_reg, arm_hfp_rtx);
24880
24881           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24882           x = gen_frame_mem (SImode, x);
24883           emit_move_insn (x, work_reg);
24884         }
24885       else
24886         {
24887           emit_move_insn (work_reg, arm_hfp_rtx);
24888
24889           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24890           x = gen_frame_mem (SImode, x);
24891           emit_move_insn (x, work_reg);
24892
24893           x = gen_rtx_REG (SImode, PC_REGNUM);
24894           emit_move_insn (work_reg, x);
24895
24896           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24897           x = gen_frame_mem (SImode, x);
24898           emit_move_insn (x, work_reg);
24899         }
24900
24901       x = gen_rtx_REG (SImode, LR_REGNUM);
24902       emit_move_insn (work_reg, x);
24903
24904       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24905       x = gen_frame_mem (SImode, x);
24906       emit_move_insn (x, work_reg);
24907
24908       x = GEN_INT (offset + 12);
24909       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24910
24911       emit_move_insn (arm_hfp_rtx, work_reg);
24912     }
24913   /* Optimization:  If we are not pushing any low registers but we are going
24914      to push some high registers then delay our first push.  This will just
24915      be a push of LR and we can combine it with the push of the first high
24916      register.  */
24917   else if ((l_mask & 0xff) != 0
24918            || (high_regs_pushed == 0 && lr_needs_saving))
24919     {
24920       unsigned long mask = l_mask;
24921       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24922       insn = thumb1_emit_multi_reg_push (mask, mask);
24923       RTX_FRAME_RELATED_P (insn) = 1;
24924       lr_needs_saving = false;
24925     }
24926
24927   if (high_regs_pushed)
24928     {
24929       unsigned pushable_regs;
24930       unsigned next_hi_reg;
24931       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24932                                                  : crtl->args.info.nregs;
24933       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24934
24935       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24936         if (live_regs_mask & (1 << next_hi_reg))
24937           break;
24938
24939       /* Here we need to mask out registers used for passing arguments
24940          even if they can be pushed.  This is to avoid using them to stash the high
24941          registers.  Such kind of stash may clobber the use of arguments.  */
24942       pushable_regs = l_mask & (~arg_regs_mask);
24943       if (lr_needs_saving)
24944         pushable_regs &= ~(1 << LR_REGNUM);
24945
24946       if (pushable_regs == 0)
24947         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24948
24949       while (high_regs_pushed > 0)
24950         {
24951           unsigned long real_regs_mask = 0;
24952           unsigned long push_mask = 0;
24953
24954           for (regno = LR_REGNUM; regno >= 0; regno --)
24955             {
24956               if (pushable_regs & (1 << regno))
24957                 {
24958                   emit_move_insn (gen_rtx_REG (SImode, regno),
24959                                   gen_rtx_REG (SImode, next_hi_reg));
24960
24961                   high_regs_pushed --;
24962                   real_regs_mask |= (1 << next_hi_reg);
24963                   push_mask |= (1 << regno);
24964
24965                   if (high_regs_pushed)
24966                     {
24967                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24968                            next_hi_reg --)
24969                         if (live_regs_mask & (1 << next_hi_reg))
24970                           break;
24971                     }
24972                   else
24973                     break;
24974                 }
24975             }
24976
24977           /* If we had to find a work register and we have not yet
24978              saved the LR then add it to the list of regs to push.  */
24979           if (lr_needs_saving)
24980             {
24981               push_mask |= 1 << LR_REGNUM;
24982               real_regs_mask |= 1 << LR_REGNUM;
24983               lr_needs_saving = false;
24984             }
24985
24986           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24987           RTX_FRAME_RELATED_P (insn) = 1;
24988         }
24989     }
24990
24991   /* Load the pic register before setting the frame pointer,
24992      so we can use r7 as a temporary work register.  */
24993   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24994     arm_load_pic_register (live_regs_mask);
24995
24996   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24997     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24998                     stack_pointer_rtx);
24999
25000   size = offsets->outgoing_args - offsets->saved_args;
25001   if (flag_stack_usage_info)
25002     current_function_static_stack_size = size;
25003
25004   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25005   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25006        || flag_stack_clash_protection)
25007       && size)
25008     sorry ("-fstack-check=specific for Thumb-1");
25009
25010   amount = offsets->outgoing_args - offsets->saved_regs;
25011   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25012   if (amount)
25013     {
25014       if (amount < 512)
25015         {
25016           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25017                                         GEN_INT (- amount)));
25018           RTX_FRAME_RELATED_P (insn) = 1;
25019         }
25020       else
25021         {
25022           rtx reg, dwarf;
25023
25024           /* The stack decrement is too big for an immediate value in a single
25025              insn.  In theory we could issue multiple subtracts, but after
25026              three of them it becomes more space efficient to place the full
25027              value in the constant pool and load into a register.  (Also the
25028              ARM debugger really likes to see only one stack decrement per
25029              function).  So instead we look for a scratch register into which
25030              we can load the decrement, and then we subtract this from the
25031              stack pointer.  Unfortunately on the thumb the only available
25032              scratch registers are the argument registers, and we cannot use
25033              these as they may hold arguments to the function.  Instead we
25034              attempt to locate a call preserved register which is used by this
25035              function.  If we can find one, then we know that it will have
25036              been pushed at the start of the prologue and so we can corrupt
25037              it now.  */
25038           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25039             if (live_regs_mask & (1 << regno))
25040               break;
25041
25042           gcc_assert(regno <= LAST_LO_REGNUM);
25043
25044           reg = gen_rtx_REG (SImode, regno);
25045
25046           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25047
25048           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25049                                         stack_pointer_rtx, reg));
25050
25051           dwarf = gen_rtx_SET (stack_pointer_rtx,
25052                                plus_constant (Pmode, stack_pointer_rtx,
25053                                               -amount));
25054           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25055           RTX_FRAME_RELATED_P (insn) = 1;
25056         }
25057     }
25058
25059   if (frame_pointer_needed)
25060     thumb_set_frame_pointer (offsets);
25061
25062   /* If we are profiling, make sure no instructions are scheduled before
25063      the call to mcount.  Similarly if the user has requested no
25064      scheduling in the prolog.  Similarly if we want non-call exceptions
25065      using the EABI unwinder, to prevent faulting instructions from being
25066      swapped with a stack adjustment.  */
25067   if (crtl->profile || !TARGET_SCHED_PROLOG
25068       || (arm_except_unwind_info (&global_options) == UI_TARGET
25069           && cfun->can_throw_non_call_exceptions))
25070     emit_insn (gen_blockage ());
25071
25072   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25073   if (live_regs_mask & 0xff)
25074     cfun->machine->lr_save_eliminated = 0;
25075 }
25076
25077 /* Clear caller saved registers not used to pass return values and leaked
25078    condition flags before exiting a cmse_nonsecure_entry function.  */
25079
25080 void
25081 cmse_nonsecure_entry_clear_before_return (void)
25082 {
25083   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25084   uint32_t padding_bits_to_clear = 0;
25085   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25086   auto_sbitmap to_clear_bitmap (maxregno + 1);
25087   tree result_type;
25088   rtx result_rtl;
25089
25090   bitmap_clear (to_clear_bitmap);
25091   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25092   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25093
25094   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25095      registers.  */
25096   if (TARGET_HARD_FLOAT)
25097     {
25098       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25099
25100       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25101
25102       /* Make sure we don't clear the two scratch registers used to clear the
25103          relevant FPSCR bits in output_return_instruction.  */
25104       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25105       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25106       emit_use (gen_rtx_REG (SImode, 4));
25107       bitmap_clear_bit (to_clear_bitmap, 4);
25108     }
25109
25110   /* If the user has defined registers to be caller saved, these are no longer
25111      restored by the function before returning and must thus be cleared for
25112      security purposes.  */
25113   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25114     {
25115       /* We do not touch registers that can be used to pass arguments as per
25116          the AAPCS, since these should never be made callee-saved by user
25117          options.  */
25118       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25119         continue;
25120       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25121         continue;
25122       if (call_used_regs[regno])
25123         bitmap_set_bit (to_clear_bitmap, regno);
25124     }
25125
25126   /* Make sure we do not clear the registers used to return the result in.  */
25127   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25128   if (!VOID_TYPE_P (result_type))
25129     {
25130       uint64_t to_clear_return_mask;
25131       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25132
25133       /* No need to check that we return in registers, because we don't
25134          support returning on stack yet.  */
25135       gcc_assert (REG_P (result_rtl));
25136       to_clear_return_mask
25137         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25138                                      padding_bits_to_clear_ptr);
25139       if (to_clear_return_mask)
25140         {
25141           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25142           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25143             {
25144               if (to_clear_return_mask & (1ULL << regno))
25145                 bitmap_clear_bit (to_clear_bitmap, regno);
25146             }
25147         }
25148     }
25149
25150   if (padding_bits_to_clear != 0)
25151     {
25152       rtx reg_rtx;
25153       auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25154
25155       /* Padding bits to clear is not 0 so we know we are dealing with
25156          returning a composite type, which only uses r0.  Let's make sure that
25157          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25158       bitmap_clear (to_clear_arg_regs_bitmap);
25159       bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25160                         NUM_ARG_REGS - 1);
25161       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25162
25163       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25164
25165       /* Fill the lower half of the negated padding_bits_to_clear.  */
25166       emit_move_insn (reg_rtx,
25167                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25168
25169       /* Also fill the top half of the negated padding_bits_to_clear.  */
25170       if (((~padding_bits_to_clear) >> 16) > 0)
25171         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25172                                                       GEN_INT (16),
25173                                                       GEN_INT (16)),
25174                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25175
25176       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25177                            gen_rtx_REG (SImode, R0_REGNUM),
25178                            reg_rtx));
25179     }
25180
25181   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25182     {
25183       if (!bitmap_bit_p (to_clear_bitmap, regno))
25184         continue;
25185
25186       if (IS_VFP_REGNUM (regno))
25187         {
25188           /* If regno is an even vfp register and its successor is also to
25189              be cleared, use vmov.  */
25190           if (TARGET_VFP_DOUBLE
25191               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25192               && bitmap_bit_p (to_clear_bitmap, regno + 1))
25193             {
25194               emit_move_insn (gen_rtx_REG (DFmode, regno),
25195                               CONST1_RTX (DFmode));
25196               emit_use (gen_rtx_REG (DFmode, regno));
25197               regno++;
25198             }
25199           else
25200             {
25201               emit_move_insn (gen_rtx_REG (SFmode, regno),
25202                               CONST1_RTX (SFmode));
25203               emit_use (gen_rtx_REG (SFmode, regno));
25204             }
25205         }
25206       else
25207         {
25208           if (TARGET_THUMB1)
25209             {
25210               if (regno == R0_REGNUM)
25211                 emit_move_insn (gen_rtx_REG (SImode, regno),
25212                                 const0_rtx);
25213               else
25214                 /* R0 has either been cleared before, see code above, or it
25215                    holds a return value, either way it is not secret
25216                    information.  */
25217                 emit_move_insn (gen_rtx_REG (SImode, regno),
25218                                 gen_rtx_REG (SImode, R0_REGNUM));
25219               emit_use (gen_rtx_REG (SImode, regno));
25220             }
25221           else
25222             {
25223               emit_move_insn (gen_rtx_REG (SImode, regno),
25224                               gen_rtx_REG (SImode, LR_REGNUM));
25225               emit_use (gen_rtx_REG (SImode, regno));
25226             }
25227         }
25228     }
25229 }
25230
25231 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25232    POP instruction can be generated.  LR should be replaced by PC.  All
25233    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25234    all we really need to check here is if single register is to be
25235    returned, or multiple register return.  */
25236 void
25237 thumb2_expand_return (bool simple_return)
25238 {
25239   int i, num_regs;
25240   unsigned long saved_regs_mask;
25241   arm_stack_offsets *offsets;
25242
25243   offsets = arm_get_frame_offsets ();
25244   saved_regs_mask = offsets->saved_regs_mask;
25245
25246   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25247     if (saved_regs_mask & (1 << i))
25248       num_regs++;
25249
25250   if (!simple_return && saved_regs_mask)
25251     {
25252       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25253          functions or adapt code to handle according to ACLE.  This path should
25254          not be reachable for cmse_nonsecure_entry functions though we prefer
25255          to assert it for now to ensure that future code changes do not silently
25256          change this behavior.  */
25257       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25258       if (num_regs == 1)
25259         {
25260           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25261           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25262           rtx addr = gen_rtx_MEM (SImode,
25263                                   gen_rtx_POST_INC (SImode,
25264                                                     stack_pointer_rtx));
25265           set_mem_alias_set (addr, get_frame_alias_set ());
25266           XVECEXP (par, 0, 0) = ret_rtx;
25267           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25268           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25269           emit_jump_insn (par);
25270         }
25271       else
25272         {
25273           saved_regs_mask &= ~ (1 << LR_REGNUM);
25274           saved_regs_mask |=   (1 << PC_REGNUM);
25275           arm_emit_multi_reg_pop (saved_regs_mask);
25276         }
25277     }
25278   else
25279     {
25280       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25281         cmse_nonsecure_entry_clear_before_return ();
25282       emit_jump_insn (simple_return_rtx);
25283     }
25284 }
25285
25286 void
25287 thumb1_expand_epilogue (void)
25288 {
25289   HOST_WIDE_INT amount;
25290   arm_stack_offsets *offsets;
25291   int regno;
25292
25293   /* Naked functions don't have prologues.  */
25294   if (IS_NAKED (arm_current_func_type ()))
25295     return;
25296
25297   offsets = arm_get_frame_offsets ();
25298   amount = offsets->outgoing_args - offsets->saved_regs;
25299
25300   if (frame_pointer_needed)
25301     {
25302       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25303       amount = offsets->locals_base - offsets->saved_regs;
25304     }
25305   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25306
25307   gcc_assert (amount >= 0);
25308   if (amount)
25309     {
25310       emit_insn (gen_blockage ());
25311
25312       if (amount < 512)
25313         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25314                                GEN_INT (amount)));
25315       else
25316         {
25317           /* r3 is always free in the epilogue.  */
25318           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25319
25320           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25321           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25322         }
25323     }
25324
25325   /* Emit a USE (stack_pointer_rtx), so that
25326      the stack adjustment will not be deleted.  */
25327   emit_insn (gen_force_register_use (stack_pointer_rtx));
25328
25329   if (crtl->profile || !TARGET_SCHED_PROLOG)
25330     emit_insn (gen_blockage ());
25331
25332   /* Emit a clobber for each insn that will be restored in the epilogue,
25333      so that flow2 will get register lifetimes correct.  */
25334   for (regno = 0; regno < 13; regno++)
25335     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25336       emit_clobber (gen_rtx_REG (SImode, regno));
25337
25338   if (! df_regs_ever_live_p (LR_REGNUM))
25339     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25340
25341   /* Clear all caller-saved regs that are not used to return.  */
25342   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25343     cmse_nonsecure_entry_clear_before_return ();
25344 }
25345
25346 /* Epilogue code for APCS frame.  */
25347 static void
25348 arm_expand_epilogue_apcs_frame (bool really_return)
25349 {
25350   unsigned long func_type;
25351   unsigned long saved_regs_mask;
25352   int num_regs = 0;
25353   int i;
25354   int floats_from_frame = 0;
25355   arm_stack_offsets *offsets;
25356
25357   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25358   func_type = arm_current_func_type ();
25359
25360   /* Get frame offsets for ARM.  */
25361   offsets = arm_get_frame_offsets ();
25362   saved_regs_mask = offsets->saved_regs_mask;
25363
25364   /* Find the offset of the floating-point save area in the frame.  */
25365   floats_from_frame
25366     = (offsets->saved_args
25367        + arm_compute_static_chain_stack_bytes ()
25368        - offsets->frame);
25369
25370   /* Compute how many core registers saved and how far away the floats are.  */
25371   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25372     if (saved_regs_mask & (1 << i))
25373       {
25374         num_regs++;
25375         floats_from_frame += 4;
25376       }
25377
25378   if (TARGET_HARD_FLOAT)
25379     {
25380       int start_reg;
25381       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25382
25383       /* The offset is from IP_REGNUM.  */
25384       int saved_size = arm_get_vfp_saved_size ();
25385       if (saved_size > 0)
25386         {
25387           rtx_insn *insn;
25388           floats_from_frame += saved_size;
25389           insn = emit_insn (gen_addsi3 (ip_rtx,
25390                                         hard_frame_pointer_rtx,
25391                                         GEN_INT (-floats_from_frame)));
25392           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25393                                        ip_rtx, hard_frame_pointer_rtx);
25394         }
25395
25396       /* Generate VFP register multi-pop.  */
25397       start_reg = FIRST_VFP_REGNUM;
25398
25399       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25400         /* Look for a case where a reg does not need restoring.  */
25401         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25402             && (!df_regs_ever_live_p (i + 1)
25403                 || call_used_regs[i + 1]))
25404           {
25405             if (start_reg != i)
25406               arm_emit_vfp_multi_reg_pop (start_reg,
25407                                           (i - start_reg) / 2,
25408                                           gen_rtx_REG (SImode,
25409                                                        IP_REGNUM));
25410             start_reg = i + 2;
25411           }
25412
25413       /* Restore the remaining regs that we have discovered (or possibly
25414          even all of them, if the conditional in the for loop never
25415          fired).  */
25416       if (start_reg != i)
25417         arm_emit_vfp_multi_reg_pop (start_reg,
25418                                     (i - start_reg) / 2,
25419                                     gen_rtx_REG (SImode, IP_REGNUM));
25420     }
25421
25422   if (TARGET_IWMMXT)
25423     {
25424       /* The frame pointer is guaranteed to be non-double-word aligned, as
25425          it is set to double-word-aligned old_stack_pointer - 4.  */
25426       rtx_insn *insn;
25427       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25428
25429       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25430         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25431           {
25432             rtx addr = gen_frame_mem (V2SImode,
25433                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25434                                                 - lrm_count * 4));
25435             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25436             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25437                                                gen_rtx_REG (V2SImode, i),
25438                                                NULL_RTX);
25439             lrm_count += 2;
25440           }
25441     }
25442
25443   /* saved_regs_mask should contain IP which contains old stack pointer
25444      at the time of activation creation.  Since SP and IP are adjacent registers,
25445      we can restore the value directly into SP.  */
25446   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25447   saved_regs_mask &= ~(1 << IP_REGNUM);
25448   saved_regs_mask |= (1 << SP_REGNUM);
25449
25450   /* There are two registers left in saved_regs_mask - LR and PC.  We
25451      only need to restore LR (the return address), but to
25452      save time we can load it directly into PC, unless we need a
25453      special function exit sequence, or we are not really returning.  */
25454   if (really_return
25455       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25456       && !crtl->calls_eh_return)
25457     /* Delete LR from the register mask, so that LR on
25458        the stack is loaded into the PC in the register mask.  */
25459     saved_regs_mask &= ~(1 << LR_REGNUM);
25460   else
25461     saved_regs_mask &= ~(1 << PC_REGNUM);
25462
25463   num_regs = bit_count (saved_regs_mask);
25464   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25465     {
25466       rtx_insn *insn;
25467       emit_insn (gen_blockage ());
25468       /* Unwind the stack to just below the saved registers.  */
25469       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25470                                     hard_frame_pointer_rtx,
25471                                     GEN_INT (- 4 * num_regs)));
25472
25473       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25474                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25475     }
25476
25477   arm_emit_multi_reg_pop (saved_regs_mask);
25478
25479   if (IS_INTERRUPT (func_type))
25480     {
25481       /* Interrupt handlers will have pushed the
25482          IP onto the stack, so restore it now.  */
25483       rtx_insn *insn;
25484       rtx addr = gen_rtx_MEM (SImode,
25485                               gen_rtx_POST_INC (SImode,
25486                               stack_pointer_rtx));
25487       set_mem_alias_set (addr, get_frame_alias_set ());
25488       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25489       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25490                                          gen_rtx_REG (SImode, IP_REGNUM),
25491                                          NULL_RTX);
25492     }
25493
25494   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25495     return;
25496
25497   if (crtl->calls_eh_return)
25498     emit_insn (gen_addsi3 (stack_pointer_rtx,
25499                            stack_pointer_rtx,
25500                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25501
25502   if (IS_STACKALIGN (func_type))
25503     /* Restore the original stack pointer.  Before prologue, the stack was
25504        realigned and the original stack pointer saved in r0.  For details,
25505        see comment in arm_expand_prologue.  */
25506     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25507
25508   emit_jump_insn (simple_return_rtx);
25509 }
25510
25511 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25512    function is not a sibcall.  */
25513 void
25514 arm_expand_epilogue (bool really_return)
25515 {
25516   unsigned long func_type;
25517   unsigned long saved_regs_mask;
25518   int num_regs = 0;
25519   int i;
25520   int amount;
25521   arm_stack_offsets *offsets;
25522
25523   func_type = arm_current_func_type ();
25524
25525   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25526      let output_return_instruction take care of instruction emission if any.  */
25527   if (IS_NAKED (func_type)
25528       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25529     {
25530       if (really_return)
25531         emit_jump_insn (simple_return_rtx);
25532       return;
25533     }
25534
25535   /* If we are throwing an exception, then we really must be doing a
25536      return, so we can't tail-call.  */
25537   gcc_assert (!crtl->calls_eh_return || really_return);
25538
25539   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25540     {
25541       arm_expand_epilogue_apcs_frame (really_return);
25542       return;
25543     }
25544
25545   /* Get frame offsets for ARM.  */
25546   offsets = arm_get_frame_offsets ();
25547   saved_regs_mask = offsets->saved_regs_mask;
25548   num_regs = bit_count (saved_regs_mask);
25549
25550   if (frame_pointer_needed)
25551     {
25552       rtx_insn *insn;
25553       /* Restore stack pointer if necessary.  */
25554       if (TARGET_ARM)
25555         {
25556           /* In ARM mode, frame pointer points to first saved register.
25557              Restore stack pointer to last saved register.  */
25558           amount = offsets->frame - offsets->saved_regs;
25559
25560           /* Force out any pending memory operations that reference stacked data
25561              before stack de-allocation occurs.  */
25562           emit_insn (gen_blockage ());
25563           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25564                             hard_frame_pointer_rtx,
25565                             GEN_INT (amount)));
25566           arm_add_cfa_adjust_cfa_note (insn, amount,
25567                                        stack_pointer_rtx,
25568                                        hard_frame_pointer_rtx);
25569
25570           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25571              deleted.  */
25572           emit_insn (gen_force_register_use (stack_pointer_rtx));
25573         }
25574       else
25575         {
25576           /* In Thumb-2 mode, the frame pointer points to the last saved
25577              register.  */
25578           amount = offsets->locals_base - offsets->saved_regs;
25579           if (amount)
25580             {
25581               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25582                                 hard_frame_pointer_rtx,
25583                                 GEN_INT (amount)));
25584               arm_add_cfa_adjust_cfa_note (insn, amount,
25585                                            hard_frame_pointer_rtx,
25586                                            hard_frame_pointer_rtx);
25587             }
25588
25589           /* Force out any pending memory operations that reference stacked data
25590              before stack de-allocation occurs.  */
25591           emit_insn (gen_blockage ());
25592           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25593                                        hard_frame_pointer_rtx));
25594           arm_add_cfa_adjust_cfa_note (insn, 0,
25595                                        stack_pointer_rtx,
25596                                        hard_frame_pointer_rtx);
25597           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25598              deleted.  */
25599           emit_insn (gen_force_register_use (stack_pointer_rtx));
25600         }
25601     }
25602   else
25603     {
25604       /* Pop off outgoing args and local frame to adjust stack pointer to
25605          last saved register.  */
25606       amount = offsets->outgoing_args - offsets->saved_regs;
25607       if (amount)
25608         {
25609           rtx_insn *tmp;
25610           /* Force out any pending memory operations that reference stacked data
25611              before stack de-allocation occurs.  */
25612           emit_insn (gen_blockage ());
25613           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25614                                        stack_pointer_rtx,
25615                                        GEN_INT (amount)));
25616           arm_add_cfa_adjust_cfa_note (tmp, amount,
25617                                        stack_pointer_rtx, stack_pointer_rtx);
25618           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25619              not deleted.  */
25620           emit_insn (gen_force_register_use (stack_pointer_rtx));
25621         }
25622     }
25623
25624   if (TARGET_HARD_FLOAT)
25625     {
25626       /* Generate VFP register multi-pop.  */
25627       int end_reg = LAST_VFP_REGNUM + 1;
25628
25629       /* Scan the registers in reverse order.  We need to match
25630          any groupings made in the prologue and generate matching
25631          vldm operations.  The need to match groups is because,
25632          unlike pop, vldm can only do consecutive regs.  */
25633       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25634         /* Look for a case where a reg does not need restoring.  */
25635         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25636             && (!df_regs_ever_live_p (i + 1)
25637                 || call_used_regs[i + 1]))
25638           {
25639             /* Restore the regs discovered so far (from reg+2 to
25640                end_reg).  */
25641             if (end_reg > i + 2)
25642               arm_emit_vfp_multi_reg_pop (i + 2,
25643                                           (end_reg - (i + 2)) / 2,
25644                                           stack_pointer_rtx);
25645             end_reg = i;
25646           }
25647
25648       /* Restore the remaining regs that we have discovered (or possibly
25649          even all of them, if the conditional in the for loop never
25650          fired).  */
25651       if (end_reg > i + 2)
25652         arm_emit_vfp_multi_reg_pop (i + 2,
25653                                     (end_reg - (i + 2)) / 2,
25654                                     stack_pointer_rtx);
25655     }
25656
25657   if (TARGET_IWMMXT)
25658     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25659       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25660         {
25661           rtx_insn *insn;
25662           rtx addr = gen_rtx_MEM (V2SImode,
25663                                   gen_rtx_POST_INC (SImode,
25664                                                     stack_pointer_rtx));
25665           set_mem_alias_set (addr, get_frame_alias_set ());
25666           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25667           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25668                                              gen_rtx_REG (V2SImode, i),
25669                                              NULL_RTX);
25670           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25671                                        stack_pointer_rtx, stack_pointer_rtx);
25672         }
25673
25674   if (saved_regs_mask)
25675     {
25676       rtx insn;
25677       bool return_in_pc = false;
25678
25679       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25680           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25681           && !IS_CMSE_ENTRY (func_type)
25682           && !IS_STACKALIGN (func_type)
25683           && really_return
25684           && crtl->args.pretend_args_size == 0
25685           && saved_regs_mask & (1 << LR_REGNUM)
25686           && !crtl->calls_eh_return)
25687         {
25688           saved_regs_mask &= ~(1 << LR_REGNUM);
25689           saved_regs_mask |= (1 << PC_REGNUM);
25690           return_in_pc = true;
25691         }
25692
25693       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25694         {
25695           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25696             if (saved_regs_mask & (1 << i))
25697               {
25698                 rtx addr = gen_rtx_MEM (SImode,
25699                                         gen_rtx_POST_INC (SImode,
25700                                                           stack_pointer_rtx));
25701                 set_mem_alias_set (addr, get_frame_alias_set ());
25702
25703                 if (i == PC_REGNUM)
25704                   {
25705                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25706                     XVECEXP (insn, 0, 0) = ret_rtx;
25707                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25708                                                         addr);
25709                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25710                     insn = emit_jump_insn (insn);
25711                   }
25712                 else
25713                   {
25714                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25715                                                  addr));
25716                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25717                                                        gen_rtx_REG (SImode, i),
25718                                                        NULL_RTX);
25719                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25720                                                  stack_pointer_rtx,
25721                                                  stack_pointer_rtx);
25722                   }
25723               }
25724         }
25725       else
25726         {
25727           if (TARGET_LDRD
25728               && current_tune->prefer_ldrd_strd
25729               && !optimize_function_for_size_p (cfun))
25730             {
25731               if (TARGET_THUMB2)
25732                 thumb2_emit_ldrd_pop (saved_regs_mask);
25733               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25734                 arm_emit_ldrd_pop (saved_regs_mask);
25735               else
25736                 arm_emit_multi_reg_pop (saved_regs_mask);
25737             }
25738           else
25739             arm_emit_multi_reg_pop (saved_regs_mask);
25740         }
25741
25742       if (return_in_pc)
25743         return;
25744     }
25745
25746   amount
25747     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25748   if (amount)
25749     {
25750       int i, j;
25751       rtx dwarf = NULL_RTX;
25752       rtx_insn *tmp =
25753         emit_insn (gen_addsi3 (stack_pointer_rtx,
25754                                stack_pointer_rtx,
25755                                GEN_INT (amount)));
25756
25757       RTX_FRAME_RELATED_P (tmp) = 1;
25758
25759       if (cfun->machine->uses_anonymous_args)
25760         {
25761           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25762              pretend_args in stack.  */
25763           int num_regs = crtl->args.pretend_args_size / 4;
25764           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25765           for (j = 0, i = 0; j < num_regs; i++)
25766             if (saved_regs_mask & (1 << i))
25767               {
25768                 rtx reg = gen_rtx_REG (SImode, i);
25769                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25770                 j++;
25771               }
25772           REG_NOTES (tmp) = dwarf;
25773         }
25774       arm_add_cfa_adjust_cfa_note (tmp, amount,
25775                                    stack_pointer_rtx, stack_pointer_rtx);
25776     }
25777
25778     /* Clear all caller-saved regs that are not used to return.  */
25779     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25780       {
25781         /* CMSE_ENTRY always returns.  */
25782         gcc_assert (really_return);
25783         cmse_nonsecure_entry_clear_before_return ();
25784       }
25785
25786   if (!really_return)
25787     return;
25788
25789   if (crtl->calls_eh_return)
25790     emit_insn (gen_addsi3 (stack_pointer_rtx,
25791                            stack_pointer_rtx,
25792                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25793
25794   if (IS_STACKALIGN (func_type))
25795     /* Restore the original stack pointer.  Before prologue, the stack was
25796        realigned and the original stack pointer saved in r0.  For details,
25797        see comment in arm_expand_prologue.  */
25798     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25799
25800   emit_jump_insn (simple_return_rtx);
25801 }
25802
25803 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25804    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25805
25806 const char *
25807 thumb1_output_interwork (void)
25808 {
25809   const char * name;
25810   FILE *f = asm_out_file;
25811
25812   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25813   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25814               == SYMBOL_REF);
25815   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25816
25817   /* Generate code sequence to switch us into Thumb mode.  */
25818   /* The .code 32 directive has already been emitted by
25819      ASM_DECLARE_FUNCTION_NAME.  */
25820   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25821   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25822
25823   /* Generate a label, so that the debugger will notice the
25824      change in instruction sets.  This label is also used by
25825      the assembler to bypass the ARM code when this function
25826      is called from a Thumb encoded function elsewhere in the
25827      same file.  Hence the definition of STUB_NAME here must
25828      agree with the definition in gas/config/tc-arm.c.  */
25829
25830 #define STUB_NAME ".real_start_of"
25831
25832   fprintf (f, "\t.code\t16\n");
25833 #ifdef ARM_PE
25834   if (arm_dllexport_name_p (name))
25835     name = arm_strip_name_encoding (name);
25836 #endif
25837   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25838   fprintf (f, "\t.thumb_func\n");
25839   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25840
25841   return "";
25842 }
25843
25844 /* Handle the case of a double word load into a low register from
25845    a computed memory address.  The computed address may involve a
25846    register which is overwritten by the load.  */
25847 const char *
25848 thumb_load_double_from_address (rtx *operands)
25849 {
25850   rtx addr;
25851   rtx base;
25852   rtx offset;
25853   rtx arg1;
25854   rtx arg2;
25855
25856   gcc_assert (REG_P (operands[0]));
25857   gcc_assert (MEM_P (operands[1]));
25858
25859   /* Get the memory address.  */
25860   addr = XEXP (operands[1], 0);
25861
25862   /* Work out how the memory address is computed.  */
25863   switch (GET_CODE (addr))
25864     {
25865     case REG:
25866       operands[2] = adjust_address (operands[1], SImode, 4);
25867
25868       if (REGNO (operands[0]) == REGNO (addr))
25869         {
25870           output_asm_insn ("ldr\t%H0, %2", operands);
25871           output_asm_insn ("ldr\t%0, %1", operands);
25872         }
25873       else
25874         {
25875           output_asm_insn ("ldr\t%0, %1", operands);
25876           output_asm_insn ("ldr\t%H0, %2", operands);
25877         }
25878       break;
25879
25880     case CONST:
25881       /* Compute <address> + 4 for the high order load.  */
25882       operands[2] = adjust_address (operands[1], SImode, 4);
25883
25884       output_asm_insn ("ldr\t%0, %1", operands);
25885       output_asm_insn ("ldr\t%H0, %2", operands);
25886       break;
25887
25888     case PLUS:
25889       arg1   = XEXP (addr, 0);
25890       arg2   = XEXP (addr, 1);
25891
25892       if (CONSTANT_P (arg1))
25893         base = arg2, offset = arg1;
25894       else
25895         base = arg1, offset = arg2;
25896
25897       gcc_assert (REG_P (base));
25898
25899       /* Catch the case of <address> = <reg> + <reg> */
25900       if (REG_P (offset))
25901         {
25902           int reg_offset = REGNO (offset);
25903           int reg_base   = REGNO (base);
25904           int reg_dest   = REGNO (operands[0]);
25905
25906           /* Add the base and offset registers together into the
25907              higher destination register.  */
25908           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25909                        reg_dest + 1, reg_base, reg_offset);
25910
25911           /* Load the lower destination register from the address in
25912              the higher destination register.  */
25913           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25914                        reg_dest, reg_dest + 1);
25915
25916           /* Load the higher destination register from its own address
25917              plus 4.  */
25918           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25919                        reg_dest + 1, reg_dest + 1);
25920         }
25921       else
25922         {
25923           /* Compute <address> + 4 for the high order load.  */
25924           operands[2] = adjust_address (operands[1], SImode, 4);
25925
25926           /* If the computed address is held in the low order register
25927              then load the high order register first, otherwise always
25928              load the low order register first.  */
25929           if (REGNO (operands[0]) == REGNO (base))
25930             {
25931               output_asm_insn ("ldr\t%H0, %2", operands);
25932               output_asm_insn ("ldr\t%0, %1", operands);
25933             }
25934           else
25935             {
25936               output_asm_insn ("ldr\t%0, %1", operands);
25937               output_asm_insn ("ldr\t%H0, %2", operands);
25938             }
25939         }
25940       break;
25941
25942     case LABEL_REF:
25943       /* With no registers to worry about we can just load the value
25944          directly.  */
25945       operands[2] = adjust_address (operands[1], SImode, 4);
25946
25947       output_asm_insn ("ldr\t%H0, %2", operands);
25948       output_asm_insn ("ldr\t%0, %1", operands);
25949       break;
25950
25951     default:
25952       gcc_unreachable ();
25953     }
25954
25955   return "";
25956 }
25957
25958 const char *
25959 thumb_output_move_mem_multiple (int n, rtx *operands)
25960 {
25961   switch (n)
25962     {
25963     case 2:
25964       if (REGNO (operands[4]) > REGNO (operands[5]))
25965         std::swap (operands[4], operands[5]);
25966
25967       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25968       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25969       break;
25970
25971     case 3:
25972       if (REGNO (operands[4]) > REGNO (operands[5]))
25973         std::swap (operands[4], operands[5]);
25974       if (REGNO (operands[5]) > REGNO (operands[6]))
25975         std::swap (operands[5], operands[6]);
25976       if (REGNO (operands[4]) > REGNO (operands[5]))
25977         std::swap (operands[4], operands[5]);
25978
25979       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25980       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25981       break;
25982
25983     default:
25984       gcc_unreachable ();
25985     }
25986
25987   return "";
25988 }
25989
25990 /* Output a call-via instruction for thumb state.  */
25991 const char *
25992 thumb_call_via_reg (rtx reg)
25993 {
25994   int regno = REGNO (reg);
25995   rtx *labelp;
25996
25997   gcc_assert (regno < LR_REGNUM);
25998
25999   /* If we are in the normal text section we can use a single instance
26000      per compilation unit.  If we are doing function sections, then we need
26001      an entry per section, since we can't rely on reachability.  */
26002   if (in_section == text_section)
26003     {
26004       thumb_call_reg_needed = 1;
26005
26006       if (thumb_call_via_label[regno] == NULL)
26007         thumb_call_via_label[regno] = gen_label_rtx ();
26008       labelp = thumb_call_via_label + regno;
26009     }
26010   else
26011     {
26012       if (cfun->machine->call_via[regno] == NULL)
26013         cfun->machine->call_via[regno] = gen_label_rtx ();
26014       labelp = cfun->machine->call_via + regno;
26015     }
26016
26017   output_asm_insn ("bl\t%a0", labelp);
26018   return "";
26019 }
26020
26021 /* Routines for generating rtl.  */
26022 void
26023 thumb_expand_movmemqi (rtx *operands)
26024 {
26025   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26026   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26027   HOST_WIDE_INT len = INTVAL (operands[2]);
26028   HOST_WIDE_INT offset = 0;
26029
26030   while (len >= 12)
26031     {
26032       emit_insn (gen_movmem12b (out, in, out, in));
26033       len -= 12;
26034     }
26035
26036   if (len >= 8)
26037     {
26038       emit_insn (gen_movmem8b (out, in, out, in));
26039       len -= 8;
26040     }
26041
26042   if (len >= 4)
26043     {
26044       rtx reg = gen_reg_rtx (SImode);
26045       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26046       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26047       len -= 4;
26048       offset += 4;
26049     }
26050
26051   if (len >= 2)
26052     {
26053       rtx reg = gen_reg_rtx (HImode);
26054       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26055                                               plus_constant (Pmode, in,
26056                                                              offset))));
26057       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26058                                                                 offset)),
26059                             reg));
26060       len -= 2;
26061       offset += 2;
26062     }
26063
26064   if (len)
26065     {
26066       rtx reg = gen_reg_rtx (QImode);
26067       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26068                                               plus_constant (Pmode, in,
26069                                                              offset))));
26070       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26071                                                                 offset)),
26072                             reg));
26073     }
26074 }
26075
26076 void
26077 thumb_reload_out_hi (rtx *operands)
26078 {
26079   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26080 }
26081
26082 /* Return the length of a function name prefix
26083     that starts with the character 'c'.  */
26084 static int
26085 arm_get_strip_length (int c)
26086 {
26087   switch (c)
26088     {
26089     ARM_NAME_ENCODING_LENGTHS
26090       default: return 0;
26091     }
26092 }
26093
26094 /* Return a pointer to a function's name with any
26095    and all prefix encodings stripped from it.  */
26096 const char *
26097 arm_strip_name_encoding (const char *name)
26098 {
26099   int skip;
26100
26101   while ((skip = arm_get_strip_length (* name)))
26102     name += skip;
26103
26104   return name;
26105 }
26106
26107 /* If there is a '*' anywhere in the name's prefix, then
26108    emit the stripped name verbatim, otherwise prepend an
26109    underscore if leading underscores are being used.  */
26110 void
26111 arm_asm_output_labelref (FILE *stream, const char *name)
26112 {
26113   int skip;
26114   int verbatim = 0;
26115
26116   while ((skip = arm_get_strip_length (* name)))
26117     {
26118       verbatim |= (*name == '*');
26119       name += skip;
26120     }
26121
26122   if (verbatim)
26123     fputs (name, stream);
26124   else
26125     asm_fprintf (stream, "%U%s", name);
26126 }
26127
26128 /* This function is used to emit an EABI tag and its associated value.
26129    We emit the numerical value of the tag in case the assembler does not
26130    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26131    the tag name in a comment so that anyone reading the assembler output
26132    will know which tag is being set.
26133
26134    This function is not static because arm-c.c needs it too.  */
26135
26136 void
26137 arm_emit_eabi_attribute (const char *name, int num, int val)
26138 {
26139   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26140   if (flag_verbose_asm || flag_debug_asm)
26141     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26142   asm_fprintf (asm_out_file, "\n");
26143 }
26144
26145 /* This function is used to print CPU tuning information as comment
26146    in assembler file.  Pointers are not printed for now.  */
26147
26148 void
26149 arm_print_tune_info (void)
26150 {
26151   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26152   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26153                current_tune->constant_limit);
26154   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26155                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26156   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26157                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26158   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26159                "prefetch.l1_cache_size:\t%d\n",
26160                current_tune->prefetch.l1_cache_size);
26161   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26162                "prefetch.l1_cache_line_size:\t%d\n",
26163                current_tune->prefetch.l1_cache_line_size);
26164   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26165                "prefer_constant_pool:\t%d\n",
26166                (int) current_tune->prefer_constant_pool);
26167   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26168                "branch_cost:\t(s:speed, p:predictable)\n");
26169   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26170   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26171                current_tune->branch_cost (false, false));
26172   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26173                current_tune->branch_cost (false, true));
26174   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26175                current_tune->branch_cost (true, false));
26176   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26177                current_tune->branch_cost (true, true));
26178   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26179                "prefer_ldrd_strd:\t%d\n",
26180                (int) current_tune->prefer_ldrd_strd);
26181   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26182                "logical_op_non_short_circuit:\t[%d,%d]\n",
26183                (int) current_tune->logical_op_non_short_circuit_thumb,
26184                (int) current_tune->logical_op_non_short_circuit_arm);
26185   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26186                "prefer_neon_for_64bits:\t%d\n",
26187                (int) current_tune->prefer_neon_for_64bits);
26188   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26189                "disparage_flag_setting_t16_encodings:\t%d\n",
26190                (int) current_tune->disparage_flag_setting_t16_encodings);
26191   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26192                "string_ops_prefer_neon:\t%d\n",
26193                (int) current_tune->string_ops_prefer_neon);
26194   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26195                "max_insns_inline_memset:\t%d\n",
26196                current_tune->max_insns_inline_memset);
26197   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26198                current_tune->fusible_ops);
26199   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26200                (int) current_tune->sched_autopref);
26201 }
26202
26203 /* Print .arch and .arch_extension directives corresponding to the
26204    current architecture configuration.  */
26205 static void
26206 arm_print_asm_arch_directives ()
26207 {
26208   const arch_option *arch
26209     = arm_parse_arch_option_name (all_architectures, "-march",
26210                                   arm_active_target.arch_name);
26211   auto_sbitmap opt_bits (isa_num_bits);
26212
26213   gcc_assert (arch);
26214
26215   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26216   if (!arch->common.extensions)
26217     return;
26218
26219   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26220        opt->name != NULL;
26221        opt++)
26222     {
26223       if (!opt->remove)
26224         {
26225           arm_initialize_isa (opt_bits, opt->isa_bits);
26226
26227           /* If every feature bit of this option is set in the target
26228              ISA specification, print out the option name.  However,
26229              don't print anything if all the bits are part of the
26230              FPU specification.  */
26231           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26232               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26233             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26234         }
26235     }
26236 }
26237
26238 static void
26239 arm_file_start (void)
26240 {
26241   int val;
26242
26243   if (TARGET_BPABI)
26244     {
26245       /* We don't have a specified CPU.  Use the architecture to
26246          generate the tags.
26247
26248          Note: it might be better to do this unconditionally, then the
26249          assembler would not need to know about all new CPU names as
26250          they are added.  */
26251       if (!arm_active_target.core_name)
26252         {
26253           /* armv7ve doesn't support any extensions.  */
26254           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26255             {
26256               /* Keep backward compatability for assemblers
26257                  which don't support armv7ve.  */
26258               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26259               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26260               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26261               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26262               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26263             }
26264           else
26265             arm_print_asm_arch_directives ();
26266         }
26267       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26268         asm_fprintf (asm_out_file, "\t.arch %s\n",
26269                      arm_active_target.core_name + 8);
26270       else
26271         {
26272           const char* truncated_name
26273             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26274           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26275         }
26276
26277       if (print_tune_info)
26278         arm_print_tune_info ();
26279
26280       if (! TARGET_SOFT_FLOAT)
26281         {
26282           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26283             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26284
26285           if (TARGET_HARD_FLOAT_ABI)
26286             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26287         }
26288
26289       /* Some of these attributes only apply when the corresponding features
26290          are used.  However we don't have any easy way of figuring this out.
26291          Conservatively record the setting that would have been used.  */
26292
26293       if (flag_rounding_math)
26294         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26295
26296       if (!flag_unsafe_math_optimizations)
26297         {
26298           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26299           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26300         }
26301       if (flag_signaling_nans)
26302         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26303
26304       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26305                            flag_finite_math_only ? 1 : 3);
26306
26307       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26308       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26309       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26310                                flag_short_enums ? 1 : 2);
26311
26312       /* Tag_ABI_optimization_goals.  */
26313       if (optimize_size)
26314         val = 4;
26315       else if (optimize >= 2)
26316         val = 2;
26317       else if (optimize)
26318         val = 1;
26319       else
26320         val = 6;
26321       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26322
26323       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26324                                unaligned_access);
26325
26326       if (arm_fp16_format)
26327         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26328                              (int) arm_fp16_format);
26329
26330       if (arm_lang_output_object_attributes_hook)
26331         arm_lang_output_object_attributes_hook();
26332     }
26333
26334   default_file_start ();
26335 }
26336
26337 static void
26338 arm_file_end (void)
26339 {
26340   int regno;
26341
26342   if (NEED_INDICATE_EXEC_STACK)
26343     /* Add .note.GNU-stack.  */
26344     file_end_indicate_exec_stack ();
26345
26346   if (! thumb_call_reg_needed)
26347     return;
26348
26349   switch_to_section (text_section);
26350   asm_fprintf (asm_out_file, "\t.code 16\n");
26351   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26352
26353   for (regno = 0; regno < LR_REGNUM; regno++)
26354     {
26355       rtx label = thumb_call_via_label[regno];
26356
26357       if (label != 0)
26358         {
26359           targetm.asm_out.internal_label (asm_out_file, "L",
26360                                           CODE_LABEL_NUMBER (label));
26361           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26362         }
26363     }
26364 }
26365
26366 #ifndef ARM_PE
26367 /* Symbols in the text segment can be accessed without indirecting via the
26368    constant pool; it may take an extra binary operation, but this is still
26369    faster than indirecting via memory.  Don't do this when not optimizing,
26370    since we won't be calculating al of the offsets necessary to do this
26371    simplification.  */
26372
26373 static void
26374 arm_encode_section_info (tree decl, rtx rtl, int first)
26375 {
26376   if (optimize > 0 && TREE_CONSTANT (decl))
26377     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26378
26379   default_encode_section_info (decl, rtl, first);
26380 }
26381 #endif /* !ARM_PE */
26382
26383 static void
26384 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26385 {
26386   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26387       && !strcmp (prefix, "L"))
26388     {
26389       arm_ccfsm_state = 0;
26390       arm_target_insn = NULL;
26391     }
26392   default_internal_label (stream, prefix, labelno);
26393 }
26394
26395 /* Output code to add DELTA to the first argument, and then jump
26396    to FUNCTION.  Used for C++ multiple inheritance.  */
26397
26398 static void
26399 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26400                      HOST_WIDE_INT, tree function)
26401 {
26402   static int thunk_label = 0;
26403   char label[256];
26404   char labelpc[256];
26405   int mi_delta = delta;
26406   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26407   int shift = 0;
26408   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26409                     ? 1 : 0);
26410   if (mi_delta < 0)
26411     mi_delta = - mi_delta;
26412
26413   final_start_function (emit_barrier (), file, 1);
26414
26415   if (TARGET_THUMB1)
26416     {
26417       int labelno = thunk_label++;
26418       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26419       /* Thunks are entered in arm mode when available.  */
26420       if (TARGET_THUMB1_ONLY)
26421         {
26422           /* push r3 so we can use it as a temporary.  */
26423           /* TODO: Omit this save if r3 is not used.  */
26424           fputs ("\tpush {r3}\n", file);
26425           fputs ("\tldr\tr3, ", file);
26426         }
26427       else
26428         {
26429           fputs ("\tldr\tr12, ", file);
26430         }
26431       assemble_name (file, label);
26432       fputc ('\n', file);
26433       if (flag_pic)
26434         {
26435           /* If we are generating PIC, the ldr instruction below loads
26436              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26437              the address of the add + 8, so we have:
26438
26439              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26440                  = target + 1.
26441
26442              Note that we have "+ 1" because some versions of GNU ld
26443              don't set the low bit of the result for R_ARM_REL32
26444              relocations against thumb function symbols.
26445              On ARMv6M this is +4, not +8.  */
26446           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26447           assemble_name (file, labelpc);
26448           fputs (":\n", file);
26449           if (TARGET_THUMB1_ONLY)
26450             {
26451               /* This is 2 insns after the start of the thunk, so we know it
26452                  is 4-byte aligned.  */
26453               fputs ("\tadd\tr3, pc, r3\n", file);
26454               fputs ("\tmov r12, r3\n", file);
26455             }
26456           else
26457             fputs ("\tadd\tr12, pc, r12\n", file);
26458         }
26459       else if (TARGET_THUMB1_ONLY)
26460         fputs ("\tmov r12, r3\n", file);
26461     }
26462   if (TARGET_THUMB1_ONLY)
26463     {
26464       if (mi_delta > 255)
26465         {
26466           fputs ("\tldr\tr3, ", file);
26467           assemble_name (file, label);
26468           fputs ("+4\n", file);
26469           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26470                        mi_op, this_regno, this_regno);
26471         }
26472       else if (mi_delta != 0)
26473         {
26474           /* Thumb1 unified syntax requires s suffix in instruction name when
26475              one of the operands is immediate.  */
26476           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26477                        mi_op, this_regno, this_regno,
26478                        mi_delta);
26479         }
26480     }
26481   else
26482     {
26483       /* TODO: Use movw/movt for large constants when available.  */
26484       while (mi_delta != 0)
26485         {
26486           if ((mi_delta & (3 << shift)) == 0)
26487             shift += 2;
26488           else
26489             {
26490               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26491                            mi_op, this_regno, this_regno,
26492                            mi_delta & (0xff << shift));
26493               mi_delta &= ~(0xff << shift);
26494               shift += 8;
26495             }
26496         }
26497     }
26498   if (TARGET_THUMB1)
26499     {
26500       if (TARGET_THUMB1_ONLY)
26501         fputs ("\tpop\t{r3}\n", file);
26502
26503       fprintf (file, "\tbx\tr12\n");
26504       ASM_OUTPUT_ALIGN (file, 2);
26505       assemble_name (file, label);
26506       fputs (":\n", file);
26507       if (flag_pic)
26508         {
26509           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26510           rtx tem = XEXP (DECL_RTL (function), 0);
26511           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26512              pipeline offset is four rather than eight.  Adjust the offset
26513              accordingly.  */
26514           tem = plus_constant (GET_MODE (tem), tem,
26515                                TARGET_THUMB1_ONLY ? -3 : -7);
26516           tem = gen_rtx_MINUS (GET_MODE (tem),
26517                                tem,
26518                                gen_rtx_SYMBOL_REF (Pmode,
26519                                                    ggc_strdup (labelpc)));
26520           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26521         }
26522       else
26523         /* Output ".word .LTHUNKn".  */
26524         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26525
26526       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26527         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26528     }
26529   else
26530     {
26531       fputs ("\tb\t", file);
26532       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26533       if (NEED_PLT_RELOC)
26534         fputs ("(PLT)", file);
26535       fputc ('\n', file);
26536     }
26537
26538   final_end_function ();
26539 }
26540
26541 /* MI thunk handling for TARGET_32BIT.  */
26542
26543 static void
26544 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26545                        HOST_WIDE_INT vcall_offset, tree function)
26546 {
26547   /* On ARM, this_regno is R0 or R1 depending on
26548      whether the function returns an aggregate or not.
26549   */
26550   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26551                                        function)
26552                     ? R1_REGNUM : R0_REGNUM);
26553
26554   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26555   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26556   reload_completed = 1;
26557   emit_note (NOTE_INSN_PROLOGUE_END);
26558
26559   /* Add DELTA to THIS_RTX.  */
26560   if (delta != 0)
26561     arm_split_constant (PLUS, Pmode, NULL_RTX,
26562                         delta, this_rtx, this_rtx, false);
26563
26564   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26565   if (vcall_offset != 0)
26566     {
26567       /* Load *THIS_RTX.  */
26568       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26569       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26570       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26571                           false);
26572       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26573       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26574       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26575     }
26576
26577   /* Generate a tail call to the target function.  */
26578   if (!TREE_USED (function))
26579     {
26580       assemble_external (function);
26581       TREE_USED (function) = 1;
26582     }
26583   rtx funexp = XEXP (DECL_RTL (function), 0);
26584   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26585   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26586   SIBLING_CALL_P (insn) = 1;
26587
26588   insn = get_insns ();
26589   shorten_branches (insn);
26590   final_start_function (insn, file, 1);
26591   final (insn, file, 1);
26592   final_end_function ();
26593
26594   /* Stop pretending this is a post-reload pass.  */
26595   reload_completed = 0;
26596 }
26597
26598 /* Output code to add DELTA to the first argument, and then jump
26599    to FUNCTION.  Used for C++ multiple inheritance.  */
26600
26601 static void
26602 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26603                      HOST_WIDE_INT vcall_offset, tree function)
26604 {
26605   if (TARGET_32BIT)
26606     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26607   else
26608     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26609 }
26610
26611 int
26612 arm_emit_vector_const (FILE *file, rtx x)
26613 {
26614   int i;
26615   const char * pattern;
26616
26617   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26618
26619   switch (GET_MODE (x))
26620     {
26621     case E_V2SImode: pattern = "%08x"; break;
26622     case E_V4HImode: pattern = "%04x"; break;
26623     case E_V8QImode: pattern = "%02x"; break;
26624     default:       gcc_unreachable ();
26625     }
26626
26627   fprintf (file, "0x");
26628   for (i = CONST_VECTOR_NUNITS (x); i--;)
26629     {
26630       rtx element;
26631
26632       element = CONST_VECTOR_ELT (x, i);
26633       fprintf (file, pattern, INTVAL (element));
26634     }
26635
26636   return 1;
26637 }
26638
26639 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26640    HFmode constant pool entries are actually loaded with ldr.  */
26641 void
26642 arm_emit_fp16_const (rtx c)
26643 {
26644   long bits;
26645
26646   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26647   if (WORDS_BIG_ENDIAN)
26648     assemble_zeros (2);
26649   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26650   if (!WORDS_BIG_ENDIAN)
26651     assemble_zeros (2);
26652 }
26653
26654 const char *
26655 arm_output_load_gr (rtx *operands)
26656 {
26657   rtx reg;
26658   rtx offset;
26659   rtx wcgr;
26660   rtx sum;
26661
26662   if (!MEM_P (operands [1])
26663       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26664       || !REG_P (reg = XEXP (sum, 0))
26665       || !CONST_INT_P (offset = XEXP (sum, 1))
26666       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26667     return "wldrw%?\t%0, %1";
26668
26669   /* Fix up an out-of-range load of a GR register.  */
26670   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26671   wcgr = operands[0];
26672   operands[0] = reg;
26673   output_asm_insn ("ldr%?\t%0, %1", operands);
26674
26675   operands[0] = wcgr;
26676   operands[1] = reg;
26677   output_asm_insn ("tmcr%?\t%0, %1", operands);
26678   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26679
26680   return "";
26681 }
26682
26683 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26684
26685    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26686    named arg and all anonymous args onto the stack.
26687    XXX I know the prologue shouldn't be pushing registers, but it is faster
26688    that way.  */
26689
26690 static void
26691 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26692                             machine_mode mode,
26693                             tree type,
26694                             int *pretend_size,
26695                             int second_time ATTRIBUTE_UNUSED)
26696 {
26697   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26698   int nregs;
26699
26700   cfun->machine->uses_anonymous_args = 1;
26701   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26702     {
26703       nregs = pcum->aapcs_ncrn;
26704       if (nregs & 1)
26705         {
26706           int res = arm_needs_doubleword_align (mode, type);
26707           if (res < 0 && warn_psabi)
26708             inform (input_location, "parameter passing for argument of "
26709                     "type %qT changed in GCC 7.1", type);
26710           else if (res > 0)
26711             nregs++;
26712         }
26713     }
26714   else
26715     nregs = pcum->nregs;
26716
26717   if (nregs < NUM_ARG_REGS)
26718     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26719 }
26720
26721 /* We can't rely on the caller doing the proper promotion when
26722    using APCS or ATPCS.  */
26723
26724 static bool
26725 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26726 {
26727     return !TARGET_AAPCS_BASED;
26728 }
26729
26730 static machine_mode
26731 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26732                            machine_mode mode,
26733                            int *punsignedp ATTRIBUTE_UNUSED,
26734                            const_tree fntype ATTRIBUTE_UNUSED,
26735                            int for_return ATTRIBUTE_UNUSED)
26736 {
26737   if (GET_MODE_CLASS (mode) == MODE_INT
26738       && GET_MODE_SIZE (mode) < 4)
26739     return SImode;
26740
26741   return mode;
26742 }
26743
26744
26745 static bool
26746 arm_default_short_enums (void)
26747 {
26748   return ARM_DEFAULT_SHORT_ENUMS;
26749 }
26750
26751
26752 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26753
26754 static bool
26755 arm_align_anon_bitfield (void)
26756 {
26757   return TARGET_AAPCS_BASED;
26758 }
26759
26760
26761 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26762
26763 static tree
26764 arm_cxx_guard_type (void)
26765 {
26766   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26767 }
26768
26769
26770 /* The EABI says test the least significant bit of a guard variable.  */
26771
26772 static bool
26773 arm_cxx_guard_mask_bit (void)
26774 {
26775   return TARGET_AAPCS_BASED;
26776 }
26777
26778
26779 /* The EABI specifies that all array cookies are 8 bytes long.  */
26780
26781 static tree
26782 arm_get_cookie_size (tree type)
26783 {
26784   tree size;
26785
26786   if (!TARGET_AAPCS_BASED)
26787     return default_cxx_get_cookie_size (type);
26788
26789   size = build_int_cst (sizetype, 8);
26790   return size;
26791 }
26792
26793
26794 /* The EABI says that array cookies should also contain the element size.  */
26795
26796 static bool
26797 arm_cookie_has_size (void)
26798 {
26799   return TARGET_AAPCS_BASED;
26800 }
26801
26802
26803 /* The EABI says constructors and destructors should return a pointer to
26804    the object constructed/destroyed.  */
26805
26806 static bool
26807 arm_cxx_cdtor_returns_this (void)
26808 {
26809   return TARGET_AAPCS_BASED;
26810 }
26811
26812 /* The EABI says that an inline function may never be the key
26813    method.  */
26814
26815 static bool
26816 arm_cxx_key_method_may_be_inline (void)
26817 {
26818   return !TARGET_AAPCS_BASED;
26819 }
26820
26821 static void
26822 arm_cxx_determine_class_data_visibility (tree decl)
26823 {
26824   if (!TARGET_AAPCS_BASED
26825       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26826     return;
26827
26828   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26829      is exported.  However, on systems without dynamic vague linkage,
26830      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26831   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26832     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26833   else
26834     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26835   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26836 }
26837
26838 static bool
26839 arm_cxx_class_data_always_comdat (void)
26840 {
26841   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26842      vague linkage if the class has no key function.  */
26843   return !TARGET_AAPCS_BASED;
26844 }
26845
26846
26847 /* The EABI says __aeabi_atexit should be used to register static
26848    destructors.  */
26849
26850 static bool
26851 arm_cxx_use_aeabi_atexit (void)
26852 {
26853   return TARGET_AAPCS_BASED;
26854 }
26855
26856
26857 void
26858 arm_set_return_address (rtx source, rtx scratch)
26859 {
26860   arm_stack_offsets *offsets;
26861   HOST_WIDE_INT delta;
26862   rtx addr;
26863   unsigned long saved_regs;
26864
26865   offsets = arm_get_frame_offsets ();
26866   saved_regs = offsets->saved_regs_mask;
26867
26868   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26869     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26870   else
26871     {
26872       if (frame_pointer_needed)
26873         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26874       else
26875         {
26876           /* LR will be the first saved register.  */
26877           delta = offsets->outgoing_args - (offsets->frame + 4);
26878
26879
26880           if (delta >= 4096)
26881             {
26882               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26883                                      GEN_INT (delta & ~4095)));
26884               addr = scratch;
26885               delta &= 4095;
26886             }
26887           else
26888             addr = stack_pointer_rtx;
26889
26890           addr = plus_constant (Pmode, addr, delta);
26891         }
26892       /* The store needs to be marked as frame related in order to prevent
26893          DSE from deleting it as dead if it is based on fp.  */
26894       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26895       RTX_FRAME_RELATED_P (insn) = 1;
26896       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26897     }
26898 }
26899
26900
26901 void
26902 thumb_set_return_address (rtx source, rtx scratch)
26903 {
26904   arm_stack_offsets *offsets;
26905   HOST_WIDE_INT delta;
26906   HOST_WIDE_INT limit;
26907   int reg;
26908   rtx addr;
26909   unsigned long mask;
26910
26911   emit_use (source);
26912
26913   offsets = arm_get_frame_offsets ();
26914   mask = offsets->saved_regs_mask;
26915   if (mask & (1 << LR_REGNUM))
26916     {
26917       limit = 1024;
26918       /* Find the saved regs.  */
26919       if (frame_pointer_needed)
26920         {
26921           delta = offsets->soft_frame - offsets->saved_args;
26922           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26923           if (TARGET_THUMB1)
26924             limit = 128;
26925         }
26926       else
26927         {
26928           delta = offsets->outgoing_args - offsets->saved_args;
26929           reg = SP_REGNUM;
26930         }
26931       /* Allow for the stack frame.  */
26932       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26933         delta -= 16;
26934       /* The link register is always the first saved register.  */
26935       delta -= 4;
26936
26937       /* Construct the address.  */
26938       addr = gen_rtx_REG (SImode, reg);
26939       if (delta > limit)
26940         {
26941           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26942           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26943           addr = scratch;
26944         }
26945       else
26946         addr = plus_constant (Pmode, addr, delta);
26947
26948       /* The store needs to be marked as frame related in order to prevent
26949          DSE from deleting it as dead if it is based on fp.  */
26950       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26951       RTX_FRAME_RELATED_P (insn) = 1;
26952       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26953     }
26954   else
26955     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26956 }
26957
26958 /* Implements target hook vector_mode_supported_p.  */
26959 bool
26960 arm_vector_mode_supported_p (machine_mode mode)
26961 {
26962   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26963   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26964       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26965       || mode == V2DImode || mode == V8HFmode))
26966     return true;
26967
26968   if ((TARGET_NEON || TARGET_IWMMXT)
26969       && ((mode == V2SImode)
26970           || (mode == V4HImode)
26971           || (mode == V8QImode)))
26972     return true;
26973
26974   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26975       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26976       || mode == V2HAmode))
26977     return true;
26978
26979   return false;
26980 }
26981
26982 /* Implements target hook array_mode_supported_p.  */
26983
26984 static bool
26985 arm_array_mode_supported_p (machine_mode mode,
26986                             unsigned HOST_WIDE_INT nelems)
26987 {
26988   if (TARGET_NEON
26989       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26990       && (nelems >= 2 && nelems <= 4))
26991     return true;
26992
26993   return false;
26994 }
26995
26996 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26997    registers when autovectorizing for Neon, at least until multiple vector
26998    widths are supported properly by the middle-end.  */
26999
27000 static machine_mode
27001 arm_preferred_simd_mode (scalar_mode mode)
27002 {
27003   if (TARGET_NEON)
27004     switch (mode)
27005       {
27006       case E_SFmode:
27007         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27008       case E_SImode:
27009         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27010       case E_HImode:
27011         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27012       case E_QImode:
27013         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27014       case E_DImode:
27015         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27016           return V2DImode;
27017         break;
27018
27019       default:;
27020       }
27021
27022   if (TARGET_REALLY_IWMMXT)
27023     switch (mode)
27024       {
27025       case E_SImode:
27026         return V2SImode;
27027       case E_HImode:
27028         return V4HImode;
27029       case E_QImode:
27030         return V8QImode;
27031
27032       default:;
27033       }
27034
27035   return word_mode;
27036 }
27037
27038 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27039
27040    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27041    using r0-r4 for function arguments, r7 for the stack frame and don't have
27042    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27043    potentially problematic instructions accept high registers so this is not
27044    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27045    that require many low registers.  */
27046 static bool
27047 arm_class_likely_spilled_p (reg_class_t rclass)
27048 {
27049   if ((TARGET_THUMB1 && rclass == LO_REGS)
27050       || rclass  == CC_REG)
27051     return true;
27052
27053   return false;
27054 }
27055
27056 /* Implements target hook small_register_classes_for_mode_p.  */
27057 bool
27058 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27059 {
27060   return TARGET_THUMB1;
27061 }
27062
27063 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27064    ARM insns and therefore guarantee that the shift count is modulo 256.
27065    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27066    guarantee no particular behavior for out-of-range counts.  */
27067
27068 static unsigned HOST_WIDE_INT
27069 arm_shift_truncation_mask (machine_mode mode)
27070 {
27071   return mode == SImode ? 255 : 0;
27072 }
27073
27074
27075 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27076
27077 unsigned int
27078 arm_dbx_register_number (unsigned int regno)
27079 {
27080   if (regno < 16)
27081     return regno;
27082
27083   if (IS_VFP_REGNUM (regno))
27084     {
27085       /* See comment in arm_dwarf_register_span.  */
27086       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27087         return 64 + regno - FIRST_VFP_REGNUM;
27088       else
27089         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27090     }
27091
27092   if (IS_IWMMXT_GR_REGNUM (regno))
27093     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27094
27095   if (IS_IWMMXT_REGNUM (regno))
27096     return 112 + regno - FIRST_IWMMXT_REGNUM;
27097
27098   return DWARF_FRAME_REGISTERS;
27099 }
27100
27101 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27102    GCC models tham as 64 32-bit registers, so we need to describe this to
27103    the DWARF generation code.  Other registers can use the default.  */
27104 static rtx
27105 arm_dwarf_register_span (rtx rtl)
27106 {
27107   machine_mode mode;
27108   unsigned regno;
27109   rtx parts[16];
27110   int nregs;
27111   int i;
27112
27113   regno = REGNO (rtl);
27114   if (!IS_VFP_REGNUM (regno))
27115     return NULL_RTX;
27116
27117   /* XXX FIXME: The EABI defines two VFP register ranges:
27118         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27119         256-287: D0-D31
27120      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27121      corresponding D register.  Until GDB supports this, we shall use the
27122      legacy encodings.  We also use these encodings for D0-D15 for
27123      compatibility with older debuggers.  */
27124   mode = GET_MODE (rtl);
27125   if (GET_MODE_SIZE (mode) < 8)
27126     return NULL_RTX;
27127
27128   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27129     {
27130       nregs = GET_MODE_SIZE (mode) / 4;
27131       for (i = 0; i < nregs; i += 2)
27132         if (TARGET_BIG_END)
27133           {
27134             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27135             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27136           }
27137         else
27138           {
27139             parts[i] = gen_rtx_REG (SImode, regno + i);
27140             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27141           }
27142     }
27143   else
27144     {
27145       nregs = GET_MODE_SIZE (mode) / 8;
27146       for (i = 0; i < nregs; i++)
27147         parts[i] = gen_rtx_REG (DImode, regno + i);
27148     }
27149
27150   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27151 }
27152
27153 #if ARM_UNWIND_INFO
27154 /* Emit unwind directives for a store-multiple instruction or stack pointer
27155    push during alignment.
27156    These should only ever be generated by the function prologue code, so
27157    expect them to have a particular form.
27158    The store-multiple instruction sometimes pushes pc as the last register,
27159    although it should not be tracked into unwind information, or for -Os
27160    sometimes pushes some dummy registers before first register that needs
27161    to be tracked in unwind information; such dummy registers are there just
27162    to avoid separate stack adjustment, and will not be restored in the
27163    epilogue.  */
27164
27165 static void
27166 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27167 {
27168   int i;
27169   HOST_WIDE_INT offset;
27170   HOST_WIDE_INT nregs;
27171   int reg_size;
27172   unsigned reg;
27173   unsigned lastreg;
27174   unsigned padfirst = 0, padlast = 0;
27175   rtx e;
27176
27177   e = XVECEXP (p, 0, 0);
27178   gcc_assert (GET_CODE (e) == SET);
27179
27180   /* First insn will adjust the stack pointer.  */
27181   gcc_assert (GET_CODE (e) == SET
27182               && REG_P (SET_DEST (e))
27183               && REGNO (SET_DEST (e)) == SP_REGNUM
27184               && GET_CODE (SET_SRC (e)) == PLUS);
27185
27186   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27187   nregs = XVECLEN (p, 0) - 1;
27188   gcc_assert (nregs);
27189
27190   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27191   if (reg < 16)
27192     {
27193       /* For -Os dummy registers can be pushed at the beginning to
27194          avoid separate stack pointer adjustment.  */
27195       e = XVECEXP (p, 0, 1);
27196       e = XEXP (SET_DEST (e), 0);
27197       if (GET_CODE (e) == PLUS)
27198         padfirst = INTVAL (XEXP (e, 1));
27199       gcc_assert (padfirst == 0 || optimize_size);
27200       /* The function prologue may also push pc, but not annotate it as it is
27201          never restored.  We turn this into a stack pointer adjustment.  */
27202       e = XVECEXP (p, 0, nregs);
27203       e = XEXP (SET_DEST (e), 0);
27204       if (GET_CODE (e) == PLUS)
27205         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27206       else
27207         padlast = offset - 4;
27208       gcc_assert (padlast == 0 || padlast == 4);
27209       if (padlast == 4)
27210         fprintf (asm_out_file, "\t.pad #4\n");
27211       reg_size = 4;
27212       fprintf (asm_out_file, "\t.save {");
27213     }
27214   else if (IS_VFP_REGNUM (reg))
27215     {
27216       reg_size = 8;
27217       fprintf (asm_out_file, "\t.vsave {");
27218     }
27219   else
27220     /* Unknown register type.  */
27221     gcc_unreachable ();
27222
27223   /* If the stack increment doesn't match the size of the saved registers,
27224      something has gone horribly wrong.  */
27225   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27226
27227   offset = padfirst;
27228   lastreg = 0;
27229   /* The remaining insns will describe the stores.  */
27230   for (i = 1; i <= nregs; i++)
27231     {
27232       /* Expect (set (mem <addr>) (reg)).
27233          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27234       e = XVECEXP (p, 0, i);
27235       gcc_assert (GET_CODE (e) == SET
27236                   && MEM_P (SET_DEST (e))
27237                   && REG_P (SET_SRC (e)));
27238
27239       reg = REGNO (SET_SRC (e));
27240       gcc_assert (reg >= lastreg);
27241
27242       if (i != 1)
27243         fprintf (asm_out_file, ", ");
27244       /* We can't use %r for vfp because we need to use the
27245          double precision register names.  */
27246       if (IS_VFP_REGNUM (reg))
27247         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27248       else
27249         asm_fprintf (asm_out_file, "%r", reg);
27250
27251       if (flag_checking)
27252         {
27253           /* Check that the addresses are consecutive.  */
27254           e = XEXP (SET_DEST (e), 0);
27255           if (GET_CODE (e) == PLUS)
27256             gcc_assert (REG_P (XEXP (e, 0))
27257                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27258                         && CONST_INT_P (XEXP (e, 1))
27259                         && offset == INTVAL (XEXP (e, 1)));
27260           else
27261             gcc_assert (i == 1
27262                         && REG_P (e)
27263                         && REGNO (e) == SP_REGNUM);
27264           offset += reg_size;
27265         }
27266     }
27267   fprintf (asm_out_file, "}\n");
27268   if (padfirst)
27269     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27270 }
27271
27272 /*  Emit unwind directives for a SET.  */
27273
27274 static void
27275 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27276 {
27277   rtx e0;
27278   rtx e1;
27279   unsigned reg;
27280
27281   e0 = XEXP (p, 0);
27282   e1 = XEXP (p, 1);
27283   switch (GET_CODE (e0))
27284     {
27285     case MEM:
27286       /* Pushing a single register.  */
27287       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27288           || !REG_P (XEXP (XEXP (e0, 0), 0))
27289           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27290         abort ();
27291
27292       asm_fprintf (asm_out_file, "\t.save ");
27293       if (IS_VFP_REGNUM (REGNO (e1)))
27294         asm_fprintf(asm_out_file, "{d%d}\n",
27295                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27296       else
27297         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27298       break;
27299
27300     case REG:
27301       if (REGNO (e0) == SP_REGNUM)
27302         {
27303           /* A stack increment.  */
27304           if (GET_CODE (e1) != PLUS
27305               || !REG_P (XEXP (e1, 0))
27306               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27307               || !CONST_INT_P (XEXP (e1, 1)))
27308             abort ();
27309
27310           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27311                        -INTVAL (XEXP (e1, 1)));
27312         }
27313       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27314         {
27315           HOST_WIDE_INT offset;
27316
27317           if (GET_CODE (e1) == PLUS)
27318             {
27319               if (!REG_P (XEXP (e1, 0))
27320                   || !CONST_INT_P (XEXP (e1, 1)))
27321                 abort ();
27322               reg = REGNO (XEXP (e1, 0));
27323               offset = INTVAL (XEXP (e1, 1));
27324               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27325                            HARD_FRAME_POINTER_REGNUM, reg,
27326                            offset);
27327             }
27328           else if (REG_P (e1))
27329             {
27330               reg = REGNO (e1);
27331               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27332                            HARD_FRAME_POINTER_REGNUM, reg);
27333             }
27334           else
27335             abort ();
27336         }
27337       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27338         {
27339           /* Move from sp to reg.  */
27340           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27341         }
27342      else if (GET_CODE (e1) == PLUS
27343               && REG_P (XEXP (e1, 0))
27344               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27345               && CONST_INT_P (XEXP (e1, 1)))
27346         {
27347           /* Set reg to offset from sp.  */
27348           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27349                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27350         }
27351       else
27352         abort ();
27353       break;
27354
27355     default:
27356       abort ();
27357     }
27358 }
27359
27360
27361 /* Emit unwind directives for the given insn.  */
27362
27363 static void
27364 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27365 {
27366   rtx note, pat;
27367   bool handled_one = false;
27368
27369   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27370     return;
27371
27372   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27373       && (TREE_NOTHROW (current_function_decl)
27374           || crtl->all_throwers_are_sibcalls))
27375     return;
27376
27377   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27378     return;
27379
27380   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27381     {
27382       switch (REG_NOTE_KIND (note))
27383         {
27384         case REG_FRAME_RELATED_EXPR:
27385           pat = XEXP (note, 0);
27386           goto found;
27387
27388         case REG_CFA_REGISTER:
27389           pat = XEXP (note, 0);
27390           if (pat == NULL)
27391             {
27392               pat = PATTERN (insn);
27393               if (GET_CODE (pat) == PARALLEL)
27394                 pat = XVECEXP (pat, 0, 0);
27395             }
27396
27397           /* Only emitted for IS_STACKALIGN re-alignment.  */
27398           {
27399             rtx dest, src;
27400             unsigned reg;
27401
27402             src = SET_SRC (pat);
27403             dest = SET_DEST (pat);
27404
27405             gcc_assert (src == stack_pointer_rtx);
27406             reg = REGNO (dest);
27407             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27408                          reg + 0x90, reg);
27409           }
27410           handled_one = true;
27411           break;
27412
27413         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27414            to get correct dwarf information for shrink-wrap.  We should not
27415            emit unwind information for it because these are used either for
27416            pretend arguments or notes to adjust sp and restore registers from
27417            stack.  */
27418         case REG_CFA_DEF_CFA:
27419         case REG_CFA_ADJUST_CFA:
27420         case REG_CFA_RESTORE:
27421           return;
27422
27423         case REG_CFA_EXPRESSION:
27424         case REG_CFA_OFFSET:
27425           /* ??? Only handling here what we actually emit.  */
27426           gcc_unreachable ();
27427
27428         default:
27429           break;
27430         }
27431     }
27432   if (handled_one)
27433     return;
27434   pat = PATTERN (insn);
27435  found:
27436
27437   switch (GET_CODE (pat))
27438     {
27439     case SET:
27440       arm_unwind_emit_set (asm_out_file, pat);
27441       break;
27442
27443     case SEQUENCE:
27444       /* Store multiple.  */
27445       arm_unwind_emit_sequence (asm_out_file, pat);
27446       break;
27447
27448     default:
27449       abort();
27450     }
27451 }
27452
27453
27454 /* Output a reference from a function exception table to the type_info
27455    object X.  The EABI specifies that the symbol should be relocated by
27456    an R_ARM_TARGET2 relocation.  */
27457
27458 static bool
27459 arm_output_ttype (rtx x)
27460 {
27461   fputs ("\t.word\t", asm_out_file);
27462   output_addr_const (asm_out_file, x);
27463   /* Use special relocations for symbol references.  */
27464   if (!CONST_INT_P (x))
27465     fputs ("(TARGET2)", asm_out_file);
27466   fputc ('\n', asm_out_file);
27467
27468   return TRUE;
27469 }
27470
27471 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27472
27473 static void
27474 arm_asm_emit_except_personality (rtx personality)
27475 {
27476   fputs ("\t.personality\t", asm_out_file);
27477   output_addr_const (asm_out_file, personality);
27478   fputc ('\n', asm_out_file);
27479 }
27480 #endif /* ARM_UNWIND_INFO */
27481
27482 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27483
27484 static void
27485 arm_asm_init_sections (void)
27486 {
27487 #if ARM_UNWIND_INFO
27488   exception_section = get_unnamed_section (0, output_section_asm_op,
27489                                            "\t.handlerdata");
27490 #endif /* ARM_UNWIND_INFO */
27491
27492 #ifdef OBJECT_FORMAT_ELF
27493   if (target_pure_code)
27494     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27495 #endif
27496 }
27497
27498 /* Output unwind directives for the start/end of a function.  */
27499
27500 void
27501 arm_output_fn_unwind (FILE * f, bool prologue)
27502 {
27503   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27504     return;
27505
27506   if (prologue)
27507     fputs ("\t.fnstart\n", f);
27508   else
27509     {
27510       /* If this function will never be unwound, then mark it as such.
27511          The came condition is used in arm_unwind_emit to suppress
27512          the frame annotations.  */
27513       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27514           && (TREE_NOTHROW (current_function_decl)
27515               || crtl->all_throwers_are_sibcalls))
27516         fputs("\t.cantunwind\n", f);
27517
27518       fputs ("\t.fnend\n", f);
27519     }
27520 }
27521
27522 static bool
27523 arm_emit_tls_decoration (FILE *fp, rtx x)
27524 {
27525   enum tls_reloc reloc;
27526   rtx val;
27527
27528   val = XVECEXP (x, 0, 0);
27529   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27530
27531   output_addr_const (fp, val);
27532
27533   switch (reloc)
27534     {
27535     case TLS_GD32:
27536       fputs ("(tlsgd)", fp);
27537       break;
27538     case TLS_LDM32:
27539       fputs ("(tlsldm)", fp);
27540       break;
27541     case TLS_LDO32:
27542       fputs ("(tlsldo)", fp);
27543       break;
27544     case TLS_IE32:
27545       fputs ("(gottpoff)", fp);
27546       break;
27547     case TLS_LE32:
27548       fputs ("(tpoff)", fp);
27549       break;
27550     case TLS_DESCSEQ:
27551       fputs ("(tlsdesc)", fp);
27552       break;
27553     default:
27554       gcc_unreachable ();
27555     }
27556
27557   switch (reloc)
27558     {
27559     case TLS_GD32:
27560     case TLS_LDM32:
27561     case TLS_IE32:
27562     case TLS_DESCSEQ:
27563       fputs (" + (. - ", fp);
27564       output_addr_const (fp, XVECEXP (x, 0, 2));
27565       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27566       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27567       output_addr_const (fp, XVECEXP (x, 0, 3));
27568       fputc (')', fp);
27569       break;
27570     default:
27571       break;
27572     }
27573
27574   return TRUE;
27575 }
27576
27577 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27578
27579 static void
27580 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27581 {
27582   gcc_assert (size == 4);
27583   fputs ("\t.word\t", file);
27584   output_addr_const (file, x);
27585   fputs ("(tlsldo)", file);
27586 }
27587
27588 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27589
27590 static bool
27591 arm_output_addr_const_extra (FILE *fp, rtx x)
27592 {
27593   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27594     return arm_emit_tls_decoration (fp, x);
27595   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27596     {
27597       char label[256];
27598       int labelno = INTVAL (XVECEXP (x, 0, 0));
27599
27600       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27601       assemble_name_raw (fp, label);
27602
27603       return TRUE;
27604     }
27605   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27606     {
27607       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27608       if (GOT_PCREL)
27609         fputs ("+.", fp);
27610       fputs ("-(", fp);
27611       output_addr_const (fp, XVECEXP (x, 0, 0));
27612       fputc (')', fp);
27613       return TRUE;
27614     }
27615   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27616     {
27617       output_addr_const (fp, XVECEXP (x, 0, 0));
27618       if (GOT_PCREL)
27619         fputs ("+.", fp);
27620       fputs ("-(", fp);
27621       output_addr_const (fp, XVECEXP (x, 0, 1));
27622       fputc (')', fp);
27623       return TRUE;
27624     }
27625   else if (GET_CODE (x) == CONST_VECTOR)
27626     return arm_emit_vector_const (fp, x);
27627
27628   return FALSE;
27629 }
27630
27631 /* Output assembly for a shift instruction.
27632    SET_FLAGS determines how the instruction modifies the condition codes.
27633    0 - Do not set condition codes.
27634    1 - Set condition codes.
27635    2 - Use smallest instruction.  */
27636 const char *
27637 arm_output_shift(rtx * operands, int set_flags)
27638 {
27639   char pattern[100];
27640   static const char flag_chars[3] = {'?', '.', '!'};
27641   const char *shift;
27642   HOST_WIDE_INT val;
27643   char c;
27644
27645   c = flag_chars[set_flags];
27646   shift = shift_op(operands[3], &val);
27647   if (shift)
27648     {
27649       if (val != -1)
27650         operands[2] = GEN_INT(val);
27651       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27652     }
27653   else
27654     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27655
27656   output_asm_insn (pattern, operands);
27657   return "";
27658 }
27659
27660 /* Output assembly for a WMMX immediate shift instruction.  */
27661 const char *
27662 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27663 {
27664   int shift = INTVAL (operands[2]);
27665   char templ[50];
27666   machine_mode opmode = GET_MODE (operands[0]);
27667
27668   gcc_assert (shift >= 0);
27669
27670   /* If the shift value in the register versions is > 63 (for D qualifier),
27671      31 (for W qualifier) or 15 (for H qualifier).  */
27672   if (((opmode == V4HImode) && (shift > 15))
27673         || ((opmode == V2SImode) && (shift > 31))
27674         || ((opmode == DImode) && (shift > 63)))
27675   {
27676     if (wror_or_wsra)
27677       {
27678         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27679         output_asm_insn (templ, operands);
27680         if (opmode == DImode)
27681           {
27682             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27683             output_asm_insn (templ, operands);
27684           }
27685       }
27686     else
27687       {
27688         /* The destination register will contain all zeros.  */
27689         sprintf (templ, "wzero\t%%0");
27690         output_asm_insn (templ, operands);
27691       }
27692     return "";
27693   }
27694
27695   if ((opmode == DImode) && (shift > 32))
27696     {
27697       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27698       output_asm_insn (templ, operands);
27699       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27700       output_asm_insn (templ, operands);
27701     }
27702   else
27703     {
27704       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27705       output_asm_insn (templ, operands);
27706     }
27707   return "";
27708 }
27709
27710 /* Output assembly for a WMMX tinsr instruction.  */
27711 const char *
27712 arm_output_iwmmxt_tinsr (rtx *operands)
27713 {
27714   int mask = INTVAL (operands[3]);
27715   int i;
27716   char templ[50];
27717   int units = mode_nunits[GET_MODE (operands[0])];
27718   gcc_assert ((mask & (mask - 1)) == 0);
27719   for (i = 0; i < units; ++i)
27720     {
27721       if ((mask & 0x01) == 1)
27722         {
27723           break;
27724         }
27725       mask >>= 1;
27726     }
27727   gcc_assert (i < units);
27728   {
27729     switch (GET_MODE (operands[0]))
27730       {
27731       case E_V8QImode:
27732         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27733         break;
27734       case E_V4HImode:
27735         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27736         break;
27737       case E_V2SImode:
27738         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27739         break;
27740       default:
27741         gcc_unreachable ();
27742         break;
27743       }
27744     output_asm_insn (templ, operands);
27745   }
27746   return "";
27747 }
27748
27749 /* Output a Thumb-1 casesi dispatch sequence.  */
27750 const char *
27751 thumb1_output_casesi (rtx *operands)
27752 {
27753   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27754
27755   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27756
27757   switch (GET_MODE(diff_vec))
27758     {
27759     case E_QImode:
27760       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27761               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27762     case E_HImode:
27763       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27764               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27765     case E_SImode:
27766       return "bl\t%___gnu_thumb1_case_si";
27767     default:
27768       gcc_unreachable ();
27769     }
27770 }
27771
27772 /* Output a Thumb-2 casesi instruction.  */
27773 const char *
27774 thumb2_output_casesi (rtx *operands)
27775 {
27776   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27777
27778   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27779
27780   output_asm_insn ("cmp\t%0, %1", operands);
27781   output_asm_insn ("bhi\t%l3", operands);
27782   switch (GET_MODE(diff_vec))
27783     {
27784     case E_QImode:
27785       return "tbb\t[%|pc, %0]";
27786     case E_HImode:
27787       return "tbh\t[%|pc, %0, lsl #1]";
27788     case E_SImode:
27789       if (flag_pic)
27790         {
27791           output_asm_insn ("adr\t%4, %l2", operands);
27792           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27793           output_asm_insn ("add\t%4, %4, %5", operands);
27794           return "bx\t%4";
27795         }
27796       else
27797         {
27798           output_asm_insn ("adr\t%4, %l2", operands);
27799           return "ldr\t%|pc, [%4, %0, lsl #2]";
27800         }
27801     default:
27802       gcc_unreachable ();
27803     }
27804 }
27805
27806 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27807    per-core tuning structs.  */
27808 static int
27809 arm_issue_rate (void)
27810 {
27811   return current_tune->issue_rate;
27812 }
27813
27814 /* Return how many instructions should scheduler lookahead to choose the
27815    best one.  */
27816 static int
27817 arm_first_cycle_multipass_dfa_lookahead (void)
27818 {
27819   int issue_rate = arm_issue_rate ();
27820
27821   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27822 }
27823
27824 /* Enable modeling of L2 auto-prefetcher.  */
27825 static int
27826 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27827 {
27828   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27829 }
27830
27831 const char *
27832 arm_mangle_type (const_tree type)
27833 {
27834   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27835      has to be managled as if it is in the "std" namespace.  */
27836   if (TARGET_AAPCS_BASED
27837       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27838     return "St9__va_list";
27839
27840   /* Half-precision float.  */
27841   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27842     return "Dh";
27843
27844   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27845      builtin type.  */
27846   if (TYPE_NAME (type) != NULL)
27847     return arm_mangle_builtin_type (type);
27848
27849   /* Use the default mangling.  */
27850   return NULL;
27851 }
27852
27853 /* Order of allocation of core registers for Thumb: this allocation is
27854    written over the corresponding initial entries of the array
27855    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27856    first.  Saving and restoring a low register is usually cheaper than
27857    using a call-clobbered high register.  */
27858
27859 static const int thumb_core_reg_alloc_order[] =
27860 {
27861    3,  2,  1,  0,  4,  5,  6,  7,
27862   12, 14,  8,  9, 10, 11
27863 };
27864
27865 /* Adjust register allocation order when compiling for Thumb.  */
27866
27867 void
27868 arm_order_regs_for_local_alloc (void)
27869 {
27870   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27871   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27872   if (TARGET_THUMB)
27873     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27874             sizeof (thumb_core_reg_alloc_order));
27875 }
27876
27877 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27878
27879 bool
27880 arm_frame_pointer_required (void)
27881 {
27882   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27883     return true;
27884
27885   /* If the function receives nonlocal gotos, it needs to save the frame
27886      pointer in the nonlocal_goto_save_area object.  */
27887   if (cfun->has_nonlocal_label)
27888     return true;
27889
27890   /* The frame pointer is required for non-leaf APCS frames.  */
27891   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27892     return true;
27893
27894   /* If we are probing the stack in the prologue, we will have a faulting
27895      instruction prior to the stack adjustment and this requires a frame
27896      pointer if we want to catch the exception using the EABI unwinder.  */
27897   if (!IS_INTERRUPT (arm_current_func_type ())
27898       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27899           || flag_stack_clash_protection)
27900       && arm_except_unwind_info (&global_options) == UI_TARGET
27901       && cfun->can_throw_non_call_exceptions)
27902     {
27903       HOST_WIDE_INT size = get_frame_size ();
27904
27905       /* That's irrelevant if there is no stack adjustment.  */
27906       if (size <= 0)
27907         return false;
27908
27909       /* That's relevant only if there is a stack probe.  */
27910       if (crtl->is_leaf && !cfun->calls_alloca)
27911         {
27912           /* We don't have the final size of the frame so adjust.  */
27913           size += 32 * UNITS_PER_WORD;
27914           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27915             return true;
27916         }
27917       else
27918         return true;
27919     }
27920
27921   return false;
27922 }
27923
27924 /* Only thumb1 can't support conditional execution, so return true if
27925    the target is not thumb1.  */
27926 static bool
27927 arm_have_conditional_execution (void)
27928 {
27929   return !TARGET_THUMB1;
27930 }
27931
27932 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27933 static HOST_WIDE_INT
27934 arm_vector_alignment (const_tree type)
27935 {
27936   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27937
27938   if (TARGET_AAPCS_BASED)
27939     align = MIN (align, 64);
27940
27941   return align;
27942 }
27943
27944 static unsigned int
27945 arm_autovectorize_vector_sizes (void)
27946 {
27947   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27948 }
27949
27950 static bool
27951 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27952 {
27953   /* Vectors which aren't in packed structures will not be less aligned than
27954      the natural alignment of their element type, so this is safe.  */
27955   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27956     return !is_packed;
27957
27958   return default_builtin_vector_alignment_reachable (type, is_packed);
27959 }
27960
27961 static bool
27962 arm_builtin_support_vector_misalignment (machine_mode mode,
27963                                          const_tree type, int misalignment,
27964                                          bool is_packed)
27965 {
27966   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27967     {
27968       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27969
27970       if (is_packed)
27971         return align == 1;
27972
27973       /* If the misalignment is unknown, we should be able to handle the access
27974          so long as it is not to a member of a packed data structure.  */
27975       if (misalignment == -1)
27976         return true;
27977
27978       /* Return true if the misalignment is a multiple of the natural alignment
27979          of the vector's element type.  This is probably always going to be
27980          true in practice, since we've already established that this isn't a
27981          packed access.  */
27982       return ((misalignment % align) == 0);
27983     }
27984
27985   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27986                                                       is_packed);
27987 }
27988
27989 static void
27990 arm_conditional_register_usage (void)
27991 {
27992   int regno;
27993
27994   if (TARGET_THUMB1 && optimize_size)
27995     {
27996       /* When optimizing for size on Thumb-1, it's better not
27997         to use the HI regs, because of the overhead of
27998         stacking them.  */
27999       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28000         fixed_regs[regno] = call_used_regs[regno] = 1;
28001     }
28002
28003   /* The link register can be clobbered by any branch insn,
28004      but we have no way to track that at present, so mark
28005      it as unavailable.  */
28006   if (TARGET_THUMB1)
28007     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28008
28009   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28010     {
28011       /* VFPv3 registers are disabled when earlier VFP
28012          versions are selected due to the definition of
28013          LAST_VFP_REGNUM.  */
28014       for (regno = FIRST_VFP_REGNUM;
28015            regno <= LAST_VFP_REGNUM; ++ regno)
28016         {
28017           fixed_regs[regno] = 0;
28018           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28019             || regno >= FIRST_VFP_REGNUM + 32;
28020         }
28021     }
28022
28023   if (TARGET_REALLY_IWMMXT)
28024     {
28025       regno = FIRST_IWMMXT_GR_REGNUM;
28026       /* The 2002/10/09 revision of the XScale ABI has wCG0
28027          and wCG1 as call-preserved registers.  The 2002/11/21
28028          revision changed this so that all wCG registers are
28029          scratch registers.  */
28030       for (regno = FIRST_IWMMXT_GR_REGNUM;
28031            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28032         fixed_regs[regno] = 0;
28033       /* The XScale ABI has wR0 - wR9 as scratch registers,
28034          the rest as call-preserved registers.  */
28035       for (regno = FIRST_IWMMXT_REGNUM;
28036            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28037         {
28038           fixed_regs[regno] = 0;
28039           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28040         }
28041     }
28042
28043   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28044     {
28045       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28046       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28047     }
28048   else if (TARGET_APCS_STACK)
28049     {
28050       fixed_regs[10]     = 1;
28051       call_used_regs[10] = 1;
28052     }
28053   /* -mcaller-super-interworking reserves r11 for calls to
28054      _interwork_r11_call_via_rN().  Making the register global
28055      is an easy way of ensuring that it remains valid for all
28056      calls.  */
28057   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28058       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28059     {
28060       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28061       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28062       if (TARGET_CALLER_INTERWORKING)
28063         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28064     }
28065   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28066 }
28067
28068 static reg_class_t
28069 arm_preferred_rename_class (reg_class_t rclass)
28070 {
28071   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28072      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28073      and code size can be reduced.  */
28074   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28075     return LO_REGS;
28076   else
28077     return NO_REGS;
28078 }
28079
28080 /* Compute the attribute "length" of insn "*push_multi".
28081    So this function MUST be kept in sync with that insn pattern.  */
28082 int
28083 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28084 {
28085   int i, regno, hi_reg;
28086   int num_saves = XVECLEN (parallel_op, 0);
28087
28088   /* ARM mode.  */
28089   if (TARGET_ARM)
28090     return 4;
28091   /* Thumb1 mode.  */
28092   if (TARGET_THUMB1)
28093     return 2;
28094
28095   /* Thumb2 mode.  */
28096   regno = REGNO (first_op);
28097   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28098      list is 8-bit.  Normally this means all registers in the list must be
28099      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28100      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28101      with 16-bit encoding.  */
28102   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28103   for (i = 1; i < num_saves && !hi_reg; i++)
28104     {
28105       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28106       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28107     }
28108
28109   if (!hi_reg)
28110     return 2;
28111   return 4;
28112 }
28113
28114 /* Compute the attribute "length" of insn.  Currently, this function is used
28115    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28116    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28117    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28118    true if OPERANDS contains insn which explicit updates base register.  */
28119
28120 int
28121 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28122 {
28123   /* ARM mode.  */
28124   if (TARGET_ARM)
28125     return 4;
28126   /* Thumb1 mode.  */
28127   if (TARGET_THUMB1)
28128     return 2;
28129
28130   rtx parallel_op = operands[0];
28131   /* Initialize to elements number of PARALLEL.  */
28132   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28133   /* Initialize the value to base register.  */
28134   unsigned regno = REGNO (operands[1]);
28135   /* Skip return and write back pattern.
28136      We only need register pop pattern for later analysis.  */
28137   unsigned first_indx = 0;
28138   first_indx += return_pc ? 1 : 0;
28139   first_indx += write_back_p ? 1 : 0;
28140
28141   /* A pop operation can be done through LDM or POP.  If the base register is SP
28142      and if it's with write back, then a LDM will be alias of POP.  */
28143   bool pop_p = (regno == SP_REGNUM && write_back_p);
28144   bool ldm_p = !pop_p;
28145
28146   /* Check base register for LDM.  */
28147   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28148     return 4;
28149
28150   /* Check each register in the list.  */
28151   for (; indx >= first_indx; indx--)
28152     {
28153       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28154       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28155          comment in arm_attr_length_push_multi.  */
28156       if (REGNO_REG_CLASS (regno) == HI_REGS
28157           && (regno != PC_REGNUM || ldm_p))
28158         return 4;
28159     }
28160
28161   return 2;
28162 }
28163
28164 /* Compute the number of instructions emitted by output_move_double.  */
28165 int
28166 arm_count_output_move_double_insns (rtx *operands)
28167 {
28168   int count;
28169   rtx ops[2];
28170   /* output_move_double may modify the operands array, so call it
28171      here on a copy of the array.  */
28172   ops[0] = operands[0];
28173   ops[1] = operands[1];
28174   output_move_double (ops, false, &count);
28175   return count;
28176 }
28177
28178 int
28179 vfp3_const_double_for_fract_bits (rtx operand)
28180 {
28181   REAL_VALUE_TYPE r0;
28182
28183   if (!CONST_DOUBLE_P (operand))
28184     return 0;
28185
28186   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28187   if (exact_real_inverse (DFmode, &r0)
28188       && !REAL_VALUE_NEGATIVE (r0))
28189     {
28190       if (exact_real_truncate (DFmode, &r0))
28191         {
28192           HOST_WIDE_INT value = real_to_integer (&r0);
28193           value = value & 0xffffffff;
28194           if ((value != 0) && ( (value & (value - 1)) == 0))
28195             {
28196               int ret = exact_log2 (value);
28197               gcc_assert (IN_RANGE (ret, 0, 31));
28198               return ret;
28199             }
28200         }
28201     }
28202   return 0;
28203 }
28204
28205 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28206    log2 is in [1, 32], return that log2.  Otherwise return -1.
28207    This is used in the patterns for vcvt.s32.f32 floating-point to
28208    fixed-point conversions.  */
28209
28210 int
28211 vfp3_const_double_for_bits (rtx x)
28212 {
28213   const REAL_VALUE_TYPE *r;
28214
28215   if (!CONST_DOUBLE_P (x))
28216     return -1;
28217
28218   r = CONST_DOUBLE_REAL_VALUE (x);
28219
28220   if (REAL_VALUE_NEGATIVE (*r)
28221       || REAL_VALUE_ISNAN (*r)
28222       || REAL_VALUE_ISINF (*r)
28223       || !real_isinteger (r, SFmode))
28224     return -1;
28225
28226   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28227
28228 /* The exact_log2 above will have returned -1 if this is
28229    not an exact log2.  */
28230   if (!IN_RANGE (hwint, 1, 32))
28231     return -1;
28232
28233   return hwint;
28234 }
28235
28236 \f
28237 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28238
28239 static void
28240 arm_pre_atomic_barrier (enum memmodel model)
28241 {
28242   if (need_atomic_barrier_p (model, true))
28243     emit_insn (gen_memory_barrier ());
28244 }
28245
28246 static void
28247 arm_post_atomic_barrier (enum memmodel model)
28248 {
28249   if (need_atomic_barrier_p (model, false))
28250     emit_insn (gen_memory_barrier ());
28251 }
28252
28253 /* Emit the load-exclusive and store-exclusive instructions.
28254    Use acquire and release versions if necessary.  */
28255
28256 static void
28257 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28258 {
28259   rtx (*gen) (rtx, rtx);
28260
28261   if (acq)
28262     {
28263       switch (mode)
28264         {
28265         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28266         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28267         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28268         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28269         default:
28270           gcc_unreachable ();
28271         }
28272     }
28273   else
28274     {
28275       switch (mode)
28276         {
28277         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28278         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28279         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28280         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28281         default:
28282           gcc_unreachable ();
28283         }
28284     }
28285
28286   emit_insn (gen (rval, mem));
28287 }
28288
28289 static void
28290 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28291                           rtx mem, bool rel)
28292 {
28293   rtx (*gen) (rtx, rtx, rtx);
28294
28295   if (rel)
28296     {
28297       switch (mode)
28298         {
28299         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28300         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28301         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28302         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28303         default:
28304           gcc_unreachable ();
28305         }
28306     }
28307   else
28308     {
28309       switch (mode)
28310         {
28311         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28312         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28313         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28314         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28315         default:
28316           gcc_unreachable ();
28317         }
28318     }
28319
28320   emit_insn (gen (bval, rval, mem));
28321 }
28322
28323 /* Mark the previous jump instruction as unlikely.  */
28324
28325 static void
28326 emit_unlikely_jump (rtx insn)
28327 {
28328   rtx_insn *jump = emit_jump_insn (insn);
28329   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28330 }
28331
28332 /* Expand a compare and swap pattern.  */
28333
28334 void
28335 arm_expand_compare_and_swap (rtx operands[])
28336 {
28337   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28338   machine_mode mode;
28339   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28340
28341   bval = operands[0];
28342   rval = operands[1];
28343   mem = operands[2];
28344   oldval = operands[3];
28345   newval = operands[4];
28346   is_weak = operands[5];
28347   mod_s = operands[6];
28348   mod_f = operands[7];
28349   mode = GET_MODE (mem);
28350
28351   /* Normally the succ memory model must be stronger than fail, but in the
28352      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28353      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28354
28355   if (TARGET_HAVE_LDACQ
28356       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28357       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28358     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28359
28360   switch (mode)
28361     {
28362     case E_QImode:
28363     case E_HImode:
28364       /* For narrow modes, we're going to perform the comparison in SImode,
28365          so do the zero-extension now.  */
28366       rval = gen_reg_rtx (SImode);
28367       oldval = convert_modes (SImode, mode, oldval, true);
28368       /* FALLTHRU */
28369
28370     case E_SImode:
28371       /* Force the value into a register if needed.  We waited until after
28372          the zero-extension above to do this properly.  */
28373       if (!arm_add_operand (oldval, SImode))
28374         oldval = force_reg (SImode, oldval);
28375       break;
28376
28377     case E_DImode:
28378       if (!cmpdi_operand (oldval, mode))
28379         oldval = force_reg (mode, oldval);
28380       break;
28381
28382     default:
28383       gcc_unreachable ();
28384     }
28385
28386   if (TARGET_THUMB1)
28387     {
28388       switch (mode)
28389         {
28390         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28391         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28392         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28393         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28394         default:
28395           gcc_unreachable ();
28396         }
28397     }
28398   else
28399     {
28400       switch (mode)
28401         {
28402         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28403         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28404         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28405         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28406         default:
28407           gcc_unreachable ();
28408         }
28409     }
28410
28411   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28412   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28413
28414   if (mode == QImode || mode == HImode)
28415     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28416
28417   /* In all cases, we arrange for success to be signaled by Z set.
28418      This arrangement allows for the boolean result to be used directly
28419      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28420      boolean negation of the result is also stored in bval because Thumb-1
28421      backend lacks dependency tracking for CC flag due to flag-setting not
28422      being represented at RTL level.  */
28423   if (TARGET_THUMB1)
28424       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28425   else
28426     {
28427       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28428       emit_insn (gen_rtx_SET (bval, x));
28429     }
28430 }
28431
28432 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28433    another memory store between the load-exclusive and store-exclusive can
28434    reset the monitor from Exclusive to Open state.  This means we must wait
28435    until after reload to split the pattern, lest we get a register spill in
28436    the middle of the atomic sequence.  Success of the compare and swap is
28437    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28438    for Thumb-1 targets (ie. negation of the boolean value returned by
28439    atomic_compare_and_swapmode standard pattern in operand 0).  */
28440
28441 void
28442 arm_split_compare_and_swap (rtx operands[])
28443 {
28444   rtx rval, mem, oldval, newval, neg_bval;
28445   machine_mode mode;
28446   enum memmodel mod_s, mod_f;
28447   bool is_weak;
28448   rtx_code_label *label1, *label2;
28449   rtx x, cond;
28450
28451   rval = operands[1];
28452   mem = operands[2];
28453   oldval = operands[3];
28454   newval = operands[4];
28455   is_weak = (operands[5] != const0_rtx);
28456   mod_s = memmodel_from_int (INTVAL (operands[6]));
28457   mod_f = memmodel_from_int (INTVAL (operands[7]));
28458   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28459   mode = GET_MODE (mem);
28460
28461   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28462
28463   bool use_acquire = TARGET_HAVE_LDACQ
28464                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28465                           || is_mm_release (mod_s));
28466
28467   bool use_release = TARGET_HAVE_LDACQ
28468                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28469                           || is_mm_acquire (mod_s));
28470
28471   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28472      a full barrier is emitted after the store-release.  */
28473   if (is_armv8_sync)
28474     use_acquire = false;
28475
28476   /* Checks whether a barrier is needed and emits one accordingly.  */
28477   if (!(use_acquire || use_release))
28478     arm_pre_atomic_barrier (mod_s);
28479
28480   label1 = NULL;
28481   if (!is_weak)
28482     {
28483       label1 = gen_label_rtx ();
28484       emit_label (label1);
28485     }
28486   label2 = gen_label_rtx ();
28487
28488   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28489
28490   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28491      as required to communicate with arm_expand_compare_and_swap.  */
28492   if (TARGET_32BIT)
28493     {
28494       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28495       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28496       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28497                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28498       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28499     }
28500   else
28501     {
28502       emit_move_insn (neg_bval, const1_rtx);
28503       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28504       if (thumb1_cmpneg_operand (oldval, SImode))
28505         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28506                                                     label2, cond));
28507       else
28508         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28509     }
28510
28511   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28512
28513   /* Weak or strong, we want EQ to be true for success, so that we
28514      match the flags that we got from the compare above.  */
28515   if (TARGET_32BIT)
28516     {
28517       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28518       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28519       emit_insn (gen_rtx_SET (cond, x));
28520     }
28521
28522   if (!is_weak)
28523     {
28524       /* Z is set to boolean value of !neg_bval, as required to communicate
28525          with arm_expand_compare_and_swap.  */
28526       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28527       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28528     }
28529
28530   if (!is_mm_relaxed (mod_f))
28531     emit_label (label2);
28532
28533   /* Checks whether a barrier is needed and emits one accordingly.  */
28534   if (is_armv8_sync
28535       || !(use_acquire || use_release))
28536     arm_post_atomic_barrier (mod_s);
28537
28538   if (is_mm_relaxed (mod_f))
28539     emit_label (label2);
28540 }
28541
28542 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28543    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28544    operation).  Operation is performed on the content at MEM and on VALUE
28545    following the memory model MODEL_RTX.  The content at MEM before and after
28546    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28547    success of the operation is returned in COND.  Using a scratch register or
28548    an operand register for these determines what result is returned for that
28549    pattern.  */
28550
28551 void
28552 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28553                      rtx value, rtx model_rtx, rtx cond)
28554 {
28555   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28556   machine_mode mode = GET_MODE (mem);
28557   machine_mode wmode = (mode == DImode ? DImode : SImode);
28558   rtx_code_label *label;
28559   bool all_low_regs, bind_old_new;
28560   rtx x;
28561
28562   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28563
28564   bool use_acquire = TARGET_HAVE_LDACQ
28565                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28566                           || is_mm_release (model));
28567
28568   bool use_release = TARGET_HAVE_LDACQ
28569                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28570                           || is_mm_acquire (model));
28571
28572   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28573      a full barrier is emitted after the store-release.  */
28574   if (is_armv8_sync)
28575     use_acquire = false;
28576
28577   /* Checks whether a barrier is needed and emits one accordingly.  */
28578   if (!(use_acquire || use_release))
28579     arm_pre_atomic_barrier (model);
28580
28581   label = gen_label_rtx ();
28582   emit_label (label);
28583
28584   if (new_out)
28585     new_out = gen_lowpart (wmode, new_out);
28586   if (old_out)
28587     old_out = gen_lowpart (wmode, old_out);
28588   else
28589     old_out = new_out;
28590   value = simplify_gen_subreg (wmode, value, mode, 0);
28591
28592   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28593
28594   /* Does the operation require destination and first operand to use the same
28595      register?  This is decided by register constraints of relevant insn
28596      patterns in thumb1.md.  */
28597   gcc_assert (!new_out || REG_P (new_out));
28598   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28599                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28600                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28601   bind_old_new =
28602     (TARGET_THUMB1
28603      && code != SET
28604      && code != MINUS
28605      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28606
28607   /* We want to return the old value while putting the result of the operation
28608      in the same register as the old value so copy the old value over to the
28609      destination register and use that register for the operation.  */
28610   if (old_out && bind_old_new)
28611     {
28612       emit_move_insn (new_out, old_out);
28613       old_out = new_out;
28614     }
28615
28616   switch (code)
28617     {
28618     case SET:
28619       new_out = value;
28620       break;
28621
28622     case NOT:
28623       x = gen_rtx_AND (wmode, old_out, value);
28624       emit_insn (gen_rtx_SET (new_out, x));
28625       x = gen_rtx_NOT (wmode, new_out);
28626       emit_insn (gen_rtx_SET (new_out, x));
28627       break;
28628
28629     case MINUS:
28630       if (CONST_INT_P (value))
28631         {
28632           value = GEN_INT (-INTVAL (value));
28633           code = PLUS;
28634         }
28635       /* FALLTHRU */
28636
28637     case PLUS:
28638       if (mode == DImode)
28639         {
28640           /* DImode plus/minus need to clobber flags.  */
28641           /* The adddi3 and subdi3 patterns are incorrectly written so that
28642              they require matching operands, even when we could easily support
28643              three operands.  Thankfully, this can be fixed up post-splitting,
28644              as the individual add+adc patterns do accept three operands and
28645              post-reload cprop can make these moves go away.  */
28646           emit_move_insn (new_out, old_out);
28647           if (code == PLUS)
28648             x = gen_adddi3 (new_out, new_out, value);
28649           else
28650             x = gen_subdi3 (new_out, new_out, value);
28651           emit_insn (x);
28652           break;
28653         }
28654       /* FALLTHRU */
28655
28656     default:
28657       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28658       emit_insn (gen_rtx_SET (new_out, x));
28659       break;
28660     }
28661
28662   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28663                             use_release);
28664
28665   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28666   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28667
28668   /* Checks whether a barrier is needed and emits one accordingly.  */
28669   if (is_armv8_sync
28670       || !(use_acquire || use_release))
28671     arm_post_atomic_barrier (model);
28672 }
28673 \f
28674 #define MAX_VECT_LEN 16
28675
28676 struct expand_vec_perm_d
28677 {
28678   rtx target, op0, op1;
28679   auto_vec_perm_indices perm;
28680   machine_mode vmode;
28681   bool one_vector_p;
28682   bool testing_p;
28683 };
28684
28685 /* Generate a variable permutation.  */
28686
28687 static void
28688 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28689 {
28690   machine_mode vmode = GET_MODE (target);
28691   bool one_vector_p = rtx_equal_p (op0, op1);
28692
28693   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28694   gcc_checking_assert (GET_MODE (op0) == vmode);
28695   gcc_checking_assert (GET_MODE (op1) == vmode);
28696   gcc_checking_assert (GET_MODE (sel) == vmode);
28697   gcc_checking_assert (TARGET_NEON);
28698
28699   if (one_vector_p)
28700     {
28701       if (vmode == V8QImode)
28702         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28703       else
28704         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28705     }
28706   else
28707     {
28708       rtx pair;
28709
28710       if (vmode == V8QImode)
28711         {
28712           pair = gen_reg_rtx (V16QImode);
28713           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28714           pair = gen_lowpart (TImode, pair);
28715           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28716         }
28717       else
28718         {
28719           pair = gen_reg_rtx (OImode);
28720           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28721           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28722         }
28723     }
28724 }
28725
28726 void
28727 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28728 {
28729   machine_mode vmode = GET_MODE (target);
28730   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28731   bool one_vector_p = rtx_equal_p (op0, op1);
28732   rtx rmask[MAX_VECT_LEN], mask;
28733
28734   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28735      numbering of elements for big-endian, we must reverse the order.  */
28736   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28737
28738   /* The VTBL instruction does not use a modulo index, so we must take care
28739      of that ourselves.  */
28740   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28741   for (i = 0; i < nelt; ++i)
28742     rmask[i] = mask;
28743   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28744   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28745
28746   arm_expand_vec_perm_1 (target, op0, op1, sel);
28747 }
28748
28749 /* Map lane ordering between architectural lane order, and GCC lane order,
28750    taking into account ABI.  See comment above output_move_neon for details.  */
28751
28752 static int
28753 neon_endian_lane_map (machine_mode mode, int lane)
28754 {
28755   if (BYTES_BIG_ENDIAN)
28756   {
28757     int nelems = GET_MODE_NUNITS (mode);
28758     /* Reverse lane order.  */
28759     lane = (nelems - 1 - lane);
28760     /* Reverse D register order, to match ABI.  */
28761     if (GET_MODE_SIZE (mode) == 16)
28762       lane = lane ^ (nelems / 2);
28763   }
28764   return lane;
28765 }
28766
28767 /* Some permutations index into pairs of vectors, this is a helper function
28768    to map indexes into those pairs of vectors.  */
28769
28770 static int
28771 neon_pair_endian_lane_map (machine_mode mode, int lane)
28772 {
28773   int nelem = GET_MODE_NUNITS (mode);
28774   if (BYTES_BIG_ENDIAN)
28775     lane =
28776       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28777   return lane;
28778 }
28779
28780 /* Generate or test for an insn that supports a constant permutation.  */
28781
28782 /* Recognize patterns for the VUZP insns.  */
28783
28784 static bool
28785 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28786 {
28787   unsigned int i, odd, mask, nelt = d->perm.length ();
28788   rtx out0, out1, in0, in1;
28789   rtx (*gen)(rtx, rtx, rtx, rtx);
28790   int first_elem;
28791   int swap_nelt;
28792
28793   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28794     return false;
28795
28796   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28797      big endian pattern on 64 bit vectors, so we correct for that.  */
28798   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28799     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28800
28801   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28802
28803   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28804     odd = 0;
28805   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28806     odd = 1;
28807   else
28808     return false;
28809   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28810
28811   for (i = 0; i < nelt; i++)
28812     {
28813       unsigned elt =
28814         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28815       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28816         return false;
28817     }
28818
28819   /* Success!  */
28820   if (d->testing_p)
28821     return true;
28822
28823   switch (d->vmode)
28824     {
28825     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28826     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28827     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28828     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28829     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28830     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28831     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28832     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28833     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28834     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28835     default:
28836       gcc_unreachable ();
28837     }
28838
28839   in0 = d->op0;
28840   in1 = d->op1;
28841   if (swap_nelt != 0)
28842     std::swap (in0, in1);
28843
28844   out0 = d->target;
28845   out1 = gen_reg_rtx (d->vmode);
28846   if (odd)
28847     std::swap (out0, out1);
28848
28849   emit_insn (gen (out0, in0, in1, out1));
28850   return true;
28851 }
28852
28853 /* Recognize patterns for the VZIP insns.  */
28854
28855 static bool
28856 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28857 {
28858   unsigned int i, high, mask, nelt = d->perm.length ();
28859   rtx out0, out1, in0, in1;
28860   rtx (*gen)(rtx, rtx, rtx, rtx);
28861   int first_elem;
28862   bool is_swapped;
28863
28864   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28865     return false;
28866
28867   is_swapped = BYTES_BIG_ENDIAN;
28868
28869   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28870
28871   high = nelt / 2;
28872   if (first_elem == neon_endian_lane_map (d->vmode, high))
28873     ;
28874   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28875     high = 0;
28876   else
28877     return false;
28878   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28879
28880   for (i = 0; i < nelt / 2; i++)
28881     {
28882       unsigned elt =
28883         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28884       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28885           != elt)
28886         return false;
28887       elt =
28888         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28889       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28890           != elt)
28891         return false;
28892     }
28893
28894   /* Success!  */
28895   if (d->testing_p)
28896     return true;
28897
28898   switch (d->vmode)
28899     {
28900     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28901     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28902     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28903     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28904     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28905     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28906     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28907     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28908     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28909     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28910     default:
28911       gcc_unreachable ();
28912     }
28913
28914   in0 = d->op0;
28915   in1 = d->op1;
28916   if (is_swapped)
28917     std::swap (in0, in1);
28918
28919   out0 = d->target;
28920   out1 = gen_reg_rtx (d->vmode);
28921   if (high)
28922     std::swap (out0, out1);
28923
28924   emit_insn (gen (out0, in0, in1, out1));
28925   return true;
28926 }
28927
28928 /* Recognize patterns for the VREV insns.  */
28929
28930 static bool
28931 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28932 {
28933   unsigned int i, j, diff, nelt = d->perm.length ();
28934   rtx (*gen)(rtx, rtx);
28935
28936   if (!d->one_vector_p)
28937     return false;
28938
28939   diff = d->perm[0];
28940   switch (diff)
28941     {
28942     case 7:
28943       switch (d->vmode)
28944         {
28945         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28946         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28947         default:
28948           return false;
28949         }
28950       break;
28951     case 3:
28952       switch (d->vmode)
28953         {
28954         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28955         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28956         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28957         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28958         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28959         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28960         default:
28961           return false;
28962         }
28963       break;
28964     case 1:
28965       switch (d->vmode)
28966         {
28967         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28968         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28969         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28970         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28971         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
28972         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
28973         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28974         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28975         default:
28976           return false;
28977         }
28978       break;
28979     default:
28980       return false;
28981     }
28982
28983   for (i = 0; i < nelt ; i += diff + 1)
28984     for (j = 0; j <= diff; j += 1)
28985       {
28986         /* This is guaranteed to be true as the value of diff
28987            is 7, 3, 1 and we should have enough elements in the
28988            queue to generate this. Getting a vector mask with a
28989            value of diff other than these values implies that
28990            something is wrong by the time we get here.  */
28991         gcc_assert (i + j < nelt);
28992         if (d->perm[i + j] != i + diff - j)
28993           return false;
28994       }
28995
28996   /* Success! */
28997   if (d->testing_p)
28998     return true;
28999
29000   emit_insn (gen (d->target, d->op0));
29001   return true;
29002 }
29003
29004 /* Recognize patterns for the VTRN insns.  */
29005
29006 static bool
29007 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29008 {
29009   unsigned int i, odd, mask, nelt = d->perm.length ();
29010   rtx out0, out1, in0, in1;
29011   rtx (*gen)(rtx, rtx, rtx, rtx);
29012
29013   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29014     return false;
29015
29016   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29017   if (d->perm[0] == 0)
29018     odd = 0;
29019   else if (d->perm[0] == 1)
29020     odd = 1;
29021   else
29022     return false;
29023   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29024
29025   for (i = 0; i < nelt; i += 2)
29026     {
29027       if (d->perm[i] != i + odd)
29028         return false;
29029       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29030         return false;
29031     }
29032
29033   /* Success!  */
29034   if (d->testing_p)
29035     return true;
29036
29037   switch (d->vmode)
29038     {
29039     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29040     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29041     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29042     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29043     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29044     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29045     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29046     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29047     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29048     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29049     default:
29050       gcc_unreachable ();
29051     }
29052
29053   in0 = d->op0;
29054   in1 = d->op1;
29055   if (BYTES_BIG_ENDIAN)
29056     {
29057       std::swap (in0, in1);
29058       odd = !odd;
29059     }
29060
29061   out0 = d->target;
29062   out1 = gen_reg_rtx (d->vmode);
29063   if (odd)
29064     std::swap (out0, out1);
29065
29066   emit_insn (gen (out0, in0, in1, out1));
29067   return true;
29068 }
29069
29070 /* Recognize patterns for the VEXT insns.  */
29071
29072 static bool
29073 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29074 {
29075   unsigned int i, nelt = d->perm.length ();
29076   rtx (*gen) (rtx, rtx, rtx, rtx);
29077   rtx offset;
29078
29079   unsigned int location;
29080
29081   unsigned int next  = d->perm[0] + 1;
29082
29083   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29084   if (BYTES_BIG_ENDIAN)
29085     return false;
29086
29087   /* Check if the extracted indexes are increasing by one.  */
29088   for (i = 1; i < nelt; next++, i++)
29089     {
29090       /* If we hit the most significant element of the 2nd vector in
29091          the previous iteration, no need to test further.  */
29092       if (next == 2 * nelt)
29093         return false;
29094
29095       /* If we are operating on only one vector: it could be a
29096          rotation.  If there are only two elements of size < 64, let
29097          arm_evpc_neon_vrev catch it.  */
29098       if (d->one_vector_p && (next == nelt))
29099         {
29100           if ((nelt == 2) && (d->vmode != V2DImode))
29101             return false;
29102           else
29103             next = 0;
29104         }
29105
29106       if (d->perm[i] != next)
29107         return false;
29108     }
29109
29110   location = d->perm[0];
29111
29112   switch (d->vmode)
29113     {
29114     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29115     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29116     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29117     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29118     case E_V2SImode: gen = gen_neon_vextv2si; break;
29119     case E_V4SImode: gen = gen_neon_vextv4si; break;
29120     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29121     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29122     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29123     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29124     case E_V2DImode: gen = gen_neon_vextv2di; break;
29125     default:
29126       return false;
29127     }
29128
29129   /* Success! */
29130   if (d->testing_p)
29131     return true;
29132
29133   offset = GEN_INT (location);
29134   emit_insn (gen (d->target, d->op0, d->op1, offset));
29135   return true;
29136 }
29137
29138 /* The NEON VTBL instruction is a fully variable permuation that's even
29139    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29140    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29141    can do slightly better by expanding this as a constant where we don't
29142    have to apply a mask.  */
29143
29144 static bool
29145 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29146 {
29147   rtx rperm[MAX_VECT_LEN], sel;
29148   machine_mode vmode = d->vmode;
29149   unsigned int i, nelt = d->perm.length ();
29150
29151   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29152      numbering of elements for big-endian, we must reverse the order.  */
29153   if (BYTES_BIG_ENDIAN)
29154     return false;
29155
29156   if (d->testing_p)
29157     return true;
29158
29159   /* Generic code will try constant permutation twice.  Once with the
29160      original mode and again with the elements lowered to QImode.
29161      So wait and don't do the selector expansion ourselves.  */
29162   if (vmode != V8QImode && vmode != V16QImode)
29163     return false;
29164
29165   for (i = 0; i < nelt; ++i)
29166     rperm[i] = GEN_INT (d->perm[i]);
29167   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29168   sel = force_reg (vmode, sel);
29169
29170   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29171   return true;
29172 }
29173
29174 static bool
29175 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29176 {
29177   /* Check if the input mask matches vext before reordering the
29178      operands.  */
29179   if (TARGET_NEON)
29180     if (arm_evpc_neon_vext (d))
29181       return true;
29182
29183   /* The pattern matching functions above are written to look for a small
29184      number to begin the sequence (0, 1, N/2).  If we begin with an index
29185      from the second operand, we can swap the operands.  */
29186   unsigned int nelt = d->perm.length ();
29187   if (d->perm[0] >= nelt)
29188     {
29189       for (unsigned int i = 0; i < nelt; ++i)
29190         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29191
29192       std::swap (d->op0, d->op1);
29193     }
29194
29195   if (TARGET_NEON)
29196     {
29197       if (arm_evpc_neon_vuzp (d))
29198         return true;
29199       if (arm_evpc_neon_vzip (d))
29200         return true;
29201       if (arm_evpc_neon_vrev (d))
29202         return true;
29203       if (arm_evpc_neon_vtrn (d))
29204         return true;
29205       return arm_evpc_neon_vtbl (d);
29206     }
29207   return false;
29208 }
29209
29210 /* Expand a vec_perm_const pattern.  */
29211
29212 bool
29213 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29214 {
29215   struct expand_vec_perm_d d;
29216   int i, nelt, which;
29217
29218   d.target = target;
29219   d.op0 = op0;
29220   d.op1 = op1;
29221
29222   d.vmode = GET_MODE (target);
29223   gcc_assert (VECTOR_MODE_P (d.vmode));
29224   d.testing_p = false;
29225
29226   nelt = GET_MODE_NUNITS (d.vmode);
29227   d.perm.reserve (nelt);
29228   for (i = which = 0; i < nelt; ++i)
29229     {
29230       rtx e = XVECEXP (sel, 0, i);
29231       int ei = INTVAL (e) & (2 * nelt - 1);
29232       which |= (ei < nelt ? 1 : 2);
29233       d.perm.quick_push (ei);
29234     }
29235
29236   switch (which)
29237     {
29238     default:
29239       gcc_unreachable();
29240
29241     case 3:
29242       d.one_vector_p = false;
29243       if (!rtx_equal_p (op0, op1))
29244         break;
29245
29246       /* The elements of PERM do not suggest that only the first operand
29247          is used, but both operands are identical.  Allow easier matching
29248          of the permutation by folding the permutation into the single
29249          input vector.  */
29250       /* FALLTHRU */
29251     case 2:
29252       for (i = 0; i < nelt; ++i)
29253         d.perm[i] &= nelt - 1;
29254       d.op0 = op1;
29255       d.one_vector_p = true;
29256       break;
29257
29258     case 1:
29259       d.op1 = op0;
29260       d.one_vector_p = true;
29261       break;
29262     }
29263
29264   return arm_expand_vec_perm_const_1 (&d);
29265 }
29266
29267 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29268
29269 static bool
29270 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29271 {
29272   struct expand_vec_perm_d d;
29273   unsigned int i, nelt, which;
29274   bool ret;
29275
29276   d.vmode = vmode;
29277   d.testing_p = true;
29278   d.perm.safe_splice (sel);
29279
29280   /* Categorize the set of elements in the selector.  */
29281   nelt = GET_MODE_NUNITS (d.vmode);
29282   for (i = which = 0; i < nelt; ++i)
29283     {
29284       unsigned int e = d.perm[i];
29285       gcc_assert (e < 2 * nelt);
29286       which |= (e < nelt ? 1 : 2);
29287     }
29288
29289   /* For all elements from second vector, fold the elements to first.  */
29290   if (which == 2)
29291     for (i = 0; i < nelt; ++i)
29292       d.perm[i] -= nelt;
29293
29294   /* Check whether the mask can be applied to the vector type.  */
29295   d.one_vector_p = (which != 3);
29296
29297   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29298   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29299   if (!d.one_vector_p)
29300     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29301
29302   start_sequence ();
29303   ret = arm_expand_vec_perm_const_1 (&d);
29304   end_sequence ();
29305
29306   return ret;
29307 }
29308
29309 bool
29310 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29311 {
29312   /* If we are soft float and we do not have ldrd
29313      then all auto increment forms are ok.  */
29314   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29315     return true;
29316
29317   switch (code)
29318     {
29319       /* Post increment and Pre Decrement are supported for all
29320          instruction forms except for vector forms.  */
29321     case ARM_POST_INC:
29322     case ARM_PRE_DEC:
29323       if (VECTOR_MODE_P (mode))
29324         {
29325           if (code != ARM_PRE_DEC)
29326             return true;
29327           else
29328             return false;
29329         }
29330
29331       return true;
29332
29333     case ARM_POST_DEC:
29334     case ARM_PRE_INC:
29335       /* Without LDRD and mode size greater than
29336          word size, there is no point in auto-incrementing
29337          because ldm and stm will not have these forms.  */
29338       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29339         return false;
29340
29341       /* Vector and floating point modes do not support
29342          these auto increment forms.  */
29343       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29344         return false;
29345
29346       return true;
29347
29348     default:
29349       return false;
29350
29351     }
29352
29353   return false;
29354 }
29355
29356 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29357    on ARM, since we know that shifts by negative amounts are no-ops.
29358    Additionally, the default expansion code is not available or suitable
29359    for post-reload insn splits (this can occur when the register allocator
29360    chooses not to do a shift in NEON).
29361
29362    This function is used in both initial expand and post-reload splits, and
29363    handles all kinds of 64-bit shifts.
29364
29365    Input requirements:
29366     - It is safe for the input and output to be the same register, but
29367       early-clobber rules apply for the shift amount and scratch registers.
29368     - Shift by register requires both scratch registers.  In all other cases
29369       the scratch registers may be NULL.
29370     - Ashiftrt by a register also clobbers the CC register.  */
29371 void
29372 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29373                                rtx amount, rtx scratch1, rtx scratch2)
29374 {
29375   rtx out_high = gen_highpart (SImode, out);
29376   rtx out_low = gen_lowpart (SImode, out);
29377   rtx in_high = gen_highpart (SImode, in);
29378   rtx in_low = gen_lowpart (SImode, in);
29379
29380   /* Terminology:
29381         in = the register pair containing the input value.
29382         out = the destination register pair.
29383         up = the high- or low-part of each pair.
29384         down = the opposite part to "up".
29385      In a shift, we can consider bits to shift from "up"-stream to
29386      "down"-stream, so in a left-shift "up" is the low-part and "down"
29387      is the high-part of each register pair.  */
29388
29389   rtx out_up   = code == ASHIFT ? out_low : out_high;
29390   rtx out_down = code == ASHIFT ? out_high : out_low;
29391   rtx in_up   = code == ASHIFT ? in_low : in_high;
29392   rtx in_down = code == ASHIFT ? in_high : in_low;
29393
29394   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29395   gcc_assert (out
29396               && (REG_P (out) || GET_CODE (out) == SUBREG)
29397               && GET_MODE (out) == DImode);
29398   gcc_assert (in
29399               && (REG_P (in) || GET_CODE (in) == SUBREG)
29400               && GET_MODE (in) == DImode);
29401   gcc_assert (amount
29402               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29403                    && GET_MODE (amount) == SImode)
29404                   || CONST_INT_P (amount)));
29405   gcc_assert (scratch1 == NULL
29406               || (GET_CODE (scratch1) == SCRATCH)
29407               || (GET_MODE (scratch1) == SImode
29408                   && REG_P (scratch1)));
29409   gcc_assert (scratch2 == NULL
29410               || (GET_CODE (scratch2) == SCRATCH)
29411               || (GET_MODE (scratch2) == SImode
29412                   && REG_P (scratch2)));
29413   gcc_assert (!REG_P (out) || !REG_P (amount)
29414               || !HARD_REGISTER_P (out)
29415               || (REGNO (out) != REGNO (amount)
29416                   && REGNO (out) + 1 != REGNO (amount)));
29417
29418   /* Macros to make following code more readable.  */
29419   #define SUB_32(DEST,SRC) \
29420             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29421   #define RSB_32(DEST,SRC) \
29422             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29423   #define SUB_S_32(DEST,SRC) \
29424             gen_addsi3_compare0 ((DEST), (SRC), \
29425                                  GEN_INT (-32))
29426   #define SET(DEST,SRC) \
29427             gen_rtx_SET ((DEST), (SRC))
29428   #define SHIFT(CODE,SRC,AMOUNT) \
29429             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29430   #define LSHIFT(CODE,SRC,AMOUNT) \
29431             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29432                             SImode, (SRC), (AMOUNT))
29433   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29434             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29435                             SImode, (SRC), (AMOUNT))
29436   #define ORR(A,B) \
29437             gen_rtx_IOR (SImode, (A), (B))
29438   #define BRANCH(COND,LABEL) \
29439             gen_arm_cond_branch ((LABEL), \
29440                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29441                                                    const0_rtx), \
29442                                  cc_reg)
29443
29444   /* Shifts by register and shifts by constant are handled separately.  */
29445   if (CONST_INT_P (amount))
29446     {
29447       /* We have a shift-by-constant.  */
29448
29449       /* First, handle out-of-range shift amounts.
29450          In both cases we try to match the result an ARM instruction in a
29451          shift-by-register would give.  This helps reduce execution
29452          differences between optimization levels, but it won't stop other
29453          parts of the compiler doing different things.  This is "undefined
29454          behavior, in any case.  */
29455       if (INTVAL (amount) <= 0)
29456         emit_insn (gen_movdi (out, in));
29457       else if (INTVAL (amount) >= 64)
29458         {
29459           if (code == ASHIFTRT)
29460             {
29461               rtx const31_rtx = GEN_INT (31);
29462               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29463               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29464             }
29465           else
29466             emit_insn (gen_movdi (out, const0_rtx));
29467         }
29468
29469       /* Now handle valid shifts. */
29470       else if (INTVAL (amount) < 32)
29471         {
29472           /* Shifts by a constant less than 32.  */
29473           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29474
29475           /* Clearing the out register in DImode first avoids lots
29476              of spilling and results in less stack usage.
29477              Later this redundant insn is completely removed.
29478              Do that only if "in" and "out" are different registers.  */
29479           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29480             emit_insn (SET (out, const0_rtx));
29481           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29482           emit_insn (SET (out_down,
29483                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29484                                out_down)));
29485           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29486         }
29487       else
29488         {
29489           /* Shifts by a constant greater than 31.  */
29490           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29491
29492           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29493             emit_insn (SET (out, const0_rtx));
29494           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29495           if (code == ASHIFTRT)
29496             emit_insn (gen_ashrsi3 (out_up, in_up,
29497                                     GEN_INT (31)));
29498           else
29499             emit_insn (SET (out_up, const0_rtx));
29500         }
29501     }
29502   else
29503     {
29504       /* We have a shift-by-register.  */
29505       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29506
29507       /* This alternative requires the scratch registers.  */
29508       gcc_assert (scratch1 && REG_P (scratch1));
29509       gcc_assert (scratch2 && REG_P (scratch2));
29510
29511       /* We will need the values "amount-32" and "32-amount" later.
29512          Swapping them around now allows the later code to be more general. */
29513       switch (code)
29514         {
29515         case ASHIFT:
29516           emit_insn (SUB_32 (scratch1, amount));
29517           emit_insn (RSB_32 (scratch2, amount));
29518           break;
29519         case ASHIFTRT:
29520           emit_insn (RSB_32 (scratch1, amount));
29521           /* Also set CC = amount > 32.  */
29522           emit_insn (SUB_S_32 (scratch2, amount));
29523           break;
29524         case LSHIFTRT:
29525           emit_insn (RSB_32 (scratch1, amount));
29526           emit_insn (SUB_32 (scratch2, amount));
29527           break;
29528         default:
29529           gcc_unreachable ();
29530         }
29531
29532       /* Emit code like this:
29533
29534          arithmetic-left:
29535             out_down = in_down << amount;
29536             out_down = (in_up << (amount - 32)) | out_down;
29537             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29538             out_up = in_up << amount;
29539
29540          arithmetic-right:
29541             out_down = in_down >> amount;
29542             out_down = (in_up << (32 - amount)) | out_down;
29543             if (amount < 32)
29544               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29545             out_up = in_up << amount;
29546
29547          logical-right:
29548             out_down = in_down >> amount;
29549             out_down = (in_up << (32 - amount)) | out_down;
29550             if (amount < 32)
29551               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29552             out_up = in_up << amount;
29553
29554           The ARM and Thumb2 variants are the same but implemented slightly
29555           differently.  If this were only called during expand we could just
29556           use the Thumb2 case and let combine do the right thing, but this
29557           can also be called from post-reload splitters.  */
29558
29559       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29560
29561       if (!TARGET_THUMB2)
29562         {
29563           /* Emit code for ARM mode.  */
29564           emit_insn (SET (out_down,
29565                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29566           if (code == ASHIFTRT)
29567             {
29568               rtx_code_label *done_label = gen_label_rtx ();
29569               emit_jump_insn (BRANCH (LT, done_label));
29570               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29571                                              out_down)));
29572               emit_label (done_label);
29573             }
29574           else
29575             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29576                                            out_down)));
29577         }
29578       else
29579         {
29580           /* Emit code for Thumb2 mode.
29581              Thumb2 can't do shift and or in one insn.  */
29582           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29583           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29584
29585           if (code == ASHIFTRT)
29586             {
29587               rtx_code_label *done_label = gen_label_rtx ();
29588               emit_jump_insn (BRANCH (LT, done_label));
29589               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29590               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29591               emit_label (done_label);
29592             }
29593           else
29594             {
29595               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29596               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29597             }
29598         }
29599
29600       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29601     }
29602
29603   #undef SUB_32
29604   #undef RSB_32
29605   #undef SUB_S_32
29606   #undef SET
29607   #undef SHIFT
29608   #undef LSHIFT
29609   #undef REV_LSHIFT
29610   #undef ORR
29611   #undef BRANCH
29612 }
29613
29614 /* Returns true if the pattern is a valid symbolic address, which is either a
29615    symbol_ref or (symbol_ref + addend).
29616
29617    According to the ARM ELF ABI, the initial addend of REL-type relocations
29618    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29619    literal field of the instruction as a 16-bit signed value in the range
29620    -32768 <= A < 32768.  */
29621
29622 bool
29623 arm_valid_symbolic_address_p (rtx addr)
29624 {
29625   rtx xop0, xop1 = NULL_RTX;
29626   rtx tmp = addr;
29627
29628   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29629     return true;
29630
29631   /* (const (plus: symbol_ref const_int))  */
29632   if (GET_CODE (addr) == CONST)
29633     tmp = XEXP (addr, 0);
29634
29635   if (GET_CODE (tmp) == PLUS)
29636     {
29637       xop0 = XEXP (tmp, 0);
29638       xop1 = XEXP (tmp, 1);
29639
29640       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29641           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29642     }
29643
29644   return false;
29645 }
29646
29647 /* Returns true if a valid comparison operation and makes
29648    the operands in a form that is valid.  */
29649 bool
29650 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29651 {
29652   enum rtx_code code = GET_CODE (*comparison);
29653   int code_int;
29654   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29655     ? GET_MODE (*op2) : GET_MODE (*op1);
29656
29657   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29658
29659   if (code == UNEQ || code == LTGT)
29660     return false;
29661
29662   code_int = (int)code;
29663   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29664   PUT_CODE (*comparison, (enum rtx_code)code_int);
29665
29666   switch (mode)
29667     {
29668     case E_SImode:
29669       if (!arm_add_operand (*op1, mode))
29670         *op1 = force_reg (mode, *op1);
29671       if (!arm_add_operand (*op2, mode))
29672         *op2 = force_reg (mode, *op2);
29673       return true;
29674
29675     case E_DImode:
29676       if (!cmpdi_operand (*op1, mode))
29677         *op1 = force_reg (mode, *op1);
29678       if (!cmpdi_operand (*op2, mode))
29679         *op2 = force_reg (mode, *op2);
29680       return true;
29681
29682     case E_HFmode:
29683       if (!TARGET_VFP_FP16INST)
29684         break;
29685       /* FP16 comparisons are done in SF mode.  */
29686       mode = SFmode;
29687       *op1 = convert_to_mode (mode, *op1, 1);
29688       *op2 = convert_to_mode (mode, *op2, 1);
29689       /* Fall through.  */
29690     case E_SFmode:
29691     case E_DFmode:
29692       if (!vfp_compare_operand (*op1, mode))
29693         *op1 = force_reg (mode, *op1);
29694       if (!vfp_compare_operand (*op2, mode))
29695         *op2 = force_reg (mode, *op2);
29696       return true;
29697     default:
29698       break;
29699     }
29700
29701   return false;
29702
29703 }
29704
29705 /* Maximum number of instructions to set block of memory.  */
29706 static int
29707 arm_block_set_max_insns (void)
29708 {
29709   if (optimize_function_for_size_p (cfun))
29710     return 4;
29711   else
29712     return current_tune->max_insns_inline_memset;
29713 }
29714
29715 /* Return TRUE if it's profitable to set block of memory for
29716    non-vectorized case.  VAL is the value to set the memory
29717    with.  LENGTH is the number of bytes to set.  ALIGN is the
29718    alignment of the destination memory in bytes.  UNALIGNED_P
29719    is TRUE if we can only set the memory with instructions
29720    meeting alignment requirements.  USE_STRD_P is TRUE if we
29721    can use strd to set the memory.  */
29722 static bool
29723 arm_block_set_non_vect_profit_p (rtx val,
29724                                  unsigned HOST_WIDE_INT length,
29725                                  unsigned HOST_WIDE_INT align,
29726                                  bool unaligned_p, bool use_strd_p)
29727 {
29728   int num = 0;
29729   /* For leftovers in bytes of 0-7, we can set the memory block using
29730      strb/strh/str with minimum instruction number.  */
29731   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29732
29733   if (unaligned_p)
29734     {
29735       num = arm_const_inline_cost (SET, val);
29736       num += length / align + length % align;
29737     }
29738   else if (use_strd_p)
29739     {
29740       num = arm_const_double_inline_cost (val);
29741       num += (length >> 3) + leftover[length & 7];
29742     }
29743   else
29744     {
29745       num = arm_const_inline_cost (SET, val);
29746       num += (length >> 2) + leftover[length & 3];
29747     }
29748
29749   /* We may be able to combine last pair STRH/STRB into a single STR
29750      by shifting one byte back.  */
29751   if (unaligned_access && length > 3 && (length & 3) == 3)
29752     num--;
29753
29754   return (num <= arm_block_set_max_insns ());
29755 }
29756
29757 /* Return TRUE if it's profitable to set block of memory for
29758    vectorized case.  LENGTH is the number of bytes to set.
29759    ALIGN is the alignment of destination memory in bytes.
29760    MODE is the vector mode used to set the memory.  */
29761 static bool
29762 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29763                              unsigned HOST_WIDE_INT align,
29764                              machine_mode mode)
29765 {
29766   int num;
29767   bool unaligned_p = ((align & 3) != 0);
29768   unsigned int nelt = GET_MODE_NUNITS (mode);
29769
29770   /* Instruction loading constant value.  */
29771   num = 1;
29772   /* Instructions storing the memory.  */
29773   num += (length + nelt - 1) / nelt;
29774   /* Instructions adjusting the address expression.  Only need to
29775      adjust address expression if it's 4 bytes aligned and bytes
29776      leftover can only be stored by mis-aligned store instruction.  */
29777   if (!unaligned_p && (length & 3) != 0)
29778     num++;
29779
29780   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29781   if (!unaligned_p && mode == V16QImode)
29782     num--;
29783
29784   return (num <= arm_block_set_max_insns ());
29785 }
29786
29787 /* Set a block of memory using vectorization instructions for the
29788    unaligned case.  We fill the first LENGTH bytes of the memory
29789    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29790    the alignment requirement of memory.  Return TRUE if succeeded.  */
29791 static bool
29792 arm_block_set_unaligned_vect (rtx dstbase,
29793                               unsigned HOST_WIDE_INT length,
29794                               unsigned HOST_WIDE_INT value,
29795                               unsigned HOST_WIDE_INT align)
29796 {
29797   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29798   rtx dst, mem;
29799   rtx val_elt, val_vec, reg;
29800   rtx rval[MAX_VECT_LEN];
29801   rtx (*gen_func) (rtx, rtx);
29802   machine_mode mode;
29803   unsigned HOST_WIDE_INT v = value;
29804   unsigned int offset = 0;
29805   gcc_assert ((align & 0x3) != 0);
29806   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29807   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29808   if (length >= nelt_v16)
29809     {
29810       mode = V16QImode;
29811       gen_func = gen_movmisalignv16qi;
29812     }
29813   else
29814     {
29815       mode = V8QImode;
29816       gen_func = gen_movmisalignv8qi;
29817     }
29818   nelt_mode = GET_MODE_NUNITS (mode);
29819   gcc_assert (length >= nelt_mode);
29820   /* Skip if it isn't profitable.  */
29821   if (!arm_block_set_vect_profit_p (length, align, mode))
29822     return false;
29823
29824   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29825   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29826
29827   v = sext_hwi (v, BITS_PER_WORD);
29828   val_elt = GEN_INT (v);
29829   for (j = 0; j < nelt_mode; j++)
29830     rval[j] = val_elt;
29831
29832   reg = gen_reg_rtx (mode);
29833   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29834   /* Emit instruction loading the constant value.  */
29835   emit_move_insn (reg, val_vec);
29836
29837   /* Handle nelt_mode bytes in a vector.  */
29838   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29839     {
29840       emit_insn ((*gen_func) (mem, reg));
29841       if (i + 2 * nelt_mode <= length)
29842         {
29843           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29844           offset += nelt_mode;
29845           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29846         }
29847     }
29848
29849   /* If there are not less than nelt_v8 bytes leftover, we must be in
29850      V16QI mode.  */
29851   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29852
29853   /* Handle (8, 16) bytes leftover.  */
29854   if (i + nelt_v8 < length)
29855     {
29856       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29857       offset += length - i;
29858       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29859
29860       /* We are shifting bytes back, set the alignment accordingly.  */
29861       if ((length & 1) != 0 && align >= 2)
29862         set_mem_align (mem, BITS_PER_UNIT);
29863
29864       emit_insn (gen_movmisalignv16qi (mem, reg));
29865     }
29866   /* Handle (0, 8] bytes leftover.  */
29867   else if (i < length && i + nelt_v8 >= length)
29868     {
29869       if (mode == V16QImode)
29870         reg = gen_lowpart (V8QImode, reg);
29871
29872       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29873                                               + (nelt_mode - nelt_v8))));
29874       offset += (length - i) + (nelt_mode - nelt_v8);
29875       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29876
29877       /* We are shifting bytes back, set the alignment accordingly.  */
29878       if ((length & 1) != 0 && align >= 2)
29879         set_mem_align (mem, BITS_PER_UNIT);
29880
29881       emit_insn (gen_movmisalignv8qi (mem, reg));
29882     }
29883
29884   return true;
29885 }
29886
29887 /* Set a block of memory using vectorization instructions for the
29888    aligned case.  We fill the first LENGTH bytes of the memory area
29889    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29890    alignment requirement of memory.  Return TRUE if succeeded.  */
29891 static bool
29892 arm_block_set_aligned_vect (rtx dstbase,
29893                             unsigned HOST_WIDE_INT length,
29894                             unsigned HOST_WIDE_INT value,
29895                             unsigned HOST_WIDE_INT align)
29896 {
29897   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29898   rtx dst, addr, mem;
29899   rtx val_elt, val_vec, reg;
29900   rtx rval[MAX_VECT_LEN];
29901   machine_mode mode;
29902   unsigned HOST_WIDE_INT v = value;
29903   unsigned int offset = 0;
29904
29905   gcc_assert ((align & 0x3) == 0);
29906   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29907   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29908   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29909     mode = V16QImode;
29910   else
29911     mode = V8QImode;
29912
29913   nelt_mode = GET_MODE_NUNITS (mode);
29914   gcc_assert (length >= nelt_mode);
29915   /* Skip if it isn't profitable.  */
29916   if (!arm_block_set_vect_profit_p (length, align, mode))
29917     return false;
29918
29919   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29920
29921   v = sext_hwi (v, BITS_PER_WORD);
29922   val_elt = GEN_INT (v);
29923   for (j = 0; j < nelt_mode; j++)
29924     rval[j] = val_elt;
29925
29926   reg = gen_reg_rtx (mode);
29927   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29928   /* Emit instruction loading the constant value.  */
29929   emit_move_insn (reg, val_vec);
29930
29931   i = 0;
29932   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29933   if (mode == V16QImode)
29934     {
29935       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29936       emit_insn (gen_movmisalignv16qi (mem, reg));
29937       i += nelt_mode;
29938       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29939       if (i + nelt_v8 < length && i + nelt_v16 > length)
29940         {
29941           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29942           offset += length - nelt_mode;
29943           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29944           /* We are shifting bytes back, set the alignment accordingly.  */
29945           if ((length & 0x3) == 0)
29946             set_mem_align (mem, BITS_PER_UNIT * 4);
29947           else if ((length & 0x1) == 0)
29948             set_mem_align (mem, BITS_PER_UNIT * 2);
29949           else
29950             set_mem_align (mem, BITS_PER_UNIT);
29951
29952           emit_insn (gen_movmisalignv16qi (mem, reg));
29953           return true;
29954         }
29955       /* Fall through for bytes leftover.  */
29956       mode = V8QImode;
29957       nelt_mode = GET_MODE_NUNITS (mode);
29958       reg = gen_lowpart (V8QImode, reg);
29959     }
29960
29961   /* Handle 8 bytes in a vector.  */
29962   for (; (i + nelt_mode <= length); i += nelt_mode)
29963     {
29964       addr = plus_constant (Pmode, dst, i);
29965       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29966       emit_move_insn (mem, reg);
29967     }
29968
29969   /* Handle single word leftover by shifting 4 bytes back.  We can
29970      use aligned access for this case.  */
29971   if (i + UNITS_PER_WORD == length)
29972     {
29973       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29974       offset += i - UNITS_PER_WORD;
29975       mem = adjust_automodify_address (dstbase, mode, addr, offset);
29976       /* We are shifting 4 bytes back, set the alignment accordingly.  */
29977       if (align > UNITS_PER_WORD)
29978         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29979
29980       emit_move_insn (mem, reg);
29981     }
29982   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29983      We have to use unaligned access for this case.  */
29984   else if (i < length)
29985     {
29986       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29987       offset += length - nelt_mode;
29988       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29989       /* We are shifting bytes back, set the alignment accordingly.  */
29990       if ((length & 1) == 0)
29991         set_mem_align (mem, BITS_PER_UNIT * 2);
29992       else
29993         set_mem_align (mem, BITS_PER_UNIT);
29994
29995       emit_insn (gen_movmisalignv8qi (mem, reg));
29996     }
29997
29998   return true;
29999 }
30000
30001 /* Set a block of memory using plain strh/strb instructions, only
30002    using instructions allowed by ALIGN on processor.  We fill the
30003    first LENGTH bytes of the memory area starting from DSTBASE
30004    with byte constant VALUE.  ALIGN is the alignment requirement
30005    of memory.  */
30006 static bool
30007 arm_block_set_unaligned_non_vect (rtx dstbase,
30008                                   unsigned HOST_WIDE_INT length,
30009                                   unsigned HOST_WIDE_INT value,
30010                                   unsigned HOST_WIDE_INT align)
30011 {
30012   unsigned int i;
30013   rtx dst, addr, mem;
30014   rtx val_exp, val_reg, reg;
30015   machine_mode mode;
30016   HOST_WIDE_INT v = value;
30017
30018   gcc_assert (align == 1 || align == 2);
30019
30020   if (align == 2)
30021     v |= (value << BITS_PER_UNIT);
30022
30023   v = sext_hwi (v, BITS_PER_WORD);
30024   val_exp = GEN_INT (v);
30025   /* Skip if it isn't profitable.  */
30026   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30027                                         align, true, false))
30028     return false;
30029
30030   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30031   mode = (align == 2 ? HImode : QImode);
30032   val_reg = force_reg (SImode, val_exp);
30033   reg = gen_lowpart (mode, val_reg);
30034
30035   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30036     {
30037       addr = plus_constant (Pmode, dst, i);
30038       mem = adjust_automodify_address (dstbase, mode, addr, i);
30039       emit_move_insn (mem, reg);
30040     }
30041
30042   /* Handle single byte leftover.  */
30043   if (i + 1 == length)
30044     {
30045       reg = gen_lowpart (QImode, val_reg);
30046       addr = plus_constant (Pmode, dst, i);
30047       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30048       emit_move_insn (mem, reg);
30049       i++;
30050     }
30051
30052   gcc_assert (i == length);
30053   return true;
30054 }
30055
30056 /* Set a block of memory using plain strd/str/strh/strb instructions,
30057    to permit unaligned copies on processors which support unaligned
30058    semantics for those instructions.  We fill the first LENGTH bytes
30059    of the memory area starting from DSTBASE with byte constant VALUE.
30060    ALIGN is the alignment requirement of memory.  */
30061 static bool
30062 arm_block_set_aligned_non_vect (rtx dstbase,
30063                                 unsigned HOST_WIDE_INT length,
30064                                 unsigned HOST_WIDE_INT value,
30065                                 unsigned HOST_WIDE_INT align)
30066 {
30067   unsigned int i;
30068   rtx dst, addr, mem;
30069   rtx val_exp, val_reg, reg;
30070   unsigned HOST_WIDE_INT v;
30071   bool use_strd_p;
30072
30073   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30074                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30075
30076   v = (value | (value << 8) | (value << 16) | (value << 24));
30077   if (length < UNITS_PER_WORD)
30078     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30079
30080   if (use_strd_p)
30081     v |= (v << BITS_PER_WORD);
30082   else
30083     v = sext_hwi (v, BITS_PER_WORD);
30084
30085   val_exp = GEN_INT (v);
30086   /* Skip if it isn't profitable.  */
30087   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30088                                         align, false, use_strd_p))
30089     {
30090       if (!use_strd_p)
30091         return false;
30092
30093       /* Try without strd.  */
30094       v = (v >> BITS_PER_WORD);
30095       v = sext_hwi (v, BITS_PER_WORD);
30096       val_exp = GEN_INT (v);
30097       use_strd_p = false;
30098       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30099                                             align, false, use_strd_p))
30100         return false;
30101     }
30102
30103   i = 0;
30104   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30105   /* Handle double words using strd if possible.  */
30106   if (use_strd_p)
30107     {
30108       val_reg = force_reg (DImode, val_exp);
30109       reg = val_reg;
30110       for (; (i + 8 <= length); i += 8)
30111         {
30112           addr = plus_constant (Pmode, dst, i);
30113           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30114           emit_move_insn (mem, reg);
30115         }
30116     }
30117   else
30118     val_reg = force_reg (SImode, val_exp);
30119
30120   /* Handle words.  */
30121   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30122   for (; (i + 4 <= length); i += 4)
30123     {
30124       addr = plus_constant (Pmode, dst, i);
30125       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30126       if ((align & 3) == 0)
30127         emit_move_insn (mem, reg);
30128       else
30129         emit_insn (gen_unaligned_storesi (mem, reg));
30130     }
30131
30132   /* Merge last pair of STRH and STRB into a STR if possible.  */
30133   if (unaligned_access && i > 0 && (i + 3) == length)
30134     {
30135       addr = plus_constant (Pmode, dst, i - 1);
30136       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30137       /* We are shifting one byte back, set the alignment accordingly.  */
30138       if ((align & 1) == 0)
30139         set_mem_align (mem, BITS_PER_UNIT);
30140
30141       /* Most likely this is an unaligned access, and we can't tell at
30142          compilation time.  */
30143       emit_insn (gen_unaligned_storesi (mem, reg));
30144       return true;
30145     }
30146
30147   /* Handle half word leftover.  */
30148   if (i + 2 <= length)
30149     {
30150       reg = gen_lowpart (HImode, val_reg);
30151       addr = plus_constant (Pmode, dst, i);
30152       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30153       if ((align & 1) == 0)
30154         emit_move_insn (mem, reg);
30155       else
30156         emit_insn (gen_unaligned_storehi (mem, reg));
30157
30158       i += 2;
30159     }
30160
30161   /* Handle single byte leftover.  */
30162   if (i + 1 == length)
30163     {
30164       reg = gen_lowpart (QImode, val_reg);
30165       addr = plus_constant (Pmode, dst, i);
30166       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30167       emit_move_insn (mem, reg);
30168     }
30169
30170   return true;
30171 }
30172
30173 /* Set a block of memory using vectorization instructions for both
30174    aligned and unaligned cases.  We fill the first LENGTH bytes of
30175    the memory area starting from DSTBASE with byte constant VALUE.
30176    ALIGN is the alignment requirement of memory.  */
30177 static bool
30178 arm_block_set_vect (rtx dstbase,
30179                     unsigned HOST_WIDE_INT length,
30180                     unsigned HOST_WIDE_INT value,
30181                     unsigned HOST_WIDE_INT align)
30182 {
30183   /* Check whether we need to use unaligned store instruction.  */
30184   if (((align & 3) != 0 || (length & 3) != 0)
30185       /* Check whether unaligned store instruction is available.  */
30186       && (!unaligned_access || BYTES_BIG_ENDIAN))
30187     return false;
30188
30189   if ((align & 3) == 0)
30190     return arm_block_set_aligned_vect (dstbase, length, value, align);
30191   else
30192     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30193 }
30194
30195 /* Expand string store operation.  Firstly we try to do that by using
30196    vectorization instructions, then try with ARM unaligned access and
30197    double-word store if profitable.  OPERANDS[0] is the destination,
30198    OPERANDS[1] is the number of bytes, operands[2] is the value to
30199    initialize the memory, OPERANDS[3] is the known alignment of the
30200    destination.  */
30201 bool
30202 arm_gen_setmem (rtx *operands)
30203 {
30204   rtx dstbase = operands[0];
30205   unsigned HOST_WIDE_INT length;
30206   unsigned HOST_WIDE_INT value;
30207   unsigned HOST_WIDE_INT align;
30208
30209   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30210     return false;
30211
30212   length = UINTVAL (operands[1]);
30213   if (length > 64)
30214     return false;
30215
30216   value = (UINTVAL (operands[2]) & 0xFF);
30217   align = UINTVAL (operands[3]);
30218   if (TARGET_NEON && length >= 8
30219       && current_tune->string_ops_prefer_neon
30220       && arm_block_set_vect (dstbase, length, value, align))
30221     return true;
30222
30223   if (!unaligned_access && (align & 3) != 0)
30224     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30225
30226   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30227 }
30228
30229
30230 static bool
30231 arm_macro_fusion_p (void)
30232 {
30233   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30234 }
30235
30236 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30237    for MOVW / MOVT macro fusion.  */
30238
30239 static bool
30240 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30241 {
30242   /* We are trying to fuse
30243      movw imm / movt imm
30244     instructions as a group that gets scheduled together.  */
30245
30246   rtx set_dest = SET_DEST (curr_set);
30247
30248   if (GET_MODE (set_dest) != SImode)
30249     return false;
30250
30251   /* We are trying to match:
30252      prev (movw)  == (set (reg r0) (const_int imm16))
30253      curr (movt) == (set (zero_extract (reg r0)
30254                                         (const_int 16)
30255                                         (const_int 16))
30256                           (const_int imm16_1))
30257      or
30258      prev (movw) == (set (reg r1)
30259                           (high (symbol_ref ("SYM"))))
30260     curr (movt) == (set (reg r0)
30261                         (lo_sum (reg r1)
30262                                 (symbol_ref ("SYM"))))  */
30263
30264     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30265       {
30266         if (CONST_INT_P (SET_SRC (curr_set))
30267             && CONST_INT_P (SET_SRC (prev_set))
30268             && REG_P (XEXP (set_dest, 0))
30269             && REG_P (SET_DEST (prev_set))
30270             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30271           return true;
30272
30273       }
30274     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30275              && REG_P (SET_DEST (curr_set))
30276              && REG_P (SET_DEST (prev_set))
30277              && GET_CODE (SET_SRC (prev_set)) == HIGH
30278              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30279       return true;
30280
30281   return false;
30282 }
30283
30284 static bool
30285 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30286 {
30287   rtx prev_set = single_set (prev);
30288   rtx curr_set = single_set (curr);
30289
30290   if (!prev_set
30291       || !curr_set)
30292     return false;
30293
30294   if (any_condjump_p (curr))
30295     return false;
30296
30297   if (!arm_macro_fusion_p ())
30298     return false;
30299
30300   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30301       && aarch_crypto_can_dual_issue (prev, curr))
30302     return true;
30303
30304   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30305       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30306     return true;
30307
30308   return false;
30309 }
30310
30311 /* Return true iff the instruction fusion described by OP is enabled.  */
30312 bool
30313 arm_fusion_enabled_p (tune_params::fuse_ops op)
30314 {
30315   return current_tune->fusible_ops & op;
30316 }
30317
30318 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30319    scheduled for speculative execution.  Reject the long-running division
30320    and square-root instructions.  */
30321
30322 static bool
30323 arm_sched_can_speculate_insn (rtx_insn *insn)
30324 {
30325   switch (get_attr_type (insn))
30326     {
30327       case TYPE_SDIV:
30328       case TYPE_UDIV:
30329       case TYPE_FDIVS:
30330       case TYPE_FDIVD:
30331       case TYPE_FSQRTS:
30332       case TYPE_FSQRTD:
30333       case TYPE_NEON_FP_SQRT_S:
30334       case TYPE_NEON_FP_SQRT_D:
30335       case TYPE_NEON_FP_SQRT_S_Q:
30336       case TYPE_NEON_FP_SQRT_D_Q:
30337       case TYPE_NEON_FP_DIV_S:
30338       case TYPE_NEON_FP_DIV_D:
30339       case TYPE_NEON_FP_DIV_S_Q:
30340       case TYPE_NEON_FP_DIV_D_Q:
30341         return false;
30342       default:
30343         return true;
30344     }
30345 }
30346
30347 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30348
30349 static unsigned HOST_WIDE_INT
30350 arm_asan_shadow_offset (void)
30351 {
30352   return HOST_WIDE_INT_1U << 29;
30353 }
30354
30355
30356 /* This is a temporary fix for PR60655.  Ideally we need
30357    to handle most of these cases in the generic part but
30358    currently we reject minus (..) (sym_ref).  We try to
30359    ameliorate the case with minus (sym_ref1) (sym_ref2)
30360    where they are in the same section.  */
30361
30362 static bool
30363 arm_const_not_ok_for_debug_p (rtx p)
30364 {
30365   tree decl_op0 = NULL;
30366   tree decl_op1 = NULL;
30367
30368   if (GET_CODE (p) == MINUS)
30369     {
30370       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30371         {
30372           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30373           if (decl_op1
30374               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30375               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30376             {
30377               if ((VAR_P (decl_op1)
30378                    || TREE_CODE (decl_op1) == CONST_DECL)
30379                   && (VAR_P (decl_op0)
30380                       || TREE_CODE (decl_op0) == CONST_DECL))
30381                 return (get_variable_section (decl_op1, false)
30382                         != get_variable_section (decl_op0, false));
30383
30384               if (TREE_CODE (decl_op1) == LABEL_DECL
30385                   && TREE_CODE (decl_op0) == LABEL_DECL)
30386                 return (DECL_CONTEXT (decl_op1)
30387                         != DECL_CONTEXT (decl_op0));
30388             }
30389
30390           return true;
30391         }
30392     }
30393
30394   return false;
30395 }
30396
30397 /* return TRUE if x is a reference to a value in a constant pool */
30398 extern bool
30399 arm_is_constant_pool_ref (rtx x)
30400 {
30401   return (MEM_P (x)
30402           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30403           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30404 }
30405
30406 /* Remember the last target of arm_set_current_function.  */
30407 static GTY(()) tree arm_previous_fndecl;
30408
30409 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30410
30411 void
30412 save_restore_target_globals (tree new_tree)
30413 {
30414   /* If we have a previous state, use it.  */
30415   if (TREE_TARGET_GLOBALS (new_tree))
30416     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30417   else if (new_tree == target_option_default_node)
30418     restore_target_globals (&default_target_globals);
30419   else
30420     {
30421       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30422       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30423     }
30424
30425   arm_option_params_internal ();
30426 }
30427
30428 /* Invalidate arm_previous_fndecl.  */
30429
30430 void
30431 arm_reset_previous_fndecl (void)
30432 {
30433   arm_previous_fndecl = NULL_TREE;
30434 }
30435
30436 /* Establish appropriate back-end context for processing the function
30437    FNDECL.  The argument might be NULL to indicate processing at top
30438    level, outside of any function scope.  */
30439
30440 static void
30441 arm_set_current_function (tree fndecl)
30442 {
30443   if (!fndecl || fndecl == arm_previous_fndecl)
30444     return;
30445
30446   tree old_tree = (arm_previous_fndecl
30447                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30448                    : NULL_TREE);
30449
30450   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30451
30452   /* If current function has no attributes but previous one did,
30453      use the default node.  */
30454   if (! new_tree && old_tree)
30455     new_tree = target_option_default_node;
30456
30457   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30458      the default have been handled by save_restore_target_globals from
30459      arm_pragma_target_parse.  */
30460   if (old_tree == new_tree)
30461     return;
30462
30463   arm_previous_fndecl = fndecl;
30464
30465   /* First set the target options.  */
30466   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30467
30468   save_restore_target_globals (new_tree);
30469 }
30470
30471 /* Implement TARGET_OPTION_PRINT.  */
30472
30473 static void
30474 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30475 {
30476   int flags = ptr->x_target_flags;
30477   const char *fpu_name;
30478
30479   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30480               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30481
30482   fprintf (file, "%*sselected isa %s\n", indent, "",
30483            TARGET_THUMB2_P (flags) ? "thumb2" :
30484            TARGET_THUMB_P (flags) ? "thumb1" :
30485            "arm");
30486
30487   if (ptr->x_arm_arch_string)
30488     fprintf (file, "%*sselected architecture %s\n", indent, "",
30489              ptr->x_arm_arch_string);
30490
30491   if (ptr->x_arm_cpu_string)
30492     fprintf (file, "%*sselected CPU %s\n", indent, "",
30493              ptr->x_arm_cpu_string);
30494
30495   if (ptr->x_arm_tune_string)
30496     fprintf (file, "%*sselected tune %s\n", indent, "",
30497              ptr->x_arm_tune_string);
30498
30499   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30500 }
30501
30502 /* Hook to determine if one function can safely inline another.  */
30503
30504 static bool
30505 arm_can_inline_p (tree caller, tree callee)
30506 {
30507   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30508   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30509   bool can_inline = true;
30510
30511   struct cl_target_option *caller_opts
30512         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30513                                            : target_option_default_node);
30514
30515   struct cl_target_option *callee_opts
30516         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30517                                            : target_option_default_node);
30518
30519   if (callee_opts == caller_opts)
30520     return true;
30521
30522   /* Callee's ISA features should be a subset of the caller's.  */
30523   struct arm_build_target caller_target;
30524   struct arm_build_target callee_target;
30525   caller_target.isa = sbitmap_alloc (isa_num_bits);
30526   callee_target.isa = sbitmap_alloc (isa_num_bits);
30527
30528   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30529                               false);
30530   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30531                               false);
30532   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30533     can_inline = false;
30534
30535   sbitmap_free (caller_target.isa);
30536   sbitmap_free (callee_target.isa);
30537
30538   /* OK to inline between different modes.
30539      Function with mode specific instructions, e.g using asm,
30540      must be explicitly protected with noinline.  */
30541   return can_inline;
30542 }
30543
30544 /* Hook to fix function's alignment affected by target attribute.  */
30545
30546 static void
30547 arm_relayout_function (tree fndecl)
30548 {
30549   if (DECL_USER_ALIGN (fndecl))
30550     return;
30551
30552   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30553
30554   if (!callee_tree)
30555     callee_tree = target_option_default_node;
30556
30557   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30558   SET_DECL_ALIGN
30559     (fndecl,
30560      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30561 }
30562
30563 /* Inner function to process the attribute((target(...))), take an argument and
30564    set the current options from the argument.  If we have a list, recursively
30565    go over the list.  */
30566
30567 static bool
30568 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30569 {
30570   if (TREE_CODE (args) == TREE_LIST)
30571     {
30572       bool ret = true;
30573
30574       for (; args; args = TREE_CHAIN (args))
30575         if (TREE_VALUE (args)
30576             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30577           ret = false;
30578       return ret;
30579     }
30580
30581   else if (TREE_CODE (args) != STRING_CST)
30582     {
30583       error ("attribute %<target%> argument not a string");
30584       return false;
30585     }
30586
30587   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30588   char *q;
30589
30590   while ((q = strtok (argstr, ",")) != NULL)
30591     {
30592       while (ISSPACE (*q)) ++q;
30593
30594       argstr = NULL;
30595       if (!strncmp (q, "thumb", 5))
30596           opts->x_target_flags |= MASK_THUMB;
30597
30598       else if (!strncmp (q, "arm", 3))
30599           opts->x_target_flags &= ~MASK_THUMB;
30600
30601       else if (!strncmp (q, "fpu=", 4))
30602         {
30603           int fpu_index;
30604           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30605                                        &fpu_index, CL_TARGET))
30606             {
30607               error ("invalid fpu for attribute(target(\"%s\"))", q);
30608               return false;
30609             }
30610           if (fpu_index == TARGET_FPU_auto)
30611             {
30612               /* This doesn't really make sense until we support
30613                  general dynamic selection of the architecture and all
30614                  sub-features.  */
30615               sorry ("auto fpu selection not currently permitted here");
30616               return false;
30617             }
30618           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30619         }
30620       else
30621         {
30622           error ("attribute(target(\"%s\")) is unknown", q);
30623           return false;
30624         }
30625     }
30626
30627   return true;
30628 }
30629
30630 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30631
30632 tree
30633 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30634                                  struct gcc_options *opts_set)
30635 {
30636   struct cl_target_option cl_opts;
30637
30638   if (!arm_valid_target_attribute_rec (args, opts))
30639     return NULL_TREE;
30640
30641   cl_target_option_save (&cl_opts, opts);
30642   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30643   arm_option_check_internal (opts);
30644   /* Do any overrides, such as global options arch=xxx.  */
30645   arm_option_override_internal (opts, opts_set);
30646
30647   return build_target_option_node (opts);
30648 }
30649
30650 static void
30651 add_attribute  (const char * mode, tree *attributes)
30652 {
30653   size_t len = strlen (mode);
30654   tree value = build_string (len, mode);
30655
30656   TREE_TYPE (value) = build_array_type (char_type_node,
30657                                         build_index_type (size_int (len)));
30658
30659   *attributes = tree_cons (get_identifier ("target"),
30660                            build_tree_list (NULL_TREE, value),
30661                            *attributes);
30662 }
30663
30664 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30665
30666 static void
30667 arm_insert_attributes (tree fndecl, tree * attributes)
30668 {
30669   const char *mode;
30670
30671   if (! TARGET_FLIP_THUMB)
30672     return;
30673
30674   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30675       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30676    return;
30677
30678   /* Nested definitions must inherit mode.  */
30679   if (current_function_decl)
30680    {
30681      mode = TARGET_THUMB ? "thumb" : "arm";
30682      add_attribute (mode, attributes);
30683      return;
30684    }
30685
30686   /* If there is already a setting don't change it.  */
30687   if (lookup_attribute ("target", *attributes) != NULL)
30688     return;
30689
30690   mode = thumb_flipper ? "thumb" : "arm";
30691   add_attribute (mode, attributes);
30692
30693   thumb_flipper = !thumb_flipper;
30694 }
30695
30696 /* Hook to validate attribute((target("string"))).  */
30697
30698 static bool
30699 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30700                               tree args, int ARG_UNUSED (flags))
30701 {
30702   bool ret = true;
30703   struct gcc_options func_options;
30704   tree cur_tree, new_optimize;
30705   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30706
30707   /* Get the optimization options of the current function.  */
30708   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30709
30710   /* If the function changed the optimization levels as well as setting target
30711      options, start with the optimizations specified.  */
30712   if (!func_optimize)
30713     func_optimize = optimization_default_node;
30714
30715   /* Init func_options.  */
30716   memset (&func_options, 0, sizeof (func_options));
30717   init_options_struct (&func_options, NULL);
30718   lang_hooks.init_options_struct (&func_options);
30719
30720   /* Initialize func_options to the defaults.  */
30721   cl_optimization_restore (&func_options,
30722                            TREE_OPTIMIZATION (func_optimize));
30723
30724   cl_target_option_restore (&func_options,
30725                             TREE_TARGET_OPTION (target_option_default_node));
30726
30727   /* Set func_options flags with new target mode.  */
30728   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30729                                               &global_options_set);
30730
30731   if (cur_tree == NULL_TREE)
30732     ret = false;
30733
30734   new_optimize = build_optimization_node (&func_options);
30735
30736   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30737
30738   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30739
30740   finalize_options_struct (&func_options);
30741
30742   return ret;
30743 }
30744
30745 /* Match an ISA feature bitmap to a named FPU.  We always use the
30746    first entry that exactly matches the feature set, so that we
30747    effectively canonicalize the FPU name for the assembler.  */
30748 static const char*
30749 arm_identify_fpu_from_isa (sbitmap isa)
30750 {
30751   auto_sbitmap fpubits (isa_num_bits);
30752   auto_sbitmap cand_fpubits (isa_num_bits);
30753
30754   bitmap_and (fpubits, isa, isa_all_fpubits);
30755
30756   /* If there are no ISA feature bits relating to the FPU, we must be
30757      doing soft-float.  */
30758   if (bitmap_empty_p (fpubits))
30759     return "softvfp";
30760
30761   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30762     {
30763       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30764       if (bitmap_equal_p (fpubits, cand_fpubits))
30765         return all_fpus[i].name;
30766     }
30767   /* We must find an entry, or things have gone wrong.  */
30768   gcc_unreachable ();
30769 }
30770
30771 void
30772 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30773 {
30774
30775   fprintf (stream, "\t.syntax unified\n");
30776
30777   if (TARGET_THUMB)
30778     {
30779       if (is_called_in_ARM_mode (decl)
30780           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30781               && cfun->is_thunk))
30782         fprintf (stream, "\t.code 32\n");
30783       else if (TARGET_THUMB1)
30784         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30785       else
30786         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30787     }
30788   else
30789     fprintf (stream, "\t.arm\n");
30790
30791   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30792                (TARGET_SOFT_FLOAT
30793                 ? "softvfp"
30794                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30795
30796   if (TARGET_POKE_FUNCTION_NAME)
30797     arm_poke_function_name (stream, (const char *) name);
30798 }
30799
30800 /* If MEM is in the form of [base+offset], extract the two parts
30801    of address and set to BASE and OFFSET, otherwise return false
30802    after clearing BASE and OFFSET.  */
30803
30804 static bool
30805 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30806 {
30807   rtx addr;
30808
30809   gcc_assert (MEM_P (mem));
30810
30811   addr = XEXP (mem, 0);
30812
30813   /* Strip off const from addresses like (const (addr)).  */
30814   if (GET_CODE (addr) == CONST)
30815     addr = XEXP (addr, 0);
30816
30817   if (GET_CODE (addr) == REG)
30818     {
30819       *base = addr;
30820       *offset = const0_rtx;
30821       return true;
30822     }
30823
30824   if (GET_CODE (addr) == PLUS
30825       && GET_CODE (XEXP (addr, 0)) == REG
30826       && CONST_INT_P (XEXP (addr, 1)))
30827     {
30828       *base = XEXP (addr, 0);
30829       *offset = XEXP (addr, 1);
30830       return true;
30831     }
30832
30833   *base = NULL_RTX;
30834   *offset = NULL_RTX;
30835
30836   return false;
30837 }
30838
30839 /* If INSN is a load or store of address in the form of [base+offset],
30840    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30841    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30842    otherwise return FALSE.  */
30843
30844 static bool
30845 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30846 {
30847   rtx x, dest, src;
30848
30849   gcc_assert (INSN_P (insn));
30850   x = PATTERN (insn);
30851   if (GET_CODE (x) != SET)
30852     return false;
30853
30854   src = SET_SRC (x);
30855   dest = SET_DEST (x);
30856   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30857     {
30858       *is_load = false;
30859       extract_base_offset_in_addr (dest, base, offset);
30860     }
30861   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30862     {
30863       *is_load = true;
30864       extract_base_offset_in_addr (src, base, offset);
30865     }
30866   else
30867     return false;
30868
30869   return (*base != NULL_RTX && *offset != NULL_RTX);
30870 }
30871
30872 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30873
30874    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30875    and PRI are only calculated for these instructions.  For other instruction,
30876    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30877    instruction fusion can be supported by returning different priorities.
30878
30879    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30880
30881 static void
30882 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30883                            int *fusion_pri, int *pri)
30884 {
30885   int tmp, off_val;
30886   bool is_load;
30887   rtx base, offset;
30888
30889   gcc_assert (INSN_P (insn));
30890
30891   tmp = max_pri - 1;
30892   if (!fusion_load_store (insn, &base, &offset, &is_load))
30893     {
30894       *pri = tmp;
30895       *fusion_pri = tmp;
30896       return;
30897     }
30898
30899   /* Load goes first.  */
30900   if (is_load)
30901     *fusion_pri = tmp - 1;
30902   else
30903     *fusion_pri = tmp - 2;
30904
30905   tmp /= 2;
30906
30907   /* INSN with smaller base register goes first.  */
30908   tmp -= ((REGNO (base) & 0xff) << 20);
30909
30910   /* INSN with smaller offset goes first.  */
30911   off_val = (int)(INTVAL (offset));
30912   if (off_val >= 0)
30913     tmp -= (off_val & 0xfffff);
30914   else
30915     tmp += ((- off_val) & 0xfffff);
30916
30917   *pri = tmp;
30918   return;
30919 }
30920
30921
30922 /* Construct and return a PARALLEL RTX vector with elements numbering the
30923    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30924    the vector - from the perspective of the architecture.  This does not
30925    line up with GCC's perspective on lane numbers, so we end up with
30926    different masks depending on our target endian-ness.  The diagram
30927    below may help.  We must draw the distinction when building masks
30928    which select one half of the vector.  An instruction selecting
30929    architectural low-lanes for a big-endian target, must be described using
30930    a mask selecting GCC high-lanes.
30931
30932                  Big-Endian             Little-Endian
30933
30934 GCC             0   1   2   3           3   2   1   0
30935               | x | x | x | x |       | x | x | x | x |
30936 Architecture    3   2   1   0           3   2   1   0
30937
30938 Low Mask:         { 2, 3 }                { 0, 1 }
30939 High Mask:        { 0, 1 }                { 2, 3 }
30940 */
30941
30942 rtx
30943 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30944 {
30945   int nunits = GET_MODE_NUNITS (mode);
30946   rtvec v = rtvec_alloc (nunits / 2);
30947   int high_base = nunits / 2;
30948   int low_base = 0;
30949   int base;
30950   rtx t1;
30951   int i;
30952
30953   if (BYTES_BIG_ENDIAN)
30954     base = high ? low_base : high_base;
30955   else
30956     base = high ? high_base : low_base;
30957
30958   for (i = 0; i < nunits / 2; i++)
30959     RTVEC_ELT (v, i) = GEN_INT (base + i);
30960
30961   t1 = gen_rtx_PARALLEL (mode, v);
30962   return t1;
30963 }
30964
30965 /* Check OP for validity as a PARALLEL RTX vector with elements
30966    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30967    from the perspective of the architecture.  See the diagram above
30968    arm_simd_vect_par_cnst_half_p for more details.  */
30969
30970 bool
30971 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30972                                        bool high)
30973 {
30974   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30975   HOST_WIDE_INT count_op = XVECLEN (op, 0);
30976   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30977   int i = 0;
30978
30979   if (!VECTOR_MODE_P (mode))
30980     return false;
30981
30982   if (count_op != count_ideal)
30983     return false;
30984
30985   for (i = 0; i < count_ideal; i++)
30986     {
30987       rtx elt_op = XVECEXP (op, 0, i);
30988       rtx elt_ideal = XVECEXP (ideal, 0, i);
30989
30990       if (!CONST_INT_P (elt_op)
30991           || INTVAL (elt_ideal) != INTVAL (elt_op))
30992         return false;
30993     }
30994   return true;
30995 }
30996
30997 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30998    in Thumb1.  */
30999 static bool
31000 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31001                          const_tree)
31002 {
31003   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31004   if (vcall_offset && TARGET_THUMB1)
31005     return false;
31006
31007   /* Otherwise ok.  */
31008   return true;
31009 }
31010
31011 /* Generate RTL for a conditional branch with rtx comparison CODE in
31012    mode CC_MODE. The destination of the unlikely conditional branch
31013    is LABEL_REF.  */
31014
31015 void
31016 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31017                           rtx label_ref)
31018 {
31019   rtx x;
31020   x = gen_rtx_fmt_ee (code, VOIDmode,
31021                       gen_rtx_REG (cc_mode, CC_REGNUM),
31022                       const0_rtx);
31023
31024   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31025                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31026                             pc_rtx);
31027   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31028 }
31029
31030 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31031
31032    For pure-code sections there is no letter code for this attribute, so
31033    output all the section flags numerically when this is needed.  */
31034
31035 static bool
31036 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31037 {
31038
31039   if (flags & SECTION_ARM_PURECODE)
31040     {
31041       *num = 0x20000000;
31042
31043       if (!(flags & SECTION_DEBUG))
31044         *num |= 0x2;
31045       if (flags & SECTION_EXCLUDE)
31046         *num |= 0x80000000;
31047       if (flags & SECTION_WRITE)
31048         *num |= 0x1;
31049       if (flags & SECTION_CODE)
31050         *num |= 0x4;
31051       if (flags & SECTION_MERGE)
31052         *num |= 0x10;
31053       if (flags & SECTION_STRINGS)
31054         *num |= 0x20;
31055       if (flags & SECTION_TLS)
31056         *num |= 0x400;
31057       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31058         *num |= 0x200;
31059
31060         return true;
31061     }
31062
31063   return false;
31064 }
31065
31066 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31067
31068    If pure-code is passed as an option, make sure all functions are in
31069    sections that have the SHF_ARM_PURECODE attribute.  */
31070
31071 static section *
31072 arm_function_section (tree decl, enum node_frequency freq,
31073                       bool startup, bool exit)
31074 {
31075   const char * section_name;
31076   section * sec;
31077
31078   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31079     return default_function_section (decl, freq, startup, exit);
31080
31081   if (!target_pure_code)
31082     return default_function_section (decl, freq, startup, exit);
31083
31084
31085   section_name = DECL_SECTION_NAME (decl);
31086
31087   /* If a function is not in a named section then it falls under the 'default'
31088      text section, also known as '.text'.  We can preserve previous behavior as
31089      the default text section already has the SHF_ARM_PURECODE section
31090      attribute.  */
31091   if (!section_name)
31092     {
31093       section *default_sec = default_function_section (decl, freq, startup,
31094                                                        exit);
31095
31096       /* If default_sec is not null, then it must be a special section like for
31097          example .text.startup.  We set the pure-code attribute and return the
31098          same section to preserve existing behavior.  */
31099       if (default_sec)
31100           default_sec->common.flags |= SECTION_ARM_PURECODE;
31101       return default_sec;
31102     }
31103
31104   /* Otherwise look whether a section has already been created with
31105      'section_name'.  */
31106   sec = get_named_section (decl, section_name, 0);
31107   if (!sec)
31108     /* If that is not the case passing NULL as the section's name to
31109        'get_named_section' will create a section with the declaration's
31110        section name.  */
31111     sec = get_named_section (decl, NULL, 0);
31112
31113   /* Set the SHF_ARM_PURECODE attribute.  */
31114   sec->common.flags |= SECTION_ARM_PURECODE;
31115
31116   return sec;
31117 }
31118
31119 /* Implements the TARGET_SECTION_FLAGS hook.
31120
31121    If DECL is a function declaration and pure-code is passed as an option
31122    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31123    section's name and RELOC indicates whether the declarations initializer may
31124    contain runtime relocations.  */
31125
31126 static unsigned int
31127 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31128 {
31129   unsigned int flags = default_section_type_flags (decl, name, reloc);
31130
31131   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31132     flags |= SECTION_ARM_PURECODE;
31133
31134   return flags;
31135 }
31136
31137 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31138
31139 static void
31140 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31141                            rtx op0, rtx op1,
31142                            rtx *quot_p, rtx *rem_p)
31143 {
31144   if (mode == SImode)
31145     gcc_assert (!TARGET_IDIV);
31146
31147   scalar_int_mode libval_mode
31148     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31149
31150   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31151                                         libval_mode,
31152                                         op0, GET_MODE (op0),
31153                                         op1, GET_MODE (op1));
31154
31155   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31156   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31157                                        GET_MODE_SIZE (mode));
31158
31159   gcc_assert (quotient);
31160   gcc_assert (remainder);
31161
31162   *quot_p = quotient;
31163   *rem_p = remainder;
31164 }
31165
31166 /*  This function checks for the availability of the coprocessor builtin passed
31167     in BUILTIN for the current target.  Returns true if it is available and
31168     false otherwise.  If a BUILTIN is passed for which this function has not
31169     been implemented it will cause an exception.  */
31170
31171 bool
31172 arm_coproc_builtin_available (enum unspecv builtin)
31173 {
31174   /* None of these builtins are available in Thumb mode if the target only
31175      supports Thumb-1.  */
31176   if (TARGET_THUMB1)
31177     return false;
31178
31179   switch (builtin)
31180     {
31181       case VUNSPEC_CDP:
31182       case VUNSPEC_LDC:
31183       case VUNSPEC_LDCL:
31184       case VUNSPEC_STC:
31185       case VUNSPEC_STCL:
31186       case VUNSPEC_MCR:
31187       case VUNSPEC_MRC:
31188         if (arm_arch4)
31189           return true;
31190         break;
31191       case VUNSPEC_CDP2:
31192       case VUNSPEC_LDC2:
31193       case VUNSPEC_LDC2L:
31194       case VUNSPEC_STC2:
31195       case VUNSPEC_STC2L:
31196       case VUNSPEC_MCR2:
31197       case VUNSPEC_MRC2:
31198         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31199            ARMv8-{A,M}.  */
31200         if (arm_arch5)
31201           return true;
31202         break;
31203       case VUNSPEC_MCRR:
31204       case VUNSPEC_MRRC:
31205         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31206            ARMv8-{A,M}.  */
31207         if (arm_arch6 || arm_arch5te)
31208           return true;
31209         break;
31210       case VUNSPEC_MCRR2:
31211       case VUNSPEC_MRRC2:
31212         if (arm_arch6)
31213           return true;
31214         break;
31215       default:
31216         gcc_unreachable ();
31217     }
31218   return false;
31219 }
31220
31221 /* This function returns true if OP is a valid memory operand for the ldc and
31222    stc coprocessor instructions and false otherwise.  */
31223
31224 bool
31225 arm_coproc_ldc_stc_legitimate_address (rtx op)
31226 {
31227   HOST_WIDE_INT range;
31228   /* Has to be a memory operand.  */
31229   if (!MEM_P (op))
31230     return false;
31231
31232   op = XEXP (op, 0);
31233
31234   /* We accept registers.  */
31235   if (REG_P (op))
31236     return true;
31237
31238   switch GET_CODE (op)
31239     {
31240       case PLUS:
31241         {
31242           /* Or registers with an offset.  */
31243           if (!REG_P (XEXP (op, 0)))
31244             return false;
31245
31246           op = XEXP (op, 1);
31247
31248           /* The offset must be an immediate though.  */
31249           if (!CONST_INT_P (op))
31250             return false;
31251
31252           range = INTVAL (op);
31253
31254           /* Within the range of [-1020,1020].  */
31255           if (!IN_RANGE (range, -1020, 1020))
31256             return false;
31257
31258           /* And a multiple of 4.  */
31259           return (range % 4) == 0;
31260         }
31261       case PRE_INC:
31262       case POST_INC:
31263       case PRE_DEC:
31264       case POST_DEC:
31265         return REG_P (XEXP (op, 0));
31266       default:
31267         gcc_unreachable ();
31268     }
31269   return false;
31270 }
31271
31272 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31273
31274    In VFPv1, VFP registers could only be accessed in the mode they were
31275    set, so subregs would be invalid there.  However, we don't support
31276    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31277
31278    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31279    VFP registers in little-endian order.  We can't describe that accurately to
31280    GCC, so avoid taking subregs of such values.
31281
31282    The only exception is going from a 128-bit to a 64-bit type.  In that
31283    case the data layout happens to be consistent for big-endian, so we
31284    explicitly allow that case.  */
31285
31286 static bool
31287 arm_can_change_mode_class (machine_mode from, machine_mode to,
31288                            reg_class_t rclass)
31289 {
31290   if (TARGET_BIG_END
31291       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31292       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31293           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31294       && reg_classes_intersect_p (VFP_REGS, rclass))
31295     return false;
31296   return true;
31297 }
31298
31299 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31300    strcpy from constants will be faster.  */
31301
31302 static HOST_WIDE_INT
31303 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31304 {
31305   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31306   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31307     return MAX (align, BITS_PER_WORD * factor);
31308   return align;
31309 }
31310
31311 #if CHECKING_P
31312 namespace selftest {
31313
31314 /* Scan the static data tables generated by parsecpu.awk looking for
31315    potential issues with the data.  We primarily check for
31316    inconsistencies in the option extensions at present (extensions
31317    that duplicate others but aren't marked as aliases).  Furthermore,
31318    for correct canonicalization later options must never be a subset
31319    of an earlier option.  Any extension should also only specify other
31320    feature bits and never an architecture bit.  The architecture is inferred
31321    from the declaration of the extension.  */
31322 static void
31323 arm_test_cpu_arch_data (void)
31324 {
31325   const arch_option *arch;
31326   const cpu_option *cpu;
31327   auto_sbitmap target_isa (isa_num_bits);
31328   auto_sbitmap isa1 (isa_num_bits);
31329   auto_sbitmap isa2 (isa_num_bits);
31330
31331   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31332     {
31333       const cpu_arch_extension *ext1, *ext2;
31334
31335       if (arch->common.extensions == NULL)
31336         continue;
31337
31338       arm_initialize_isa (target_isa, arch->common.isa_bits);
31339
31340       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31341         {
31342           if (ext1->alias)
31343             continue;
31344
31345           arm_initialize_isa (isa1, ext1->isa_bits);
31346           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31347             {
31348               if (ext2->alias || ext1->remove != ext2->remove)
31349                 continue;
31350
31351               arm_initialize_isa (isa2, ext2->isa_bits);
31352               /* If the option is a subset of the parent option, it doesn't
31353                  add anything and so isn't useful.  */
31354               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31355
31356               /* If the extension specifies any architectural bits then
31357                  disallow it.  Extensions should only specify feature bits.  */
31358               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31359             }
31360         }
31361     }
31362
31363   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31364     {
31365       const cpu_arch_extension *ext1, *ext2;
31366
31367       if (cpu->common.extensions == NULL)
31368         continue;
31369
31370       arm_initialize_isa (target_isa, arch->common.isa_bits);
31371
31372       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31373         {
31374           if (ext1->alias)
31375             continue;
31376
31377           arm_initialize_isa (isa1, ext1->isa_bits);
31378           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31379             {
31380               if (ext2->alias || ext1->remove != ext2->remove)
31381                 continue;
31382
31383               arm_initialize_isa (isa2, ext2->isa_bits);
31384               /* If the option is a subset of the parent option, it doesn't
31385                  add anything and so isn't useful.  */
31386               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31387
31388               /* If the extension specifies any architectural bits then
31389                  disallow it.  Extensions should only specify feature bits.  */
31390               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31391             }
31392         }
31393     }
31394 }
31395
31396 /* Scan the static data tables generated by parsecpu.awk looking for
31397    potential issues with the data.  Here we check for consistency between the
31398    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31399    a feature bit that is not defined by any FPU flag.  */
31400 static void
31401 arm_test_fpu_data (void)
31402 {
31403   auto_sbitmap isa_all_fpubits (isa_num_bits);
31404   auto_sbitmap fpubits (isa_num_bits);
31405   auto_sbitmap tmpset (isa_num_bits);
31406
31407   static const enum isa_feature fpu_bitlist[]
31408     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31409   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31410
31411   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31412   {
31413     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31414     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31415     bitmap_clear (isa_all_fpubits);
31416     bitmap_copy (isa_all_fpubits, tmpset);
31417   }
31418
31419   if (!bitmap_empty_p (isa_all_fpubits))
31420     {
31421         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31422                          " group that are not defined by any FPU.\n"
31423                          "       Check your arm-cpus.in.\n");
31424         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31425     }
31426 }
31427
31428 static void
31429 arm_run_selftests (void)
31430 {
31431   arm_test_cpu_arch_data ();
31432   arm_test_fpu_data ();
31433 }
31434 } /* Namespace selftest.  */
31435
31436 #undef TARGET_RUN_TARGET_SELFTESTS
31437 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31438 #endif /* CHECKING_P */
31439
31440 struct gcc_target targetm = TARGET_INITIALIZER;
31441
31442 #include "gt-arm.h"