gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "optabs.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "recog.h"
  39 #include "cgraph.h"
  40 #include "diagnostic-core.h"
  41 #include "alias.h"
  42 #include "fold-const.h"
  43 #include "stor-layout.h"
  44 #include "calls.h"
  45 #include "varasm.h"
  46 #include "output.h"
  47 #include "insn-attr.h"
  48 #include "flags.h"
  49 #include "reload.h"
  50 #include "explow.h"
  51 #include "expr.h"
  52 #include "cfgrtl.h"
  53 #include "sched-int.h"
  54 #include "common/common-target.h"
  55 #include "langhooks.h"
  56 #include "intl.h"
  57 #include "libfuncs.h"
  58 #include "params.h"
  59 #include "opts.h"
  60 #include "dumpfile.h"
  61 #include "target-globals.h"
  62 #include "builtins.h"
  63 #include "tm-constrs.h"
  64 #include "rtl-iter.h"
  65 #include "optabs-libfuncs.h"
  66 #include "gimplify.h"
  67 #include "gimple.h"
  68
  69 /* This file should be included last.  */
  70 #include "target-def.h"
  71
  72 /* Forward definitions of types.  */
  73 typedef struct minipool_node    Mnode;
  74 typedef struct minipool_fixup   Mfix;
  75
  76 void (*arm_lang_output_object_attributes_hook)(void);
  77
  78 struct four_ints
  79 {
  80   int i[4];
  81 };
  82
  83 /* Forward function declarations.  */
  84 static bool arm_const_not_ok_for_debug_p (rtx);
  85 static int arm_needs_doubleword_align (machine_mode, const_tree);
  86 static int arm_compute_static_chain_stack_bytes (void);
  87 static arm_stack_offsets *arm_get_frame_offsets (void);
  88 static void arm_compute_frame_layout (void);
  89 static void arm_add_gc_roots (void);
  90 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  91                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  92 static unsigned bit_count (unsigned long);
  93 static unsigned bitmap_popcount (const sbitmap);
  94 static int arm_address_register_rtx_p (rtx, int);
  95 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  96 static bool is_called_in_ARM_mode (tree);
  97 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
  98 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
  99 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 100 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 101 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 102 inline static int thumb1_index_register_rtx_p (rtx, int);
 103 static int thumb_far_jump_used_p (void);
 104 static bool thumb_force_lr_save (void);
 105 static unsigned arm_size_return_regs (void);
 106 static bool arm_assemble_integer (rtx, unsigned int, int);
 107 static void arm_print_operand (FILE *, rtx, int);
 108 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 109 static bool arm_print_operand_punct_valid_p (unsigned char code);
 110 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 111 static arm_cc get_arm_condition_code (rtx);
 112 static const char *output_multi_immediate (rtx *, const char *, const char *,
 113                                            int, HOST_WIDE_INT);
 114 static const char *shift_op (rtx, HOST_WIDE_INT *);
 115 static struct machine_function *arm_init_machine_status (void);
 116 static void thumb_exit (FILE *, int);
 117 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 118 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 119 static Mnode *add_minipool_forward_ref (Mfix *);
 120 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 121 static Mnode *add_minipool_backward_ref (Mfix *);
 122 static void assign_minipool_offsets (Mfix *);
 123 static void arm_print_value (FILE *, rtx);
 124 static void dump_minipool (rtx_insn *);
 125 static int arm_barrier_cost (rtx_insn *);
 126 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 127 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 128 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 129                                machine_mode, rtx);
 130 static void arm_reorg (void);
 131 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 132 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 133 static unsigned long arm_compute_save_core_reg_mask (void);
 134 static unsigned long arm_isr_value (tree);
 135 static unsigned long arm_compute_func_type (void);
 136 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 137 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 138 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 139 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 140 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 141 #endif
 142 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 143 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 144 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 145 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 146 static int arm_comp_type_attributes (const_tree, const_tree);
 147 static void arm_set_default_type_attributes (tree);
 148 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 149 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 150 static int optimal_immediate_sequence (enum rtx_code code,
 151                                        unsigned HOST_WIDE_INT val,
 152                                        struct four_ints *return_sequence);
 153 static int optimal_immediate_sequence_1 (enum rtx_code code,
 154                                          unsigned HOST_WIDE_INT val,
 155                                          struct four_ints *return_sequence,
 156                                          int i);
 157 static int arm_get_strip_length (int);
 158 static bool arm_function_ok_for_sibcall (tree, tree);
 159 static machine_mode arm_promote_function_mode (const_tree,
 160                                                     machine_mode, int *,
 161                                                     const_tree, int);
 162 static bool arm_return_in_memory (const_tree, const_tree);
 163 static rtx arm_function_value (const_tree, const_tree, bool);
 164 static rtx arm_libcall_value_1 (machine_mode);
 165 static rtx arm_libcall_value (machine_mode, const_rtx);
 166 static bool arm_function_value_regno_p (const unsigned int);
 167 static void arm_internal_label (FILE *, const char *, unsigned long);
 168 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 169                                  tree);
 170 static bool arm_have_conditional_execution (void);
 171 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 172 static bool arm_legitimate_constant_p (machine_mode, rtx);
 173 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 174 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 175 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 176 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 177 static void emit_constant_insn (rtx cond, rtx pattern);
 178 static rtx_insn *emit_set_insn (rtx, rtx);
 179 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 180 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 181                                   tree, bool);
 182 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 183                              const_tree, bool);
 184 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 185                                       const_tree, bool);
 186 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 187 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 188                                       const_tree);
 189 static rtx aapcs_libcall_value (machine_mode);
 190 static int aapcs_select_return_coproc (const_tree, const_tree);
 191
 192 #ifdef OBJECT_FORMAT_ELF
 193 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 194 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 195 #endif
 196 #ifndef ARM_PE
 197 static void arm_encode_section_info (tree, rtx, int);
 198 #endif
 199
 200 static void arm_file_end (void);
 201 static void arm_file_start (void);
 202 static void arm_insert_attributes (tree, tree *);
 203
 204 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 205                                         tree, int *, int);
 206 static bool arm_pass_by_reference (cumulative_args_t,
 207                                    machine_mode, const_tree, bool);
 208 static bool arm_promote_prototypes (const_tree);
 209 static bool arm_default_short_enums (void);
 210 static bool arm_align_anon_bitfield (void);
 211 static bool arm_return_in_msb (const_tree);
 212 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 213 static bool arm_return_in_memory (const_tree, const_tree);
 214 #if ARM_UNWIND_INFO
 215 static void arm_unwind_emit (FILE *, rtx_insn *);
 216 static bool arm_output_ttype (rtx);
 217 static void arm_asm_emit_except_personality (rtx);
 218 #endif
 219 static void arm_asm_init_sections (void);
 220 static rtx arm_dwarf_register_span (rtx);
 221
 222 static tree arm_cxx_guard_type (void);
 223 static bool arm_cxx_guard_mask_bit (void);
 224 static tree arm_get_cookie_size (tree);
 225 static bool arm_cookie_has_size (void);
 226 static bool arm_cxx_cdtor_returns_this (void);
 227 static bool arm_cxx_key_method_may_be_inline (void);
 228 static void arm_cxx_determine_class_data_visibility (tree);
 229 static bool arm_cxx_class_data_always_comdat (void);
 230 static bool arm_cxx_use_aeabi_atexit (void);
 231 static void arm_init_libfuncs (void);
 232 static tree arm_build_builtin_va_list (void);
 233 static void arm_expand_builtin_va_start (tree, rtx);
 234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 235 static void arm_option_override (void);
 236 static void arm_option_restore (struct gcc_options *,
 237                                 struct cl_target_option *);
 238 static void arm_override_options_after_change (void);
 239 static void arm_option_print (FILE *, int, struct cl_target_option *);
 240 static void arm_set_current_function (tree);
 241 static bool arm_can_inline_p (tree, tree);
 242 static void arm_relayout_function (tree);
 243 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 244 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 245 static bool arm_sched_can_speculate_insn (rtx_insn *);
 246 static bool arm_macro_fusion_p (void);
 247 static bool arm_cannot_copy_insn_p (rtx_insn *);
 248 static int arm_issue_rate (void);
 249 static int arm_first_cycle_multipass_dfa_lookahead (void);
 250 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 251 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 252 static bool arm_output_addr_const_extra (FILE *, rtx);
 253 static bool arm_allocate_stack_slots_for_args (void);
 254 static bool arm_warn_func_return (tree);
 255 static tree arm_promoted_type (const_tree t);
 256 static bool arm_scalar_mode_supported_p (machine_mode);
 257 static bool arm_frame_pointer_required (void);
 258 static bool arm_can_eliminate (const int, const int);
 259 static void arm_asm_trampoline_template (FILE *);
 260 static void arm_trampoline_init (rtx, tree, rtx);
 261 static rtx arm_trampoline_adjust_address (rtx);
 262 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 263 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 264 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 265 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 266 static bool arm_array_mode_supported_p (machine_mode,
 267                                         unsigned HOST_WIDE_INT);
 268 static machine_mode arm_preferred_simd_mode (machine_mode);
 269 static bool arm_class_likely_spilled_p (reg_class_t);
 270 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 271 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 272 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 273                                                      const_tree type,
 274                                                      int misalignment,
 275                                                      bool is_packed);
 276 static void arm_conditional_register_usage (void);
 277 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 278 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 279 static unsigned int arm_autovectorize_vector_sizes (void);
 280 static int arm_default_branch_cost (bool, bool);
 281 static int arm_cortex_a5_branch_cost (bool, bool);
 282 static int arm_cortex_m_branch_cost (bool, bool);
 283 static int arm_cortex_m7_branch_cost (bool, bool);
 284
 285 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 286                                              const unsigned char *sel);
 287
 288 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 289
 290 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 291                                            tree vectype,
 292                                            int misalign ATTRIBUTE_UNUSED);
 293 static unsigned arm_add_stmt_cost (void *data, int count,
 294                                    enum vect_cost_for_stmt kind,
 295                                    struct _stmt_vec_info *stmt_info,
 296                                    int misalign,
 297                                    enum vect_cost_model_location where);
 298
 299 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 300                                          bool op0_preserve_value);
 301 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 302
 303 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 304 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 305                                      const_tree);
 306 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 307 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 308 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 309                                                 int reloc);
 310 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 311 static machine_mode arm_floatn_mode (int, bool);
 312 \f
 313 /* Table of machine attributes.  */
 314 static const struct attribute_spec arm_attribute_table[] =
 315 {
 316   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 317        affects_type_identity } */
 318   /* Function calls made to this symbol must be done indirectly, because
 319      it may lie outside of the 26 bit addressing range of a normal function
 320      call.  */
 321   { "long_call",    0, 0, false, true,  true,  NULL, false },
 322   /* Whereas these functions are always known to reside within the 26 bit
 323      addressing range.  */
 324   { "short_call",   0, 0, false, true,  true,  NULL, false },
 325   /* Specify the procedure call conventions for a function.  */
 326   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 327     false },
 328   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 329   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 330     false },
 331   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 332     false },
 333   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 334     false },
 335 #ifdef ARM_PE
 336   /* ARM/PE has three new attributes:
 337      interfacearm - ?
 338      dllexport - for exporting a function/variable that will live in a dll
 339      dllimport - for importing a function/variable from a dll
 340
 341      Microsoft allows multiple declspecs in one __declspec, separating
 342      them with spaces.  We do NOT support this.  Instead, use __declspec
 343      multiple times.
 344   */
 345   { "dllimport",    0, 0, true,  false, false, NULL, false },
 346   { "dllexport",    0, 0, true,  false, false, NULL, false },
 347   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 348     false },
 349 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 350   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 351   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 352   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 353     false },
 354 #endif
 355   /* ARMv8-M Security Extensions support.  */
 356   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 357     arm_handle_cmse_nonsecure_entry, false },
 358   { "cmse_nonsecure_call", 0, 0, true, false, false,
 359     arm_handle_cmse_nonsecure_call, true },
 360   { NULL,           0, 0, false, false, false, NULL, false }
 361 };
 362 \f
 363 /* Initialize the GCC target structure.  */
 364 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 365 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 366 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 367 #endif
 368
 369 #undef TARGET_LEGITIMIZE_ADDRESS
 370 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 371
 372 #undef  TARGET_ATTRIBUTE_TABLE
 373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 374
 375 #undef  TARGET_INSERT_ATTRIBUTES
 376 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 377
 378 #undef TARGET_ASM_FILE_START
 379 #define TARGET_ASM_FILE_START arm_file_start
 380 #undef TARGET_ASM_FILE_END
 381 #define TARGET_ASM_FILE_END arm_file_end
 382
 383 #undef  TARGET_ASM_ALIGNED_SI_OP
 384 #define TARGET_ASM_ALIGNED_SI_OP NULL
 385 #undef  TARGET_ASM_INTEGER
 386 #define TARGET_ASM_INTEGER arm_assemble_integer
 387
 388 #undef TARGET_PRINT_OPERAND
 389 #define TARGET_PRINT_OPERAND arm_print_operand
 390 #undef TARGET_PRINT_OPERAND_ADDRESS
 391 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 392 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 393 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 394
 395 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 396 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 397
 398 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 399 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 400
 401 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 402 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 403
 404 #undef TARGET_CAN_INLINE_P
 405 #define TARGET_CAN_INLINE_P arm_can_inline_p
 406
 407 #undef TARGET_RELAYOUT_FUNCTION
 408 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 409
 410 #undef  TARGET_OPTION_OVERRIDE
 411 #define TARGET_OPTION_OVERRIDE arm_option_override
 412
 413 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 414 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 415
 416 #undef TARGET_OPTION_RESTORE
 417 #define TARGET_OPTION_RESTORE arm_option_restore
 418
 419 #undef TARGET_OPTION_PRINT
 420 #define TARGET_OPTION_PRINT arm_option_print
 421
 422 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 423 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 424
 425 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 426 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 427
 428 #undef TARGET_SCHED_MACRO_FUSION_P
 429 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 430
 431 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 432 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 433
 434 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 435 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 436
 437 #undef  TARGET_SCHED_ADJUST_COST
 438 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 439
 440 #undef TARGET_SET_CURRENT_FUNCTION
 441 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 442
 443 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 444 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 445
 446 #undef TARGET_SCHED_REORDER
 447 #define TARGET_SCHED_REORDER arm_sched_reorder
 448
 449 #undef TARGET_REGISTER_MOVE_COST
 450 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 451
 452 #undef TARGET_MEMORY_MOVE_COST
 453 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 454
 455 #undef TARGET_ENCODE_SECTION_INFO
 456 #ifdef ARM_PE
 457 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 458 #else
 459 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 460 #endif
 461
 462 #undef  TARGET_STRIP_NAME_ENCODING
 463 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 464
 465 #undef  TARGET_ASM_INTERNAL_LABEL
 466 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 467
 468 #undef TARGET_FLOATN_MODE
 469 #define TARGET_FLOATN_MODE arm_floatn_mode
 470
 471 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 472 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 473
 474 #undef  TARGET_FUNCTION_VALUE
 475 #define TARGET_FUNCTION_VALUE arm_function_value
 476
 477 #undef  TARGET_LIBCALL_VALUE
 478 #define TARGET_LIBCALL_VALUE arm_libcall_value
 479
 480 #undef TARGET_FUNCTION_VALUE_REGNO_P
 481 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 482
 483 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 484 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 485 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 486 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 487
 488 #undef  TARGET_RTX_COSTS
 489 #define TARGET_RTX_COSTS arm_rtx_costs
 490 #undef  TARGET_ADDRESS_COST
 491 #define TARGET_ADDRESS_COST arm_address_cost
 492
 493 #undef TARGET_SHIFT_TRUNCATION_MASK
 494 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 495 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 496 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 497 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 498 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 499 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 500 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 501 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 502 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 503   arm_autovectorize_vector_sizes
 504
 505 #undef  TARGET_MACHINE_DEPENDENT_REORG
 506 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 507
 508 #undef  TARGET_INIT_BUILTINS
 509 #define TARGET_INIT_BUILTINS  arm_init_builtins
 510 #undef  TARGET_EXPAND_BUILTIN
 511 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 512 #undef  TARGET_BUILTIN_DECL
 513 #define TARGET_BUILTIN_DECL arm_builtin_decl
 514
 515 #undef TARGET_INIT_LIBFUNCS
 516 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 517
 518 #undef TARGET_PROMOTE_FUNCTION_MODE
 519 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 520 #undef TARGET_PROMOTE_PROTOTYPES
 521 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 522 #undef TARGET_PASS_BY_REFERENCE
 523 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 524 #undef TARGET_ARG_PARTIAL_BYTES
 525 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 526 #undef TARGET_FUNCTION_ARG
 527 #define TARGET_FUNCTION_ARG arm_function_arg
 528 #undef TARGET_FUNCTION_ARG_ADVANCE
 529 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 530 #undef TARGET_FUNCTION_ARG_BOUNDARY
 531 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 532
 533 #undef  TARGET_SETUP_INCOMING_VARARGS
 534 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 535
 536 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 537 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 538
 539 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 540 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 541 #undef TARGET_TRAMPOLINE_INIT
 542 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 543 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 544 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 545
 546 #undef TARGET_WARN_FUNC_RETURN
 547 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 548
 549 #undef TARGET_DEFAULT_SHORT_ENUMS
 550 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 551
 552 #undef TARGET_ALIGN_ANON_BITFIELD
 553 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 554
 555 #undef TARGET_NARROW_VOLATILE_BITFIELD
 556 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 557
 558 #undef TARGET_CXX_GUARD_TYPE
 559 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 560
 561 #undef TARGET_CXX_GUARD_MASK_BIT
 562 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 563
 564 #undef TARGET_CXX_GET_COOKIE_SIZE
 565 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 566
 567 #undef TARGET_CXX_COOKIE_HAS_SIZE
 568 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 569
 570 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 571 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 572
 573 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 574 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 575
 576 #undef TARGET_CXX_USE_AEABI_ATEXIT
 577 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 578
 579 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 580 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 581   arm_cxx_determine_class_data_visibility
 582
 583 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 584 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 585
 586 #undef TARGET_RETURN_IN_MSB
 587 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 588
 589 #undef TARGET_RETURN_IN_MEMORY
 590 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 591
 592 #undef TARGET_MUST_PASS_IN_STACK
 593 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 594
 595 #if ARM_UNWIND_INFO
 596 #undef TARGET_ASM_UNWIND_EMIT
 597 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 598
 599 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 600 #undef TARGET_ASM_TTYPE
 601 #define TARGET_ASM_TTYPE arm_output_ttype
 602
 603 #undef TARGET_ARM_EABI_UNWINDER
 604 #define TARGET_ARM_EABI_UNWINDER true
 605
 606 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 607 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 608
 609 #endif /* ARM_UNWIND_INFO */
 610
 611 #undef TARGET_ASM_INIT_SECTIONS
 612 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 613
 614 #undef TARGET_DWARF_REGISTER_SPAN
 615 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 616
 617 #undef  TARGET_CANNOT_COPY_INSN_P
 618 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 619
 620 #ifdef HAVE_AS_TLS
 621 #undef TARGET_HAVE_TLS
 622 #define TARGET_HAVE_TLS true
 623 #endif
 624
 625 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 626 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 627
 628 #undef TARGET_LEGITIMATE_CONSTANT_P
 629 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 630
 631 #undef TARGET_CANNOT_FORCE_CONST_MEM
 632 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 633
 634 #undef TARGET_MAX_ANCHOR_OFFSET
 635 #define TARGET_MAX_ANCHOR_OFFSET 4095
 636
 637 /* The minimum is set such that the total size of the block
 638    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 639    divisible by eight, ensuring natural spacing of anchors.  */
 640 #undef TARGET_MIN_ANCHOR_OFFSET
 641 #define TARGET_MIN_ANCHOR_OFFSET -4088
 642
 643 #undef TARGET_SCHED_ISSUE_RATE
 644 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 645
 646 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 647 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 648   arm_first_cycle_multipass_dfa_lookahead
 649
 650 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 651 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 652   arm_first_cycle_multipass_dfa_lookahead_guard
 653
 654 #undef TARGET_MANGLE_TYPE
 655 #define TARGET_MANGLE_TYPE arm_mangle_type
 656
 657 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 658 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 659
 660 #undef TARGET_BUILD_BUILTIN_VA_LIST
 661 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 662 #undef TARGET_EXPAND_BUILTIN_VA_START
 663 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 664 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 665 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 666
 667 #ifdef HAVE_AS_TLS
 668 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 669 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 670 #endif
 671
 672 #undef TARGET_LEGITIMATE_ADDRESS_P
 673 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 674
 675 #undef TARGET_PREFERRED_RELOAD_CLASS
 676 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 677
 678 #undef TARGET_PROMOTED_TYPE
 679 #define TARGET_PROMOTED_TYPE arm_promoted_type
 680
 681 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 682 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 683
 684 #undef TARGET_COMPUTE_FRAME_LAYOUT
 685 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 686
 687 #undef TARGET_FRAME_POINTER_REQUIRED
 688 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 689
 690 #undef TARGET_CAN_ELIMINATE
 691 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 692
 693 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 694 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 695
 696 #undef TARGET_CLASS_LIKELY_SPILLED_P
 697 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 698
 699 #undef TARGET_VECTORIZE_BUILTINS
 700 #define TARGET_VECTORIZE_BUILTINS
 701
 702 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 703 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 704   arm_builtin_vectorized_function
 705
 706 #undef TARGET_VECTOR_ALIGNMENT
 707 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 708
 709 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 710 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 711   arm_vector_alignment_reachable
 712
 713 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 714 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 715   arm_builtin_support_vector_misalignment
 716
 717 #undef TARGET_PREFERRED_RENAME_CLASS
 718 #define TARGET_PREFERRED_RENAME_CLASS \
 719   arm_preferred_rename_class
 720
 721 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 722 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 723   arm_vectorize_vec_perm_const_ok
 724
 725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 727   arm_builtin_vectorization_cost
 728 #undef TARGET_VECTORIZE_ADD_STMT_COST
 729 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 730
 731 #undef TARGET_CANONICALIZE_COMPARISON
 732 #define TARGET_CANONICALIZE_COMPARISON \
 733   arm_canonicalize_comparison
 734
 735 #undef TARGET_ASAN_SHADOW_OFFSET
 736 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 737
 738 #undef MAX_INSN_PER_IT_BLOCK
 739 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 740
 741 #undef TARGET_CAN_USE_DOLOOP_P
 742 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 743
 744 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 745 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 746
 747 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 748 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 749
 750 #undef TARGET_SCHED_FUSION_PRIORITY
 751 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 752
 753 #undef  TARGET_ASM_FUNCTION_SECTION
 754 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 755
 756 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 757 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 758
 759 #undef TARGET_SECTION_TYPE_FLAGS
 760 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 761
 762 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 763 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 764
 765 #undef TARGET_C_EXCESS_PRECISION
 766 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 767
 768 /* Although the architecture reserves bits 0 and 1, only the former is
 769    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 770 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 771 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 772
 773 struct gcc_target targetm = TARGET_INITIALIZER;
 774 \f
 775 /* Obstack for minipool constant handling.  */
 776 static struct obstack minipool_obstack;
 777 static char *         minipool_startobj;
 778
 779 /* The maximum number of insns skipped which
 780    will be conditionalised if possible.  */
 781 static int max_insns_skipped = 5;
 782
 783 extern FILE * asm_out_file;
 784
 785 /* True if we are currently building a constant table.  */
 786 int making_const_table;
 787
 788 /* The processor for which instructions should be scheduled.  */
 789 enum processor_type arm_tune = TARGET_CPU_arm_none;
 790
 791 /* The current tuning set.  */
 792 const struct tune_params *current_tune;
 793
 794 /* Which floating point hardware to schedule for.  */
 795 int arm_fpu_attr;
 796
 797 /* Used for Thumb call_via trampolines.  */
 798 rtx thumb_call_via_label[14];
 799 static int thumb_call_reg_needed;
 800
 801 /* The bits in this mask specify which instruction scheduling options should
 802    be used.  */
 803 unsigned int tune_flags = 0;
 804
 805 /* The highest ARM architecture version supported by the
 806    target.  */
 807 enum base_architecture arm_base_arch = BASE_ARCH_0;
 808
 809 /* Active target architecture and tuning.  */
 810
 811 struct arm_build_target arm_active_target;
 812
 813 /* The following are used in the arm.md file as equivalents to bits
 814    in the above two flag variables.  */
 815
 816 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 817 int arm_arch3m = 0;
 818
 819 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 820 int arm_arch4 = 0;
 821
 822 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 823 int arm_arch4t = 0;
 824
 825 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 826 int arm_arch5 = 0;
 827
 828 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 829 int arm_arch5e = 0;
 830
 831 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 832 int arm_arch5te = 0;
 833
 834 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 835 int arm_arch6 = 0;
 836
 837 /* Nonzero if this chip supports the ARM 6K extensions.  */
 838 int arm_arch6k = 0;
 839
 840 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 841 int arm_arch6kz = 0;
 842
 843 /* Nonzero if instructions present in ARMv6-M can be used.  */
 844 int arm_arch6m = 0;
 845
 846 /* Nonzero if this chip supports the ARM 7 extensions.  */
 847 int arm_arch7 = 0;
 848
 849 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 850 int arm_arch_lpae = 0;
 851
 852 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 853 int arm_arch_notm = 0;
 854
 855 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 856 int arm_arch7em = 0;
 857
 858 /* Nonzero if instructions present in ARMv8 can be used.  */
 859 int arm_arch8 = 0;
 860
 861 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 862 int arm_arch8_1 = 0;
 863
 864 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 865 int arm_arch8_2 = 0;
 866
 867 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 868    Architecture 8.2.  */
 869 int arm_fp16_inst = 0;
 870
 871 /* Nonzero if this chip can benefit from load scheduling.  */
 872 int arm_ld_sched = 0;
 873
 874 /* Nonzero if this chip is a StrongARM.  */
 875 int arm_tune_strongarm = 0;
 876
 877 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 878 int arm_arch_iwmmxt = 0;
 879
 880 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 881 int arm_arch_iwmmxt2 = 0;
 882
 883 /* Nonzero if this chip is an XScale.  */
 884 int arm_arch_xscale = 0;
 885
 886 /* Nonzero if tuning for XScale  */
 887 int arm_tune_xscale = 0;
 888
 889 /* Nonzero if we want to tune for stores that access the write-buffer.
 890    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 891 int arm_tune_wbuf = 0;
 892
 893 /* Nonzero if tuning for Cortex-A9.  */
 894 int arm_tune_cortex_a9 = 0;
 895
 896 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 897    preprocessor.
 898    XXX This is a bit of a hack, it's intended to help work around
 899    problems in GLD which doesn't understand that armv5t code is
 900    interworking clean.  */
 901 int arm_cpp_interwork = 0;
 902
 903 /* Nonzero if chip supports Thumb 1.  */
 904 int arm_arch_thumb1;
 905
 906 /* Nonzero if chip supports Thumb 2.  */
 907 int arm_arch_thumb2;
 908
 909 /* Nonzero if chip supports integer division instruction.  */
 910 int arm_arch_arm_hwdiv;
 911 int arm_arch_thumb_hwdiv;
 912
 913 /* Nonzero if chip disallows volatile memory access in IT block.  */
 914 int arm_arch_no_volatile_ce;
 915
 916 /* Nonzero if we should use Neon to handle 64-bits operations rather
 917    than core registers.  */
 918 int prefer_neon_for_64bits = 0;
 919
 920 /* Nonzero if we shouldn't use literal pools.  */
 921 bool arm_disable_literal_pool = false;
 922
 923 /* The register number to be used for the PIC offset register.  */
 924 unsigned arm_pic_register = INVALID_REGNUM;
 925
 926 enum arm_pcs arm_pcs_default;
 927
 928 /* For an explanation of these variables, see final_prescan_insn below.  */
 929 int arm_ccfsm_state;
 930 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 931 enum arm_cond_code arm_current_cc;
 932
 933 rtx arm_target_insn;
 934 int arm_target_label;
 935 /* The number of conditionally executed insns, including the current insn.  */
 936 int arm_condexec_count = 0;
 937 /* A bitmask specifying the patterns for the IT block.
 938    Zero means do not output an IT block before this insn. */
 939 int arm_condexec_mask = 0;
 940 /* The number of bits used in arm_condexec_mask.  */
 941 int arm_condexec_masklen = 0;
 942
 943 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 944 int arm_arch_crc = 0;
 945
 946 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 947 int arm_arch_cmse = 0;
 948
 949 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 950 int arm_m_profile_small_mul = 0;
 951
 952 /* The condition codes of the ARM, and the inverse function.  */
 953 static const char * const arm_condition_codes[] =
 954 {
 955   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 956   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 957 };
 958
 959 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 960 int arm_regs_in_sequence[] =
 961 {
 962   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 963 };
 964
 965 #define ARM_LSL_NAME "lsl"
 966 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 967
 968 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 969                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 970                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 971 \f
 972 /* Initialization code.  */
 973
 974 struct processors
 975 {
 976   const char *const name;
 977   enum processor_type core;
 978   unsigned int tune_flags;
 979   const char *arch;
 980   enum base_architecture base_arch;
 981   enum isa_feature isa_bits[isa_num_bits];
 982   const struct tune_params *const tune;
 983 };
 984
 985
 986 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
 987 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
 988   {                                                             \
 989     num_slots,                                                  \
 990     l1_size,                                                    \
 991     l1_line_size                                                \
 992   }
 993
 994 /* arm generic vectorizer costs.  */
 995 static const
 996 struct cpu_vec_costs arm_default_vec_cost = {
 997   1,                                    /* scalar_stmt_cost.  */
 998   1,                                    /* scalar load_cost.  */
 999   1,                                    /* scalar_store_cost.  */
1000   1,                                    /* vec_stmt_cost.  */
1001   1,                                    /* vec_to_scalar_cost.  */
1002   1,                                    /* scalar_to_vec_cost.  */
1003   1,                                    /* vec_align_load_cost.  */
1004   1,                                    /* vec_unalign_load_cost.  */
1005   1,                                    /* vec_unalign_store_cost.  */
1006   1,                                    /* vec_store_cost.  */
1007   3,                                    /* cond_taken_branch_cost.  */
1008   1,                                    /* cond_not_taken_branch_cost.  */
1009 };
1010
1011 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1012 #include "aarch-cost-tables.h"
1013
1014
1015
1016 const struct cpu_cost_table cortexa9_extra_costs =
1017 {
1018   /* ALU */
1019   {
1020     0,                  /* arith.  */
1021     0,                  /* logical.  */
1022     0,                  /* shift.  */
1023     COSTS_N_INSNS (1),  /* shift_reg.  */
1024     COSTS_N_INSNS (1),  /* arith_shift.  */
1025     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1026     0,                  /* log_shift.  */
1027     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1028     COSTS_N_INSNS (1),  /* extend.  */
1029     COSTS_N_INSNS (2),  /* extend_arith.  */
1030     COSTS_N_INSNS (1),  /* bfi.  */
1031     COSTS_N_INSNS (1),  /* bfx.  */
1032     0,                  /* clz.  */
1033     0,                  /* rev.  */
1034     0,                  /* non_exec.  */
1035     true                /* non_exec_costs_exec.  */
1036   },
1037   {
1038     /* MULT SImode */
1039     {
1040       COSTS_N_INSNS (3),        /* simple.  */
1041       COSTS_N_INSNS (3),        /* flag_setting.  */
1042       COSTS_N_INSNS (2),        /* extend.  */
1043       COSTS_N_INSNS (3),        /* add.  */
1044       COSTS_N_INSNS (2),        /* extend_add.  */
1045       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1046     },
1047     /* MULT DImode */
1048     {
1049       0,                        /* simple (N/A).  */
1050       0,                        /* flag_setting (N/A).  */
1051       COSTS_N_INSNS (4),        /* extend.  */
1052       0,                        /* add (N/A).  */
1053       COSTS_N_INSNS (4),        /* extend_add.  */
1054       0                         /* idiv (N/A).  */
1055     }
1056   },
1057   /* LD/ST */
1058   {
1059     COSTS_N_INSNS (2),  /* load.  */
1060     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1061     COSTS_N_INSNS (2),  /* ldrd.  */
1062     COSTS_N_INSNS (2),  /* ldm_1st.  */
1063     1,                  /* ldm_regs_per_insn_1st.  */
1064     2,                  /* ldm_regs_per_insn_subsequent.  */
1065     COSTS_N_INSNS (5),  /* loadf.  */
1066     COSTS_N_INSNS (5),  /* loadd.  */
1067     COSTS_N_INSNS (1),  /* load_unaligned.  */
1068     COSTS_N_INSNS (2),  /* store.  */
1069     COSTS_N_INSNS (2),  /* strd.  */
1070     COSTS_N_INSNS (2),  /* stm_1st.  */
1071     1,                  /* stm_regs_per_insn_1st.  */
1072     2,                  /* stm_regs_per_insn_subsequent.  */
1073     COSTS_N_INSNS (1),  /* storef.  */
1074     COSTS_N_INSNS (1),  /* stored.  */
1075     COSTS_N_INSNS (1),  /* store_unaligned.  */
1076     COSTS_N_INSNS (1),  /* loadv.  */
1077     COSTS_N_INSNS (1)   /* storev.  */
1078   },
1079   {
1080     /* FP SFmode */
1081     {
1082       COSTS_N_INSNS (14),       /* div.  */
1083       COSTS_N_INSNS (4),        /* mult.  */
1084       COSTS_N_INSNS (7),        /* mult_addsub. */
1085       COSTS_N_INSNS (30),       /* fma.  */
1086       COSTS_N_INSNS (3),        /* addsub.  */
1087       COSTS_N_INSNS (1),        /* fpconst.  */
1088       COSTS_N_INSNS (1),        /* neg.  */
1089       COSTS_N_INSNS (3),        /* compare.  */
1090       COSTS_N_INSNS (3),        /* widen.  */
1091       COSTS_N_INSNS (3),        /* narrow.  */
1092       COSTS_N_INSNS (3),        /* toint.  */
1093       COSTS_N_INSNS (3),        /* fromint.  */
1094       COSTS_N_INSNS (3)         /* roundint.  */
1095     },
1096     /* FP DFmode */
1097     {
1098       COSTS_N_INSNS (24),       /* div.  */
1099       COSTS_N_INSNS (5),        /* mult.  */
1100       COSTS_N_INSNS (8),        /* mult_addsub.  */
1101       COSTS_N_INSNS (30),       /* fma.  */
1102       COSTS_N_INSNS (3),        /* addsub.  */
1103       COSTS_N_INSNS (1),        /* fpconst.  */
1104       COSTS_N_INSNS (1),        /* neg.  */
1105       COSTS_N_INSNS (3),        /* compare.  */
1106       COSTS_N_INSNS (3),        /* widen.  */
1107       COSTS_N_INSNS (3),        /* narrow.  */
1108       COSTS_N_INSNS (3),        /* toint.  */
1109       COSTS_N_INSNS (3),        /* fromint.  */
1110       COSTS_N_INSNS (3)         /* roundint.  */
1111     }
1112   },
1113   /* Vector */
1114   {
1115     COSTS_N_INSNS (1)   /* alu.  */
1116   }
1117 };
1118
1119 const struct cpu_cost_table cortexa8_extra_costs =
1120 {
1121   /* ALU */
1122   {
1123     0,                  /* arith.  */
1124     0,                  /* logical.  */
1125     COSTS_N_INSNS (1),  /* shift.  */
1126     0,                  /* shift_reg.  */
1127     COSTS_N_INSNS (1),  /* arith_shift.  */
1128     0,                  /* arith_shift_reg.  */
1129     COSTS_N_INSNS (1),  /* log_shift.  */
1130     0,                  /* log_shift_reg.  */
1131     0,                  /* extend.  */
1132     0,                  /* extend_arith.  */
1133     0,                  /* bfi.  */
1134     0,                  /* bfx.  */
1135     0,                  /* clz.  */
1136     0,                  /* rev.  */
1137     0,                  /* non_exec.  */
1138     true                /* non_exec_costs_exec.  */
1139   },
1140   {
1141     /* MULT SImode */
1142     {
1143       COSTS_N_INSNS (1),        /* simple.  */
1144       COSTS_N_INSNS (1),        /* flag_setting.  */
1145       COSTS_N_INSNS (1),        /* extend.  */
1146       COSTS_N_INSNS (1),        /* add.  */
1147       COSTS_N_INSNS (1),        /* extend_add.  */
1148       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1149     },
1150     /* MULT DImode */
1151     {
1152       0,                        /* simple (N/A).  */
1153       0,                        /* flag_setting (N/A).  */
1154       COSTS_N_INSNS (2),        /* extend.  */
1155       0,                        /* add (N/A).  */
1156       COSTS_N_INSNS (2),        /* extend_add.  */
1157       0                         /* idiv (N/A).  */
1158     }
1159   },
1160   /* LD/ST */
1161   {
1162     COSTS_N_INSNS (1),  /* load.  */
1163     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1164     COSTS_N_INSNS (1),  /* ldrd.  */
1165     COSTS_N_INSNS (1),  /* ldm_1st.  */
1166     1,                  /* ldm_regs_per_insn_1st.  */
1167     2,                  /* ldm_regs_per_insn_subsequent.  */
1168     COSTS_N_INSNS (1),  /* loadf.  */
1169     COSTS_N_INSNS (1),  /* loadd.  */
1170     COSTS_N_INSNS (1),  /* load_unaligned.  */
1171     COSTS_N_INSNS (1),  /* store.  */
1172     COSTS_N_INSNS (1),  /* strd.  */
1173     COSTS_N_INSNS (1),  /* stm_1st.  */
1174     1,                  /* stm_regs_per_insn_1st.  */
1175     2,                  /* stm_regs_per_insn_subsequent.  */
1176     COSTS_N_INSNS (1),  /* storef.  */
1177     COSTS_N_INSNS (1),  /* stored.  */
1178     COSTS_N_INSNS (1),  /* store_unaligned.  */
1179     COSTS_N_INSNS (1),  /* loadv.  */
1180     COSTS_N_INSNS (1)   /* storev.  */
1181   },
1182   {
1183     /* FP SFmode */
1184     {
1185       COSTS_N_INSNS (36),       /* div.  */
1186       COSTS_N_INSNS (11),       /* mult.  */
1187       COSTS_N_INSNS (20),       /* mult_addsub. */
1188       COSTS_N_INSNS (30),       /* fma.  */
1189       COSTS_N_INSNS (9),        /* addsub.  */
1190       COSTS_N_INSNS (3),        /* fpconst.  */
1191       COSTS_N_INSNS (3),        /* neg.  */
1192       COSTS_N_INSNS (6),        /* compare.  */
1193       COSTS_N_INSNS (4),        /* widen.  */
1194       COSTS_N_INSNS (4),        /* narrow.  */
1195       COSTS_N_INSNS (8),        /* toint.  */
1196       COSTS_N_INSNS (8),        /* fromint.  */
1197       COSTS_N_INSNS (8)         /* roundint.  */
1198     },
1199     /* FP DFmode */
1200     {
1201       COSTS_N_INSNS (64),       /* div.  */
1202       COSTS_N_INSNS (16),       /* mult.  */
1203       COSTS_N_INSNS (25),       /* mult_addsub.  */
1204       COSTS_N_INSNS (30),       /* fma.  */
1205       COSTS_N_INSNS (9),        /* addsub.  */
1206       COSTS_N_INSNS (3),        /* fpconst.  */
1207       COSTS_N_INSNS (3),        /* neg.  */
1208       COSTS_N_INSNS (6),        /* compare.  */
1209       COSTS_N_INSNS (6),        /* widen.  */
1210       COSTS_N_INSNS (6),        /* narrow.  */
1211       COSTS_N_INSNS (8),        /* toint.  */
1212       COSTS_N_INSNS (8),        /* fromint.  */
1213       COSTS_N_INSNS (8)         /* roundint.  */
1214     }
1215   },
1216   /* Vector */
1217   {
1218     COSTS_N_INSNS (1)   /* alu.  */
1219   }
1220 };
1221
1222 const struct cpu_cost_table cortexa5_extra_costs =
1223 {
1224   /* ALU */
1225   {
1226     0,                  /* arith.  */
1227     0,                  /* logical.  */
1228     COSTS_N_INSNS (1),  /* shift.  */
1229     COSTS_N_INSNS (1),  /* shift_reg.  */
1230     COSTS_N_INSNS (1),  /* arith_shift.  */
1231     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1232     COSTS_N_INSNS (1),  /* log_shift.  */
1233     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1234     COSTS_N_INSNS (1),  /* extend.  */
1235     COSTS_N_INSNS (1),  /* extend_arith.  */
1236     COSTS_N_INSNS (1),  /* bfi.  */
1237     COSTS_N_INSNS (1),  /* bfx.  */
1238     COSTS_N_INSNS (1),  /* clz.  */
1239     COSTS_N_INSNS (1),  /* rev.  */
1240     0,                  /* non_exec.  */
1241     true                /* non_exec_costs_exec.  */
1242   },
1243
1244   {
1245     /* MULT SImode */
1246     {
1247       0,                        /* simple.  */
1248       COSTS_N_INSNS (1),        /* flag_setting.  */
1249       COSTS_N_INSNS (1),        /* extend.  */
1250       COSTS_N_INSNS (1),        /* add.  */
1251       COSTS_N_INSNS (1),        /* extend_add.  */
1252       COSTS_N_INSNS (7)         /* idiv.  */
1253     },
1254     /* MULT DImode */
1255     {
1256       0,                        /* simple (N/A).  */
1257       0,                        /* flag_setting (N/A).  */
1258       COSTS_N_INSNS (1),        /* extend.  */
1259       0,                        /* add.  */
1260       COSTS_N_INSNS (2),        /* extend_add.  */
1261       0                         /* idiv (N/A).  */
1262     }
1263   },
1264   /* LD/ST */
1265   {
1266     COSTS_N_INSNS (1),  /* load.  */
1267     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1268     COSTS_N_INSNS (6),  /* ldrd.  */
1269     COSTS_N_INSNS (1),  /* ldm_1st.  */
1270     1,                  /* ldm_regs_per_insn_1st.  */
1271     2,                  /* ldm_regs_per_insn_subsequent.  */
1272     COSTS_N_INSNS (2),  /* loadf.  */
1273     COSTS_N_INSNS (4),  /* loadd.  */
1274     COSTS_N_INSNS (1),  /* load_unaligned.  */
1275     COSTS_N_INSNS (1),  /* store.  */
1276     COSTS_N_INSNS (3),  /* strd.  */
1277     COSTS_N_INSNS (1),  /* stm_1st.  */
1278     1,                  /* stm_regs_per_insn_1st.  */
1279     2,                  /* stm_regs_per_insn_subsequent.  */
1280     COSTS_N_INSNS (2),  /* storef.  */
1281     COSTS_N_INSNS (2),  /* stored.  */
1282     COSTS_N_INSNS (1),  /* store_unaligned.  */
1283     COSTS_N_INSNS (1),  /* loadv.  */
1284     COSTS_N_INSNS (1)   /* storev.  */
1285   },
1286   {
1287     /* FP SFmode */
1288     {
1289       COSTS_N_INSNS (15),       /* div.  */
1290       COSTS_N_INSNS (3),        /* mult.  */
1291       COSTS_N_INSNS (7),        /* mult_addsub. */
1292       COSTS_N_INSNS (7),        /* fma.  */
1293       COSTS_N_INSNS (3),        /* addsub.  */
1294       COSTS_N_INSNS (3),        /* fpconst.  */
1295       COSTS_N_INSNS (3),        /* neg.  */
1296       COSTS_N_INSNS (3),        /* compare.  */
1297       COSTS_N_INSNS (3),        /* widen.  */
1298       COSTS_N_INSNS (3),        /* narrow.  */
1299       COSTS_N_INSNS (3),        /* toint.  */
1300       COSTS_N_INSNS (3),        /* fromint.  */
1301       COSTS_N_INSNS (3)         /* roundint.  */
1302     },
1303     /* FP DFmode */
1304     {
1305       COSTS_N_INSNS (30),       /* div.  */
1306       COSTS_N_INSNS (6),        /* mult.  */
1307       COSTS_N_INSNS (10),       /* mult_addsub.  */
1308       COSTS_N_INSNS (7),        /* fma.  */
1309       COSTS_N_INSNS (3),        /* addsub.  */
1310       COSTS_N_INSNS (3),        /* fpconst.  */
1311       COSTS_N_INSNS (3),        /* neg.  */
1312       COSTS_N_INSNS (3),        /* compare.  */
1313       COSTS_N_INSNS (3),        /* widen.  */
1314       COSTS_N_INSNS (3),        /* narrow.  */
1315       COSTS_N_INSNS (3),        /* toint.  */
1316       COSTS_N_INSNS (3),        /* fromint.  */
1317       COSTS_N_INSNS (3)         /* roundint.  */
1318     }
1319   },
1320   /* Vector */
1321   {
1322     COSTS_N_INSNS (1)   /* alu.  */
1323   }
1324 };
1325
1326
1327 const struct cpu_cost_table cortexa7_extra_costs =
1328 {
1329   /* ALU */
1330   {
1331     0,                  /* arith.  */
1332     0,                  /* logical.  */
1333     COSTS_N_INSNS (1),  /* shift.  */
1334     COSTS_N_INSNS (1),  /* shift_reg.  */
1335     COSTS_N_INSNS (1),  /* arith_shift.  */
1336     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1337     COSTS_N_INSNS (1),  /* log_shift.  */
1338     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1339     COSTS_N_INSNS (1),  /* extend.  */
1340     COSTS_N_INSNS (1),  /* extend_arith.  */
1341     COSTS_N_INSNS (1),  /* bfi.  */
1342     COSTS_N_INSNS (1),  /* bfx.  */
1343     COSTS_N_INSNS (1),  /* clz.  */
1344     COSTS_N_INSNS (1),  /* rev.  */
1345     0,                  /* non_exec.  */
1346     true                /* non_exec_costs_exec.  */
1347   },
1348
1349   {
1350     /* MULT SImode */
1351     {
1352       0,                        /* simple.  */
1353       COSTS_N_INSNS (1),        /* flag_setting.  */
1354       COSTS_N_INSNS (1),        /* extend.  */
1355       COSTS_N_INSNS (1),        /* add.  */
1356       COSTS_N_INSNS (1),        /* extend_add.  */
1357       COSTS_N_INSNS (7)         /* idiv.  */
1358     },
1359     /* MULT DImode */
1360     {
1361       0,                        /* simple (N/A).  */
1362       0,                        /* flag_setting (N/A).  */
1363       COSTS_N_INSNS (1),        /* extend.  */
1364       0,                        /* add.  */
1365       COSTS_N_INSNS (2),        /* extend_add.  */
1366       0                         /* idiv (N/A).  */
1367     }
1368   },
1369   /* LD/ST */
1370   {
1371     COSTS_N_INSNS (1),  /* load.  */
1372     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1373     COSTS_N_INSNS (3),  /* ldrd.  */
1374     COSTS_N_INSNS (1),  /* ldm_1st.  */
1375     1,                  /* ldm_regs_per_insn_1st.  */
1376     2,                  /* ldm_regs_per_insn_subsequent.  */
1377     COSTS_N_INSNS (2),  /* loadf.  */
1378     COSTS_N_INSNS (2),  /* loadd.  */
1379     COSTS_N_INSNS (1),  /* load_unaligned.  */
1380     COSTS_N_INSNS (1),  /* store.  */
1381     COSTS_N_INSNS (3),  /* strd.  */
1382     COSTS_N_INSNS (1),  /* stm_1st.  */
1383     1,                  /* stm_regs_per_insn_1st.  */
1384     2,                  /* stm_regs_per_insn_subsequent.  */
1385     COSTS_N_INSNS (2),  /* storef.  */
1386     COSTS_N_INSNS (2),  /* stored.  */
1387     COSTS_N_INSNS (1),  /* store_unaligned.  */
1388     COSTS_N_INSNS (1),  /* loadv.  */
1389     COSTS_N_INSNS (1)   /* storev.  */
1390   },
1391   {
1392     /* FP SFmode */
1393     {
1394       COSTS_N_INSNS (15),       /* div.  */
1395       COSTS_N_INSNS (3),        /* mult.  */
1396       COSTS_N_INSNS (7),        /* mult_addsub. */
1397       COSTS_N_INSNS (7),        /* fma.  */
1398       COSTS_N_INSNS (3),        /* addsub.  */
1399       COSTS_N_INSNS (3),        /* fpconst.  */
1400       COSTS_N_INSNS (3),        /* neg.  */
1401       COSTS_N_INSNS (3),        /* compare.  */
1402       COSTS_N_INSNS (3),        /* widen.  */
1403       COSTS_N_INSNS (3),        /* narrow.  */
1404       COSTS_N_INSNS (3),        /* toint.  */
1405       COSTS_N_INSNS (3),        /* fromint.  */
1406       COSTS_N_INSNS (3)         /* roundint.  */
1407     },
1408     /* FP DFmode */
1409     {
1410       COSTS_N_INSNS (30),       /* div.  */
1411       COSTS_N_INSNS (6),        /* mult.  */
1412       COSTS_N_INSNS (10),       /* mult_addsub.  */
1413       COSTS_N_INSNS (7),        /* fma.  */
1414       COSTS_N_INSNS (3),        /* addsub.  */
1415       COSTS_N_INSNS (3),        /* fpconst.  */
1416       COSTS_N_INSNS (3),        /* neg.  */
1417       COSTS_N_INSNS (3),        /* compare.  */
1418       COSTS_N_INSNS (3),        /* widen.  */
1419       COSTS_N_INSNS (3),        /* narrow.  */
1420       COSTS_N_INSNS (3),        /* toint.  */
1421       COSTS_N_INSNS (3),        /* fromint.  */
1422       COSTS_N_INSNS (3)         /* roundint.  */
1423     }
1424   },
1425   /* Vector */
1426   {
1427     COSTS_N_INSNS (1)   /* alu.  */
1428   }
1429 };
1430
1431 const struct cpu_cost_table cortexa12_extra_costs =
1432 {
1433   /* ALU */
1434   {
1435     0,                  /* arith.  */
1436     0,                  /* logical.  */
1437     0,                  /* shift.  */
1438     COSTS_N_INSNS (1),  /* shift_reg.  */
1439     COSTS_N_INSNS (1),  /* arith_shift.  */
1440     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1441     COSTS_N_INSNS (1),  /* log_shift.  */
1442     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1443     0,                  /* extend.  */
1444     COSTS_N_INSNS (1),  /* extend_arith.  */
1445     0,                  /* bfi.  */
1446     COSTS_N_INSNS (1),  /* bfx.  */
1447     COSTS_N_INSNS (1),  /* clz.  */
1448     COSTS_N_INSNS (1),  /* rev.  */
1449     0,                  /* non_exec.  */
1450     true                /* non_exec_costs_exec.  */
1451   },
1452   /* MULT SImode */
1453   {
1454     {
1455       COSTS_N_INSNS (2),        /* simple.  */
1456       COSTS_N_INSNS (3),        /* flag_setting.  */
1457       COSTS_N_INSNS (2),        /* extend.  */
1458       COSTS_N_INSNS (3),        /* add.  */
1459       COSTS_N_INSNS (2),        /* extend_add.  */
1460       COSTS_N_INSNS (18)        /* idiv.  */
1461     },
1462     /* MULT DImode */
1463     {
1464       0,                        /* simple (N/A).  */
1465       0,                        /* flag_setting (N/A).  */
1466       COSTS_N_INSNS (3),        /* extend.  */
1467       0,                        /* add (N/A).  */
1468       COSTS_N_INSNS (3),        /* extend_add.  */
1469       0                         /* idiv (N/A).  */
1470     }
1471   },
1472   /* LD/ST */
1473   {
1474     COSTS_N_INSNS (3),  /* load.  */
1475     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1476     COSTS_N_INSNS (3),  /* ldrd.  */
1477     COSTS_N_INSNS (3),  /* ldm_1st.  */
1478     1,                  /* ldm_regs_per_insn_1st.  */
1479     2,                  /* ldm_regs_per_insn_subsequent.  */
1480     COSTS_N_INSNS (3),  /* loadf.  */
1481     COSTS_N_INSNS (3),  /* loadd.  */
1482     0,                  /* load_unaligned.  */
1483     0,                  /* store.  */
1484     0,                  /* strd.  */
1485     0,                  /* stm_1st.  */
1486     1,                  /* stm_regs_per_insn_1st.  */
1487     2,                  /* stm_regs_per_insn_subsequent.  */
1488     COSTS_N_INSNS (2),  /* storef.  */
1489     COSTS_N_INSNS (2),  /* stored.  */
1490     0,                  /* store_unaligned.  */
1491     COSTS_N_INSNS (1),  /* loadv.  */
1492     COSTS_N_INSNS (1)   /* storev.  */
1493   },
1494   {
1495     /* FP SFmode */
1496     {
1497       COSTS_N_INSNS (17),       /* div.  */
1498       COSTS_N_INSNS (4),        /* mult.  */
1499       COSTS_N_INSNS (8),        /* mult_addsub. */
1500       COSTS_N_INSNS (8),        /* fma.  */
1501       COSTS_N_INSNS (4),        /* addsub.  */
1502       COSTS_N_INSNS (2),        /* fpconst. */
1503       COSTS_N_INSNS (2),        /* neg.  */
1504       COSTS_N_INSNS (2),        /* compare.  */
1505       COSTS_N_INSNS (4),        /* widen.  */
1506       COSTS_N_INSNS (4),        /* narrow.  */
1507       COSTS_N_INSNS (4),        /* toint.  */
1508       COSTS_N_INSNS (4),        /* fromint.  */
1509       COSTS_N_INSNS (4)         /* roundint.  */
1510     },
1511     /* FP DFmode */
1512     {
1513       COSTS_N_INSNS (31),       /* div.  */
1514       COSTS_N_INSNS (4),        /* mult.  */
1515       COSTS_N_INSNS (8),        /* mult_addsub.  */
1516       COSTS_N_INSNS (8),        /* fma.  */
1517       COSTS_N_INSNS (4),        /* addsub.  */
1518       COSTS_N_INSNS (2),        /* fpconst.  */
1519       COSTS_N_INSNS (2),        /* neg.  */
1520       COSTS_N_INSNS (2),        /* compare.  */
1521       COSTS_N_INSNS (4),        /* widen.  */
1522       COSTS_N_INSNS (4),        /* narrow.  */
1523       COSTS_N_INSNS (4),        /* toint.  */
1524       COSTS_N_INSNS (4),        /* fromint.  */
1525       COSTS_N_INSNS (4)         /* roundint.  */
1526     }
1527   },
1528   /* Vector */
1529   {
1530     COSTS_N_INSNS (1)   /* alu.  */
1531   }
1532 };
1533
1534 const struct cpu_cost_table cortexa15_extra_costs =
1535 {
1536   /* ALU */
1537   {
1538     0,                  /* arith.  */
1539     0,                  /* logical.  */
1540     0,                  /* shift.  */
1541     0,                  /* shift_reg.  */
1542     COSTS_N_INSNS (1),  /* arith_shift.  */
1543     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1544     COSTS_N_INSNS (1),  /* log_shift.  */
1545     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1546     0,                  /* extend.  */
1547     COSTS_N_INSNS (1),  /* extend_arith.  */
1548     COSTS_N_INSNS (1),  /* bfi.  */
1549     0,                  /* bfx.  */
1550     0,                  /* clz.  */
1551     0,                  /* rev.  */
1552     0,                  /* non_exec.  */
1553     true                /* non_exec_costs_exec.  */
1554   },
1555   /* MULT SImode */
1556   {
1557     {
1558       COSTS_N_INSNS (2),        /* simple.  */
1559       COSTS_N_INSNS (3),        /* flag_setting.  */
1560       COSTS_N_INSNS (2),        /* extend.  */
1561       COSTS_N_INSNS (2),        /* add.  */
1562       COSTS_N_INSNS (2),        /* extend_add.  */
1563       COSTS_N_INSNS (18)        /* idiv.  */
1564     },
1565     /* MULT DImode */
1566     {
1567       0,                        /* simple (N/A).  */
1568       0,                        /* flag_setting (N/A).  */
1569       COSTS_N_INSNS (3),        /* extend.  */
1570       0,                        /* add (N/A).  */
1571       COSTS_N_INSNS (3),        /* extend_add.  */
1572       0                         /* idiv (N/A).  */
1573     }
1574   },
1575   /* LD/ST */
1576   {
1577     COSTS_N_INSNS (3),  /* load.  */
1578     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1579     COSTS_N_INSNS (3),  /* ldrd.  */
1580     COSTS_N_INSNS (4),  /* ldm_1st.  */
1581     1,                  /* ldm_regs_per_insn_1st.  */
1582     2,                  /* ldm_regs_per_insn_subsequent.  */
1583     COSTS_N_INSNS (4),  /* loadf.  */
1584     COSTS_N_INSNS (4),  /* loadd.  */
1585     0,                  /* load_unaligned.  */
1586     0,                  /* store.  */
1587     0,                  /* strd.  */
1588     COSTS_N_INSNS (1),  /* stm_1st.  */
1589     1,                  /* stm_regs_per_insn_1st.  */
1590     2,                  /* stm_regs_per_insn_subsequent.  */
1591     0,                  /* storef.  */
1592     0,                  /* stored.  */
1593     0,                  /* store_unaligned.  */
1594     COSTS_N_INSNS (1),  /* loadv.  */
1595     COSTS_N_INSNS (1)   /* storev.  */
1596   },
1597   {
1598     /* FP SFmode */
1599     {
1600       COSTS_N_INSNS (17),       /* div.  */
1601       COSTS_N_INSNS (4),        /* mult.  */
1602       COSTS_N_INSNS (8),        /* mult_addsub. */
1603       COSTS_N_INSNS (8),        /* fma.  */
1604       COSTS_N_INSNS (4),        /* addsub.  */
1605       COSTS_N_INSNS (2),        /* fpconst. */
1606       COSTS_N_INSNS (2),        /* neg.  */
1607       COSTS_N_INSNS (5),        /* compare.  */
1608       COSTS_N_INSNS (4),        /* widen.  */
1609       COSTS_N_INSNS (4),        /* narrow.  */
1610       COSTS_N_INSNS (4),        /* toint.  */
1611       COSTS_N_INSNS (4),        /* fromint.  */
1612       COSTS_N_INSNS (4)         /* roundint.  */
1613     },
1614     /* FP DFmode */
1615     {
1616       COSTS_N_INSNS (31),       /* div.  */
1617       COSTS_N_INSNS (4),        /* mult.  */
1618       COSTS_N_INSNS (8),        /* mult_addsub.  */
1619       COSTS_N_INSNS (8),        /* fma.  */
1620       COSTS_N_INSNS (4),        /* addsub.  */
1621       COSTS_N_INSNS (2),        /* fpconst.  */
1622       COSTS_N_INSNS (2),        /* neg.  */
1623       COSTS_N_INSNS (2),        /* compare.  */
1624       COSTS_N_INSNS (4),        /* widen.  */
1625       COSTS_N_INSNS (4),        /* narrow.  */
1626       COSTS_N_INSNS (4),        /* toint.  */
1627       COSTS_N_INSNS (4),        /* fromint.  */
1628       COSTS_N_INSNS (4)         /* roundint.  */
1629     }
1630   },
1631   /* Vector */
1632   {
1633     COSTS_N_INSNS (1)   /* alu.  */
1634   }
1635 };
1636
1637 const struct cpu_cost_table v7m_extra_costs =
1638 {
1639   /* ALU */
1640   {
1641     0,                  /* arith.  */
1642     0,                  /* logical.  */
1643     0,                  /* shift.  */
1644     0,                  /* shift_reg.  */
1645     0,                  /* arith_shift.  */
1646     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1647     0,                  /* log_shift.  */
1648     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1649     0,                  /* extend.  */
1650     COSTS_N_INSNS (1),  /* extend_arith.  */
1651     0,                  /* bfi.  */
1652     0,                  /* bfx.  */
1653     0,                  /* clz.  */
1654     0,                  /* rev.  */
1655     COSTS_N_INSNS (1),  /* non_exec.  */
1656     false               /* non_exec_costs_exec.  */
1657   },
1658   {
1659     /* MULT SImode */
1660     {
1661       COSTS_N_INSNS (1),        /* simple.  */
1662       COSTS_N_INSNS (1),        /* flag_setting.  */
1663       COSTS_N_INSNS (2),        /* extend.  */
1664       COSTS_N_INSNS (1),        /* add.  */
1665       COSTS_N_INSNS (3),        /* extend_add.  */
1666       COSTS_N_INSNS (8)         /* idiv.  */
1667     },
1668     /* MULT DImode */
1669     {
1670       0,                        /* simple (N/A).  */
1671       0,                        /* flag_setting (N/A).  */
1672       COSTS_N_INSNS (2),        /* extend.  */
1673       0,                        /* add (N/A).  */
1674       COSTS_N_INSNS (3),        /* extend_add.  */
1675       0                         /* idiv (N/A).  */
1676     }
1677   },
1678   /* LD/ST */
1679   {
1680     COSTS_N_INSNS (2),  /* load.  */
1681     0,                  /* load_sign_extend.  */
1682     COSTS_N_INSNS (3),  /* ldrd.  */
1683     COSTS_N_INSNS (2),  /* ldm_1st.  */
1684     1,                  /* ldm_regs_per_insn_1st.  */
1685     1,                  /* ldm_regs_per_insn_subsequent.  */
1686     COSTS_N_INSNS (2),  /* loadf.  */
1687     COSTS_N_INSNS (3),  /* loadd.  */
1688     COSTS_N_INSNS (1),  /* load_unaligned.  */
1689     COSTS_N_INSNS (2),  /* store.  */
1690     COSTS_N_INSNS (3),  /* strd.  */
1691     COSTS_N_INSNS (2),  /* stm_1st.  */
1692     1,                  /* stm_regs_per_insn_1st.  */
1693     1,                  /* stm_regs_per_insn_subsequent.  */
1694     COSTS_N_INSNS (2),  /* storef.  */
1695     COSTS_N_INSNS (3),  /* stored.  */
1696     COSTS_N_INSNS (1),  /* store_unaligned.  */
1697     COSTS_N_INSNS (1),  /* loadv.  */
1698     COSTS_N_INSNS (1)   /* storev.  */
1699   },
1700   {
1701     /* FP SFmode */
1702     {
1703       COSTS_N_INSNS (7),        /* div.  */
1704       COSTS_N_INSNS (2),        /* mult.  */
1705       COSTS_N_INSNS (5),        /* mult_addsub.  */
1706       COSTS_N_INSNS (3),        /* fma.  */
1707       COSTS_N_INSNS (1),        /* addsub.  */
1708       0,                        /* fpconst.  */
1709       0,                        /* neg.  */
1710       0,                        /* compare.  */
1711       0,                        /* widen.  */
1712       0,                        /* narrow.  */
1713       0,                        /* toint.  */
1714       0,                        /* fromint.  */
1715       0                         /* roundint.  */
1716     },
1717     /* FP DFmode */
1718     {
1719       COSTS_N_INSNS (15),       /* div.  */
1720       COSTS_N_INSNS (5),        /* mult.  */
1721       COSTS_N_INSNS (7),        /* mult_addsub.  */
1722       COSTS_N_INSNS (7),        /* fma.  */
1723       COSTS_N_INSNS (3),        /* addsub.  */
1724       0,                        /* fpconst.  */
1725       0,                        /* neg.  */
1726       0,                        /* compare.  */
1727       0,                        /* widen.  */
1728       0,                        /* narrow.  */
1729       0,                        /* toint.  */
1730       0,                        /* fromint.  */
1731       0                         /* roundint.  */
1732     }
1733   },
1734   /* Vector */
1735   {
1736     COSTS_N_INSNS (1)   /* alu.  */
1737   }
1738 };
1739
1740 const struct tune_params arm_slowmul_tune =
1741 {
1742   &generic_extra_costs,                 /* Insn extra costs.  */
1743   NULL,                                 /* Sched adj cost.  */
1744   arm_default_branch_cost,
1745   &arm_default_vec_cost,
1746   3,                                            /* Constant limit.  */
1747   5,                                            /* Max cond insns.  */
1748   8,                                            /* Memset max inline.  */
1749   1,                                            /* Issue rate.  */
1750   ARM_PREFETCH_NOT_BENEFICIAL,
1751   tune_params::PREF_CONST_POOL_TRUE,
1752   tune_params::PREF_LDRD_FALSE,
1753   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1754   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1755   tune_params::DISPARAGE_FLAGS_NEITHER,
1756   tune_params::PREF_NEON_64_FALSE,
1757   tune_params::PREF_NEON_STRINGOPS_FALSE,
1758   tune_params::FUSE_NOTHING,
1759   tune_params::SCHED_AUTOPREF_OFF
1760 };
1761
1762 const struct tune_params arm_fastmul_tune =
1763 {
1764   &generic_extra_costs,                 /* Insn extra costs.  */
1765   NULL,                                 /* Sched adj cost.  */
1766   arm_default_branch_cost,
1767   &arm_default_vec_cost,
1768   1,                                            /* Constant limit.  */
1769   5,                                            /* Max cond insns.  */
1770   8,                                            /* Memset max inline.  */
1771   1,                                            /* Issue rate.  */
1772   ARM_PREFETCH_NOT_BENEFICIAL,
1773   tune_params::PREF_CONST_POOL_TRUE,
1774   tune_params::PREF_LDRD_FALSE,
1775   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1776   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1777   tune_params::DISPARAGE_FLAGS_NEITHER,
1778   tune_params::PREF_NEON_64_FALSE,
1779   tune_params::PREF_NEON_STRINGOPS_FALSE,
1780   tune_params::FUSE_NOTHING,
1781   tune_params::SCHED_AUTOPREF_OFF
1782 };
1783
1784 /* StrongARM has early execution of branches, so a sequence that is worth
1785    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1786
1787 const struct tune_params arm_strongarm_tune =
1788 {
1789   &generic_extra_costs,                 /* Insn extra costs.  */
1790   NULL,                                 /* Sched adj cost.  */
1791   arm_default_branch_cost,
1792   &arm_default_vec_cost,
1793   1,                                            /* Constant limit.  */
1794   3,                                            /* Max cond insns.  */
1795   8,                                            /* Memset max inline.  */
1796   1,                                            /* Issue rate.  */
1797   ARM_PREFETCH_NOT_BENEFICIAL,
1798   tune_params::PREF_CONST_POOL_TRUE,
1799   tune_params::PREF_LDRD_FALSE,
1800   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1801   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1802   tune_params::DISPARAGE_FLAGS_NEITHER,
1803   tune_params::PREF_NEON_64_FALSE,
1804   tune_params::PREF_NEON_STRINGOPS_FALSE,
1805   tune_params::FUSE_NOTHING,
1806   tune_params::SCHED_AUTOPREF_OFF
1807 };
1808
1809 const struct tune_params arm_xscale_tune =
1810 {
1811   &generic_extra_costs,                 /* Insn extra costs.  */
1812   xscale_sched_adjust_cost,
1813   arm_default_branch_cost,
1814   &arm_default_vec_cost,
1815   2,                                            /* Constant limit.  */
1816   3,                                            /* Max cond insns.  */
1817   8,                                            /* Memset max inline.  */
1818   1,                                            /* Issue rate.  */
1819   ARM_PREFETCH_NOT_BENEFICIAL,
1820   tune_params::PREF_CONST_POOL_TRUE,
1821   tune_params::PREF_LDRD_FALSE,
1822   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1823   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1824   tune_params::DISPARAGE_FLAGS_NEITHER,
1825   tune_params::PREF_NEON_64_FALSE,
1826   tune_params::PREF_NEON_STRINGOPS_FALSE,
1827   tune_params::FUSE_NOTHING,
1828   tune_params::SCHED_AUTOPREF_OFF
1829 };
1830
1831 const struct tune_params arm_9e_tune =
1832 {
1833   &generic_extra_costs,                 /* Insn extra costs.  */
1834   NULL,                                 /* Sched adj cost.  */
1835   arm_default_branch_cost,
1836   &arm_default_vec_cost,
1837   1,                                            /* Constant limit.  */
1838   5,                                            /* Max cond insns.  */
1839   8,                                            /* Memset max inline.  */
1840   1,                                            /* Issue rate.  */
1841   ARM_PREFETCH_NOT_BENEFICIAL,
1842   tune_params::PREF_CONST_POOL_TRUE,
1843   tune_params::PREF_LDRD_FALSE,
1844   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1845   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1846   tune_params::DISPARAGE_FLAGS_NEITHER,
1847   tune_params::PREF_NEON_64_FALSE,
1848   tune_params::PREF_NEON_STRINGOPS_FALSE,
1849   tune_params::FUSE_NOTHING,
1850   tune_params::SCHED_AUTOPREF_OFF
1851 };
1852
1853 const struct tune_params arm_marvell_pj4_tune =
1854 {
1855   &generic_extra_costs,                 /* Insn extra costs.  */
1856   NULL,                                 /* Sched adj cost.  */
1857   arm_default_branch_cost,
1858   &arm_default_vec_cost,
1859   1,                                            /* Constant limit.  */
1860   5,                                            /* Max cond insns.  */
1861   8,                                            /* Memset max inline.  */
1862   2,                                            /* Issue rate.  */
1863   ARM_PREFETCH_NOT_BENEFICIAL,
1864   tune_params::PREF_CONST_POOL_TRUE,
1865   tune_params::PREF_LDRD_FALSE,
1866   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1867   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1868   tune_params::DISPARAGE_FLAGS_NEITHER,
1869   tune_params::PREF_NEON_64_FALSE,
1870   tune_params::PREF_NEON_STRINGOPS_FALSE,
1871   tune_params::FUSE_NOTHING,
1872   tune_params::SCHED_AUTOPREF_OFF
1873 };
1874
1875 const struct tune_params arm_v6t2_tune =
1876 {
1877   &generic_extra_costs,                 /* Insn extra costs.  */
1878   NULL,                                 /* Sched adj cost.  */
1879   arm_default_branch_cost,
1880   &arm_default_vec_cost,
1881   1,                                            /* Constant limit.  */
1882   5,                                            /* Max cond insns.  */
1883   8,                                            /* Memset max inline.  */
1884   1,                                            /* Issue rate.  */
1885   ARM_PREFETCH_NOT_BENEFICIAL,
1886   tune_params::PREF_CONST_POOL_FALSE,
1887   tune_params::PREF_LDRD_FALSE,
1888   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1889   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1890   tune_params::DISPARAGE_FLAGS_NEITHER,
1891   tune_params::PREF_NEON_64_FALSE,
1892   tune_params::PREF_NEON_STRINGOPS_FALSE,
1893   tune_params::FUSE_NOTHING,
1894   tune_params::SCHED_AUTOPREF_OFF
1895 };
1896
1897
1898 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1899 const struct tune_params arm_cortex_tune =
1900 {
1901   &generic_extra_costs,
1902   NULL,                                 /* Sched adj cost.  */
1903   arm_default_branch_cost,
1904   &arm_default_vec_cost,
1905   1,                                            /* Constant limit.  */
1906   5,                                            /* Max cond insns.  */
1907   8,                                            /* Memset max inline.  */
1908   2,                                            /* Issue rate.  */
1909   ARM_PREFETCH_NOT_BENEFICIAL,
1910   tune_params::PREF_CONST_POOL_FALSE,
1911   tune_params::PREF_LDRD_FALSE,
1912   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1913   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1914   tune_params::DISPARAGE_FLAGS_NEITHER,
1915   tune_params::PREF_NEON_64_FALSE,
1916   tune_params::PREF_NEON_STRINGOPS_FALSE,
1917   tune_params::FUSE_NOTHING,
1918   tune_params::SCHED_AUTOPREF_OFF
1919 };
1920
1921 const struct tune_params arm_cortex_a8_tune =
1922 {
1923   &cortexa8_extra_costs,
1924   NULL,                                 /* Sched adj cost.  */
1925   arm_default_branch_cost,
1926   &arm_default_vec_cost,
1927   1,                                            /* Constant limit.  */
1928   5,                                            /* Max cond insns.  */
1929   8,                                            /* Memset max inline.  */
1930   2,                                            /* Issue rate.  */
1931   ARM_PREFETCH_NOT_BENEFICIAL,
1932   tune_params::PREF_CONST_POOL_FALSE,
1933   tune_params::PREF_LDRD_FALSE,
1934   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1935   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1936   tune_params::DISPARAGE_FLAGS_NEITHER,
1937   tune_params::PREF_NEON_64_FALSE,
1938   tune_params::PREF_NEON_STRINGOPS_TRUE,
1939   tune_params::FUSE_NOTHING,
1940   tune_params::SCHED_AUTOPREF_OFF
1941 };
1942
1943 const struct tune_params arm_cortex_a7_tune =
1944 {
1945   &cortexa7_extra_costs,
1946   NULL,                                 /* Sched adj cost.  */
1947   arm_default_branch_cost,
1948   &arm_default_vec_cost,
1949   1,                                            /* Constant limit.  */
1950   5,                                            /* Max cond insns.  */
1951   8,                                            /* Memset max inline.  */
1952   2,                                            /* Issue rate.  */
1953   ARM_PREFETCH_NOT_BENEFICIAL,
1954   tune_params::PREF_CONST_POOL_FALSE,
1955   tune_params::PREF_LDRD_FALSE,
1956   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1957   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1958   tune_params::DISPARAGE_FLAGS_NEITHER,
1959   tune_params::PREF_NEON_64_FALSE,
1960   tune_params::PREF_NEON_STRINGOPS_TRUE,
1961   tune_params::FUSE_NOTHING,
1962   tune_params::SCHED_AUTOPREF_OFF
1963 };
1964
1965 const struct tune_params arm_cortex_a15_tune =
1966 {
1967   &cortexa15_extra_costs,
1968   NULL,                                 /* Sched adj cost.  */
1969   arm_default_branch_cost,
1970   &arm_default_vec_cost,
1971   1,                                            /* Constant limit.  */
1972   2,                                            /* Max cond insns.  */
1973   8,                                            /* Memset max inline.  */
1974   3,                                            /* Issue rate.  */
1975   ARM_PREFETCH_NOT_BENEFICIAL,
1976   tune_params::PREF_CONST_POOL_FALSE,
1977   tune_params::PREF_LDRD_TRUE,
1978   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1979   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1980   tune_params::DISPARAGE_FLAGS_ALL,
1981   tune_params::PREF_NEON_64_FALSE,
1982   tune_params::PREF_NEON_STRINGOPS_TRUE,
1983   tune_params::FUSE_NOTHING,
1984   tune_params::SCHED_AUTOPREF_FULL
1985 };
1986
1987 const struct tune_params arm_cortex_a35_tune =
1988 {
1989   &cortexa53_extra_costs,
1990   NULL,                                 /* Sched adj cost.  */
1991   arm_default_branch_cost,
1992   &arm_default_vec_cost,
1993   1,                                            /* Constant limit.  */
1994   5,                                            /* Max cond insns.  */
1995   8,                                            /* Memset max inline.  */
1996   1,                                            /* Issue rate.  */
1997   ARM_PREFETCH_NOT_BENEFICIAL,
1998   tune_params::PREF_CONST_POOL_FALSE,
1999   tune_params::PREF_LDRD_FALSE,
2000   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2001   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2002   tune_params::DISPARAGE_FLAGS_NEITHER,
2003   tune_params::PREF_NEON_64_FALSE,
2004   tune_params::PREF_NEON_STRINGOPS_TRUE,
2005   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2006   tune_params::SCHED_AUTOPREF_OFF
2007 };
2008
2009 const struct tune_params arm_cortex_a53_tune =
2010 {
2011   &cortexa53_extra_costs,
2012   NULL,                                 /* Sched adj cost.  */
2013   arm_default_branch_cost,
2014   &arm_default_vec_cost,
2015   1,                                            /* Constant limit.  */
2016   5,                                            /* Max cond insns.  */
2017   8,                                            /* Memset max inline.  */
2018   2,                                            /* Issue rate.  */
2019   ARM_PREFETCH_NOT_BENEFICIAL,
2020   tune_params::PREF_CONST_POOL_FALSE,
2021   tune_params::PREF_LDRD_FALSE,
2022   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2023   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2024   tune_params::DISPARAGE_FLAGS_NEITHER,
2025   tune_params::PREF_NEON_64_FALSE,
2026   tune_params::PREF_NEON_STRINGOPS_TRUE,
2027   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2028   tune_params::SCHED_AUTOPREF_OFF
2029 };
2030
2031 const struct tune_params arm_cortex_a57_tune =
2032 {
2033   &cortexa57_extra_costs,
2034   NULL,                                 /* Sched adj cost.  */
2035   arm_default_branch_cost,
2036   &arm_default_vec_cost,
2037   1,                                            /* Constant limit.  */
2038   2,                                            /* Max cond insns.  */
2039   8,                                            /* Memset max inline.  */
2040   3,                                            /* Issue rate.  */
2041   ARM_PREFETCH_NOT_BENEFICIAL,
2042   tune_params::PREF_CONST_POOL_FALSE,
2043   tune_params::PREF_LDRD_TRUE,
2044   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2045   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2046   tune_params::DISPARAGE_FLAGS_ALL,
2047   tune_params::PREF_NEON_64_FALSE,
2048   tune_params::PREF_NEON_STRINGOPS_TRUE,
2049   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2050   tune_params::SCHED_AUTOPREF_FULL
2051 };
2052
2053 const struct tune_params arm_exynosm1_tune =
2054 {
2055   &exynosm1_extra_costs,
2056   NULL,                                         /* Sched adj cost.  */
2057   arm_default_branch_cost,
2058   &arm_default_vec_cost,
2059   1,                                            /* Constant limit.  */
2060   2,                                            /* Max cond insns.  */
2061   8,                                            /* Memset max inline.  */
2062   3,                                            /* Issue rate.  */
2063   ARM_PREFETCH_NOT_BENEFICIAL,
2064   tune_params::PREF_CONST_POOL_FALSE,
2065   tune_params::PREF_LDRD_TRUE,
2066   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2067   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2068   tune_params::DISPARAGE_FLAGS_ALL,
2069   tune_params::PREF_NEON_64_FALSE,
2070   tune_params::PREF_NEON_STRINGOPS_TRUE,
2071   tune_params::FUSE_NOTHING,
2072   tune_params::SCHED_AUTOPREF_OFF
2073 };
2074
2075 const struct tune_params arm_xgene1_tune =
2076 {
2077   &xgene1_extra_costs,
2078   NULL,                                 /* Sched adj cost.  */
2079   arm_default_branch_cost,
2080   &arm_default_vec_cost,
2081   1,                                            /* Constant limit.  */
2082   2,                                            /* Max cond insns.  */
2083   32,                                           /* Memset max inline.  */
2084   4,                                            /* Issue rate.  */
2085   ARM_PREFETCH_NOT_BENEFICIAL,
2086   tune_params::PREF_CONST_POOL_FALSE,
2087   tune_params::PREF_LDRD_TRUE,
2088   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2089   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2090   tune_params::DISPARAGE_FLAGS_ALL,
2091   tune_params::PREF_NEON_64_FALSE,
2092   tune_params::PREF_NEON_STRINGOPS_FALSE,
2093   tune_params::FUSE_NOTHING,
2094   tune_params::SCHED_AUTOPREF_OFF
2095 };
2096
2097 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2098    less appealing.  Set max_insns_skipped to a low value.  */
2099
2100 const struct tune_params arm_cortex_a5_tune =
2101 {
2102   &cortexa5_extra_costs,
2103   NULL,                                 /* Sched adj cost.  */
2104   arm_cortex_a5_branch_cost,
2105   &arm_default_vec_cost,
2106   1,                                            /* Constant limit.  */
2107   1,                                            /* Max cond insns.  */
2108   8,                                            /* Memset max inline.  */
2109   2,                                            /* Issue rate.  */
2110   ARM_PREFETCH_NOT_BENEFICIAL,
2111   tune_params::PREF_CONST_POOL_FALSE,
2112   tune_params::PREF_LDRD_FALSE,
2113   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2114   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2115   tune_params::DISPARAGE_FLAGS_NEITHER,
2116   tune_params::PREF_NEON_64_FALSE,
2117   tune_params::PREF_NEON_STRINGOPS_TRUE,
2118   tune_params::FUSE_NOTHING,
2119   tune_params::SCHED_AUTOPREF_OFF
2120 };
2121
2122 const struct tune_params arm_cortex_a9_tune =
2123 {
2124   &cortexa9_extra_costs,
2125   cortex_a9_sched_adjust_cost,
2126   arm_default_branch_cost,
2127   &arm_default_vec_cost,
2128   1,                                            /* Constant limit.  */
2129   5,                                            /* Max cond insns.  */
2130   8,                                            /* Memset max inline.  */
2131   2,                                            /* Issue rate.  */
2132   ARM_PREFETCH_BENEFICIAL(4,32,32),
2133   tune_params::PREF_CONST_POOL_FALSE,
2134   tune_params::PREF_LDRD_FALSE,
2135   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2136   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2137   tune_params::DISPARAGE_FLAGS_NEITHER,
2138   tune_params::PREF_NEON_64_FALSE,
2139   tune_params::PREF_NEON_STRINGOPS_FALSE,
2140   tune_params::FUSE_NOTHING,
2141   tune_params::SCHED_AUTOPREF_OFF
2142 };
2143
2144 const struct tune_params arm_cortex_a12_tune =
2145 {
2146   &cortexa12_extra_costs,
2147   NULL,                                 /* Sched adj cost.  */
2148   arm_default_branch_cost,
2149   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2150   1,                                            /* Constant limit.  */
2151   2,                                            /* Max cond insns.  */
2152   8,                                            /* Memset max inline.  */
2153   2,                                            /* Issue rate.  */
2154   ARM_PREFETCH_NOT_BENEFICIAL,
2155   tune_params::PREF_CONST_POOL_FALSE,
2156   tune_params::PREF_LDRD_TRUE,
2157   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2158   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2159   tune_params::DISPARAGE_FLAGS_ALL,
2160   tune_params::PREF_NEON_64_FALSE,
2161   tune_params::PREF_NEON_STRINGOPS_TRUE,
2162   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2163   tune_params::SCHED_AUTOPREF_OFF
2164 };
2165
2166 const struct tune_params arm_cortex_a73_tune =
2167 {
2168   &cortexa57_extra_costs,
2169   NULL,                                         /* Sched adj cost.  */
2170   arm_default_branch_cost,
2171   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2172   1,                                            /* Constant limit.  */
2173   2,                                            /* Max cond insns.  */
2174   8,                                            /* Memset max inline.  */
2175   2,                                            /* Issue rate.  */
2176   ARM_PREFETCH_NOT_BENEFICIAL,
2177   tune_params::PREF_CONST_POOL_FALSE,
2178   tune_params::PREF_LDRD_TRUE,
2179   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2180   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2181   tune_params::DISPARAGE_FLAGS_ALL,
2182   tune_params::PREF_NEON_64_FALSE,
2183   tune_params::PREF_NEON_STRINGOPS_TRUE,
2184   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2185   tune_params::SCHED_AUTOPREF_FULL
2186 };
2187
2188 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2189    cycle to execute each.  An LDR from the constant pool also takes two cycles
2190    to execute, but mildly increases pipelining opportunity (consecutive
2191    loads/stores can be pipelined together, saving one cycle), and may also
2192    improve icache utilisation.  Hence we prefer the constant pool for such
2193    processors.  */
2194
2195 const struct tune_params arm_v7m_tune =
2196 {
2197   &v7m_extra_costs,
2198   NULL,                                 /* Sched adj cost.  */
2199   arm_cortex_m_branch_cost,
2200   &arm_default_vec_cost,
2201   1,                                            /* Constant limit.  */
2202   2,                                            /* Max cond insns.  */
2203   8,                                            /* Memset max inline.  */
2204   1,                                            /* Issue rate.  */
2205   ARM_PREFETCH_NOT_BENEFICIAL,
2206   tune_params::PREF_CONST_POOL_TRUE,
2207   tune_params::PREF_LDRD_FALSE,
2208   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2209   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2210   tune_params::DISPARAGE_FLAGS_NEITHER,
2211   tune_params::PREF_NEON_64_FALSE,
2212   tune_params::PREF_NEON_STRINGOPS_FALSE,
2213   tune_params::FUSE_NOTHING,
2214   tune_params::SCHED_AUTOPREF_OFF
2215 };
2216
2217 /* Cortex-M7 tuning.  */
2218
2219 const struct tune_params arm_cortex_m7_tune =
2220 {
2221   &v7m_extra_costs,
2222   NULL,                                 /* Sched adj cost.  */
2223   arm_cortex_m7_branch_cost,
2224   &arm_default_vec_cost,
2225   0,                                            /* Constant limit.  */
2226   1,                                            /* Max cond insns.  */
2227   8,                                            /* Memset max inline.  */
2228   2,                                            /* Issue rate.  */
2229   ARM_PREFETCH_NOT_BENEFICIAL,
2230   tune_params::PREF_CONST_POOL_TRUE,
2231   tune_params::PREF_LDRD_FALSE,
2232   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2233   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2234   tune_params::DISPARAGE_FLAGS_NEITHER,
2235   tune_params::PREF_NEON_64_FALSE,
2236   tune_params::PREF_NEON_STRINGOPS_FALSE,
2237   tune_params::FUSE_NOTHING,
2238   tune_params::SCHED_AUTOPREF_OFF
2239 };
2240
2241 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2242    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2243    cortex-m23.  */
2244 const struct tune_params arm_v6m_tune =
2245 {
2246   &generic_extra_costs,                 /* Insn extra costs.  */
2247   NULL,                                 /* Sched adj cost.  */
2248   arm_default_branch_cost,
2249   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2250   1,                                            /* Constant limit.  */
2251   5,                                            /* Max cond insns.  */
2252   8,                                            /* Memset max inline.  */
2253   1,                                            /* Issue rate.  */
2254   ARM_PREFETCH_NOT_BENEFICIAL,
2255   tune_params::PREF_CONST_POOL_FALSE,
2256   tune_params::PREF_LDRD_FALSE,
2257   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2258   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2259   tune_params::DISPARAGE_FLAGS_NEITHER,
2260   tune_params::PREF_NEON_64_FALSE,
2261   tune_params::PREF_NEON_STRINGOPS_FALSE,
2262   tune_params::FUSE_NOTHING,
2263   tune_params::SCHED_AUTOPREF_OFF
2264 };
2265
2266 const struct tune_params arm_fa726te_tune =
2267 {
2268   &generic_extra_costs,                         /* Insn extra costs.  */
2269   fa726te_sched_adjust_cost,
2270   arm_default_branch_cost,
2271   &arm_default_vec_cost,
2272   1,                                            /* Constant limit.  */
2273   5,                                            /* Max cond insns.  */
2274   8,                                            /* Memset max inline.  */
2275   2,                                            /* Issue rate.  */
2276   ARM_PREFETCH_NOT_BENEFICIAL,
2277   tune_params::PREF_CONST_POOL_TRUE,
2278   tune_params::PREF_LDRD_FALSE,
2279   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2280   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2281   tune_params::DISPARAGE_FLAGS_NEITHER,
2282   tune_params::PREF_NEON_64_FALSE,
2283   tune_params::PREF_NEON_STRINGOPS_FALSE,
2284   tune_params::FUSE_NOTHING,
2285   tune_params::SCHED_AUTOPREF_OFF
2286 };
2287
2288 /* Auto-generated CPU, FPU and architecture tables.  */
2289 #include "arm-cpu-data.h"
2290
2291 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2292    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2293    is thus chosen to be big enough to hold the longest architecture name.  */
2294
2295 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2296
2297 /* Supported TLS relocations.  */
2298
2299 enum tls_reloc {
2300   TLS_GD32,
2301   TLS_LDM32,
2302   TLS_LDO32,
2303   TLS_IE32,
2304   TLS_LE32,
2305   TLS_DESCSEQ   /* GNU scheme */
2306 };
2307
2308 /* The maximum number of insns to be used when loading a constant.  */
2309 inline static int
2310 arm_constant_limit (bool size_p)
2311 {
2312   return size_p ? 1 : current_tune->constant_limit;
2313 }
2314
2315 /* Emit an insn that's a simple single-set.  Both the operands must be known
2316    to be valid.  */
2317 inline static rtx_insn *
2318 emit_set_insn (rtx x, rtx y)
2319 {
2320   return emit_insn (gen_rtx_SET (x, y));
2321 }
2322
2323 /* Return the number of bits set in VALUE.  */
2324 static unsigned
2325 bit_count (unsigned long value)
2326 {
2327   unsigned long count = 0;
2328
2329   while (value)
2330     {
2331       count++;
2332       value &= value - 1;  /* Clear the least-significant set bit.  */
2333     }
2334
2335   return count;
2336 }
2337
2338 /* Return the number of bits set in BMAP.  */
2339 static unsigned
2340 bitmap_popcount (const sbitmap bmap)
2341 {
2342   unsigned int count = 0;
2343   unsigned int n = 0;
2344   sbitmap_iterator sbi;
2345
2346   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2347     count++;
2348   return count;
2349 }
2350
2351 typedef struct
2352 {
2353   machine_mode mode;
2354   const char *name;
2355 } arm_fixed_mode_set;
2356
2357 /* A small helper for setting fixed-point library libfuncs.  */
2358
2359 static void
2360 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2361                              const char *funcname, const char *modename,
2362                              int num_suffix)
2363 {
2364   char buffer[50];
2365
2366   if (num_suffix == 0)
2367     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2368   else
2369     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2370
2371   set_optab_libfunc (optable, mode, buffer);
2372 }
2373
2374 static void
2375 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2376                             machine_mode from, const char *funcname,
2377                             const char *toname, const char *fromname)
2378 {
2379   char buffer[50];
2380   const char *maybe_suffix_2 = "";
2381
2382   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2383   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2384       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2385       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2386     maybe_suffix_2 = "2";
2387
2388   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2389            maybe_suffix_2);
2390
2391   set_conv_libfunc (optable, to, from, buffer);
2392 }
2393
2394 /* Set up library functions unique to ARM.  */
2395
2396 static void
2397 arm_init_libfuncs (void)
2398 {
2399   /* For Linux, we have access to kernel support for atomic operations.  */
2400   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2401     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2402
2403   /* There are no special library functions unless we are using the
2404      ARM BPABI.  */
2405   if (!TARGET_BPABI)
2406     return;
2407
2408   /* The functions below are described in Section 4 of the "Run-Time
2409      ABI for the ARM architecture", Version 1.0.  */
2410
2411   /* Double-precision floating-point arithmetic.  Table 2.  */
2412   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2413   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2414   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2415   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2416   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2417
2418   /* Double-precision comparisons.  Table 3.  */
2419   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2420   set_optab_libfunc (ne_optab, DFmode, NULL);
2421   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2422   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2423   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2424   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2425   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2426
2427   /* Single-precision floating-point arithmetic.  Table 4.  */
2428   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2429   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2430   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2431   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2432   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2433
2434   /* Single-precision comparisons.  Table 5.  */
2435   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2436   set_optab_libfunc (ne_optab, SFmode, NULL);
2437   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2438   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2439   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2440   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2441   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2442
2443   /* Floating-point to integer conversions.  Table 6.  */
2444   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2445   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2446   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2447   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2448   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2449   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2450   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2451   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2452
2453   /* Conversions between floating types.  Table 7.  */
2454   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2455   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2456
2457   /* Integer to floating-point conversions.  Table 8.  */
2458   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2459   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2460   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2461   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2462   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2463   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2464   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2465   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2466
2467   /* Long long.  Table 9.  */
2468   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2469   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2470   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2471   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2472   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2473   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2474   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2475   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2476
2477   /* Integer (32/32->32) division.  \S 4.3.1.  */
2478   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2479   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2480
2481   /* The divmod functions are designed so that they can be used for
2482      plain division, even though they return both the quotient and the
2483      remainder.  The quotient is returned in the usual location (i.e.,
2484      r0 for SImode, {r0, r1} for DImode), just as would be expected
2485      for an ordinary division routine.  Because the AAPCS calling
2486      conventions specify that all of { r0, r1, r2, r3 } are
2487      callee-saved registers, there is no need to tell the compiler
2488      explicitly that those registers are clobbered by these
2489      routines.  */
2490   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2491   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2492
2493   /* For SImode division the ABI provides div-without-mod routines,
2494      which are faster.  */
2495   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2496   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2497
2498   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2499      divmod libcalls instead.  */
2500   set_optab_libfunc (smod_optab, DImode, NULL);
2501   set_optab_libfunc (umod_optab, DImode, NULL);
2502   set_optab_libfunc (smod_optab, SImode, NULL);
2503   set_optab_libfunc (umod_optab, SImode, NULL);
2504
2505   /* Half-precision float operations.  The compiler handles all operations
2506      with NULL libfuncs by converting the SFmode.  */
2507   switch (arm_fp16_format)
2508     {
2509     case ARM_FP16_FORMAT_IEEE:
2510     case ARM_FP16_FORMAT_ALTERNATIVE:
2511
2512       /* Conversions.  */
2513       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2514                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2515                          ? "__gnu_f2h_ieee"
2516                          : "__gnu_f2h_alternative"));
2517       set_conv_libfunc (sext_optab, SFmode, HFmode,
2518                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2519                          ? "__gnu_h2f_ieee"
2520                          : "__gnu_h2f_alternative"));
2521
2522       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2523                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2524                          ? "__gnu_d2h_ieee"
2525                          : "__gnu_d2h_alternative"));
2526
2527       /* Arithmetic.  */
2528       set_optab_libfunc (add_optab, HFmode, NULL);
2529       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2530       set_optab_libfunc (smul_optab, HFmode, NULL);
2531       set_optab_libfunc (neg_optab, HFmode, NULL);
2532       set_optab_libfunc (sub_optab, HFmode, NULL);
2533
2534       /* Comparisons.  */
2535       set_optab_libfunc (eq_optab, HFmode, NULL);
2536       set_optab_libfunc (ne_optab, HFmode, NULL);
2537       set_optab_libfunc (lt_optab, HFmode, NULL);
2538       set_optab_libfunc (le_optab, HFmode, NULL);
2539       set_optab_libfunc (ge_optab, HFmode, NULL);
2540       set_optab_libfunc (gt_optab, HFmode, NULL);
2541       set_optab_libfunc (unord_optab, HFmode, NULL);
2542       break;
2543
2544     default:
2545       break;
2546     }
2547
2548   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2549   {
2550     const arm_fixed_mode_set fixed_arith_modes[] =
2551       {
2552         { QQmode, "qq" },
2553         { UQQmode, "uqq" },
2554         { HQmode, "hq" },
2555         { UHQmode, "uhq" },
2556         { SQmode, "sq" },
2557         { USQmode, "usq" },
2558         { DQmode, "dq" },
2559         { UDQmode, "udq" },
2560         { TQmode, "tq" },
2561         { UTQmode, "utq" },
2562         { HAmode, "ha" },
2563         { UHAmode, "uha" },
2564         { SAmode, "sa" },
2565         { USAmode, "usa" },
2566         { DAmode, "da" },
2567         { UDAmode, "uda" },
2568         { TAmode, "ta" },
2569         { UTAmode, "uta" }
2570       };
2571     const arm_fixed_mode_set fixed_conv_modes[] =
2572       {
2573         { QQmode, "qq" },
2574         { UQQmode, "uqq" },
2575         { HQmode, "hq" },
2576         { UHQmode, "uhq" },
2577         { SQmode, "sq" },
2578         { USQmode, "usq" },
2579         { DQmode, "dq" },
2580         { UDQmode, "udq" },
2581         { TQmode, "tq" },
2582         { UTQmode, "utq" },
2583         { HAmode, "ha" },
2584         { UHAmode, "uha" },
2585         { SAmode, "sa" },
2586         { USAmode, "usa" },
2587         { DAmode, "da" },
2588         { UDAmode, "uda" },
2589         { TAmode, "ta" },
2590         { UTAmode, "uta" },
2591         { QImode, "qi" },
2592         { HImode, "hi" },
2593         { SImode, "si" },
2594         { DImode, "di" },
2595         { TImode, "ti" },
2596         { SFmode, "sf" },
2597         { DFmode, "df" }
2598       };
2599     unsigned int i, j;
2600
2601     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2602       {
2603         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2604                                      "add", fixed_arith_modes[i].name, 3);
2605         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2606                                      "ssadd", fixed_arith_modes[i].name, 3);
2607         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2608                                      "usadd", fixed_arith_modes[i].name, 3);
2609         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2610                                      "sub", fixed_arith_modes[i].name, 3);
2611         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2612                                      "sssub", fixed_arith_modes[i].name, 3);
2613         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2614                                      "ussub", fixed_arith_modes[i].name, 3);
2615         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2616                                      "mul", fixed_arith_modes[i].name, 3);
2617         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2618                                      "ssmul", fixed_arith_modes[i].name, 3);
2619         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2620                                      "usmul", fixed_arith_modes[i].name, 3);
2621         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2622                                      "div", fixed_arith_modes[i].name, 3);
2623         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2624                                      "udiv", fixed_arith_modes[i].name, 3);
2625         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2626                                      "ssdiv", fixed_arith_modes[i].name, 3);
2627         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2628                                      "usdiv", fixed_arith_modes[i].name, 3);
2629         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2630                                      "neg", fixed_arith_modes[i].name, 2);
2631         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2632                                      "ssneg", fixed_arith_modes[i].name, 2);
2633         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2634                                      "usneg", fixed_arith_modes[i].name, 2);
2635         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2636                                      "ashl", fixed_arith_modes[i].name, 3);
2637         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2638                                      "ashr", fixed_arith_modes[i].name, 3);
2639         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2640                                      "lshr", fixed_arith_modes[i].name, 3);
2641         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2642                                      "ssashl", fixed_arith_modes[i].name, 3);
2643         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2644                                      "usashl", fixed_arith_modes[i].name, 3);
2645         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2646                                      "cmp", fixed_arith_modes[i].name, 2);
2647       }
2648
2649     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2650       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2651         {
2652           if (i == j
2653               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2654                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2655             continue;
2656
2657           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2658                                       fixed_conv_modes[j].mode, "fract",
2659                                       fixed_conv_modes[i].name,
2660                                       fixed_conv_modes[j].name);
2661           arm_set_fixed_conv_libfunc (satfract_optab,
2662                                       fixed_conv_modes[i].mode,
2663                                       fixed_conv_modes[j].mode, "satfract",
2664                                       fixed_conv_modes[i].name,
2665                                       fixed_conv_modes[j].name);
2666           arm_set_fixed_conv_libfunc (fractuns_optab,
2667                                       fixed_conv_modes[i].mode,
2668                                       fixed_conv_modes[j].mode, "fractuns",
2669                                       fixed_conv_modes[i].name,
2670                                       fixed_conv_modes[j].name);
2671           arm_set_fixed_conv_libfunc (satfractuns_optab,
2672                                       fixed_conv_modes[i].mode,
2673                                       fixed_conv_modes[j].mode, "satfractuns",
2674                                       fixed_conv_modes[i].name,
2675                                       fixed_conv_modes[j].name);
2676         }
2677   }
2678
2679   if (TARGET_AAPCS_BASED)
2680     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2681 }
2682
2683 /* On AAPCS systems, this is the "struct __va_list".  */
2684 static GTY(()) tree va_list_type;
2685
2686 /* Return the type to use as __builtin_va_list.  */
2687 static tree
2688 arm_build_builtin_va_list (void)
2689 {
2690   tree va_list_name;
2691   tree ap_field;
2692
2693   if (!TARGET_AAPCS_BASED)
2694     return std_build_builtin_va_list ();
2695
2696   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2697      defined as:
2698
2699        struct __va_list
2700        {
2701          void *__ap;
2702        };
2703
2704      The C Library ABI further reinforces this definition in \S
2705      4.1.
2706
2707      We must follow this definition exactly.  The structure tag
2708      name is visible in C++ mangled names, and thus forms a part
2709      of the ABI.  The field name may be used by people who
2710      #include <stdarg.h>.  */
2711   /* Create the type.  */
2712   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2713   /* Give it the required name.  */
2714   va_list_name = build_decl (BUILTINS_LOCATION,
2715                              TYPE_DECL,
2716                              get_identifier ("__va_list"),
2717                              va_list_type);
2718   DECL_ARTIFICIAL (va_list_name) = 1;
2719   TYPE_NAME (va_list_type) = va_list_name;
2720   TYPE_STUB_DECL (va_list_type) = va_list_name;
2721   /* Create the __ap field.  */
2722   ap_field = build_decl (BUILTINS_LOCATION,
2723                          FIELD_DECL,
2724                          get_identifier ("__ap"),
2725                          ptr_type_node);
2726   DECL_ARTIFICIAL (ap_field) = 1;
2727   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2728   TYPE_FIELDS (va_list_type) = ap_field;
2729   /* Compute its layout.  */
2730   layout_type (va_list_type);
2731
2732   return va_list_type;
2733 }
2734
2735 /* Return an expression of type "void *" pointing to the next
2736    available argument in a variable-argument list.  VALIST is the
2737    user-level va_list object, of type __builtin_va_list.  */
2738 static tree
2739 arm_extract_valist_ptr (tree valist)
2740 {
2741   if (TREE_TYPE (valist) == error_mark_node)
2742     return error_mark_node;
2743
2744   /* On an AAPCS target, the pointer is stored within "struct
2745      va_list".  */
2746   if (TARGET_AAPCS_BASED)
2747     {
2748       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2749       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2750                        valist, ap_field, NULL_TREE);
2751     }
2752
2753   return valist;
2754 }
2755
2756 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2757 static void
2758 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2759 {
2760   valist = arm_extract_valist_ptr (valist);
2761   std_expand_builtin_va_start (valist, nextarg);
2762 }
2763
2764 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2765 static tree
2766 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2767                           gimple_seq *post_p)
2768 {
2769   valist = arm_extract_valist_ptr (valist);
2770   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2771 }
2772
2773 /* Check any incompatible options that the user has specified.  */
2774 static void
2775 arm_option_check_internal (struct gcc_options *opts)
2776 {
2777   int flags = opts->x_target_flags;
2778
2779   /* iWMMXt and NEON are incompatible.  */
2780   if (TARGET_IWMMXT
2781       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2782     error ("iWMMXt and NEON are incompatible");
2783
2784   /* Make sure that the processor choice does not conflict with any of the
2785      other command line choices.  */
2786   if (TARGET_ARM_P (flags)
2787       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2788     error ("target CPU does not support ARM mode");
2789
2790   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2791   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2792     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2793
2794   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2795     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2796
2797   /* If this target is normally configured to use APCS frames, warn if they
2798      are turned off and debugging is turned on.  */
2799   if (TARGET_ARM_P (flags)
2800       && write_symbols != NO_DEBUG
2801       && !TARGET_APCS_FRAME
2802       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2803     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2804
2805   /* iWMMXt unsupported under Thumb mode.  */
2806   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2807     error ("iWMMXt unsupported under Thumb mode");
2808
2809   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2810     error ("can not use -mtp=cp15 with 16-bit Thumb");
2811
2812   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2813     {
2814       error ("RTP PIC is incompatible with Thumb");
2815       flag_pic = 0;
2816     }
2817
2818   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2819      with MOVT.  */
2820   if ((target_pure_code || target_slow_flash_data)
2821       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2822     {
2823       const char *flag = (target_pure_code ? "-mpure-code" :
2824                                              "-mslow-flash-data");
2825       error ("%s only supports non-pic code on M-profile targets with the "
2826              "MOVT instruction", flag);
2827     }
2828
2829 }
2830
2831 /* Recompute the global settings depending on target attribute options.  */
2832
2833 static void
2834 arm_option_params_internal (void)
2835 {
2836   /* If we are not using the default (ARM mode) section anchor offset
2837      ranges, then set the correct ranges now.  */
2838   if (TARGET_THUMB1)
2839     {
2840       /* Thumb-1 LDR instructions cannot have negative offsets.
2841          Permissible positive offset ranges are 5-bit (for byte loads),
2842          6-bit (for halfword loads), or 7-bit (for word loads).
2843          Empirical results suggest a 7-bit anchor range gives the best
2844          overall code size.  */
2845       targetm.min_anchor_offset = 0;
2846       targetm.max_anchor_offset = 127;
2847     }
2848   else if (TARGET_THUMB2)
2849     {
2850       /* The minimum is set such that the total size of the block
2851          for a particular anchor is 248 + 1 + 4095 bytes, which is
2852          divisible by eight, ensuring natural spacing of anchors.  */
2853       targetm.min_anchor_offset = -248;
2854       targetm.max_anchor_offset = 4095;
2855     }
2856   else
2857     {
2858       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2859       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2860     }
2861
2862   if (optimize_size)
2863     {
2864       /* If optimizing for size, bump the number of instructions that we
2865          are prepared to conditionally execute (even on a StrongARM).  */
2866       max_insns_skipped = 6;
2867
2868       /* For THUMB2, we limit the conditional sequence to one IT block.  */
2869       if (TARGET_THUMB2)
2870         max_insns_skipped = arm_restrict_it ? 1 : 4;
2871     }
2872   else
2873     /* When -mrestrict-it is in use tone down the if-conversion.  */
2874     max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2875       ? 1 : current_tune->max_insns_skipped;
2876 }
2877
2878 /* True if -mflip-thumb should next add an attribute for the default
2879    mode, false if it should next add an attribute for the opposite mode.  */
2880 static GTY(()) bool thumb_flipper;
2881
2882 /* Options after initial target override.  */
2883 static GTY(()) tree init_optimize;
2884
2885 static void
2886 arm_override_options_after_change_1 (struct gcc_options *opts)
2887 {
2888   if (opts->x_align_functions <= 0)
2889     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2890       && opts->x_optimize_size ? 2 : 4;
2891 }
2892
2893 /* Implement targetm.override_options_after_change.  */
2894
2895 static void
2896 arm_override_options_after_change (void)
2897 {
2898   arm_configure_build_target (&arm_active_target,
2899                               TREE_TARGET_OPTION (target_option_default_node),
2900                               &global_options_set, false);
2901
2902   arm_override_options_after_change_1 (&global_options);
2903 }
2904
2905 static void
2906 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2907 {
2908   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2909                               false);
2910 }
2911
2912 /* Reset options between modes that the user has specified.  */
2913 static void
2914 arm_option_override_internal (struct gcc_options *opts,
2915                               struct gcc_options *opts_set)
2916 {
2917   arm_override_options_after_change_1 (opts);
2918
2919   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2920     {
2921       /* The default is to enable interworking, so this warning message would
2922          be confusing to users who have just compiled with, eg, -march=armv3.  */
2923       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2924       opts->x_target_flags &= ~MASK_INTERWORK;
2925     }
2926
2927   if (TARGET_THUMB_P (opts->x_target_flags)
2928       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2929     {
2930       warning (0, "target CPU does not support THUMB instructions");
2931       opts->x_target_flags &= ~MASK_THUMB;
2932     }
2933
2934   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2935     {
2936       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2937       opts->x_target_flags &= ~MASK_APCS_FRAME;
2938     }
2939
2940   /* Callee super interworking implies thumb interworking.  Adding
2941      this to the flags here simplifies the logic elsewhere.  */
2942   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2943     opts->x_target_flags |= MASK_INTERWORK;
2944
2945   /* need to remember initial values so combinaisons of options like
2946      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2947   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2948
2949   if (! opts_set->x_arm_restrict_it)
2950     opts->x_arm_restrict_it = arm_arch8;
2951
2952   /* ARM execution state and M profile don't have [restrict] IT.  */
2953   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2954     opts->x_arm_restrict_it = 0;
2955
2956   /* Enable -munaligned-access by default for
2957      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2958      i.e. Thumb2 and ARM state only.
2959      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2960      - ARMv8 architecture-base processors.
2961
2962      Disable -munaligned-access by default for
2963      - all pre-ARMv6 architecture-based processors
2964      - ARMv6-M architecture-based processors
2965      - ARMv8-M Baseline processors.  */
2966
2967   if (! opts_set->x_unaligned_access)
2968     {
2969       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2970                           && arm_arch6 && (arm_arch_notm || arm_arch7));
2971     }
2972   else if (opts->x_unaligned_access == 1
2973            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2974     {
2975       warning (0, "target CPU does not support unaligned accesses");
2976      opts->x_unaligned_access = 0;
2977     }
2978
2979   /* Don't warn since it's on by default in -O2.  */
2980   if (TARGET_THUMB1_P (opts->x_target_flags))
2981     opts->x_flag_schedule_insns = 0;
2982   else
2983     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2984
2985   /* Disable shrink-wrap when optimizing function for size, since it tends to
2986      generate additional returns.  */
2987   if (optimize_function_for_size_p (cfun)
2988       && TARGET_THUMB2_P (opts->x_target_flags))
2989     opts->x_flag_shrink_wrap = false;
2990   else
2991     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2992
2993   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2994      - epilogue_insns - does not accurately model the corresponding insns
2995      emitted in the asm file.  In particular, see the comment in thumb_exit
2996      'Find out how many of the (return) argument registers we can corrupt'.
2997      As a consequence, the epilogue may clobber registers without fipa-ra
2998      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
2999      TODO: Accurately model clobbers for epilogue_insns and reenable
3000      fipa-ra.  */
3001   if (TARGET_THUMB1_P (opts->x_target_flags))
3002     opts->x_flag_ipa_ra = 0;
3003   else
3004     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3005
3006   /* Thumb2 inline assembly code should always use unified syntax.
3007      This will apply to ARM and Thumb1 eventually.  */
3008   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3009
3010 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3011   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3012 #endif
3013 }
3014
3015 /* Convert a static initializer array of feature bits to sbitmap
3016    representation.  */
3017 static void
3018 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3019 {
3020   bitmap_clear (isa);
3021   while (*isa_bits != isa_nobit)
3022     bitmap_set_bit (isa, *(isa_bits++));
3023 }
3024
3025 static sbitmap isa_all_fpubits;
3026 static sbitmap isa_quirkbits;
3027
3028 /* Configure a build target TARGET from the user-specified options OPTS and
3029    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3030    architecture have been specified, but the two are not identical.  */
3031 void
3032 arm_configure_build_target (struct arm_build_target *target,
3033                             struct cl_target_option *opts,
3034                             struct gcc_options *opts_set,
3035                             bool warn_compatible)
3036 {
3037   const struct processors *arm_selected_tune = NULL;
3038   const struct processors *arm_selected_arch = NULL;
3039   const struct processors *arm_selected_cpu = NULL;
3040   const struct arm_fpu_desc *arm_selected_fpu = NULL;
3041
3042   bitmap_clear (target->isa);
3043   target->core_name = NULL;
3044   target->arch_name = NULL;
3045
3046   if (opts_set->x_arm_arch_option)
3047     arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3048
3049   if (opts_set->x_arm_cpu_option)
3050     {
3051       arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3052       arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3053     }
3054
3055   if (opts_set->x_arm_tune_option)
3056     arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3057
3058   if (arm_selected_arch)
3059     {
3060       arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3061
3062       if (arm_selected_cpu)
3063         {
3064           auto_sbitmap cpu_isa (isa_num_bits);
3065
3066           arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3067           bitmap_xor (cpu_isa, cpu_isa, target->isa);
3068           /* Ignore any bits that are quirk bits.  */
3069           bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3070           /* Ignore (for now) any bits that might be set by -mfpu.  */
3071           bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3072
3073           if (!bitmap_empty_p (cpu_isa))
3074             {
3075               if (warn_compatible)
3076                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3077                          arm_selected_cpu->name, arm_selected_arch->name);
3078               /* -march wins for code generation.
3079                  -mcpu wins for default tuning.  */
3080               if (!arm_selected_tune)
3081                 arm_selected_tune = arm_selected_cpu;
3082
3083               arm_selected_cpu = arm_selected_arch;
3084               target->arch_name = arm_selected_arch->name;
3085             }
3086           else
3087             {
3088               /* Architecture and CPU are essentially the same.
3089                  Prefer the CPU setting.  */
3090               arm_selected_arch = NULL;
3091               target->core_name = arm_selected_cpu->name;
3092             }
3093         }
3094       else
3095         {
3096           /* Pick a CPU based on the architecture.  */
3097           arm_selected_cpu = arm_selected_arch;
3098           target->arch_name = arm_selected_arch->name;
3099           /* Note: target->core_name is left unset in this path.  */
3100         }
3101     }
3102   else if (arm_selected_cpu)
3103     {
3104       target->core_name = arm_selected_cpu->name;
3105       arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3106     }
3107   /* If the user did not specify a processor, choose one for them.  */
3108   else
3109     {
3110       const struct processors * sel;
3111       auto_sbitmap sought_isa (isa_num_bits);
3112       bitmap_clear (sought_isa);
3113       auto_sbitmap default_isa (isa_num_bits);
3114
3115       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3116       gcc_assert (arm_selected_cpu->name);
3117
3118       /* RWE: All of the selection logic below (to the end of this
3119          'if' clause) looks somewhat suspect.  It appears to be mostly
3120          there to support forcing thumb support when the default CPU
3121          does not have thumb (somewhat dubious in terms of what the
3122          user might be expecting).  I think it should be removed once
3123          support for the pre-thumb era cores is removed.  */
3124       sel = arm_selected_cpu;
3125       arm_initialize_isa (default_isa, sel->isa_bits);
3126
3127       /* Now check to see if the user has specified any command line
3128          switches that require certain abilities from the cpu.  */
3129
3130       if (TARGET_INTERWORK || TARGET_THUMB)
3131         {
3132           bitmap_set_bit (sought_isa, isa_bit_thumb);
3133           bitmap_set_bit (sought_isa, isa_bit_mode32);
3134
3135           /* There are no ARM processors that support both APCS-26 and
3136              interworking.  Therefore we forcibly remove MODE26 from
3137              from the isa features here (if it was set), so that the
3138              search below will always be able to find a compatible
3139              processor.  */
3140           bitmap_clear_bit (default_isa, isa_bit_mode26);
3141         }
3142
3143       /* If there are such requirements and the default CPU does not
3144          satisfy them, we need to run over the complete list of
3145          cores looking for one that is satisfactory.  */
3146       if (!bitmap_empty_p (sought_isa)
3147           && !bitmap_subset_p (sought_isa, default_isa))
3148         {
3149           auto_sbitmap candidate_isa (isa_num_bits);
3150           /* We're only interested in a CPU with at least the
3151              capabilities of the default CPU and the required
3152              additional features.  */
3153           bitmap_ior (default_isa, default_isa, sought_isa);
3154
3155           /* Try to locate a CPU type that supports all of the abilities
3156              of the default CPU, plus the extra abilities requested by
3157              the user.  */
3158           for (sel = all_cores; sel->name != NULL; sel++)
3159             {
3160               arm_initialize_isa (candidate_isa, sel->isa_bits);
3161               /* An exact match?  */
3162               if (bitmap_equal_p (default_isa, candidate_isa))
3163                 break;
3164             }
3165
3166           if (sel->name == NULL)
3167             {
3168               unsigned current_bit_count = isa_num_bits;
3169               const struct processors * best_fit = NULL;
3170
3171               /* Ideally we would like to issue an error message here
3172                  saying that it was not possible to find a CPU compatible
3173                  with the default CPU, but which also supports the command
3174                  line options specified by the programmer, and so they
3175                  ought to use the -mcpu=<name> command line option to
3176                  override the default CPU type.
3177
3178                  If we cannot find a CPU that has exactly the
3179                  characteristics of the default CPU and the given
3180                  command line options we scan the array again looking
3181                  for a best match.  The best match must have at least
3182                  the capabilities of the perfect match.  */
3183               for (sel = all_cores; sel->name != NULL; sel++)
3184                 {
3185                   arm_initialize_isa (candidate_isa, sel->isa_bits);
3186
3187                   if (bitmap_subset_p (default_isa, candidate_isa))
3188                     {
3189                       unsigned count;
3190
3191                       bitmap_and_compl (candidate_isa, candidate_isa,
3192                                         default_isa);
3193                       count = bitmap_popcount (candidate_isa);
3194
3195                       if (count < current_bit_count)
3196                         {
3197                           best_fit = sel;
3198                           current_bit_count = count;
3199                         }
3200                     }
3201
3202                   gcc_assert (best_fit);
3203                   sel = best_fit;
3204                 }
3205             }
3206           arm_selected_cpu = sel;
3207         }
3208
3209       /* Now we know the CPU, we can finally initialize the target
3210          structure.  */
3211       target->core_name = arm_selected_cpu->name;
3212       arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3213     }
3214
3215   gcc_assert (arm_selected_cpu);
3216
3217   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3218     {
3219       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3220       auto_sbitmap fpu_bits (isa_num_bits);
3221
3222       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3223       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3224       bitmap_ior (target->isa, target->isa, fpu_bits);
3225     }
3226   else if (target->core_name == NULL)
3227     /* To support this we need to be able to parse FPU feature options
3228        from the architecture string.  */
3229     sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3230
3231   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
3232   if (!arm_selected_tune)
3233     arm_selected_tune = &all_cores[arm_selected_cpu->core];
3234
3235   /* Finish initializing the target structure.  */
3236   target->arch_pp_name = arm_selected_cpu->arch;
3237   target->base_arch = arm_selected_cpu->base_arch;
3238   target->arch_core = arm_selected_cpu->core;
3239
3240   target->tune_flags = arm_selected_tune->tune_flags;
3241   target->tune = arm_selected_tune->tune;
3242   target->tune_core = arm_selected_tune->core;
3243 }
3244
3245 /* Fix up any incompatible options that the user has specified.  */
3246 static void
3247 arm_option_override (void)
3248 {
3249   static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3250   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3251   cl_target_option opts;
3252
3253   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3254   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3255
3256   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3257   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3258
3259   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3260
3261   if (!global_options_set.x_arm_fpu_index)
3262     {
3263       const char *target_fpu_name;
3264       bool ok;
3265       int fpu_index;
3266
3267 #ifdef FPUTYPE_DEFAULT
3268       target_fpu_name = FPUTYPE_DEFAULT;
3269 #else
3270       target_fpu_name = "vfp";
3271 #endif
3272
3273       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3274                                   CL_TARGET);
3275       gcc_assert (ok);
3276       arm_fpu_index = (enum fpu_type) fpu_index;
3277     }
3278
3279   cl_target_option_save (&opts, &global_options);
3280   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3281                               true);
3282
3283 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3284   SUBTARGET_OVERRIDE_OPTIONS;
3285 #endif
3286
3287   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3288   arm_base_arch = arm_active_target.base_arch;
3289
3290   arm_tune = arm_active_target.tune_core;
3291   tune_flags = arm_active_target.tune_flags;
3292   current_tune = arm_active_target.tune;
3293
3294   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3295   if (TARGET_APCS_FRAME)
3296     flag_shrink_wrap = false;
3297
3298   /* BPABI targets use linker tricks to allow interworking on cores
3299      without thumb support.  */
3300   if (TARGET_INTERWORK
3301       && !TARGET_BPABI
3302       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3303     {
3304       warning (0, "target CPU does not support interworking" );
3305       target_flags &= ~MASK_INTERWORK;
3306     }
3307
3308   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3309     {
3310       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3311       target_flags |= MASK_APCS_FRAME;
3312     }
3313
3314   if (TARGET_POKE_FUNCTION_NAME)
3315     target_flags |= MASK_APCS_FRAME;
3316
3317   if (TARGET_APCS_REENT && flag_pic)
3318     error ("-fpic and -mapcs-reent are incompatible");
3319
3320   if (TARGET_APCS_REENT)
3321     warning (0, "APCS reentrant code not supported.  Ignored");
3322
3323   /* Initialize boolean versions of the architectural flags, for use
3324      in the arm.md file.  */
3325   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3326   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3327   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3328   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3329   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3330   arm_arch5te = arm_arch5e
3331     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3332   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3333   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3334   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3335   arm_arch6m = arm_arch6 && !arm_arch_notm;
3336   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3337   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3338   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3339   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3340   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3341   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3342   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3343   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3344   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3345   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3346   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3347   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3348   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3349   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3350   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3351   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3352   if (arm_fp16_inst)
3353     {
3354       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3355         error ("selected fp16 options are incompatible");
3356       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3357     }
3358
3359
3360   /* Set up some tuning parameters.  */
3361   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3362   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3363   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3364   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3365   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3366   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3367
3368   /* And finally, set up some quirks.  */
3369   arm_arch_no_volatile_ce
3370     = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3371   arm_arch6kz
3372     = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3373
3374   /* V5 code we generate is completely interworking capable, so we turn off
3375      TARGET_INTERWORK here to avoid many tests later on.  */
3376
3377   /* XXX However, we must pass the right pre-processor defines to CPP
3378      or GLD can get confused.  This is a hack.  */
3379   if (TARGET_INTERWORK)
3380     arm_cpp_interwork = 1;
3381
3382   if (arm_arch5)
3383     target_flags &= ~MASK_INTERWORK;
3384
3385   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3386     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3387
3388   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3389     error ("iwmmxt abi requires an iwmmxt capable cpu");
3390
3391   /* If soft-float is specified then don't use FPU.  */
3392   if (TARGET_SOFT_FLOAT)
3393     arm_fpu_attr = FPU_NONE;
3394   else
3395     arm_fpu_attr = FPU_VFP;
3396
3397   if (TARGET_AAPCS_BASED)
3398     {
3399       if (TARGET_CALLER_INTERWORKING)
3400         error ("AAPCS does not support -mcaller-super-interworking");
3401       else
3402         if (TARGET_CALLEE_INTERWORKING)
3403           error ("AAPCS does not support -mcallee-super-interworking");
3404     }
3405
3406   /* __fp16 support currently assumes the core has ldrh.  */
3407   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3408     sorry ("__fp16 and no ldrh");
3409
3410   if (TARGET_AAPCS_BASED)
3411     {
3412       if (arm_abi == ARM_ABI_IWMMXT)
3413         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3414       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3415                && TARGET_HARD_FLOAT)
3416         {
3417           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3418           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3419             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3420         }
3421       else
3422         arm_pcs_default = ARM_PCS_AAPCS;
3423     }
3424   else
3425     {
3426       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3427         sorry ("-mfloat-abi=hard and VFP");
3428
3429       if (arm_abi == ARM_ABI_APCS)
3430         arm_pcs_default = ARM_PCS_APCS;
3431       else
3432         arm_pcs_default = ARM_PCS_ATPCS;
3433     }
3434
3435   /* For arm2/3 there is no need to do any scheduling if we are doing
3436      software floating-point.  */
3437   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3438     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3439
3440   /* Use the cp15 method if it is available.  */
3441   if (target_thread_pointer == TP_AUTO)
3442     {
3443       if (arm_arch6k && !TARGET_THUMB1)
3444         target_thread_pointer = TP_CP15;
3445       else
3446         target_thread_pointer = TP_SOFT;
3447     }
3448
3449   /* Override the default structure alignment for AAPCS ABI.  */
3450   if (!global_options_set.x_arm_structure_size_boundary)
3451     {
3452       if (TARGET_AAPCS_BASED)
3453         arm_structure_size_boundary = 8;
3454     }
3455   else
3456     {
3457       if (arm_structure_size_boundary != 8
3458           && arm_structure_size_boundary != 32
3459           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3460         {
3461           if (ARM_DOUBLEWORD_ALIGN)
3462             warning (0,
3463                      "structure size boundary can only be set to 8, 32 or 64");
3464           else
3465             warning (0, "structure size boundary can only be set to 8 or 32");
3466           arm_structure_size_boundary
3467             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3468         }
3469     }
3470
3471   if (TARGET_VXWORKS_RTP)
3472     {
3473       if (!global_options_set.x_arm_pic_data_is_text_relative)
3474         arm_pic_data_is_text_relative = 0;
3475     }
3476   else if (flag_pic
3477            && !arm_pic_data_is_text_relative
3478            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3479     /* When text & data segments don't have a fixed displacement, the
3480        intended use is with a single, read only, pic base register.
3481        Unless the user explicitly requested not to do that, set
3482        it.  */
3483     target_flags |= MASK_SINGLE_PIC_BASE;
3484
3485   /* If stack checking is disabled, we can use r10 as the PIC register,
3486      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3487   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3488     {
3489       if (TARGET_VXWORKS_RTP)
3490         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3491       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3492     }
3493
3494   if (flag_pic && TARGET_VXWORKS_RTP)
3495     arm_pic_register = 9;
3496
3497   if (arm_pic_register_string != NULL)
3498     {
3499       int pic_register = decode_reg_name (arm_pic_register_string);
3500
3501       if (!flag_pic)
3502         warning (0, "-mpic-register= is useless without -fpic");
3503
3504       /* Prevent the user from choosing an obviously stupid PIC register.  */
3505       else if (pic_register < 0 || call_used_regs[pic_register]
3506                || pic_register == HARD_FRAME_POINTER_REGNUM
3507                || pic_register == STACK_POINTER_REGNUM
3508                || pic_register >= PC_REGNUM
3509                || (TARGET_VXWORKS_RTP
3510                    && (unsigned int) pic_register != arm_pic_register))
3511         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3512       else
3513         arm_pic_register = pic_register;
3514     }
3515
3516   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3517   if (fix_cm3_ldrd == 2)
3518     {
3519       if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3520         fix_cm3_ldrd = 1;
3521       else
3522         fix_cm3_ldrd = 0;
3523     }
3524
3525   /* Hot/Cold partitioning is not currently supported, since we can't
3526      handle literal pool placement in that case.  */
3527   if (flag_reorder_blocks_and_partition)
3528     {
3529       inform (input_location,
3530               "-freorder-blocks-and-partition not supported on this architecture");
3531       flag_reorder_blocks_and_partition = 0;
3532       flag_reorder_blocks = 1;
3533     }
3534
3535   if (flag_pic)
3536     /* Hoisting PIC address calculations more aggressively provides a small,
3537        but measurable, size reduction for PIC code.  Therefore, we decrease
3538        the bar for unrestricted expression hoisting to the cost of PIC address
3539        calculation, which is 2 instructions.  */
3540     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3541                            global_options.x_param_values,
3542                            global_options_set.x_param_values);
3543
3544   /* ARM EABI defaults to strict volatile bitfields.  */
3545   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3546       && abi_version_at_least(2))
3547     flag_strict_volatile_bitfields = 1;
3548
3549   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3550      have deemed it beneficial (signified by setting
3551      prefetch.num_slots to 1 or more).  */
3552   if (flag_prefetch_loop_arrays < 0
3553       && HAVE_prefetch
3554       && optimize >= 3
3555       && current_tune->prefetch.num_slots > 0)
3556     flag_prefetch_loop_arrays = 1;
3557
3558   /* Set up parameters to be used in prefetching algorithm.  Do not
3559      override the defaults unless we are tuning for a core we have
3560      researched values for.  */
3561   if (current_tune->prefetch.num_slots > 0)
3562     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3563                            current_tune->prefetch.num_slots,
3564                            global_options.x_param_values,
3565                            global_options_set.x_param_values);
3566   if (current_tune->prefetch.l1_cache_line_size >= 0)
3567     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3568                            current_tune->prefetch.l1_cache_line_size,
3569                            global_options.x_param_values,
3570                            global_options_set.x_param_values);
3571   if (current_tune->prefetch.l1_cache_size >= 0)
3572     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3573                            current_tune->prefetch.l1_cache_size,
3574                            global_options.x_param_values,
3575                            global_options_set.x_param_values);
3576
3577   /* Use Neon to perform 64-bits operations rather than core
3578      registers.  */
3579   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3580   if (use_neon_for_64bits == 1)
3581      prefer_neon_for_64bits = true;
3582
3583   /* Use the alternative scheduling-pressure algorithm by default.  */
3584   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3585                          global_options.x_param_values,
3586                          global_options_set.x_param_values);
3587
3588   /* Look through ready list and all of queue for instructions
3589      relevant for L2 auto-prefetcher.  */
3590   int param_sched_autopref_queue_depth;
3591
3592   switch (current_tune->sched_autopref)
3593     {
3594     case tune_params::SCHED_AUTOPREF_OFF:
3595       param_sched_autopref_queue_depth = -1;
3596       break;
3597
3598     case tune_params::SCHED_AUTOPREF_RANK:
3599       param_sched_autopref_queue_depth = 0;
3600       break;
3601
3602     case tune_params::SCHED_AUTOPREF_FULL:
3603       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3604       break;
3605
3606     default:
3607       gcc_unreachable ();
3608     }
3609
3610   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3611                          param_sched_autopref_queue_depth,
3612                          global_options.x_param_values,
3613                          global_options_set.x_param_values);
3614
3615   /* Currently, for slow flash data, we just disable literal pools.  We also
3616      disable it for pure-code.  */
3617   if (target_slow_flash_data || target_pure_code)
3618     arm_disable_literal_pool = true;
3619
3620   if (use_cmse && !arm_arch_cmse)
3621     error ("target CPU does not support ARMv8-M Security Extensions");
3622
3623   /* Disable scheduling fusion by default if it's not armv7 processor
3624      or doesn't prefer ldrd/strd.  */
3625   if (flag_schedule_fusion == 2
3626       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3627     flag_schedule_fusion = 0;
3628
3629   /* Need to remember initial options before they are overriden.  */
3630   init_optimize = build_optimization_node (&global_options);
3631
3632   arm_option_override_internal (&global_options, &global_options_set);
3633   arm_option_check_internal (&global_options);
3634   arm_option_params_internal ();
3635
3636   /* Create the default target_options structure.  */
3637   target_option_default_node = target_option_current_node
3638     = build_target_option_node (&global_options);
3639
3640   /* Register global variables with the garbage collector.  */
3641   arm_add_gc_roots ();
3642
3643   /* Init initial mode for testing.  */
3644   thumb_flipper = TARGET_THUMB;
3645 }
3646
3647 static void
3648 arm_add_gc_roots (void)
3649 {
3650   gcc_obstack_init(&minipool_obstack);
3651   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3652 }
3653 \f
3654 /* A table of known ARM exception types.
3655    For use with the interrupt function attribute.  */
3656
3657 typedef struct
3658 {
3659   const char *const arg;
3660   const unsigned long return_value;
3661 }
3662 isr_attribute_arg;
3663
3664 static const isr_attribute_arg isr_attribute_args [] =
3665 {
3666   { "IRQ",   ARM_FT_ISR },
3667   { "irq",   ARM_FT_ISR },
3668   { "FIQ",   ARM_FT_FIQ },
3669   { "fiq",   ARM_FT_FIQ },
3670   { "ABORT", ARM_FT_ISR },
3671   { "abort", ARM_FT_ISR },
3672   { "ABORT", ARM_FT_ISR },
3673   { "abort", ARM_FT_ISR },
3674   { "UNDEF", ARM_FT_EXCEPTION },
3675   { "undef", ARM_FT_EXCEPTION },
3676   { "SWI",   ARM_FT_EXCEPTION },
3677   { "swi",   ARM_FT_EXCEPTION },
3678   { NULL,    ARM_FT_NORMAL }
3679 };
3680
3681 /* Returns the (interrupt) function type of the current
3682    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3683
3684 static unsigned long
3685 arm_isr_value (tree argument)
3686 {
3687   const isr_attribute_arg * ptr;
3688   const char *              arg;
3689
3690   if (!arm_arch_notm)
3691     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3692
3693   /* No argument - default to IRQ.  */
3694   if (argument == NULL_TREE)
3695     return ARM_FT_ISR;
3696
3697   /* Get the value of the argument.  */
3698   if (TREE_VALUE (argument) == NULL_TREE
3699       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3700     return ARM_FT_UNKNOWN;
3701
3702   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3703
3704   /* Check it against the list of known arguments.  */
3705   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3706     if (streq (arg, ptr->arg))
3707       return ptr->return_value;
3708
3709   /* An unrecognized interrupt type.  */
3710   return ARM_FT_UNKNOWN;
3711 }
3712
3713 /* Computes the type of the current function.  */
3714
3715 static unsigned long
3716 arm_compute_func_type (void)
3717 {
3718   unsigned long type = ARM_FT_UNKNOWN;
3719   tree a;
3720   tree attr;
3721
3722   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3723
3724   /* Decide if the current function is volatile.  Such functions
3725      never return, and many memory cycles can be saved by not storing
3726      register values that will never be needed again.  This optimization
3727      was added to speed up context switching in a kernel application.  */
3728   if (optimize > 0
3729       && (TREE_NOTHROW (current_function_decl)
3730           || !(flag_unwind_tables
3731                || (flag_exceptions
3732                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3733       && TREE_THIS_VOLATILE (current_function_decl))
3734     type |= ARM_FT_VOLATILE;
3735
3736   if (cfun->static_chain_decl != NULL)
3737     type |= ARM_FT_NESTED;
3738
3739   attr = DECL_ATTRIBUTES (current_function_decl);
3740
3741   a = lookup_attribute ("naked", attr);
3742   if (a != NULL_TREE)
3743     type |= ARM_FT_NAKED;
3744
3745   a = lookup_attribute ("isr", attr);
3746   if (a == NULL_TREE)
3747     a = lookup_attribute ("interrupt", attr);
3748
3749   if (a == NULL_TREE)
3750     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3751   else
3752     type |= arm_isr_value (TREE_VALUE (a));
3753
3754   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3755     type |= ARM_FT_CMSE_ENTRY;
3756
3757   return type;
3758 }
3759
3760 /* Returns the type of the current function.  */
3761
3762 unsigned long
3763 arm_current_func_type (void)
3764 {
3765   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3766     cfun->machine->func_type = arm_compute_func_type ();
3767
3768   return cfun->machine->func_type;
3769 }
3770
3771 bool
3772 arm_allocate_stack_slots_for_args (void)
3773 {
3774   /* Naked functions should not allocate stack slots for arguments.  */
3775   return !IS_NAKED (arm_current_func_type ());
3776 }
3777
3778 static bool
3779 arm_warn_func_return (tree decl)
3780 {
3781   /* Naked functions are implemented entirely in assembly, including the
3782      return sequence, so suppress warnings about this.  */
3783   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3784 }
3785
3786 \f
3787 /* Output assembler code for a block containing the constant parts
3788    of a trampoline, leaving space for the variable parts.
3789
3790    On the ARM, (if r8 is the static chain regnum, and remembering that
3791    referencing pc adds an offset of 8) the trampoline looks like:
3792            ldr          r8, [pc, #0]
3793            ldr          pc, [pc]
3794            .word        static chain value
3795            .word        function's address
3796    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3797
3798 static void
3799 arm_asm_trampoline_template (FILE *f)
3800 {
3801   fprintf (f, "\t.syntax unified\n");
3802
3803   if (TARGET_ARM)
3804     {
3805       fprintf (f, "\t.arm\n");
3806       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3807       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3808     }
3809   else if (TARGET_THUMB2)
3810     {
3811       fprintf (f, "\t.thumb\n");
3812       /* The Thumb-2 trampoline is similar to the arm implementation.
3813          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3814       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3815                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3816       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3817     }
3818   else
3819     {
3820       ASM_OUTPUT_ALIGN (f, 2);
3821       fprintf (f, "\t.code\t16\n");
3822       fprintf (f, ".Ltrampoline_start:\n");
3823       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3824       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3825       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3826       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3827       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3828       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3829     }
3830   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3831   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3832 }
3833
3834 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3835
3836 static void
3837 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3838 {
3839   rtx fnaddr, mem, a_tramp;
3840
3841   emit_block_move (m_tramp, assemble_trampoline_template (),
3842                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3843
3844   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3845   emit_move_insn (mem, chain_value);
3846
3847   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3848   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3849   emit_move_insn (mem, fnaddr);
3850
3851   a_tramp = XEXP (m_tramp, 0);
3852   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3853                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3854                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3855 }
3856
3857 /* Thumb trampolines should be entered in thumb mode, so set
3858    the bottom bit of the address.  */
3859
3860 static rtx
3861 arm_trampoline_adjust_address (rtx addr)
3862 {
3863   if (TARGET_THUMB)
3864     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3865                                 NULL, 0, OPTAB_LIB_WIDEN);
3866   return addr;
3867 }
3868 \f
3869 /* Return 1 if it is possible to return using a single instruction.
3870    If SIBLING is non-null, this is a test for a return before a sibling
3871    call.  SIBLING is the call insn, so we can examine its register usage.  */
3872
3873 int
3874 use_return_insn (int iscond, rtx sibling)
3875 {
3876   int regno;
3877   unsigned int func_type;
3878   unsigned long saved_int_regs;
3879   unsigned HOST_WIDE_INT stack_adjust;
3880   arm_stack_offsets *offsets;
3881
3882   /* Never use a return instruction before reload has run.  */
3883   if (!reload_completed)
3884     return 0;
3885
3886   func_type = arm_current_func_type ();
3887
3888   /* Naked, volatile and stack alignment functions need special
3889      consideration.  */
3890   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3891     return 0;
3892
3893   /* So do interrupt functions that use the frame pointer and Thumb
3894      interrupt functions.  */
3895   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3896     return 0;
3897
3898   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3899       && !optimize_function_for_size_p (cfun))
3900     return 0;
3901
3902   offsets = arm_get_frame_offsets ();
3903   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3904
3905   /* As do variadic functions.  */
3906   if (crtl->args.pretend_args_size
3907       || cfun->machine->uses_anonymous_args
3908       /* Or if the function calls __builtin_eh_return () */
3909       || crtl->calls_eh_return
3910       /* Or if the function calls alloca */
3911       || cfun->calls_alloca
3912       /* Or if there is a stack adjustment.  However, if the stack pointer
3913          is saved on the stack, we can use a pre-incrementing stack load.  */
3914       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3915                                  && stack_adjust == 4))
3916       /* Or if the static chain register was saved above the frame, under the
3917          assumption that the stack pointer isn't saved on the stack.  */
3918       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3919           && arm_compute_static_chain_stack_bytes() != 0))
3920     return 0;
3921
3922   saved_int_regs = offsets->saved_regs_mask;
3923
3924   /* Unfortunately, the insn
3925
3926        ldmib sp, {..., sp, ...}
3927
3928      triggers a bug on most SA-110 based devices, such that the stack
3929      pointer won't be correctly restored if the instruction takes a
3930      page fault.  We work around this problem by popping r3 along with
3931      the other registers, since that is never slower than executing
3932      another instruction.
3933
3934      We test for !arm_arch5 here, because code for any architecture
3935      less than this could potentially be run on one of the buggy
3936      chips.  */
3937   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3938     {
3939       /* Validate that r3 is a call-clobbered register (always true in
3940          the default abi) ...  */
3941       if (!call_used_regs[3])
3942         return 0;
3943
3944       /* ... that it isn't being used for a return value ... */
3945       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3946         return 0;
3947
3948       /* ... or for a tail-call argument ...  */
3949       if (sibling)
3950         {
3951           gcc_assert (CALL_P (sibling));
3952
3953           if (find_regno_fusage (sibling, USE, 3))
3954             return 0;
3955         }
3956
3957       /* ... and that there are no call-saved registers in r0-r2
3958          (always true in the default ABI).  */
3959       if (saved_int_regs & 0x7)
3960         return 0;
3961     }
3962
3963   /* Can't be done if interworking with Thumb, and any registers have been
3964      stacked.  */
3965   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3966     return 0;
3967
3968   /* On StrongARM, conditional returns are expensive if they aren't
3969      taken and multiple registers have been stacked.  */
3970   if (iscond && arm_tune_strongarm)
3971     {
3972       /* Conditional return when just the LR is stored is a simple
3973          conditional-load instruction, that's not expensive.  */
3974       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3975         return 0;
3976
3977       if (flag_pic
3978           && arm_pic_register != INVALID_REGNUM
3979           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3980         return 0;
3981     }
3982
3983   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
3984      several instructions if anything needs to be popped.  */
3985   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
3986     return 0;
3987
3988   /* If there are saved registers but the LR isn't saved, then we need
3989      two instructions for the return.  */
3990   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3991     return 0;
3992
3993   /* Can't be done if any of the VFP regs are pushed,
3994      since this also requires an insn.  */
3995   if (TARGET_HARD_FLOAT)
3996     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3997       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3998         return 0;
3999
4000   if (TARGET_REALLY_IWMMXT)
4001     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4002       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4003         return 0;
4004
4005   return 1;
4006 }
4007
4008 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4009    shrink-wrapping if possible.  This is the case if we need to emit a
4010    prologue, which we can test by looking at the offsets.  */
4011 bool
4012 use_simple_return_p (void)
4013 {
4014   arm_stack_offsets *offsets;
4015
4016   /* Note this function can be called before or after reload.  */
4017   if (!reload_completed)
4018     arm_compute_frame_layout ();
4019
4020   offsets = arm_get_frame_offsets ();
4021   return offsets->outgoing_args != 0;
4022 }
4023
4024 /* Return TRUE if int I is a valid immediate ARM constant.  */
4025
4026 int
4027 const_ok_for_arm (HOST_WIDE_INT i)
4028 {
4029   int lowbit;
4030
4031   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4032      be all zero, or all one.  */
4033   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4034       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4035           != ((~(unsigned HOST_WIDE_INT) 0)
4036               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4037     return FALSE;
4038
4039   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4040
4041   /* Fast return for 0 and small values.  We must do this for zero, since
4042      the code below can't handle that one case.  */
4043   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4044     return TRUE;
4045
4046   /* Get the number of trailing zeros.  */
4047   lowbit = ffs((int) i) - 1;
4048
4049   /* Only even shifts are allowed in ARM mode so round down to the
4050      nearest even number.  */
4051   if (TARGET_ARM)
4052     lowbit &= ~1;
4053
4054   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4055     return TRUE;
4056
4057   if (TARGET_ARM)
4058     {
4059       /* Allow rotated constants in ARM mode.  */
4060       if (lowbit <= 4
4061            && ((i & ~0xc000003f) == 0
4062                || (i & ~0xf000000f) == 0
4063                || (i & ~0xfc000003) == 0))
4064         return TRUE;
4065     }
4066   else if (TARGET_THUMB2)
4067     {
4068       HOST_WIDE_INT v;
4069
4070       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4071       v = i & 0xff;
4072       v |= v << 16;
4073       if (i == v || i == (v | (v << 8)))
4074         return TRUE;
4075
4076       /* Allow repeated pattern 0xXY00XY00.  */
4077       v = i & 0xff00;
4078       v |= v << 16;
4079       if (i == v)
4080         return TRUE;
4081     }
4082   else if (TARGET_HAVE_MOVT)
4083     {
4084       /* Thumb-1 Targets with MOVT.  */
4085       if (i > 0xffff)
4086         return FALSE;
4087       else
4088         return TRUE;
4089     }
4090
4091   return FALSE;
4092 }
4093
4094 /* Return true if I is a valid constant for the operation CODE.  */
4095 int
4096 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4097 {
4098   if (const_ok_for_arm (i))
4099     return 1;
4100
4101   switch (code)
4102     {
4103     case SET:
4104       /* See if we can use movw.  */
4105       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4106         return 1;
4107       else
4108         /* Otherwise, try mvn.  */
4109         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4110
4111     case PLUS:
4112       /* See if we can use addw or subw.  */
4113       if (TARGET_THUMB2
4114           && ((i & 0xfffff000) == 0
4115               || ((-i) & 0xfffff000) == 0))
4116         return 1;
4117       /* Fall through.  */
4118     case COMPARE:
4119     case EQ:
4120     case NE:
4121     case GT:
4122     case LE:
4123     case LT:
4124     case GE:
4125     case GEU:
4126     case LTU:
4127     case GTU:
4128     case LEU:
4129     case UNORDERED:
4130     case ORDERED:
4131     case UNEQ:
4132     case UNGE:
4133     case UNLT:
4134     case UNGT:
4135     case UNLE:
4136       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4137
4138     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4139     case XOR:
4140       return 0;
4141
4142     case IOR:
4143       if (TARGET_THUMB2)
4144         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4145       return 0;
4146
4147     case AND:
4148       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4149
4150     default:
4151       gcc_unreachable ();
4152     }
4153 }
4154
4155 /* Return true if I is a valid di mode constant for the operation CODE.  */
4156 int
4157 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4158 {
4159   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4160   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4161   rtx hi = GEN_INT (hi_val);
4162   rtx lo = GEN_INT (lo_val);
4163
4164   if (TARGET_THUMB1)
4165     return 0;
4166
4167   switch (code)
4168     {
4169     case AND:
4170     case IOR:
4171     case XOR:
4172       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4173               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4174     case PLUS:
4175       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4176
4177     default:
4178       return 0;
4179     }
4180 }
4181
4182 /* Emit a sequence of insns to handle a large constant.
4183    CODE is the code of the operation required, it can be any of SET, PLUS,
4184    IOR, AND, XOR, MINUS;
4185    MODE is the mode in which the operation is being performed;
4186    VAL is the integer to operate on;
4187    SOURCE is the other operand (a register, or a null-pointer for SET);
4188    SUBTARGETS means it is safe to create scratch registers if that will
4189    either produce a simpler sequence, or we will want to cse the values.
4190    Return value is the number of insns emitted.  */
4191
4192 /* ??? Tweak this for thumb2.  */
4193 int
4194 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4195                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4196 {
4197   rtx cond;
4198
4199   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4200     cond = COND_EXEC_TEST (PATTERN (insn));
4201   else
4202     cond = NULL_RTX;
4203
4204   if (subtargets || code == SET
4205       || (REG_P (target) && REG_P (source)
4206           && REGNO (target) != REGNO (source)))
4207     {
4208       /* After arm_reorg has been called, we can't fix up expensive
4209          constants by pushing them into memory so we must synthesize
4210          them in-line, regardless of the cost.  This is only likely to
4211          be more costly on chips that have load delay slots and we are
4212          compiling without running the scheduler (so no splitting
4213          occurred before the final instruction emission).
4214
4215          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4216       */
4217       if (!cfun->machine->after_arm_reorg
4218           && !cond
4219           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4220                                 1, 0)
4221               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4222                  + (code != SET))))
4223         {
4224           if (code == SET)
4225             {
4226               /* Currently SET is the only monadic value for CODE, all
4227                  the rest are diadic.  */
4228               if (TARGET_USE_MOVT)
4229                 arm_emit_movpair (target, GEN_INT (val));
4230               else
4231                 emit_set_insn (target, GEN_INT (val));
4232
4233               return 1;
4234             }
4235           else
4236             {
4237               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4238
4239               if (TARGET_USE_MOVT)
4240                 arm_emit_movpair (temp, GEN_INT (val));
4241               else
4242                 emit_set_insn (temp, GEN_INT (val));
4243
4244               /* For MINUS, the value is subtracted from, since we never
4245                  have subtraction of a constant.  */
4246               if (code == MINUS)
4247                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4248               else
4249                 emit_set_insn (target,
4250                                gen_rtx_fmt_ee (code, mode, source, temp));
4251               return 2;
4252             }
4253         }
4254     }
4255
4256   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4257                            1);
4258 }
4259
4260 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4261    ARM/THUMB2 immediates, and add up to VAL.
4262    Thr function return value gives the number of insns required.  */
4263 static int
4264 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4265                             struct four_ints *return_sequence)
4266 {
4267   int best_consecutive_zeros = 0;
4268   int i;
4269   int best_start = 0;
4270   int insns1, insns2;
4271   struct four_ints tmp_sequence;
4272
4273   /* If we aren't targeting ARM, the best place to start is always at
4274      the bottom, otherwise look more closely.  */
4275   if (TARGET_ARM)
4276     {
4277       for (i = 0; i < 32; i += 2)
4278         {
4279           int consecutive_zeros = 0;
4280
4281           if (!(val & (3 << i)))
4282             {
4283               while ((i < 32) && !(val & (3 << i)))
4284                 {
4285                   consecutive_zeros += 2;
4286                   i += 2;
4287                 }
4288               if (consecutive_zeros > best_consecutive_zeros)
4289                 {
4290                   best_consecutive_zeros = consecutive_zeros;
4291                   best_start = i - consecutive_zeros;
4292                 }
4293               i -= 2;
4294             }
4295         }
4296     }
4297
4298   /* So long as it won't require any more insns to do so, it's
4299      desirable to emit a small constant (in bits 0...9) in the last
4300      insn.  This way there is more chance that it can be combined with
4301      a later addressing insn to form a pre-indexed load or store
4302      operation.  Consider:
4303
4304            *((volatile int *)0xe0000100) = 1;
4305            *((volatile int *)0xe0000110) = 2;
4306
4307      We want this to wind up as:
4308
4309             mov rA, #0xe0000000
4310             mov rB, #1
4311             str rB, [rA, #0x100]
4312             mov rB, #2
4313             str rB, [rA, #0x110]
4314
4315      rather than having to synthesize both large constants from scratch.
4316
4317      Therefore, we calculate how many insns would be required to emit
4318      the constant starting from `best_start', and also starting from
4319      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4320      yield a shorter sequence, we may as well use zero.  */
4321   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4322   if (best_start != 0
4323       && ((HOST_WIDE_INT_1U << best_start) < val))
4324     {
4325       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4326       if (insns2 <= insns1)
4327         {
4328           *return_sequence = tmp_sequence;
4329           insns1 = insns2;
4330         }
4331     }
4332
4333   return insns1;
4334 }
4335
4336 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4337 static int
4338 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4339                              struct four_ints *return_sequence, int i)
4340 {
4341   int remainder = val & 0xffffffff;
4342   int insns = 0;
4343
4344   /* Try and find a way of doing the job in either two or three
4345      instructions.
4346
4347      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4348      location.  We start at position I.  This may be the MSB, or
4349      optimial_immediate_sequence may have positioned it at the largest block
4350      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4351      wrapping around to the top of the word when we drop off the bottom.
4352      In the worst case this code should produce no more than four insns.
4353
4354      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4355      constants, shifted to any arbitrary location.  We should always start
4356      at the MSB.  */
4357   do
4358     {
4359       int end;
4360       unsigned int b1, b2, b3, b4;
4361       unsigned HOST_WIDE_INT result;
4362       int loc;
4363
4364       gcc_assert (insns < 4);
4365
4366       if (i <= 0)
4367         i += 32;
4368
4369       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4370       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4371         {
4372           loc = i;
4373           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4374             /* We can use addw/subw for the last 12 bits.  */
4375             result = remainder;
4376           else
4377             {
4378               /* Use an 8-bit shifted/rotated immediate.  */
4379               end = i - 8;
4380               if (end < 0)
4381                 end += 32;
4382               result = remainder & ((0x0ff << end)
4383                                    | ((i < end) ? (0xff >> (32 - end))
4384                                                 : 0));
4385               i -= 8;
4386             }
4387         }
4388       else
4389         {
4390           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4391              arbitrary shifts.  */
4392           i -= TARGET_ARM ? 2 : 1;
4393           continue;
4394         }
4395
4396       /* Next, see if we can do a better job with a thumb2 replicated
4397          constant.
4398
4399          We do it this way around to catch the cases like 0x01F001E0 where
4400          two 8-bit immediates would work, but a replicated constant would
4401          make it worse.
4402
4403          TODO: 16-bit constants that don't clear all the bits, but still win.
4404          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4405       if (TARGET_THUMB2)
4406         {
4407           b1 = (remainder & 0xff000000) >> 24;
4408           b2 = (remainder & 0x00ff0000) >> 16;
4409           b3 = (remainder & 0x0000ff00) >> 8;
4410           b4 = remainder & 0xff;
4411
4412           if (loc > 24)
4413             {
4414               /* The 8-bit immediate already found clears b1 (and maybe b2),
4415                  but must leave b3 and b4 alone.  */
4416
4417               /* First try to find a 32-bit replicated constant that clears
4418                  almost everything.  We can assume that we can't do it in one,
4419                  or else we wouldn't be here.  */
4420               unsigned int tmp = b1 & b2 & b3 & b4;
4421               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4422                                   + (tmp << 24);
4423               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4424                                             + (tmp == b3) + (tmp == b4);
4425               if (tmp
4426                   && (matching_bytes >= 3
4427                       || (matching_bytes == 2
4428                           && const_ok_for_op (remainder & ~tmp2, code))))
4429                 {
4430                   /* At least 3 of the bytes match, and the fourth has at
4431                      least as many bits set, or two of the bytes match
4432                      and it will only require one more insn to finish.  */
4433                   result = tmp2;
4434                   i = tmp != b1 ? 32
4435                       : tmp != b2 ? 24
4436                       : tmp != b3 ? 16
4437                       : 8;
4438                 }
4439
4440               /* Second, try to find a 16-bit replicated constant that can
4441                  leave three of the bytes clear.  If b2 or b4 is already
4442                  zero, then we can.  If the 8-bit from above would not
4443                  clear b2 anyway, then we still win.  */
4444               else if (b1 == b3 && (!b2 || !b4
4445                                || (remainder & 0x00ff0000 & ~result)))
4446                 {
4447                   result = remainder & 0xff00ff00;
4448                   i = 24;
4449                 }
4450             }
4451           else if (loc > 16)
4452             {
4453               /* The 8-bit immediate already found clears b2 (and maybe b3)
4454                  and we don't get here unless b1 is alredy clear, but it will
4455                  leave b4 unchanged.  */
4456
4457               /* If we can clear b2 and b4 at once, then we win, since the
4458                  8-bits couldn't possibly reach that far.  */
4459               if (b2 == b4)
4460                 {
4461                   result = remainder & 0x00ff00ff;
4462                   i = 16;
4463                 }
4464             }
4465         }
4466
4467       return_sequence->i[insns++] = result;
4468       remainder &= ~result;
4469
4470       if (code == SET || code == MINUS)
4471         code = PLUS;
4472     }
4473   while (remainder);
4474
4475   return insns;
4476 }
4477
4478 /* Emit an instruction with the indicated PATTERN.  If COND is
4479    non-NULL, conditionalize the execution of the instruction on COND
4480    being true.  */
4481
4482 static void
4483 emit_constant_insn (rtx cond, rtx pattern)
4484 {
4485   if (cond)
4486     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4487   emit_insn (pattern);
4488 }
4489
4490 /* As above, but extra parameter GENERATE which, if clear, suppresses
4491    RTL generation.  */
4492
4493 static int
4494 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4495                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4496                   int subtargets, int generate)
4497 {
4498   int can_invert = 0;
4499   int can_negate = 0;
4500   int final_invert = 0;
4501   int i;
4502   int set_sign_bit_copies = 0;
4503   int clear_sign_bit_copies = 0;
4504   int clear_zero_bit_copies = 0;
4505   int set_zero_bit_copies = 0;
4506   int insns = 0, neg_insns, inv_insns;
4507   unsigned HOST_WIDE_INT temp1, temp2;
4508   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4509   struct four_ints *immediates;
4510   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4511
4512   /* Find out which operations are safe for a given CODE.  Also do a quick
4513      check for degenerate cases; these can occur when DImode operations
4514      are split.  */
4515   switch (code)
4516     {
4517     case SET:
4518       can_invert = 1;
4519       break;
4520
4521     case PLUS:
4522       can_negate = 1;
4523       break;
4524
4525     case IOR:
4526       if (remainder == 0xffffffff)
4527         {
4528           if (generate)
4529             emit_constant_insn (cond,
4530                                 gen_rtx_SET (target,
4531                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4532           return 1;
4533         }
4534
4535       if (remainder == 0)
4536         {
4537           if (reload_completed && rtx_equal_p (target, source))
4538             return 0;
4539
4540           if (generate)
4541             emit_constant_insn (cond, gen_rtx_SET (target, source));
4542           return 1;
4543         }
4544       break;
4545
4546     case AND:
4547       if (remainder == 0)
4548         {
4549           if (generate)
4550             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4551           return 1;
4552         }
4553       if (remainder == 0xffffffff)
4554         {
4555           if (reload_completed && rtx_equal_p (target, source))
4556             return 0;
4557           if (generate)
4558             emit_constant_insn (cond, gen_rtx_SET (target, source));
4559           return 1;
4560         }
4561       can_invert = 1;
4562       break;
4563
4564     case XOR:
4565       if (remainder == 0)
4566         {
4567           if (reload_completed && rtx_equal_p (target, source))
4568             return 0;
4569           if (generate)
4570             emit_constant_insn (cond, gen_rtx_SET (target, source));
4571           return 1;
4572         }
4573
4574       if (remainder == 0xffffffff)
4575         {
4576           if (generate)
4577             emit_constant_insn (cond,
4578                                 gen_rtx_SET (target,
4579                                              gen_rtx_NOT (mode, source)));
4580           return 1;
4581         }
4582       final_invert = 1;
4583       break;
4584
4585     case MINUS:
4586       /* We treat MINUS as (val - source), since (source - val) is always
4587          passed as (source + (-val)).  */
4588       if (remainder == 0)
4589         {
4590           if (generate)
4591             emit_constant_insn (cond,
4592                                 gen_rtx_SET (target,
4593                                              gen_rtx_NEG (mode, source)));
4594           return 1;
4595         }
4596       if (const_ok_for_arm (val))
4597         {
4598           if (generate)
4599             emit_constant_insn (cond,
4600                                 gen_rtx_SET (target,
4601                                              gen_rtx_MINUS (mode, GEN_INT (val),
4602                                                             source)));
4603           return 1;
4604         }
4605
4606       break;
4607
4608     default:
4609       gcc_unreachable ();
4610     }
4611
4612   /* If we can do it in one insn get out quickly.  */
4613   if (const_ok_for_op (val, code))
4614     {
4615       if (generate)
4616         emit_constant_insn (cond,
4617                             gen_rtx_SET (target,
4618                                          (source
4619                                           ? gen_rtx_fmt_ee (code, mode, source,
4620                                                             GEN_INT (val))
4621                                           : GEN_INT (val))));
4622       return 1;
4623     }
4624
4625   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4626      insn.  */
4627   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4628       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4629     {
4630       if (generate)
4631         {
4632           if (mode == SImode && i == 16)
4633             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4634                smaller insn.  */
4635             emit_constant_insn (cond,
4636                                 gen_zero_extendhisi2
4637                                 (target, gen_lowpart (HImode, source)));
4638           else
4639             /* Extz only supports SImode, but we can coerce the operands
4640                into that mode.  */
4641             emit_constant_insn (cond,
4642                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4643                                               gen_lowpart (SImode, source),
4644                                               GEN_INT (i), const0_rtx));
4645         }
4646
4647       return 1;
4648     }
4649
4650   /* Calculate a few attributes that may be useful for specific
4651      optimizations.  */
4652   /* Count number of leading zeros.  */
4653   for (i = 31; i >= 0; i--)
4654     {
4655       if ((remainder & (1 << i)) == 0)
4656         clear_sign_bit_copies++;
4657       else
4658         break;
4659     }
4660
4661   /* Count number of leading 1's.  */
4662   for (i = 31; i >= 0; i--)
4663     {
4664       if ((remainder & (1 << i)) != 0)
4665         set_sign_bit_copies++;
4666       else
4667         break;
4668     }
4669
4670   /* Count number of trailing zero's.  */
4671   for (i = 0; i <= 31; i++)
4672     {
4673       if ((remainder & (1 << i)) == 0)
4674         clear_zero_bit_copies++;
4675       else
4676         break;
4677     }
4678
4679   /* Count number of trailing 1's.  */
4680   for (i = 0; i <= 31; i++)
4681     {
4682       if ((remainder & (1 << i)) != 0)
4683         set_zero_bit_copies++;
4684       else
4685         break;
4686     }
4687
4688   switch (code)
4689     {
4690     case SET:
4691       /* See if we can do this by sign_extending a constant that is known
4692          to be negative.  This is a good, way of doing it, since the shift
4693          may well merge into a subsequent insn.  */
4694       if (set_sign_bit_copies > 1)
4695         {
4696           if (const_ok_for_arm
4697               (temp1 = ARM_SIGN_EXTEND (remainder
4698                                         << (set_sign_bit_copies - 1))))
4699             {
4700               if (generate)
4701                 {
4702                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4703                   emit_constant_insn (cond,
4704                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4705                   emit_constant_insn (cond,
4706                                       gen_ashrsi3 (target, new_src,
4707                                                    GEN_INT (set_sign_bit_copies - 1)));
4708                 }
4709               return 2;
4710             }
4711           /* For an inverted constant, we will need to set the low bits,
4712              these will be shifted out of harm's way.  */
4713           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4714           if (const_ok_for_arm (~temp1))
4715             {
4716               if (generate)
4717                 {
4718                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4719                   emit_constant_insn (cond,
4720                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4721                   emit_constant_insn (cond,
4722                                       gen_ashrsi3 (target, new_src,
4723                                                    GEN_INT (set_sign_bit_copies - 1)));
4724                 }
4725               return 2;
4726             }
4727         }
4728
4729       /* See if we can calculate the value as the difference between two
4730          valid immediates.  */
4731       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4732         {
4733           int topshift = clear_sign_bit_copies & ~1;
4734
4735           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4736                                    & (0xff000000 >> topshift));
4737
4738           /* If temp1 is zero, then that means the 9 most significant
4739              bits of remainder were 1 and we've caused it to overflow.
4740              When topshift is 0 we don't need to do anything since we
4741              can borrow from 'bit 32'.  */
4742           if (temp1 == 0 && topshift != 0)
4743             temp1 = 0x80000000 >> (topshift - 1);
4744
4745           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4746
4747           if (const_ok_for_arm (temp2))
4748             {
4749               if (generate)
4750                 {
4751                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4752                   emit_constant_insn (cond,
4753                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4754                   emit_constant_insn (cond,
4755                                       gen_addsi3 (target, new_src,
4756                                                   GEN_INT (-temp2)));
4757                 }
4758
4759               return 2;
4760             }
4761         }
4762
4763       /* See if we can generate this by setting the bottom (or the top)
4764          16 bits, and then shifting these into the other half of the
4765          word.  We only look for the simplest cases, to do more would cost
4766          too much.  Be careful, however, not to generate this when the
4767          alternative would take fewer insns.  */
4768       if (val & 0xffff0000)
4769         {
4770           temp1 = remainder & 0xffff0000;
4771           temp2 = remainder & 0x0000ffff;
4772
4773           /* Overlaps outside this range are best done using other methods.  */
4774           for (i = 9; i < 24; i++)
4775             {
4776               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4777                   && !const_ok_for_arm (temp2))
4778                 {
4779                   rtx new_src = (subtargets
4780                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4781                                  : target);
4782                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4783                                             source, subtargets, generate);
4784                   source = new_src;
4785                   if (generate)
4786                     emit_constant_insn
4787                       (cond,
4788                        gen_rtx_SET
4789                        (target,
4790                         gen_rtx_IOR (mode,
4791                                      gen_rtx_ASHIFT (mode, source,
4792                                                      GEN_INT (i)),
4793                                      source)));
4794                   return insns + 1;
4795                 }
4796             }
4797
4798           /* Don't duplicate cases already considered.  */
4799           for (i = 17; i < 24; i++)
4800             {
4801               if (((temp1 | (temp1 >> i)) == remainder)
4802                   && !const_ok_for_arm (temp1))
4803                 {
4804                   rtx new_src = (subtargets
4805                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4806                                  : target);
4807                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4808                                             source, subtargets, generate);
4809                   source = new_src;
4810                   if (generate)
4811                     emit_constant_insn
4812                       (cond,
4813                        gen_rtx_SET (target,
4814                                     gen_rtx_IOR
4815                                     (mode,
4816                                      gen_rtx_LSHIFTRT (mode, source,
4817                                                        GEN_INT (i)),
4818                                      source)));
4819                   return insns + 1;
4820                 }
4821             }
4822         }
4823       break;
4824
4825     case IOR:
4826     case XOR:
4827       /* If we have IOR or XOR, and the constant can be loaded in a
4828          single instruction, and we can find a temporary to put it in,
4829          then this can be done in two instructions instead of 3-4.  */
4830       if (subtargets
4831           /* TARGET can't be NULL if SUBTARGETS is 0 */
4832           || (reload_completed && !reg_mentioned_p (target, source)))
4833         {
4834           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4835             {
4836               if (generate)
4837                 {
4838                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4839
4840                   emit_constant_insn (cond,
4841                                       gen_rtx_SET (sub, GEN_INT (val)));
4842                   emit_constant_insn (cond,
4843                                       gen_rtx_SET (target,
4844                                                    gen_rtx_fmt_ee (code, mode,
4845                                                                    source, sub)));
4846                 }
4847               return 2;
4848             }
4849         }
4850
4851       if (code == XOR)
4852         break;
4853
4854       /*  Convert.
4855           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4856                              and the remainder 0s for e.g. 0xfff00000)
4857           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4858
4859           This can be done in 2 instructions by using shifts with mov or mvn.
4860           e.g. for
4861           x = x | 0xfff00000;
4862           we generate.
4863           mvn   r0, r0, asl #12
4864           mvn   r0, r0, lsr #12  */
4865       if (set_sign_bit_copies > 8
4866           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4867         {
4868           if (generate)
4869             {
4870               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4871               rtx shift = GEN_INT (set_sign_bit_copies);
4872
4873               emit_constant_insn
4874                 (cond,
4875                  gen_rtx_SET (sub,
4876                               gen_rtx_NOT (mode,
4877                                            gen_rtx_ASHIFT (mode,
4878                                                            source,
4879                                                            shift))));
4880               emit_constant_insn
4881                 (cond,
4882                  gen_rtx_SET (target,
4883                               gen_rtx_NOT (mode,
4884                                            gen_rtx_LSHIFTRT (mode, sub,
4885                                                              shift))));
4886             }
4887           return 2;
4888         }
4889
4890       /* Convert
4891           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4892            to
4893           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4894
4895           For eg. r0 = r0 | 0xfff
4896                mvn      r0, r0, lsr #12
4897                mvn      r0, r0, asl #12
4898
4899       */
4900       if (set_zero_bit_copies > 8
4901           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4902         {
4903           if (generate)
4904             {
4905               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4906               rtx shift = GEN_INT (set_zero_bit_copies);
4907
4908               emit_constant_insn
4909                 (cond,
4910                  gen_rtx_SET (sub,
4911                               gen_rtx_NOT (mode,
4912                                            gen_rtx_LSHIFTRT (mode,
4913                                                              source,
4914                                                              shift))));
4915               emit_constant_insn
4916                 (cond,
4917                  gen_rtx_SET (target,
4918                               gen_rtx_NOT (mode,
4919                                            gen_rtx_ASHIFT (mode, sub,
4920                                                            shift))));
4921             }
4922           return 2;
4923         }
4924
4925       /* This will never be reached for Thumb2 because orn is a valid
4926          instruction. This is for Thumb1 and the ARM 32 bit cases.
4927
4928          x = y | constant (such that ~constant is a valid constant)
4929          Transform this to
4930          x = ~(~y & ~constant).
4931       */
4932       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4933         {
4934           if (generate)
4935             {
4936               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4937               emit_constant_insn (cond,
4938                                   gen_rtx_SET (sub,
4939                                                gen_rtx_NOT (mode, source)));
4940               source = sub;
4941               if (subtargets)
4942                 sub = gen_reg_rtx (mode);
4943               emit_constant_insn (cond,
4944                                   gen_rtx_SET (sub,
4945                                                gen_rtx_AND (mode, source,
4946                                                             GEN_INT (temp1))));
4947               emit_constant_insn (cond,
4948                                   gen_rtx_SET (target,
4949                                                gen_rtx_NOT (mode, sub)));
4950             }
4951           return 3;
4952         }
4953       break;
4954
4955     case AND:
4956       /* See if two shifts will do 2 or more insn's worth of work.  */
4957       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4958         {
4959           HOST_WIDE_INT shift_mask = ((0xffffffff
4960                                        << (32 - clear_sign_bit_copies))
4961                                       & 0xffffffff);
4962
4963           if ((remainder | shift_mask) != 0xffffffff)
4964             {
4965               HOST_WIDE_INT new_val
4966                 = ARM_SIGN_EXTEND (remainder | shift_mask);
4967
4968               if (generate)
4969                 {
4970                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4971                   insns = arm_gen_constant (AND, SImode, cond, new_val,
4972                                             new_src, source, subtargets, 1);
4973                   source = new_src;
4974                 }
4975               else
4976                 {
4977                   rtx targ = subtargets ? NULL_RTX : target;
4978                   insns = arm_gen_constant (AND, mode, cond, new_val,
4979                                             targ, source, subtargets, 0);
4980                 }
4981             }
4982
4983           if (generate)
4984             {
4985               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4986               rtx shift = GEN_INT (clear_sign_bit_copies);
4987
4988               emit_insn (gen_ashlsi3 (new_src, source, shift));
4989               emit_insn (gen_lshrsi3 (target, new_src, shift));
4990             }
4991
4992           return insns + 2;
4993         }
4994
4995       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4996         {
4997           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4998
4999           if ((remainder | shift_mask) != 0xffffffff)
5000             {
5001               HOST_WIDE_INT new_val
5002                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5003               if (generate)
5004                 {
5005                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5006
5007                   insns = arm_gen_constant (AND, mode, cond, new_val,
5008                                             new_src, source, subtargets, 1);
5009                   source = new_src;
5010                 }
5011               else
5012                 {
5013                   rtx targ = subtargets ? NULL_RTX : target;
5014
5015                   insns = arm_gen_constant (AND, mode, cond, new_val,
5016                                             targ, source, subtargets, 0);
5017                 }
5018             }
5019
5020           if (generate)
5021             {
5022               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5023               rtx shift = GEN_INT (clear_zero_bit_copies);
5024
5025               emit_insn (gen_lshrsi3 (new_src, source, shift));
5026               emit_insn (gen_ashlsi3 (target, new_src, shift));
5027             }
5028
5029           return insns + 2;
5030         }
5031
5032       break;
5033
5034     default:
5035       break;
5036     }
5037
5038   /* Calculate what the instruction sequences would be if we generated it
5039      normally, negated, or inverted.  */
5040   if (code == AND)
5041     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5042     insns = 99;
5043   else
5044     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5045
5046   if (can_negate)
5047     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5048                                             &neg_immediates);
5049   else
5050     neg_insns = 99;
5051
5052   if (can_invert || final_invert)
5053     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5054                                             &inv_immediates);
5055   else
5056     inv_insns = 99;
5057
5058   immediates = &pos_immediates;
5059
5060   /* Is the negated immediate sequence more efficient?  */
5061   if (neg_insns < insns && neg_insns <= inv_insns)
5062     {
5063       insns = neg_insns;
5064       immediates = &neg_immediates;
5065     }
5066   else
5067     can_negate = 0;
5068
5069   /* Is the inverted immediate sequence more efficient?
5070      We must allow for an extra NOT instruction for XOR operations, although
5071      there is some chance that the final 'mvn' will get optimized later.  */
5072   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5073     {
5074       insns = inv_insns;
5075       immediates = &inv_immediates;
5076     }
5077   else
5078     {
5079       can_invert = 0;
5080       final_invert = 0;
5081     }
5082
5083   /* Now output the chosen sequence as instructions.  */
5084   if (generate)
5085     {
5086       for (i = 0; i < insns; i++)
5087         {
5088           rtx new_src, temp1_rtx;
5089
5090           temp1 = immediates->i[i];
5091
5092           if (code == SET || code == MINUS)
5093             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5094           else if ((final_invert || i < (insns - 1)) && subtargets)
5095             new_src = gen_reg_rtx (mode);
5096           else
5097             new_src = target;
5098
5099           if (can_invert)
5100             temp1 = ~temp1;
5101           else if (can_negate)
5102             temp1 = -temp1;
5103
5104           temp1 = trunc_int_for_mode (temp1, mode);
5105           temp1_rtx = GEN_INT (temp1);
5106
5107           if (code == SET)
5108             ;
5109           else if (code == MINUS)
5110             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5111           else
5112             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5113
5114           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5115           source = new_src;
5116
5117           if (code == SET)
5118             {
5119               can_negate = can_invert;
5120               can_invert = 0;
5121               code = PLUS;
5122             }
5123           else if (code == MINUS)
5124             code = PLUS;
5125         }
5126     }
5127
5128   if (final_invert)
5129     {
5130       if (generate)
5131         emit_constant_insn (cond, gen_rtx_SET (target,
5132                                                gen_rtx_NOT (mode, source)));
5133       insns++;
5134     }
5135
5136   return insns;
5137 }
5138
5139 /* Canonicalize a comparison so that we are more likely to recognize it.
5140    This can be done for a few constant compares, where we can make the
5141    immediate value easier to load.  */
5142
5143 static void
5144 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5145                              bool op0_preserve_value)
5146 {
5147   machine_mode mode;
5148   unsigned HOST_WIDE_INT i, maxval;
5149
5150   mode = GET_MODE (*op0);
5151   if (mode == VOIDmode)
5152     mode = GET_MODE (*op1);
5153
5154   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5155
5156   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5157      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5158      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5159      for GTU/LEU in Thumb mode.  */
5160   if (mode == DImode)
5161     {
5162
5163       if (*code == GT || *code == LE
5164           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5165         {
5166           /* Missing comparison.  First try to use an available
5167              comparison.  */
5168           if (CONST_INT_P (*op1))
5169             {
5170               i = INTVAL (*op1);
5171               switch (*code)
5172                 {
5173                 case GT:
5174                 case LE:
5175                   if (i != maxval
5176                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5177                     {
5178                       *op1 = GEN_INT (i + 1);
5179                       *code = *code == GT ? GE : LT;
5180                       return;
5181                     }
5182                   break;
5183                 case GTU:
5184                 case LEU:
5185                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5186                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5187                     {
5188                       *op1 = GEN_INT (i + 1);
5189                       *code = *code == GTU ? GEU : LTU;
5190                       return;
5191                     }
5192                   break;
5193                 default:
5194                   gcc_unreachable ();
5195                 }
5196             }
5197
5198           /* If that did not work, reverse the condition.  */
5199           if (!op0_preserve_value)
5200             {
5201               std::swap (*op0, *op1);
5202               *code = (int)swap_condition ((enum rtx_code)*code);
5203             }
5204         }
5205       return;
5206     }
5207
5208   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5209      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5210      to facilitate possible combining with a cmp into 'ands'.  */
5211   if (mode == SImode
5212       && GET_CODE (*op0) == ZERO_EXTEND
5213       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5214       && GET_MODE (XEXP (*op0, 0)) == QImode
5215       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5216       && subreg_lowpart_p (XEXP (*op0, 0))
5217       && *op1 == const0_rtx)
5218     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5219                         GEN_INT (255));
5220
5221   /* Comparisons smaller than DImode.  Only adjust comparisons against
5222      an out-of-range constant.  */
5223   if (!CONST_INT_P (*op1)
5224       || const_ok_for_arm (INTVAL (*op1))
5225       || const_ok_for_arm (- INTVAL (*op1)))
5226     return;
5227
5228   i = INTVAL (*op1);
5229
5230   switch (*code)
5231     {
5232     case EQ:
5233     case NE:
5234       return;
5235
5236     case GT:
5237     case LE:
5238       if (i != maxval
5239           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5240         {
5241           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5242           *code = *code == GT ? GE : LT;
5243           return;
5244         }
5245       break;
5246
5247     case GE:
5248     case LT:
5249       if (i != ~maxval
5250           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5251         {
5252           *op1 = GEN_INT (i - 1);
5253           *code = *code == GE ? GT : LE;
5254           return;
5255         }
5256       break;
5257
5258     case GTU:
5259     case LEU:
5260       if (i != ~((unsigned HOST_WIDE_INT) 0)
5261           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5262         {
5263           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5264           *code = *code == GTU ? GEU : LTU;
5265           return;
5266         }
5267       break;
5268
5269     case GEU:
5270     case LTU:
5271       if (i != 0
5272           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5273         {
5274           *op1 = GEN_INT (i - 1);
5275           *code = *code == GEU ? GTU : LEU;
5276           return;
5277         }
5278       break;
5279
5280     default:
5281       gcc_unreachable ();
5282     }
5283 }
5284
5285
5286 /* Define how to find the value returned by a function.  */
5287
5288 static rtx
5289 arm_function_value(const_tree type, const_tree func,
5290                    bool outgoing ATTRIBUTE_UNUSED)
5291 {
5292   machine_mode mode;
5293   int unsignedp ATTRIBUTE_UNUSED;
5294   rtx r ATTRIBUTE_UNUSED;
5295
5296   mode = TYPE_MODE (type);
5297
5298   if (TARGET_AAPCS_BASED)
5299     return aapcs_allocate_return_reg (mode, type, func);
5300
5301   /* Promote integer types.  */
5302   if (INTEGRAL_TYPE_P (type))
5303     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5304
5305   /* Promotes small structs returned in a register to full-word size
5306      for big-endian AAPCS.  */
5307   if (arm_return_in_msb (type))
5308     {
5309       HOST_WIDE_INT size = int_size_in_bytes (type);
5310       if (size % UNITS_PER_WORD != 0)
5311         {
5312           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5313           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5314         }
5315     }
5316
5317   return arm_libcall_value_1 (mode);
5318 }
5319
5320 /* libcall hashtable helpers.  */
5321
5322 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5323 {
5324   static inline hashval_t hash (const rtx_def *);
5325   static inline bool equal (const rtx_def *, const rtx_def *);
5326   static inline void remove (rtx_def *);
5327 };
5328
5329 inline bool
5330 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5331 {
5332   return rtx_equal_p (p1, p2);
5333 }
5334
5335 inline hashval_t
5336 libcall_hasher::hash (const rtx_def *p1)
5337 {
5338   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5339 }
5340
5341 typedef hash_table<libcall_hasher> libcall_table_type;
5342
5343 static void
5344 add_libcall (libcall_table_type *htab, rtx libcall)
5345 {
5346   *htab->find_slot (libcall, INSERT) = libcall;
5347 }
5348
5349 static bool
5350 arm_libcall_uses_aapcs_base (const_rtx libcall)
5351 {
5352   static bool init_done = false;
5353   static libcall_table_type *libcall_htab = NULL;
5354
5355   if (!init_done)
5356     {
5357       init_done = true;
5358
5359       libcall_htab = new libcall_table_type (31);
5360       add_libcall (libcall_htab,
5361                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5362       add_libcall (libcall_htab,
5363                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5364       add_libcall (libcall_htab,
5365                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5366       add_libcall (libcall_htab,
5367                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5368
5369       add_libcall (libcall_htab,
5370                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5371       add_libcall (libcall_htab,
5372                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5373       add_libcall (libcall_htab,
5374                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5375       add_libcall (libcall_htab,
5376                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5377
5378       add_libcall (libcall_htab,
5379                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5380       add_libcall (libcall_htab,
5381                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5382       add_libcall (libcall_htab,
5383                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5384       add_libcall (libcall_htab,
5385                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5386       add_libcall (libcall_htab,
5387                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5388       add_libcall (libcall_htab,
5389                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5390       add_libcall (libcall_htab,
5391                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5392       add_libcall (libcall_htab,
5393                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5394
5395       /* Values from double-precision helper functions are returned in core
5396          registers if the selected core only supports single-precision
5397          arithmetic, even if we are using the hard-float ABI.  The same is
5398          true for single-precision helpers, but we will never be using the
5399          hard-float ABI on a CPU which doesn't support single-precision
5400          operations in hardware.  */
5401       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5402       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5403       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5404       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5405       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5406       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5407       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5408       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5409       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5410       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5411       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5412       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5413                                                         SFmode));
5414       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5415                                                         DFmode));
5416       add_libcall (libcall_htab,
5417                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5418     }
5419
5420   return libcall && libcall_htab->find (libcall) != NULL;
5421 }
5422
5423 static rtx
5424 arm_libcall_value_1 (machine_mode mode)
5425 {
5426   if (TARGET_AAPCS_BASED)
5427     return aapcs_libcall_value (mode);
5428   else if (TARGET_IWMMXT_ABI
5429            && arm_vector_mode_supported_p (mode))
5430     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5431   else
5432     return gen_rtx_REG (mode, ARG_REGISTER (1));
5433 }
5434
5435 /* Define how to find the value returned by a library function
5436    assuming the value has mode MODE.  */
5437
5438 static rtx
5439 arm_libcall_value (machine_mode mode, const_rtx libcall)
5440 {
5441   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5442       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5443     {
5444       /* The following libcalls return their result in integer registers,
5445          even though they return a floating point value.  */
5446       if (arm_libcall_uses_aapcs_base (libcall))
5447         return gen_rtx_REG (mode, ARG_REGISTER(1));
5448
5449     }
5450
5451   return arm_libcall_value_1 (mode);
5452 }
5453
5454 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5455
5456 static bool
5457 arm_function_value_regno_p (const unsigned int regno)
5458 {
5459   if (regno == ARG_REGISTER (1)
5460       || (TARGET_32BIT
5461           && TARGET_AAPCS_BASED
5462           && TARGET_HARD_FLOAT
5463           && regno == FIRST_VFP_REGNUM)
5464       || (TARGET_IWMMXT_ABI
5465           && regno == FIRST_IWMMXT_REGNUM))
5466     return true;
5467
5468   return false;
5469 }
5470
5471 /* Determine the amount of memory needed to store the possible return
5472    registers of an untyped call.  */
5473 int
5474 arm_apply_result_size (void)
5475 {
5476   int size = 16;
5477
5478   if (TARGET_32BIT)
5479     {
5480       if (TARGET_HARD_FLOAT_ABI)
5481         size += 32;
5482       if (TARGET_IWMMXT_ABI)
5483         size += 8;
5484     }
5485
5486   return size;
5487 }
5488
5489 /* Decide whether TYPE should be returned in memory (true)
5490    or in a register (false).  FNTYPE is the type of the function making
5491    the call.  */
5492 static bool
5493 arm_return_in_memory (const_tree type, const_tree fntype)
5494 {
5495   HOST_WIDE_INT size;
5496
5497   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5498
5499   if (TARGET_AAPCS_BASED)
5500     {
5501       /* Simple, non-aggregate types (ie not including vectors and
5502          complex) are always returned in a register (or registers).
5503          We don't care about which register here, so we can short-cut
5504          some of the detail.  */
5505       if (!AGGREGATE_TYPE_P (type)
5506           && TREE_CODE (type) != VECTOR_TYPE
5507           && TREE_CODE (type) != COMPLEX_TYPE)
5508         return false;
5509
5510       /* Any return value that is no larger than one word can be
5511          returned in r0.  */
5512       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5513         return false;
5514
5515       /* Check any available co-processors to see if they accept the
5516          type as a register candidate (VFP, for example, can return
5517          some aggregates in consecutive registers).  These aren't
5518          available if the call is variadic.  */
5519       if (aapcs_select_return_coproc (type, fntype) >= 0)
5520         return false;
5521
5522       /* Vector values should be returned using ARM registers, not
5523          memory (unless they're over 16 bytes, which will break since
5524          we only have four call-clobbered registers to play with).  */
5525       if (TREE_CODE (type) == VECTOR_TYPE)
5526         return (size < 0 || size > (4 * UNITS_PER_WORD));
5527
5528       /* The rest go in memory.  */
5529       return true;
5530     }
5531
5532   if (TREE_CODE (type) == VECTOR_TYPE)
5533     return (size < 0 || size > (4 * UNITS_PER_WORD));
5534
5535   if (!AGGREGATE_TYPE_P (type) &&
5536       (TREE_CODE (type) != VECTOR_TYPE))
5537     /* All simple types are returned in registers.  */
5538     return false;
5539
5540   if (arm_abi != ARM_ABI_APCS)
5541     {
5542       /* ATPCS and later return aggregate types in memory only if they are
5543          larger than a word (or are variable size).  */
5544       return (size < 0 || size > UNITS_PER_WORD);
5545     }
5546
5547   /* For the arm-wince targets we choose to be compatible with Microsoft's
5548      ARM and Thumb compilers, which always return aggregates in memory.  */
5549 #ifndef ARM_WINCE
5550   /* All structures/unions bigger than one word are returned in memory.
5551      Also catch the case where int_size_in_bytes returns -1.  In this case
5552      the aggregate is either huge or of variable size, and in either case
5553      we will want to return it via memory and not in a register.  */
5554   if (size < 0 || size > UNITS_PER_WORD)
5555     return true;
5556
5557   if (TREE_CODE (type) == RECORD_TYPE)
5558     {
5559       tree field;
5560
5561       /* For a struct the APCS says that we only return in a register
5562          if the type is 'integer like' and every addressable element
5563          has an offset of zero.  For practical purposes this means
5564          that the structure can have at most one non bit-field element
5565          and that this element must be the first one in the structure.  */
5566
5567       /* Find the first field, ignoring non FIELD_DECL things which will
5568          have been created by C++.  */
5569       for (field = TYPE_FIELDS (type);
5570            field && TREE_CODE (field) != FIELD_DECL;
5571            field = DECL_CHAIN (field))
5572         continue;
5573
5574       if (field == NULL)
5575         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5576
5577       /* Check that the first field is valid for returning in a register.  */
5578
5579       /* ... Floats are not allowed */
5580       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5581         return true;
5582
5583       /* ... Aggregates that are not themselves valid for returning in
5584          a register are not allowed.  */
5585       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5586         return true;
5587
5588       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5589          since they are not addressable.  */
5590       for (field = DECL_CHAIN (field);
5591            field;
5592            field = DECL_CHAIN (field))
5593         {
5594           if (TREE_CODE (field) != FIELD_DECL)
5595             continue;
5596
5597           if (!DECL_BIT_FIELD_TYPE (field))
5598             return true;
5599         }
5600
5601       return false;
5602     }
5603
5604   if (TREE_CODE (type) == UNION_TYPE)
5605     {
5606       tree field;
5607
5608       /* Unions can be returned in registers if every element is
5609          integral, or can be returned in an integer register.  */
5610       for (field = TYPE_FIELDS (type);
5611            field;
5612            field = DECL_CHAIN (field))
5613         {
5614           if (TREE_CODE (field) != FIELD_DECL)
5615             continue;
5616
5617           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5618             return true;
5619
5620           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5621             return true;
5622         }
5623
5624       return false;
5625     }
5626 #endif /* not ARM_WINCE */
5627
5628   /* Return all other types in memory.  */
5629   return true;
5630 }
5631
5632 const struct pcs_attribute_arg
5633 {
5634   const char *arg;
5635   enum arm_pcs value;
5636 } pcs_attribute_args[] =
5637   {
5638     {"aapcs", ARM_PCS_AAPCS},
5639     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5640 #if 0
5641     /* We could recognize these, but changes would be needed elsewhere
5642      * to implement them.  */
5643     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5644     {"atpcs", ARM_PCS_ATPCS},
5645     {"apcs", ARM_PCS_APCS},
5646 #endif
5647     {NULL, ARM_PCS_UNKNOWN}
5648   };
5649
5650 static enum arm_pcs
5651 arm_pcs_from_attribute (tree attr)
5652 {
5653   const struct pcs_attribute_arg *ptr;
5654   const char *arg;
5655
5656   /* Get the value of the argument.  */
5657   if (TREE_VALUE (attr) == NULL_TREE
5658       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5659     return ARM_PCS_UNKNOWN;
5660
5661   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5662
5663   /* Check it against the list of known arguments.  */
5664   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5665     if (streq (arg, ptr->arg))
5666       return ptr->value;
5667
5668   /* An unrecognized interrupt type.  */
5669   return ARM_PCS_UNKNOWN;
5670 }
5671
5672 /* Get the PCS variant to use for this call.  TYPE is the function's type
5673    specification, DECL is the specific declartion.  DECL may be null if
5674    the call could be indirect or if this is a library call.  */
5675 static enum arm_pcs
5676 arm_get_pcs_model (const_tree type, const_tree decl)
5677 {
5678   bool user_convention = false;
5679   enum arm_pcs user_pcs = arm_pcs_default;
5680   tree attr;
5681
5682   gcc_assert (type);
5683
5684   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5685   if (attr)
5686     {
5687       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5688       user_convention = true;
5689     }
5690
5691   if (TARGET_AAPCS_BASED)
5692     {
5693       /* Detect varargs functions.  These always use the base rules
5694          (no argument is ever a candidate for a co-processor
5695          register).  */
5696       bool base_rules = stdarg_p (type);
5697
5698       if (user_convention)
5699         {
5700           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5701             sorry ("non-AAPCS derived PCS variant");
5702           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5703             error ("variadic functions must use the base AAPCS variant");
5704         }
5705
5706       if (base_rules)
5707         return ARM_PCS_AAPCS;
5708       else if (user_convention)
5709         return user_pcs;
5710       else if (decl && flag_unit_at_a_time)
5711         {
5712           /* Local functions never leak outside this compilation unit,
5713              so we are free to use whatever conventions are
5714              appropriate.  */
5715           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5716           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5717           if (i && i->local)
5718             return ARM_PCS_AAPCS_LOCAL;
5719         }
5720     }
5721   else if (user_convention && user_pcs != arm_pcs_default)
5722     sorry ("PCS variant");
5723
5724   /* For everything else we use the target's default.  */
5725   return arm_pcs_default;
5726 }
5727
5728
5729 static void
5730 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5731                     const_tree fntype ATTRIBUTE_UNUSED,
5732                     rtx libcall ATTRIBUTE_UNUSED,
5733                     const_tree fndecl ATTRIBUTE_UNUSED)
5734 {
5735   /* Record the unallocated VFP registers.  */
5736   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5737   pcum->aapcs_vfp_reg_alloc = 0;
5738 }
5739
5740 /* Walk down the type tree of TYPE counting consecutive base elements.
5741    If *MODEP is VOIDmode, then set it to the first valid floating point
5742    type.  If a non-floating point type is found, or if a floating point
5743    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5744    otherwise return the count in the sub-tree.  */
5745 static int
5746 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5747 {
5748   machine_mode mode;
5749   HOST_WIDE_INT size;
5750
5751   switch (TREE_CODE (type))
5752     {
5753     case REAL_TYPE:
5754       mode = TYPE_MODE (type);
5755       if (mode != DFmode && mode != SFmode && mode != HFmode)
5756         return -1;
5757
5758       if (*modep == VOIDmode)
5759         *modep = mode;
5760
5761       if (*modep == mode)
5762         return 1;
5763
5764       break;
5765
5766     case COMPLEX_TYPE:
5767       mode = TYPE_MODE (TREE_TYPE (type));
5768       if (mode != DFmode && mode != SFmode)
5769         return -1;
5770
5771       if (*modep == VOIDmode)
5772         *modep = mode;
5773
5774       if (*modep == mode)
5775         return 2;
5776
5777       break;
5778
5779     case VECTOR_TYPE:
5780       /* Use V2SImode and V4SImode as representatives of all 64-bit
5781          and 128-bit vector types, whether or not those modes are
5782          supported with the present options.  */
5783       size = int_size_in_bytes (type);
5784       switch (size)
5785         {
5786         case 8:
5787           mode = V2SImode;
5788           break;
5789         case 16:
5790           mode = V4SImode;
5791           break;
5792         default:
5793           return -1;
5794         }
5795
5796       if (*modep == VOIDmode)
5797         *modep = mode;
5798
5799       /* Vector modes are considered to be opaque: two vectors are
5800          equivalent for the purposes of being homogeneous aggregates
5801          if they are the same size.  */
5802       if (*modep == mode)
5803         return 1;
5804
5805       break;
5806
5807     case ARRAY_TYPE:
5808       {
5809         int count;
5810         tree index = TYPE_DOMAIN (type);
5811
5812         /* Can't handle incomplete types nor sizes that are not
5813            fixed.  */
5814         if (!COMPLETE_TYPE_P (type)
5815             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5816           return -1;
5817
5818         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5819         if (count == -1
5820             || !index
5821             || !TYPE_MAX_VALUE (index)
5822             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5823             || !TYPE_MIN_VALUE (index)
5824             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5825             || count < 0)
5826           return -1;
5827
5828         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5829                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5830
5831         /* There must be no padding.  */
5832         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5833           return -1;
5834
5835         return count;
5836       }
5837
5838     case RECORD_TYPE:
5839       {
5840         int count = 0;
5841         int sub_count;
5842         tree field;
5843
5844         /* Can't handle incomplete types nor sizes that are not
5845            fixed.  */
5846         if (!COMPLETE_TYPE_P (type)
5847             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5848           return -1;
5849
5850         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5851           {
5852             if (TREE_CODE (field) != FIELD_DECL)
5853               continue;
5854
5855             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5856             if (sub_count < 0)
5857               return -1;
5858             count += sub_count;
5859           }
5860
5861         /* There must be no padding.  */
5862         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5863           return -1;
5864
5865         return count;
5866       }
5867
5868     case UNION_TYPE:
5869     case QUAL_UNION_TYPE:
5870       {
5871         /* These aren't very interesting except in a degenerate case.  */
5872         int count = 0;
5873         int sub_count;
5874         tree field;
5875
5876         /* Can't handle incomplete types nor sizes that are not
5877            fixed.  */
5878         if (!COMPLETE_TYPE_P (type)
5879             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5880           return -1;
5881
5882         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5883           {
5884             if (TREE_CODE (field) != FIELD_DECL)
5885               continue;
5886
5887             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5888             if (sub_count < 0)
5889               return -1;
5890             count = count > sub_count ? count : sub_count;
5891           }
5892
5893         /* There must be no padding.  */
5894         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5895           return -1;
5896
5897         return count;
5898       }
5899
5900     default:
5901       break;
5902     }
5903
5904   return -1;
5905 }
5906
5907 /* Return true if PCS_VARIANT should use VFP registers.  */
5908 static bool
5909 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5910 {
5911   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5912     {
5913       static bool seen_thumb1_vfp = false;
5914
5915       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5916         {
5917           sorry ("Thumb-1 hard-float VFP ABI");
5918           /* sorry() is not immediately fatal, so only display this once.  */
5919           seen_thumb1_vfp = true;
5920         }
5921
5922       return true;
5923     }
5924
5925   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5926     return false;
5927
5928   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5929           (TARGET_VFP_DOUBLE || !is_double));
5930 }
5931
5932 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5933    suitable for passing or returning in VFP registers for the PCS
5934    variant selected.  If it is, then *BASE_MODE is updated to contain
5935    a machine mode describing each element of the argument's type and
5936    *COUNT to hold the number of such elements.  */
5937 static bool
5938 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5939                                        machine_mode mode, const_tree type,
5940                                        machine_mode *base_mode, int *count)
5941 {
5942   machine_mode new_mode = VOIDmode;
5943
5944   /* If we have the type information, prefer that to working things
5945      out from the mode.  */
5946   if (type)
5947     {
5948       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5949
5950       if (ag_count > 0 && ag_count <= 4)
5951         *count = ag_count;
5952       else
5953         return false;
5954     }
5955   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5956            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5957            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5958     {
5959       *count = 1;
5960       new_mode = mode;
5961     }
5962   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5963     {
5964       *count = 2;
5965       new_mode = (mode == DCmode ? DFmode : SFmode);
5966     }
5967   else
5968     return false;
5969
5970
5971   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5972     return false;
5973
5974   *base_mode = new_mode;
5975   return true;
5976 }
5977
5978 static bool
5979 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5980                                machine_mode mode, const_tree type)
5981 {
5982   int count ATTRIBUTE_UNUSED;
5983   machine_mode ag_mode ATTRIBUTE_UNUSED;
5984
5985   if (!use_vfp_abi (pcs_variant, false))
5986     return false;
5987   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5988                                                 &ag_mode, &count);
5989 }
5990
5991 static bool
5992 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5993                              const_tree type)
5994 {
5995   if (!use_vfp_abi (pcum->pcs_variant, false))
5996     return false;
5997
5998   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5999                                                 &pcum->aapcs_vfp_rmode,
6000                                                 &pcum->aapcs_vfp_rcount);
6001 }
6002
6003 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6004    for the behaviour of this function.  */
6005
6006 static bool
6007 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6008                     const_tree type  ATTRIBUTE_UNUSED)
6009 {
6010   int rmode_size
6011     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6012   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6013   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6014   int regno;
6015
6016   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6017     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6018       {
6019         pcum->aapcs_vfp_reg_alloc = mask << regno;
6020         if (mode == BLKmode
6021             || (mode == TImode && ! TARGET_NEON)
6022             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6023           {
6024             int i;
6025             int rcount = pcum->aapcs_vfp_rcount;
6026             int rshift = shift;
6027             machine_mode rmode = pcum->aapcs_vfp_rmode;
6028             rtx par;
6029             if (!TARGET_NEON)
6030               {
6031                 /* Avoid using unsupported vector modes.  */
6032                 if (rmode == V2SImode)
6033                   rmode = DImode;
6034                 else if (rmode == V4SImode)
6035                   {
6036                     rmode = DImode;
6037                     rcount *= 2;
6038                     rshift /= 2;
6039                   }
6040               }
6041             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6042             for (i = 0; i < rcount; i++)
6043               {
6044                 rtx tmp = gen_rtx_REG (rmode,
6045                                        FIRST_VFP_REGNUM + regno + i * rshift);
6046                 tmp = gen_rtx_EXPR_LIST
6047                   (VOIDmode, tmp,
6048                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6049                 XVECEXP (par, 0, i) = tmp;
6050               }
6051
6052             pcum->aapcs_reg = par;
6053           }
6054         else
6055           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6056         return true;
6057       }
6058   return false;
6059 }
6060
6061 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6062    comment there for the behaviour of this function.  */
6063
6064 static rtx
6065 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6066                                machine_mode mode,
6067                                const_tree type ATTRIBUTE_UNUSED)
6068 {
6069   if (!use_vfp_abi (pcs_variant, false))
6070     return NULL;
6071
6072   if (mode == BLKmode
6073       || (GET_MODE_CLASS (mode) == MODE_INT
6074           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6075           && !TARGET_NEON))
6076     {
6077       int count;
6078       machine_mode ag_mode;
6079       int i;
6080       rtx par;
6081       int shift;
6082
6083       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6084                                              &ag_mode, &count);
6085
6086       if (!TARGET_NEON)
6087         {
6088           if (ag_mode == V2SImode)
6089             ag_mode = DImode;
6090           else if (ag_mode == V4SImode)
6091             {
6092               ag_mode = DImode;
6093               count *= 2;
6094             }
6095         }
6096       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6097       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6098       for (i = 0; i < count; i++)
6099         {
6100           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6101           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6102                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6103           XVECEXP (par, 0, i) = tmp;
6104         }
6105
6106       return par;
6107     }
6108
6109   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6110 }
6111
6112 static void
6113 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6114                    machine_mode mode  ATTRIBUTE_UNUSED,
6115                    const_tree type  ATTRIBUTE_UNUSED)
6116 {
6117   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6118   pcum->aapcs_vfp_reg_alloc = 0;
6119   return;
6120 }
6121
6122 #define AAPCS_CP(X)                             \
6123   {                                             \
6124     aapcs_ ## X ## _cum_init,                   \
6125     aapcs_ ## X ## _is_call_candidate,          \
6126     aapcs_ ## X ## _allocate,                   \
6127     aapcs_ ## X ## _is_return_candidate,        \
6128     aapcs_ ## X ## _allocate_return_reg,        \
6129     aapcs_ ## X ## _advance                     \
6130   }
6131
6132 /* Table of co-processors that can be used to pass arguments in
6133    registers.  Idealy no arugment should be a candidate for more than
6134    one co-processor table entry, but the table is processed in order
6135    and stops after the first match.  If that entry then fails to put
6136    the argument into a co-processor register, the argument will go on
6137    the stack.  */
6138 static struct
6139 {
6140   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6141   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6142
6143   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6144      BLKmode) is a candidate for this co-processor's registers; this
6145      function should ignore any position-dependent state in
6146      CUMULATIVE_ARGS and only use call-type dependent information.  */
6147   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6148
6149   /* Return true if the argument does get a co-processor register; it
6150      should set aapcs_reg to an RTX of the register allocated as is
6151      required for a return from FUNCTION_ARG.  */
6152   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6153
6154   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6155      be returned in this co-processor's registers.  */
6156   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6157
6158   /* Allocate and return an RTX element to hold the return type of a call.  This
6159      routine must not fail and will only be called if is_return_candidate
6160      returned true with the same parameters.  */
6161   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6162
6163   /* Finish processing this argument and prepare to start processing
6164      the next one.  */
6165   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6166 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6167   {
6168     AAPCS_CP(vfp)
6169   };
6170
6171 #undef AAPCS_CP
6172
6173 static int
6174 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6175                           const_tree type)
6176 {
6177   int i;
6178
6179   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6180     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6181       return i;
6182
6183   return -1;
6184 }
6185
6186 static int
6187 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6188 {
6189   /* We aren't passed a decl, so we can't check that a call is local.
6190      However, it isn't clear that that would be a win anyway, since it
6191      might limit some tail-calling opportunities.  */
6192   enum arm_pcs pcs_variant;
6193
6194   if (fntype)
6195     {
6196       const_tree fndecl = NULL_TREE;
6197
6198       if (TREE_CODE (fntype) == FUNCTION_DECL)
6199         {
6200           fndecl = fntype;
6201           fntype = TREE_TYPE (fntype);
6202         }
6203
6204       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6205     }
6206   else
6207     pcs_variant = arm_pcs_default;
6208
6209   if (pcs_variant != ARM_PCS_AAPCS)
6210     {
6211       int i;
6212
6213       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6214         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6215                                                         TYPE_MODE (type),
6216                                                         type))
6217           return i;
6218     }
6219   return -1;
6220 }
6221
6222 static rtx
6223 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6224                            const_tree fntype)
6225 {
6226   /* We aren't passed a decl, so we can't check that a call is local.
6227      However, it isn't clear that that would be a win anyway, since it
6228      might limit some tail-calling opportunities.  */
6229   enum arm_pcs pcs_variant;
6230   int unsignedp ATTRIBUTE_UNUSED;
6231
6232   if (fntype)
6233     {
6234       const_tree fndecl = NULL_TREE;
6235
6236       if (TREE_CODE (fntype) == FUNCTION_DECL)
6237         {
6238           fndecl = fntype;
6239           fntype = TREE_TYPE (fntype);
6240         }
6241
6242       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6243     }
6244   else
6245     pcs_variant = arm_pcs_default;
6246
6247   /* Promote integer types.  */
6248   if (type && INTEGRAL_TYPE_P (type))
6249     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6250
6251   if (pcs_variant != ARM_PCS_AAPCS)
6252     {
6253       int i;
6254
6255       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6256         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6257                                                         type))
6258           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6259                                                              mode, type);
6260     }
6261
6262   /* Promotes small structs returned in a register to full-word size
6263      for big-endian AAPCS.  */
6264   if (type && arm_return_in_msb (type))
6265     {
6266       HOST_WIDE_INT size = int_size_in_bytes (type);
6267       if (size % UNITS_PER_WORD != 0)
6268         {
6269           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6270           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6271         }
6272     }
6273
6274   return gen_rtx_REG (mode, R0_REGNUM);
6275 }
6276
6277 static rtx
6278 aapcs_libcall_value (machine_mode mode)
6279 {
6280   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6281       && GET_MODE_SIZE (mode) <= 4)
6282     mode = SImode;
6283
6284   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6285 }
6286
6287 /* Lay out a function argument using the AAPCS rules.  The rule
6288    numbers referred to here are those in the AAPCS.  */
6289 static void
6290 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6291                   const_tree type, bool named)
6292 {
6293   int nregs, nregs2;
6294   int ncrn;
6295
6296   /* We only need to do this once per argument.  */
6297   if (pcum->aapcs_arg_processed)
6298     return;
6299
6300   pcum->aapcs_arg_processed = true;
6301
6302   /* Special case: if named is false then we are handling an incoming
6303      anonymous argument which is on the stack.  */
6304   if (!named)
6305     return;
6306
6307   /* Is this a potential co-processor register candidate?  */
6308   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6309     {
6310       int slot = aapcs_select_call_coproc (pcum, mode, type);
6311       pcum->aapcs_cprc_slot = slot;
6312
6313       /* We don't have to apply any of the rules from part B of the
6314          preparation phase, these are handled elsewhere in the
6315          compiler.  */
6316
6317       if (slot >= 0)
6318         {
6319           /* A Co-processor register candidate goes either in its own
6320              class of registers or on the stack.  */
6321           if (!pcum->aapcs_cprc_failed[slot])
6322             {
6323               /* C1.cp - Try to allocate the argument to co-processor
6324                  registers.  */
6325               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6326                 return;
6327
6328               /* C2.cp - Put the argument on the stack and note that we
6329                  can't assign any more candidates in this slot.  We also
6330                  need to note that we have allocated stack space, so that
6331                  we won't later try to split a non-cprc candidate between
6332                  core registers and the stack.  */
6333               pcum->aapcs_cprc_failed[slot] = true;
6334               pcum->can_split = false;
6335             }
6336
6337           /* We didn't get a register, so this argument goes on the
6338              stack.  */
6339           gcc_assert (pcum->can_split == false);
6340           return;
6341         }
6342     }
6343
6344   /* C3 - For double-word aligned arguments, round the NCRN up to the
6345      next even number.  */
6346   ncrn = pcum->aapcs_ncrn;
6347   if (ncrn & 1)
6348     {
6349       int res = arm_needs_doubleword_align (mode, type);
6350       /* Only warn during RTL expansion of call stmts, otherwise we would
6351          warn e.g. during gimplification even on functions that will be
6352          always inlined, and we'd warn multiple times.  Don't warn when
6353          called in expand_function_start either, as we warn instead in
6354          arm_function_arg_boundary in that case.  */
6355       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6356         inform (input_location, "parameter passing for argument of type "
6357                 "%qT changed in GCC 7.1", type);
6358       else if (res > 0)
6359         ncrn++;
6360     }
6361
6362   nregs = ARM_NUM_REGS2(mode, type);
6363
6364   /* Sigh, this test should really assert that nregs > 0, but a GCC
6365      extension allows empty structs and then gives them empty size; it
6366      then allows such a structure to be passed by value.  For some of
6367      the code below we have to pretend that such an argument has
6368      non-zero size so that we 'locate' it correctly either in
6369      registers or on the stack.  */
6370   gcc_assert (nregs >= 0);
6371
6372   nregs2 = nregs ? nregs : 1;
6373
6374   /* C4 - Argument fits entirely in core registers.  */
6375   if (ncrn + nregs2 <= NUM_ARG_REGS)
6376     {
6377       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6378       pcum->aapcs_next_ncrn = ncrn + nregs;
6379       return;
6380     }
6381
6382   /* C5 - Some core registers left and there are no arguments already
6383      on the stack: split this argument between the remaining core
6384      registers and the stack.  */
6385   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6386     {
6387       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6388       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6389       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6390       return;
6391     }
6392
6393   /* C6 - NCRN is set to 4.  */
6394   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6395
6396   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6397   return;
6398 }
6399
6400 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6401    for a call to a function whose data type is FNTYPE.
6402    For a library call, FNTYPE is NULL.  */
6403 void
6404 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6405                           rtx libname,
6406                           tree fndecl ATTRIBUTE_UNUSED)
6407 {
6408   /* Long call handling.  */
6409   if (fntype)
6410     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6411   else
6412     pcum->pcs_variant = arm_pcs_default;
6413
6414   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6415     {
6416       if (arm_libcall_uses_aapcs_base (libname))
6417         pcum->pcs_variant = ARM_PCS_AAPCS;
6418
6419       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6420       pcum->aapcs_reg = NULL_RTX;
6421       pcum->aapcs_partial = 0;
6422       pcum->aapcs_arg_processed = false;
6423       pcum->aapcs_cprc_slot = -1;
6424       pcum->can_split = true;
6425
6426       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6427         {
6428           int i;
6429
6430           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6431             {
6432               pcum->aapcs_cprc_failed[i] = false;
6433               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6434             }
6435         }
6436       return;
6437     }
6438
6439   /* Legacy ABIs */
6440
6441   /* On the ARM, the offset starts at 0.  */
6442   pcum->nregs = 0;
6443   pcum->iwmmxt_nregs = 0;
6444   pcum->can_split = true;
6445
6446   /* Varargs vectors are treated the same as long long.
6447      named_count avoids having to change the way arm handles 'named' */
6448   pcum->named_count = 0;
6449   pcum->nargs = 0;
6450
6451   if (TARGET_REALLY_IWMMXT && fntype)
6452     {
6453       tree fn_arg;
6454
6455       for (fn_arg = TYPE_ARG_TYPES (fntype);
6456            fn_arg;
6457            fn_arg = TREE_CHAIN (fn_arg))
6458         pcum->named_count += 1;
6459
6460       if (! pcum->named_count)
6461         pcum->named_count = INT_MAX;
6462     }
6463 }
6464
6465 /* Return 1 if double word alignment is required for argument passing.
6466    Return -1 if double word alignment used to be required for argument
6467    passing before PR77728 ABI fix, but is not required anymore.
6468    Return 0 if double word alignment is not required and wasn't requried
6469    before either.  */
6470 static int
6471 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6472 {
6473   if (!type)
6474     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6475
6476   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6477   if (!AGGREGATE_TYPE_P (type))
6478     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6479
6480   /* Array types: Use member alignment of element type.  */
6481   if (TREE_CODE (type) == ARRAY_TYPE)
6482     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6483
6484   int ret = 0;
6485   /* Record/aggregate types: Use greatest member alignment of any member.  */
6486   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6487     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6488       {
6489         if (TREE_CODE (field) == FIELD_DECL)
6490           return 1;
6491         else
6492           /* Before PR77728 fix, we were incorrectly considering also
6493              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6494              Make sure we can warn about that with -Wpsabi.  */
6495           ret = -1;
6496       }
6497
6498   return ret;
6499 }
6500
6501
6502 /* Determine where to put an argument to a function.
6503    Value is zero to push the argument on the stack,
6504    or a hard register in which to store the argument.
6505
6506    MODE is the argument's machine mode.
6507    TYPE is the data type of the argument (as a tree).
6508     This is null for libcalls where that information may
6509     not be available.
6510    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6511     the preceding args and about the function being called.
6512    NAMED is nonzero if this argument is a named parameter
6513     (otherwise it is an extra parameter matching an ellipsis).
6514
6515    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6516    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6517    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6518    defined), say it is passed in the stack (function_prologue will
6519    indeed make it pass in the stack if necessary).  */
6520
6521 static rtx
6522 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6523                   const_tree type, bool named)
6524 {
6525   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6526   int nregs;
6527
6528   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6529      a call insn (op3 of a call_value insn).  */
6530   if (mode == VOIDmode)
6531     return const0_rtx;
6532
6533   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6534     {
6535       aapcs_layout_arg (pcum, mode, type, named);
6536       return pcum->aapcs_reg;
6537     }
6538
6539   /* Varargs vectors are treated the same as long long.
6540      named_count avoids having to change the way arm handles 'named' */
6541   if (TARGET_IWMMXT_ABI
6542       && arm_vector_mode_supported_p (mode)
6543       && pcum->named_count > pcum->nargs + 1)
6544     {
6545       if (pcum->iwmmxt_nregs <= 9)
6546         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6547       else
6548         {
6549           pcum->can_split = false;
6550           return NULL_RTX;
6551         }
6552     }
6553
6554   /* Put doubleword aligned quantities in even register pairs.  */
6555   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6556     {
6557       int res = arm_needs_doubleword_align (mode, type);
6558       if (res < 0 && warn_psabi)
6559         inform (input_location, "parameter passing for argument of type "
6560                 "%qT changed in GCC 7.1", type);
6561       else if (res > 0)
6562         pcum->nregs++;
6563     }
6564
6565   /* Only allow splitting an arg between regs and memory if all preceding
6566      args were allocated to regs.  For args passed by reference we only count
6567      the reference pointer.  */
6568   if (pcum->can_split)
6569     nregs = 1;
6570   else
6571     nregs = ARM_NUM_REGS2 (mode, type);
6572
6573   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6574     return NULL_RTX;
6575
6576   return gen_rtx_REG (mode, pcum->nregs);
6577 }
6578
6579 static unsigned int
6580 arm_function_arg_boundary (machine_mode mode, const_tree type)
6581 {
6582   if (!ARM_DOUBLEWORD_ALIGN)
6583     return PARM_BOUNDARY;
6584
6585   int res = arm_needs_doubleword_align (mode, type);
6586   if (res < 0 && warn_psabi)
6587     inform (input_location, "parameter passing for argument of type %qT "
6588             "changed in GCC 7.1", type);
6589
6590   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6591 }
6592
6593 static int
6594 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6595                        tree type, bool named)
6596 {
6597   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6598   int nregs = pcum->nregs;
6599
6600   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6601     {
6602       aapcs_layout_arg (pcum, mode, type, named);
6603       return pcum->aapcs_partial;
6604     }
6605
6606   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6607     return 0;
6608
6609   if (NUM_ARG_REGS > nregs
6610       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6611       && pcum->can_split)
6612     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6613
6614   return 0;
6615 }
6616
6617 /* Update the data in PCUM to advance over an argument
6618    of mode MODE and data type TYPE.
6619    (TYPE is null for libcalls where that information may not be available.)  */
6620
6621 static void
6622 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6623                           const_tree type, bool named)
6624 {
6625   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6626
6627   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6628     {
6629       aapcs_layout_arg (pcum, mode, type, named);
6630
6631       if (pcum->aapcs_cprc_slot >= 0)
6632         {
6633           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6634                                                               type);
6635           pcum->aapcs_cprc_slot = -1;
6636         }
6637
6638       /* Generic stuff.  */
6639       pcum->aapcs_arg_processed = false;
6640       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6641       pcum->aapcs_reg = NULL_RTX;
6642       pcum->aapcs_partial = 0;
6643     }
6644   else
6645     {
6646       pcum->nargs += 1;
6647       if (arm_vector_mode_supported_p (mode)
6648           && pcum->named_count > pcum->nargs
6649           && TARGET_IWMMXT_ABI)
6650         pcum->iwmmxt_nregs += 1;
6651       else
6652         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6653     }
6654 }
6655
6656 /* Variable sized types are passed by reference.  This is a GCC
6657    extension to the ARM ABI.  */
6658
6659 static bool
6660 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6661                        machine_mode mode ATTRIBUTE_UNUSED,
6662                        const_tree type, bool named ATTRIBUTE_UNUSED)
6663 {
6664   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6665 }
6666 \f
6667 /* Encode the current state of the #pragma [no_]long_calls.  */
6668 typedef enum
6669 {
6670   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6671   LONG,         /* #pragma long_calls is in effect.  */
6672   SHORT         /* #pragma no_long_calls is in effect.  */
6673 } arm_pragma_enum;
6674
6675 static arm_pragma_enum arm_pragma_long_calls = OFF;
6676
6677 void
6678 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6679 {
6680   arm_pragma_long_calls = LONG;
6681 }
6682
6683 void
6684 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6685 {
6686   arm_pragma_long_calls = SHORT;
6687 }
6688
6689 void
6690 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6691 {
6692   arm_pragma_long_calls = OFF;
6693 }
6694 \f
6695 /* Handle an attribute requiring a FUNCTION_DECL;
6696    arguments as in struct attribute_spec.handler.  */
6697 static tree
6698 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6699                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6700 {
6701   if (TREE_CODE (*node) != FUNCTION_DECL)
6702     {
6703       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6704                name);
6705       *no_add_attrs = true;
6706     }
6707
6708   return NULL_TREE;
6709 }
6710
6711 /* Handle an "interrupt" or "isr" attribute;
6712    arguments as in struct attribute_spec.handler.  */
6713 static tree
6714 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6715                           bool *no_add_attrs)
6716 {
6717   if (DECL_P (*node))
6718     {
6719       if (TREE_CODE (*node) != FUNCTION_DECL)
6720         {
6721           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6722                    name);
6723           *no_add_attrs = true;
6724         }
6725       /* FIXME: the argument if any is checked for type attributes;
6726          should it be checked for decl ones?  */
6727     }
6728   else
6729     {
6730       if (TREE_CODE (*node) == FUNCTION_TYPE
6731           || TREE_CODE (*node) == METHOD_TYPE)
6732         {
6733           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6734             {
6735               warning (OPT_Wattributes, "%qE attribute ignored",
6736                        name);
6737               *no_add_attrs = true;
6738             }
6739         }
6740       else if (TREE_CODE (*node) == POINTER_TYPE
6741                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6742                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6743                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6744         {
6745           *node = build_variant_type_copy (*node);
6746           TREE_TYPE (*node) = build_type_attribute_variant
6747             (TREE_TYPE (*node),
6748              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6749           *no_add_attrs = true;
6750         }
6751       else
6752         {
6753           /* Possibly pass this attribute on from the type to a decl.  */
6754           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6755                        | (int) ATTR_FLAG_FUNCTION_NEXT
6756                        | (int) ATTR_FLAG_ARRAY_NEXT))
6757             {
6758               *no_add_attrs = true;
6759               return tree_cons (name, args, NULL_TREE);
6760             }
6761           else
6762             {
6763               warning (OPT_Wattributes, "%qE attribute ignored",
6764                        name);
6765             }
6766         }
6767     }
6768
6769   return NULL_TREE;
6770 }
6771
6772 /* Handle a "pcs" attribute; arguments as in struct
6773    attribute_spec.handler.  */
6774 static tree
6775 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6776                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6777 {
6778   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6779     {
6780       warning (OPT_Wattributes, "%qE attribute ignored", name);
6781       *no_add_attrs = true;
6782     }
6783   return NULL_TREE;
6784 }
6785
6786 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6787 /* Handle the "notshared" attribute.  This attribute is another way of
6788    requesting hidden visibility.  ARM's compiler supports
6789    "__declspec(notshared)"; we support the same thing via an
6790    attribute.  */
6791
6792 static tree
6793 arm_handle_notshared_attribute (tree *node,
6794                                 tree name ATTRIBUTE_UNUSED,
6795                                 tree args ATTRIBUTE_UNUSED,
6796                                 int flags ATTRIBUTE_UNUSED,
6797                                 bool *no_add_attrs)
6798 {
6799   tree decl = TYPE_NAME (*node);
6800
6801   if (decl)
6802     {
6803       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6804       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6805       *no_add_attrs = false;
6806     }
6807   return NULL_TREE;
6808 }
6809 #endif
6810
6811 /* This function returns true if a function with declaration FNDECL and type
6812    FNTYPE uses the stack to pass arguments or return variables and false
6813    otherwise.  This is used for functions with the attributes
6814    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6815    diagnostic messages if the stack is used.  NAME is the name of the attribute
6816    used.  */
6817
6818 static bool
6819 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6820 {
6821   function_args_iterator args_iter;
6822   CUMULATIVE_ARGS args_so_far_v;
6823   cumulative_args_t args_so_far;
6824   bool first_param = true;
6825   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6826
6827   /* Error out if any argument is passed on the stack.  */
6828   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6829   args_so_far = pack_cumulative_args (&args_so_far_v);
6830   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6831     {
6832       rtx arg_rtx;
6833       machine_mode arg_mode = TYPE_MODE (arg_type);
6834
6835       prev_arg_type = arg_type;
6836       if (VOID_TYPE_P (arg_type))
6837         continue;
6838
6839       if (!first_param)
6840         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6841       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6842       if (!arg_rtx
6843           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6844         {
6845           error ("%qE attribute not available to functions with arguments "
6846                  "passed on the stack", name);
6847           return true;
6848         }
6849       first_param = false;
6850     }
6851
6852   /* Error out for variadic functions since we cannot control how many
6853      arguments will be passed and thus stack could be used.  stdarg_p () is not
6854      used for the checking to avoid browsing arguments twice.  */
6855   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6856     {
6857       error ("%qE attribute not available to functions with variable number "
6858              "of arguments", name);
6859       return true;
6860     }
6861
6862   /* Error out if return value is passed on the stack.  */
6863   ret_type = TREE_TYPE (fntype);
6864   if (arm_return_in_memory (ret_type, fntype))
6865     {
6866       error ("%qE attribute not available to functions that return value on "
6867              "the stack", name);
6868       return true;
6869     }
6870   return false;
6871 }
6872
6873 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6874    function will check whether the attribute is allowed here and will add the
6875    attribute to the function declaration tree or otherwise issue a warning.  */
6876
6877 static tree
6878 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6879                                  tree /* args */,
6880                                  int /* flags */,
6881                                  bool *no_add_attrs)
6882 {
6883   tree fndecl;
6884
6885   if (!use_cmse)
6886     {
6887       *no_add_attrs = true;
6888       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6889                name);
6890       return NULL_TREE;
6891     }
6892
6893   /* Ignore attribute for function types.  */
6894   if (TREE_CODE (*node) != FUNCTION_DECL)
6895     {
6896       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6897                name);
6898       *no_add_attrs = true;
6899       return NULL_TREE;
6900     }
6901
6902   fndecl = *node;
6903
6904   /* Warn for static linkage functions.  */
6905   if (!TREE_PUBLIC (fndecl))
6906     {
6907       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6908                "with static linkage", name);
6909       *no_add_attrs = true;
6910       return NULL_TREE;
6911     }
6912
6913   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6914                                                 TREE_TYPE (fndecl));
6915   return NULL_TREE;
6916 }
6917
6918
6919 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6920    function will check whether the attribute is allowed here and will add the
6921    attribute to the function type tree or otherwise issue a diagnostic.  The
6922    reason we check this at declaration time is to only allow the use of the
6923    attribute with declarations of function pointers and not function
6924    declarations.  This function checks NODE is of the expected type and issues
6925    diagnostics otherwise using NAME.  If it is not of the expected type
6926    *NO_ADD_ATTRS will be set to true.  */
6927
6928 static tree
6929 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6930                                  tree /* args */,
6931                                  int /* flags */,
6932                                  bool *no_add_attrs)
6933 {
6934   tree decl = NULL_TREE, fntype = NULL_TREE;
6935   tree type;
6936
6937   if (!use_cmse)
6938     {
6939       *no_add_attrs = true;
6940       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6941                name);
6942       return NULL_TREE;
6943     }
6944
6945   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6946     {
6947       decl = *node;
6948       fntype = TREE_TYPE (decl);
6949     }
6950
6951   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6952     fntype = TREE_TYPE (fntype);
6953
6954   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6955     {
6956         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6957                  "function pointer", name);
6958         *no_add_attrs = true;
6959         return NULL_TREE;
6960     }
6961
6962   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6963
6964   if (*no_add_attrs)
6965     return NULL_TREE;
6966
6967   /* Prevent trees being shared among function types with and without
6968      cmse_nonsecure_call attribute.  */
6969   type = TREE_TYPE (decl);
6970
6971   type = build_distinct_type_copy (type);
6972   TREE_TYPE (decl) = type;
6973   fntype = type;
6974
6975   while (TREE_CODE (fntype) != FUNCTION_TYPE)
6976     {
6977       type = fntype;
6978       fntype = TREE_TYPE (fntype);
6979       fntype = build_distinct_type_copy (fntype);
6980       TREE_TYPE (type) = fntype;
6981     }
6982
6983   /* Construct a type attribute and add it to the function type.  */
6984   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6985                           TYPE_ATTRIBUTES (fntype));
6986   TYPE_ATTRIBUTES (fntype) = attrs;
6987   return NULL_TREE;
6988 }
6989
6990 /* Return 0 if the attributes for two types are incompatible, 1 if they
6991    are compatible, and 2 if they are nearly compatible (which causes a
6992    warning to be generated).  */
6993 static int
6994 arm_comp_type_attributes (const_tree type1, const_tree type2)
6995 {
6996   int l1, l2, s1, s2;
6997
6998   /* Check for mismatch of non-default calling convention.  */
6999   if (TREE_CODE (type1) != FUNCTION_TYPE)
7000     return 1;
7001
7002   /* Check for mismatched call attributes.  */
7003   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7004   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7005   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7006   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7007
7008   /* Only bother to check if an attribute is defined.  */
7009   if (l1 | l2 | s1 | s2)
7010     {
7011       /* If one type has an attribute, the other must have the same attribute.  */
7012       if ((l1 != l2) || (s1 != s2))
7013         return 0;
7014
7015       /* Disallow mixed attributes.  */
7016       if ((l1 & s2) || (l2 & s1))
7017         return 0;
7018     }
7019
7020   /* Check for mismatched ISR attribute.  */
7021   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7022   if (! l1)
7023     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7024   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7025   if (! l2)
7026     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7027   if (l1 != l2)
7028     return 0;
7029
7030   l1 = lookup_attribute ("cmse_nonsecure_call",
7031                          TYPE_ATTRIBUTES (type1)) != NULL;
7032   l2 = lookup_attribute ("cmse_nonsecure_call",
7033                          TYPE_ATTRIBUTES (type2)) != NULL;
7034
7035   if (l1 != l2)
7036     return 0;
7037
7038   return 1;
7039 }
7040
7041 /*  Assigns default attributes to newly defined type.  This is used to
7042     set short_call/long_call attributes for function types of
7043     functions defined inside corresponding #pragma scopes.  */
7044 static void
7045 arm_set_default_type_attributes (tree type)
7046 {
7047   /* Add __attribute__ ((long_call)) to all functions, when
7048      inside #pragma long_calls or __attribute__ ((short_call)),
7049      when inside #pragma no_long_calls.  */
7050   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7051     {
7052       tree type_attr_list, attr_name;
7053       type_attr_list = TYPE_ATTRIBUTES (type);
7054
7055       if (arm_pragma_long_calls == LONG)
7056         attr_name = get_identifier ("long_call");
7057       else if (arm_pragma_long_calls == SHORT)
7058         attr_name = get_identifier ("short_call");
7059       else
7060         return;
7061
7062       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7063       TYPE_ATTRIBUTES (type) = type_attr_list;
7064     }
7065 }
7066 \f
7067 /* Return true if DECL is known to be linked into section SECTION.  */
7068
7069 static bool
7070 arm_function_in_section_p (tree decl, section *section)
7071 {
7072   /* We can only be certain about the prevailing symbol definition.  */
7073   if (!decl_binds_to_current_def_p (decl))
7074     return false;
7075
7076   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7077   if (!DECL_SECTION_NAME (decl))
7078     {
7079       /* Make sure that we will not create a unique section for DECL.  */
7080       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7081         return false;
7082     }
7083
7084   return function_section (decl) == section;
7085 }
7086
7087 /* Return nonzero if a 32-bit "long_call" should be generated for
7088    a call from the current function to DECL.  We generate a long_call
7089    if the function:
7090
7091         a.  has an __attribute__((long call))
7092      or b.  is within the scope of a #pragma long_calls
7093      or c.  the -mlong-calls command line switch has been specified
7094
7095    However we do not generate a long call if the function:
7096
7097         d.  has an __attribute__ ((short_call))
7098      or e.  is inside the scope of a #pragma no_long_calls
7099      or f.  is defined in the same section as the current function.  */
7100
7101 bool
7102 arm_is_long_call_p (tree decl)
7103 {
7104   tree attrs;
7105
7106   if (!decl)
7107     return TARGET_LONG_CALLS;
7108
7109   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7110   if (lookup_attribute ("short_call", attrs))
7111     return false;
7112
7113   /* For "f", be conservative, and only cater for cases in which the
7114      whole of the current function is placed in the same section.  */
7115   if (!flag_reorder_blocks_and_partition
7116       && TREE_CODE (decl) == FUNCTION_DECL
7117       && arm_function_in_section_p (decl, current_function_section ()))
7118     return false;
7119
7120   if (lookup_attribute ("long_call", attrs))
7121     return true;
7122
7123   return TARGET_LONG_CALLS;
7124 }
7125
7126 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7127 static bool
7128 arm_function_ok_for_sibcall (tree decl, tree exp)
7129 {
7130   unsigned long func_type;
7131
7132   if (cfun->machine->sibcall_blocked)
7133     return false;
7134
7135   /* Never tailcall something if we are generating code for Thumb-1.  */
7136   if (TARGET_THUMB1)
7137     return false;
7138
7139   /* The PIC register is live on entry to VxWorks PLT entries, so we
7140      must make the call before restoring the PIC register.  */
7141   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7142     return false;
7143
7144   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7145      may be used both as target of the call and base register for restoring
7146      the VFP registers  */
7147   if (TARGET_APCS_FRAME && TARGET_ARM
7148       && TARGET_HARD_FLOAT
7149       && decl && arm_is_long_call_p (decl))
7150     return false;
7151
7152   /* If we are interworking and the function is not declared static
7153      then we can't tail-call it unless we know that it exists in this
7154      compilation unit (since it might be a Thumb routine).  */
7155   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7156       && !TREE_ASM_WRITTEN (decl))
7157     return false;
7158
7159   func_type = arm_current_func_type ();
7160   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7161   if (IS_INTERRUPT (func_type))
7162     return false;
7163
7164   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7165      generated for entry functions themselves.  */
7166   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7167     return false;
7168
7169   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7170      this would complicate matters for later code generation.  */
7171   if (TREE_CODE (exp) == CALL_EXPR)
7172     {
7173       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7174       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7175         return false;
7176     }
7177
7178   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7179     {
7180       /* Check that the return value locations are the same.  For
7181          example that we aren't returning a value from the sibling in
7182          a VFP register but then need to transfer it to a core
7183          register.  */
7184       rtx a, b;
7185       tree decl_or_type = decl;
7186
7187       /* If it is an indirect function pointer, get the function type.  */
7188       if (!decl)
7189         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7190
7191       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7192       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7193                               cfun->decl, false);
7194       if (!rtx_equal_p (a, b))
7195         return false;
7196     }
7197
7198   /* Never tailcall if function may be called with a misaligned SP.  */
7199   if (IS_STACKALIGN (func_type))
7200     return false;
7201
7202   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7203      references should become a NOP.  Don't convert such calls into
7204      sibling calls.  */
7205   if (TARGET_AAPCS_BASED
7206       && arm_abi == ARM_ABI_AAPCS
7207       && decl
7208       && DECL_WEAK (decl))
7209     return false;
7210
7211   /* We cannot do a tailcall for an indirect call by descriptor if all the
7212      argument registers are used because the only register left to load the
7213      address is IP and it will already contain the static chain.  */
7214   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7215     {
7216       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7217       CUMULATIVE_ARGS cum;
7218       cumulative_args_t cum_v;
7219
7220       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7221       cum_v = pack_cumulative_args (&cum);
7222
7223       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7224         {
7225           tree type = TREE_VALUE (t);
7226           if (!VOID_TYPE_P (type))
7227             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7228         }
7229
7230       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7231         return false;
7232     }
7233
7234   /* Everything else is ok.  */
7235   return true;
7236 }
7237
7238 \f
7239 /* Addressing mode support functions.  */
7240
7241 /* Return nonzero if X is a legitimate immediate operand when compiling
7242    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7243 int
7244 legitimate_pic_operand_p (rtx x)
7245 {
7246   if (GET_CODE (x) == SYMBOL_REF
7247       || (GET_CODE (x) == CONST
7248           && GET_CODE (XEXP (x, 0)) == PLUS
7249           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7250     return 0;
7251
7252   return 1;
7253 }
7254
7255 /* Record that the current function needs a PIC register.  Initialize
7256    cfun->machine->pic_reg if we have not already done so.  */
7257
7258 static void
7259 require_pic_register (void)
7260 {
7261   /* A lot of the logic here is made obscure by the fact that this
7262      routine gets called as part of the rtx cost estimation process.
7263      We don't want those calls to affect any assumptions about the real
7264      function; and further, we can't call entry_of_function() until we
7265      start the real expansion process.  */
7266   if (!crtl->uses_pic_offset_table)
7267     {
7268       gcc_assert (can_create_pseudo_p ());
7269       if (arm_pic_register != INVALID_REGNUM
7270           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7271         {
7272           if (!cfun->machine->pic_reg)
7273             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7274
7275           /* Play games to avoid marking the function as needing pic
7276              if we are being called as part of the cost-estimation
7277              process.  */
7278           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7279             crtl->uses_pic_offset_table = 1;
7280         }
7281       else
7282         {
7283           rtx_insn *seq, *insn;
7284
7285           if (!cfun->machine->pic_reg)
7286             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7287
7288           /* Play games to avoid marking the function as needing pic
7289              if we are being called as part of the cost-estimation
7290              process.  */
7291           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7292             {
7293               crtl->uses_pic_offset_table = 1;
7294               start_sequence ();
7295
7296               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7297                   && arm_pic_register > LAST_LO_REGNUM)
7298                 emit_move_insn (cfun->machine->pic_reg,
7299                                 gen_rtx_REG (Pmode, arm_pic_register));
7300               else
7301                 arm_load_pic_register (0UL);
7302
7303               seq = get_insns ();
7304               end_sequence ();
7305
7306               for (insn = seq; insn; insn = NEXT_INSN (insn))
7307                 if (INSN_P (insn))
7308                   INSN_LOCATION (insn) = prologue_location;
7309
7310               /* We can be called during expansion of PHI nodes, where
7311                  we can't yet emit instructions directly in the final
7312                  insn stream.  Queue the insns on the entry edge, they will
7313                  be committed after everything else is expanded.  */
7314               insert_insn_on_edge (seq,
7315                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7316             }
7317         }
7318     }
7319 }
7320
7321 rtx
7322 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7323 {
7324   if (GET_CODE (orig) == SYMBOL_REF
7325       || GET_CODE (orig) == LABEL_REF)
7326     {
7327       if (reg == 0)
7328         {
7329           gcc_assert (can_create_pseudo_p ());
7330           reg = gen_reg_rtx (Pmode);
7331         }
7332
7333       /* VxWorks does not impose a fixed gap between segments; the run-time
7334          gap can be different from the object-file gap.  We therefore can't
7335          use GOTOFF unless we are absolutely sure that the symbol is in the
7336          same segment as the GOT.  Unfortunately, the flexibility of linker
7337          scripts means that we can't be sure of that in general, so assume
7338          that GOTOFF is never valid on VxWorks.  */
7339       /* References to weak symbols cannot be resolved locally: they
7340          may be overridden by a non-weak definition at link time.  */
7341       rtx_insn *insn;
7342       if ((GET_CODE (orig) == LABEL_REF
7343            || (GET_CODE (orig) == SYMBOL_REF
7344                && SYMBOL_REF_LOCAL_P (orig)
7345                && (SYMBOL_REF_DECL (orig)
7346                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7347           && NEED_GOT_RELOC
7348           && arm_pic_data_is_text_relative)
7349         insn = arm_pic_static_addr (orig, reg);
7350       else
7351         {
7352           rtx pat;
7353           rtx mem;
7354
7355           /* If this function doesn't have a pic register, create one now.  */
7356           require_pic_register ();
7357
7358           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7359
7360           /* Make the MEM as close to a constant as possible.  */
7361           mem = SET_SRC (pat);
7362           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7363           MEM_READONLY_P (mem) = 1;
7364           MEM_NOTRAP_P (mem) = 1;
7365
7366           insn = emit_insn (pat);
7367         }
7368
7369       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7370          by loop.  */
7371       set_unique_reg_note (insn, REG_EQUAL, orig);
7372
7373       return reg;
7374     }
7375   else if (GET_CODE (orig) == CONST)
7376     {
7377       rtx base, offset;
7378
7379       if (GET_CODE (XEXP (orig, 0)) == PLUS
7380           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7381         return orig;
7382
7383       /* Handle the case where we have: const (UNSPEC_TLS).  */
7384       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7385           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7386         return orig;
7387
7388       /* Handle the case where we have:
7389          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7390          CONST_INT.  */
7391       if (GET_CODE (XEXP (orig, 0)) == PLUS
7392           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7393           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7394         {
7395           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7396           return orig;
7397         }
7398
7399       if (reg == 0)
7400         {
7401           gcc_assert (can_create_pseudo_p ());
7402           reg = gen_reg_rtx (Pmode);
7403         }
7404
7405       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7406
7407       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7408       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7409                                        base == reg ? 0 : reg);
7410
7411       if (CONST_INT_P (offset))
7412         {
7413           /* The base register doesn't really matter, we only want to
7414              test the index for the appropriate mode.  */
7415           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7416             {
7417               gcc_assert (can_create_pseudo_p ());
7418               offset = force_reg (Pmode, offset);
7419             }
7420
7421           if (CONST_INT_P (offset))
7422             return plus_constant (Pmode, base, INTVAL (offset));
7423         }
7424
7425       if (GET_MODE_SIZE (mode) > 4
7426           && (GET_MODE_CLASS (mode) == MODE_INT
7427               || TARGET_SOFT_FLOAT))
7428         {
7429           emit_insn (gen_addsi3 (reg, base, offset));
7430           return reg;
7431         }
7432
7433       return gen_rtx_PLUS (Pmode, base, offset);
7434     }
7435
7436   return orig;
7437 }
7438
7439
7440 /* Find a spare register to use during the prolog of a function.  */
7441
7442 static int
7443 thumb_find_work_register (unsigned long pushed_regs_mask)
7444 {
7445   int reg;
7446
7447   /* Check the argument registers first as these are call-used.  The
7448      register allocation order means that sometimes r3 might be used
7449      but earlier argument registers might not, so check them all.  */
7450   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7451     if (!df_regs_ever_live_p (reg))
7452       return reg;
7453
7454   /* Before going on to check the call-saved registers we can try a couple
7455      more ways of deducing that r3 is available.  The first is when we are
7456      pushing anonymous arguments onto the stack and we have less than 4
7457      registers worth of fixed arguments(*).  In this case r3 will be part of
7458      the variable argument list and so we can be sure that it will be
7459      pushed right at the start of the function.  Hence it will be available
7460      for the rest of the prologue.
7461      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7462   if (cfun->machine->uses_anonymous_args
7463       && crtl->args.pretend_args_size > 0)
7464     return LAST_ARG_REGNUM;
7465
7466   /* The other case is when we have fixed arguments but less than 4 registers
7467      worth.  In this case r3 might be used in the body of the function, but
7468      it is not being used to convey an argument into the function.  In theory
7469      we could just check crtl->args.size to see how many bytes are
7470      being passed in argument registers, but it seems that it is unreliable.
7471      Sometimes it will have the value 0 when in fact arguments are being
7472      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7473      check the args_info.nregs field as well.  The problem with this field is
7474      that it makes no allowances for arguments that are passed to the
7475      function but which are not used.  Hence we could miss an opportunity
7476      when a function has an unused argument in r3.  But it is better to be
7477      safe than to be sorry.  */
7478   if (! cfun->machine->uses_anonymous_args
7479       && crtl->args.size >= 0
7480       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7481       && (TARGET_AAPCS_BASED
7482           ? crtl->args.info.aapcs_ncrn < 4
7483           : crtl->args.info.nregs < 4))
7484     return LAST_ARG_REGNUM;
7485
7486   /* Otherwise look for a call-saved register that is going to be pushed.  */
7487   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7488     if (pushed_regs_mask & (1 << reg))
7489       return reg;
7490
7491   if (TARGET_THUMB2)
7492     {
7493       /* Thumb-2 can use high regs.  */
7494       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7495         if (pushed_regs_mask & (1 << reg))
7496           return reg;
7497     }
7498   /* Something went wrong - thumb_compute_save_reg_mask()
7499      should have arranged for a suitable register to be pushed.  */
7500   gcc_unreachable ();
7501 }
7502
7503 static GTY(()) int pic_labelno;
7504
7505 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7506    low register.  */
7507
7508 void
7509 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7510 {
7511   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7512
7513   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7514     return;
7515
7516   gcc_assert (flag_pic);
7517
7518   pic_reg = cfun->machine->pic_reg;
7519   if (TARGET_VXWORKS_RTP)
7520     {
7521       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7522       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7523       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7524
7525       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7526
7527       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7528       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7529     }
7530   else
7531     {
7532       /* We use an UNSPEC rather than a LABEL_REF because this label
7533          never appears in the code stream.  */
7534
7535       labelno = GEN_INT (pic_labelno++);
7536       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7537       l1 = gen_rtx_CONST (VOIDmode, l1);
7538
7539       /* On the ARM the PC register contains 'dot + 8' at the time of the
7540          addition, on the Thumb it is 'dot + 4'.  */
7541       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7542       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7543                                 UNSPEC_GOTSYM_OFF);
7544       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7545
7546       if (TARGET_32BIT)
7547         {
7548           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7549         }
7550       else /* TARGET_THUMB1 */
7551         {
7552           if (arm_pic_register != INVALID_REGNUM
7553               && REGNO (pic_reg) > LAST_LO_REGNUM)
7554             {
7555               /* We will have pushed the pic register, so we should always be
7556                  able to find a work register.  */
7557               pic_tmp = gen_rtx_REG (SImode,
7558                                      thumb_find_work_register (saved_regs));
7559               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7560               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7561               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7562             }
7563           else if (arm_pic_register != INVALID_REGNUM
7564                    && arm_pic_register > LAST_LO_REGNUM
7565                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7566             {
7567               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7568               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7569               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7570             }
7571           else
7572             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7573         }
7574     }
7575
7576   /* Need to emit this whether or not we obey regdecls,
7577      since setjmp/longjmp can cause life info to screw up.  */
7578   emit_use (pic_reg);
7579 }
7580
7581 /* Generate code to load the address of a static var when flag_pic is set.  */
7582 static rtx_insn *
7583 arm_pic_static_addr (rtx orig, rtx reg)
7584 {
7585   rtx l1, labelno, offset_rtx;
7586
7587   gcc_assert (flag_pic);
7588
7589   /* We use an UNSPEC rather than a LABEL_REF because this label
7590      never appears in the code stream.  */
7591   labelno = GEN_INT (pic_labelno++);
7592   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7593   l1 = gen_rtx_CONST (VOIDmode, l1);
7594
7595   /* On the ARM the PC register contains 'dot + 8' at the time of the
7596      addition, on the Thumb it is 'dot + 4'.  */
7597   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7598   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7599                                UNSPEC_SYMBOL_OFFSET);
7600   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7601
7602   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7603 }
7604
7605 /* Return nonzero if X is valid as an ARM state addressing register.  */
7606 static int
7607 arm_address_register_rtx_p (rtx x, int strict_p)
7608 {
7609   int regno;
7610
7611   if (!REG_P (x))
7612     return 0;
7613
7614   regno = REGNO (x);
7615
7616   if (strict_p)
7617     return ARM_REGNO_OK_FOR_BASE_P (regno);
7618
7619   return (regno <= LAST_ARM_REGNUM
7620           || regno >= FIRST_PSEUDO_REGISTER
7621           || regno == FRAME_POINTER_REGNUM
7622           || regno == ARG_POINTER_REGNUM);
7623 }
7624
7625 /* Return TRUE if this rtx is the difference of a symbol and a label,
7626    and will reduce to a PC-relative relocation in the object file.
7627    Expressions like this can be left alone when generating PIC, rather
7628    than forced through the GOT.  */
7629 static int
7630 pcrel_constant_p (rtx x)
7631 {
7632   if (GET_CODE (x) == MINUS)
7633     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7634
7635   return FALSE;
7636 }
7637
7638 /* Return true if X will surely end up in an index register after next
7639    splitting pass.  */
7640 static bool
7641 will_be_in_index_register (const_rtx x)
7642 {
7643   /* arm.md: calculate_pic_address will split this into a register.  */
7644   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7645 }
7646
7647 /* Return nonzero if X is a valid ARM state address operand.  */
7648 int
7649 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7650                                 int strict_p)
7651 {
7652   bool use_ldrd;
7653   enum rtx_code code = GET_CODE (x);
7654
7655   if (arm_address_register_rtx_p (x, strict_p))
7656     return 1;
7657
7658   use_ldrd = (TARGET_LDRD
7659               && (mode == DImode || mode == DFmode));
7660
7661   if (code == POST_INC || code == PRE_DEC
7662       || ((code == PRE_INC || code == POST_DEC)
7663           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7664     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7665
7666   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7667            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7668            && GET_CODE (XEXP (x, 1)) == PLUS
7669            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7670     {
7671       rtx addend = XEXP (XEXP (x, 1), 1);
7672
7673       /* Don't allow ldrd post increment by register because it's hard
7674          to fixup invalid register choices.  */
7675       if (use_ldrd
7676           && GET_CODE (x) == POST_MODIFY
7677           && REG_P (addend))
7678         return 0;
7679
7680       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7681               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7682     }
7683
7684   /* After reload constants split into minipools will have addresses
7685      from a LABEL_REF.  */
7686   else if (reload_completed
7687            && (code == LABEL_REF
7688                || (code == CONST
7689                    && GET_CODE (XEXP (x, 0)) == PLUS
7690                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7691                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7692     return 1;
7693
7694   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7695     return 0;
7696
7697   else if (code == PLUS)
7698     {
7699       rtx xop0 = XEXP (x, 0);
7700       rtx xop1 = XEXP (x, 1);
7701
7702       return ((arm_address_register_rtx_p (xop0, strict_p)
7703                && ((CONST_INT_P (xop1)
7704                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7705                    || (!strict_p && will_be_in_index_register (xop1))))
7706               || (arm_address_register_rtx_p (xop1, strict_p)
7707                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7708     }
7709
7710 #if 0
7711   /* Reload currently can't handle MINUS, so disable this for now */
7712   else if (GET_CODE (x) == MINUS)
7713     {
7714       rtx xop0 = XEXP (x, 0);
7715       rtx xop1 = XEXP (x, 1);
7716
7717       return (arm_address_register_rtx_p (xop0, strict_p)
7718               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7719     }
7720 #endif
7721
7722   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7723            && code == SYMBOL_REF
7724            && CONSTANT_POOL_ADDRESS_P (x)
7725            && ! (flag_pic
7726                  && symbol_mentioned_p (get_pool_constant (x))
7727                  && ! pcrel_constant_p (get_pool_constant (x))))
7728     return 1;
7729
7730   return 0;
7731 }
7732
7733 /* Return true if we can avoid creating a constant pool entry for x.  */
7734 static bool
7735 can_avoid_literal_pool_for_label_p (rtx x)
7736 {
7737   /* Normally we can assign constant values to target registers without
7738      the help of constant pool.  But there are cases we have to use constant
7739      pool like:
7740      1) assign a label to register.
7741      2) sign-extend a 8bit value to 32bit and then assign to register.
7742
7743      Constant pool access in format:
7744      (set (reg r0) (mem (symbol_ref (".LC0"))))
7745      will cause the use of literal pool (later in function arm_reorg).
7746      So here we mark such format as an invalid format, then the compiler
7747      will adjust it into:
7748      (set (reg r0) (symbol_ref (".LC0")))
7749      (set (reg r0) (mem (reg r0))).
7750      No extra register is required, and (mem (reg r0)) won't cause the use
7751      of literal pools.  */
7752   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7753       && CONSTANT_POOL_ADDRESS_P (x))
7754     return 1;
7755   return 0;
7756 }
7757
7758
7759 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7760 static int
7761 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7762 {
7763   bool use_ldrd;
7764   enum rtx_code code = GET_CODE (x);
7765
7766   if (arm_address_register_rtx_p (x, strict_p))
7767     return 1;
7768
7769   use_ldrd = (TARGET_LDRD
7770               && (mode == DImode || mode == DFmode));
7771
7772   if (code == POST_INC || code == PRE_DEC
7773       || ((code == PRE_INC || code == POST_DEC)
7774           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7775     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7776
7777   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7778            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7779            && GET_CODE (XEXP (x, 1)) == PLUS
7780            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7781     {
7782       /* Thumb-2 only has autoincrement by constant.  */
7783       rtx addend = XEXP (XEXP (x, 1), 1);
7784       HOST_WIDE_INT offset;
7785
7786       if (!CONST_INT_P (addend))
7787         return 0;
7788
7789       offset = INTVAL(addend);
7790       if (GET_MODE_SIZE (mode) <= 4)
7791         return (offset > -256 && offset < 256);
7792
7793       return (use_ldrd && offset > -1024 && offset < 1024
7794               && (offset & 3) == 0);
7795     }
7796
7797   /* After reload constants split into minipools will have addresses
7798      from a LABEL_REF.  */
7799   else if (reload_completed
7800            && (code == LABEL_REF
7801                || (code == CONST
7802                    && GET_CODE (XEXP (x, 0)) == PLUS
7803                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7804                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7805     return 1;
7806
7807   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7808     return 0;
7809
7810   else if (code == PLUS)
7811     {
7812       rtx xop0 = XEXP (x, 0);
7813       rtx xop1 = XEXP (x, 1);
7814
7815       return ((arm_address_register_rtx_p (xop0, strict_p)
7816                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7817                    || (!strict_p && will_be_in_index_register (xop1))))
7818               || (arm_address_register_rtx_p (xop1, strict_p)
7819                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7820     }
7821
7822   else if (can_avoid_literal_pool_for_label_p (x))
7823     return 0;
7824
7825   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7826            && code == SYMBOL_REF
7827            && CONSTANT_POOL_ADDRESS_P (x)
7828            && ! (flag_pic
7829                  && symbol_mentioned_p (get_pool_constant (x))
7830                  && ! pcrel_constant_p (get_pool_constant (x))))
7831     return 1;
7832
7833   return 0;
7834 }
7835
7836 /* Return nonzero if INDEX is valid for an address index operand in
7837    ARM state.  */
7838 static int
7839 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7840                         int strict_p)
7841 {
7842   HOST_WIDE_INT range;
7843   enum rtx_code code = GET_CODE (index);
7844
7845   /* Standard coprocessor addressing modes.  */
7846   if (TARGET_HARD_FLOAT
7847       && (mode == SFmode || mode == DFmode))
7848     return (code == CONST_INT && INTVAL (index) < 1024
7849             && INTVAL (index) > -1024
7850             && (INTVAL (index) & 3) == 0);
7851
7852   /* For quad modes, we restrict the constant offset to be slightly less
7853      than what the instruction format permits.  We do this because for
7854      quad mode moves, we will actually decompose them into two separate
7855      double-mode reads or writes.  INDEX must therefore be a valid
7856      (double-mode) offset and so should INDEX+8.  */
7857   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7858     return (code == CONST_INT
7859             && INTVAL (index) < 1016
7860             && INTVAL (index) > -1024
7861             && (INTVAL (index) & 3) == 0);
7862
7863   /* We have no such constraint on double mode offsets, so we permit the
7864      full range of the instruction format.  */
7865   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7866     return (code == CONST_INT
7867             && INTVAL (index) < 1024
7868             && INTVAL (index) > -1024
7869             && (INTVAL (index) & 3) == 0);
7870
7871   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7872     return (code == CONST_INT
7873             && INTVAL (index) < 1024
7874             && INTVAL (index) > -1024
7875             && (INTVAL (index) & 3) == 0);
7876
7877   if (arm_address_register_rtx_p (index, strict_p)
7878       && (GET_MODE_SIZE (mode) <= 4))
7879     return 1;
7880
7881   if (mode == DImode || mode == DFmode)
7882     {
7883       if (code == CONST_INT)
7884         {
7885           HOST_WIDE_INT val = INTVAL (index);
7886
7887           if (TARGET_LDRD)
7888             return val > -256 && val < 256;
7889           else
7890             return val > -4096 && val < 4092;
7891         }
7892
7893       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7894     }
7895
7896   if (GET_MODE_SIZE (mode) <= 4
7897       && ! (arm_arch4
7898             && (mode == HImode
7899                 || mode == HFmode
7900                 || (mode == QImode && outer == SIGN_EXTEND))))
7901     {
7902       if (code == MULT)
7903         {
7904           rtx xiop0 = XEXP (index, 0);
7905           rtx xiop1 = XEXP (index, 1);
7906
7907           return ((arm_address_register_rtx_p (xiop0, strict_p)
7908                    && power_of_two_operand (xiop1, SImode))
7909                   || (arm_address_register_rtx_p (xiop1, strict_p)
7910                       && power_of_two_operand (xiop0, SImode)));
7911         }
7912       else if (code == LSHIFTRT || code == ASHIFTRT
7913                || code == ASHIFT || code == ROTATERT)
7914         {
7915           rtx op = XEXP (index, 1);
7916
7917           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7918                   && CONST_INT_P (op)
7919                   && INTVAL (op) > 0
7920                   && INTVAL (op) <= 31);
7921         }
7922     }
7923
7924   /* For ARM v4 we may be doing a sign-extend operation during the
7925      load.  */
7926   if (arm_arch4)
7927     {
7928       if (mode == HImode
7929           || mode == HFmode
7930           || (outer == SIGN_EXTEND && mode == QImode))
7931         range = 256;
7932       else
7933         range = 4096;
7934     }
7935   else
7936     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7937
7938   return (code == CONST_INT
7939           && INTVAL (index) < range
7940           && INTVAL (index) > -range);
7941 }
7942
7943 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7944    index operand.  i.e. 1, 2, 4 or 8.  */
7945 static bool
7946 thumb2_index_mul_operand (rtx op)
7947 {
7948   HOST_WIDE_INT val;
7949
7950   if (!CONST_INT_P (op))
7951     return false;
7952
7953   val = INTVAL(op);
7954   return (val == 1 || val == 2 || val == 4 || val == 8);
7955 }
7956
7957 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7958 static int
7959 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7960 {
7961   enum rtx_code code = GET_CODE (index);
7962
7963   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7964   /* Standard coprocessor addressing modes.  */
7965   if (TARGET_HARD_FLOAT
7966       && (mode == SFmode || mode == DFmode))
7967     return (code == CONST_INT && INTVAL (index) < 1024
7968             /* Thumb-2 allows only > -256 index range for it's core register
7969                load/stores. Since we allow SF/DF in core registers, we have
7970                to use the intersection between -256~4096 (core) and -1024~1024
7971                (coprocessor).  */
7972             && INTVAL (index) > -256
7973             && (INTVAL (index) & 3) == 0);
7974
7975   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7976     {
7977       /* For DImode assume values will usually live in core regs
7978          and only allow LDRD addressing modes.  */
7979       if (!TARGET_LDRD || mode != DImode)
7980         return (code == CONST_INT
7981                 && INTVAL (index) < 1024
7982                 && INTVAL (index) > -1024
7983                 && (INTVAL (index) & 3) == 0);
7984     }
7985
7986   /* For quad modes, we restrict the constant offset to be slightly less
7987      than what the instruction format permits.  We do this because for
7988      quad mode moves, we will actually decompose them into two separate
7989      double-mode reads or writes.  INDEX must therefore be a valid
7990      (double-mode) offset and so should INDEX+8.  */
7991   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7992     return (code == CONST_INT
7993             && INTVAL (index) < 1016
7994             && INTVAL (index) > -1024
7995             && (INTVAL (index) & 3) == 0);
7996
7997   /* We have no such constraint on double mode offsets, so we permit the
7998      full range of the instruction format.  */
7999   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8000     return (code == CONST_INT
8001             && INTVAL (index) < 1024
8002             && INTVAL (index) > -1024
8003             && (INTVAL (index) & 3) == 0);
8004
8005   if (arm_address_register_rtx_p (index, strict_p)
8006       && (GET_MODE_SIZE (mode) <= 4))
8007     return 1;
8008
8009   if (mode == DImode || mode == DFmode)
8010     {
8011       if (code == CONST_INT)
8012         {
8013           HOST_WIDE_INT val = INTVAL (index);
8014           /* ??? Can we assume ldrd for thumb2?  */
8015           /* Thumb-2 ldrd only has reg+const addressing modes.  */
8016           /* ldrd supports offsets of +-1020.
8017              However the ldr fallback does not.  */
8018           return val > -256 && val < 256 && (val & 3) == 0;
8019         }
8020       else
8021         return 0;
8022     }
8023
8024   if (code == MULT)
8025     {
8026       rtx xiop0 = XEXP (index, 0);
8027       rtx xiop1 = XEXP (index, 1);
8028
8029       return ((arm_address_register_rtx_p (xiop0, strict_p)
8030                && thumb2_index_mul_operand (xiop1))
8031               || (arm_address_register_rtx_p (xiop1, strict_p)
8032                   && thumb2_index_mul_operand (xiop0)));
8033     }
8034   else if (code == ASHIFT)
8035     {
8036       rtx op = XEXP (index, 1);
8037
8038       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8039               && CONST_INT_P (op)
8040               && INTVAL (op) > 0
8041               && INTVAL (op) <= 3);
8042     }
8043
8044   return (code == CONST_INT
8045           && INTVAL (index) < 4096
8046           && INTVAL (index) > -256);
8047 }
8048
8049 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8050 static int
8051 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8052 {
8053   int regno;
8054
8055   if (!REG_P (x))
8056     return 0;
8057
8058   regno = REGNO (x);
8059
8060   if (strict_p)
8061     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8062
8063   return (regno <= LAST_LO_REGNUM
8064           || regno > LAST_VIRTUAL_REGISTER
8065           || regno == FRAME_POINTER_REGNUM
8066           || (GET_MODE_SIZE (mode) >= 4
8067               && (regno == STACK_POINTER_REGNUM
8068                   || regno >= FIRST_PSEUDO_REGISTER
8069                   || x == hard_frame_pointer_rtx
8070                   || x == arg_pointer_rtx)));
8071 }
8072
8073 /* Return nonzero if x is a legitimate index register.  This is the case
8074    for any base register that can access a QImode object.  */
8075 inline static int
8076 thumb1_index_register_rtx_p (rtx x, int strict_p)
8077 {
8078   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8079 }
8080
8081 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8082
8083    The AP may be eliminated to either the SP or the FP, so we use the
8084    least common denominator, e.g. SImode, and offsets from 0 to 64.
8085
8086    ??? Verify whether the above is the right approach.
8087
8088    ??? Also, the FP may be eliminated to the SP, so perhaps that
8089    needs special handling also.
8090
8091    ??? Look at how the mips16 port solves this problem.  It probably uses
8092    better ways to solve some of these problems.
8093
8094    Although it is not incorrect, we don't accept QImode and HImode
8095    addresses based on the frame pointer or arg pointer until the
8096    reload pass starts.  This is so that eliminating such addresses
8097    into stack based ones won't produce impossible code.  */
8098 int
8099 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8100 {
8101   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8102     return 0;
8103
8104   /* ??? Not clear if this is right.  Experiment.  */
8105   if (GET_MODE_SIZE (mode) < 4
8106       && !(reload_in_progress || reload_completed)
8107       && (reg_mentioned_p (frame_pointer_rtx, x)
8108           || reg_mentioned_p (arg_pointer_rtx, x)
8109           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8110           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8111           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8112           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8113     return 0;
8114
8115   /* Accept any base register.  SP only in SImode or larger.  */
8116   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8117     return 1;
8118
8119   /* This is PC relative data before arm_reorg runs.  */
8120   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8121            && GET_CODE (x) == SYMBOL_REF
8122            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8123     return 1;
8124
8125   /* This is PC relative data after arm_reorg runs.  */
8126   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8127            && reload_completed
8128            && (GET_CODE (x) == LABEL_REF
8129                || (GET_CODE (x) == CONST
8130                    && GET_CODE (XEXP (x, 0)) == PLUS
8131                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8132                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8133     return 1;
8134
8135   /* Post-inc indexing only supported for SImode and larger.  */
8136   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8137            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8138     return 1;
8139
8140   else if (GET_CODE (x) == PLUS)
8141     {
8142       /* REG+REG address can be any two index registers.  */
8143       /* We disallow FRAME+REG addressing since we know that FRAME
8144          will be replaced with STACK, and SP relative addressing only
8145          permits SP+OFFSET.  */
8146       if (GET_MODE_SIZE (mode) <= 4
8147           && XEXP (x, 0) != frame_pointer_rtx
8148           && XEXP (x, 1) != frame_pointer_rtx
8149           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8150           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8151               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8152         return 1;
8153
8154       /* REG+const has 5-7 bit offset for non-SP registers.  */
8155       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8156                 || XEXP (x, 0) == arg_pointer_rtx)
8157                && CONST_INT_P (XEXP (x, 1))
8158                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8159         return 1;
8160
8161       /* REG+const has 10-bit offset for SP, but only SImode and
8162          larger is supported.  */
8163       /* ??? Should probably check for DI/DFmode overflow here
8164          just like GO_IF_LEGITIMATE_OFFSET does.  */
8165       else if (REG_P (XEXP (x, 0))
8166                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8167                && GET_MODE_SIZE (mode) >= 4
8168                && CONST_INT_P (XEXP (x, 1))
8169                && INTVAL (XEXP (x, 1)) >= 0
8170                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8171                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8172         return 1;
8173
8174       else if (REG_P (XEXP (x, 0))
8175                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8176                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8177                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8178                        && REGNO (XEXP (x, 0))
8179                           <= LAST_VIRTUAL_POINTER_REGISTER))
8180                && GET_MODE_SIZE (mode) >= 4
8181                && CONST_INT_P (XEXP (x, 1))
8182                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8183         return 1;
8184     }
8185
8186   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8187            && GET_MODE_SIZE (mode) == 4
8188            && GET_CODE (x) == SYMBOL_REF
8189            && CONSTANT_POOL_ADDRESS_P (x)
8190            && ! (flag_pic
8191                  && symbol_mentioned_p (get_pool_constant (x))
8192                  && ! pcrel_constant_p (get_pool_constant (x))))
8193     return 1;
8194
8195   return 0;
8196 }
8197
8198 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8199    instruction of mode MODE.  */
8200 int
8201 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8202 {
8203   switch (GET_MODE_SIZE (mode))
8204     {
8205     case 1:
8206       return val >= 0 && val < 32;
8207
8208     case 2:
8209       return val >= 0 && val < 64 && (val & 1) == 0;
8210
8211     default:
8212       return (val >= 0
8213               && (val + GET_MODE_SIZE (mode)) <= 128
8214               && (val & 3) == 0);
8215     }
8216 }
8217
8218 bool
8219 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8220 {
8221   if (TARGET_ARM)
8222     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8223   else if (TARGET_THUMB2)
8224     return thumb2_legitimate_address_p (mode, x, strict_p);
8225   else /* if (TARGET_THUMB1) */
8226     return thumb1_legitimate_address_p (mode, x, strict_p);
8227 }
8228
8229 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8230
8231    Given an rtx X being reloaded into a reg required to be
8232    in class CLASS, return the class of reg to actually use.
8233    In general this is just CLASS, but for the Thumb core registers and
8234    immediate constants we prefer a LO_REGS class or a subset.  */
8235
8236 static reg_class_t
8237 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8238 {
8239   if (TARGET_32BIT)
8240     return rclass;
8241   else
8242     {
8243       if (rclass == GENERAL_REGS)
8244         return LO_REGS;
8245       else
8246         return rclass;
8247     }
8248 }
8249
8250 /* Build the SYMBOL_REF for __tls_get_addr.  */
8251
8252 static GTY(()) rtx tls_get_addr_libfunc;
8253
8254 static rtx
8255 get_tls_get_addr (void)
8256 {
8257   if (!tls_get_addr_libfunc)
8258     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8259   return tls_get_addr_libfunc;
8260 }
8261
8262 rtx
8263 arm_load_tp (rtx target)
8264 {
8265   if (!target)
8266     target = gen_reg_rtx (SImode);
8267
8268   if (TARGET_HARD_TP)
8269     {
8270       /* Can return in any reg.  */
8271       emit_insn (gen_load_tp_hard (target));
8272     }
8273   else
8274     {
8275       /* Always returned in r0.  Immediately copy the result into a pseudo,
8276          otherwise other uses of r0 (e.g. setting up function arguments) may
8277          clobber the value.  */
8278
8279       rtx tmp;
8280
8281       emit_insn (gen_load_tp_soft ());
8282
8283       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8284       emit_move_insn (target, tmp);
8285     }
8286   return target;
8287 }
8288
8289 static rtx
8290 load_tls_operand (rtx x, rtx reg)
8291 {
8292   rtx tmp;
8293
8294   if (reg == NULL_RTX)
8295     reg = gen_reg_rtx (SImode);
8296
8297   tmp = gen_rtx_CONST (SImode, x);
8298
8299   emit_move_insn (reg, tmp);
8300
8301   return reg;
8302 }
8303
8304 static rtx_insn *
8305 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8306 {
8307   rtx label, labelno, sum;
8308
8309   gcc_assert (reloc != TLS_DESCSEQ);
8310   start_sequence ();
8311
8312   labelno = GEN_INT (pic_labelno++);
8313   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8314   label = gen_rtx_CONST (VOIDmode, label);
8315
8316   sum = gen_rtx_UNSPEC (Pmode,
8317                         gen_rtvec (4, x, GEN_INT (reloc), label,
8318                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8319                         UNSPEC_TLS);
8320   reg = load_tls_operand (sum, reg);
8321
8322   if (TARGET_ARM)
8323     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8324   else
8325     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8326
8327   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8328                                      LCT_PURE, /* LCT_CONST?  */
8329                                      Pmode, 1, reg, Pmode);
8330
8331   rtx_insn *insns = get_insns ();
8332   end_sequence ();
8333
8334   return insns;
8335 }
8336
8337 static rtx
8338 arm_tls_descseq_addr (rtx x, rtx reg)
8339 {
8340   rtx labelno = GEN_INT (pic_labelno++);
8341   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8342   rtx sum = gen_rtx_UNSPEC (Pmode,
8343                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8344                                        gen_rtx_CONST (VOIDmode, label),
8345                                        GEN_INT (!TARGET_ARM)),
8346                             UNSPEC_TLS);
8347   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8348
8349   emit_insn (gen_tlscall (x, labelno));
8350   if (!reg)
8351     reg = gen_reg_rtx (SImode);
8352   else
8353     gcc_assert (REGNO (reg) != R0_REGNUM);
8354
8355   emit_move_insn (reg, reg0);
8356
8357   return reg;
8358 }
8359
8360 rtx
8361 legitimize_tls_address (rtx x, rtx reg)
8362 {
8363   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8364   rtx_insn *insns;
8365   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8366
8367   switch (model)
8368     {
8369     case TLS_MODEL_GLOBAL_DYNAMIC:
8370       if (TARGET_GNU2_TLS)
8371         {
8372           reg = arm_tls_descseq_addr (x, reg);
8373
8374           tp = arm_load_tp (NULL_RTX);
8375
8376           dest = gen_rtx_PLUS (Pmode, tp, reg);
8377         }
8378       else
8379         {
8380           /* Original scheme */
8381           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8382           dest = gen_reg_rtx (Pmode);
8383           emit_libcall_block (insns, dest, ret, x);
8384         }
8385       return dest;
8386
8387     case TLS_MODEL_LOCAL_DYNAMIC:
8388       if (TARGET_GNU2_TLS)
8389         {
8390           reg = arm_tls_descseq_addr (x, reg);
8391
8392           tp = arm_load_tp (NULL_RTX);
8393
8394           dest = gen_rtx_PLUS (Pmode, tp, reg);
8395         }
8396       else
8397         {
8398           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8399
8400           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8401              share the LDM result with other LD model accesses.  */
8402           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8403                                 UNSPEC_TLS);
8404           dest = gen_reg_rtx (Pmode);
8405           emit_libcall_block (insns, dest, ret, eqv);
8406
8407           /* Load the addend.  */
8408           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8409                                                      GEN_INT (TLS_LDO32)),
8410                                    UNSPEC_TLS);
8411           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8412           dest = gen_rtx_PLUS (Pmode, dest, addend);
8413         }
8414       return dest;
8415
8416     case TLS_MODEL_INITIAL_EXEC:
8417       labelno = GEN_INT (pic_labelno++);
8418       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8419       label = gen_rtx_CONST (VOIDmode, label);
8420       sum = gen_rtx_UNSPEC (Pmode,
8421                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8422                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8423                             UNSPEC_TLS);
8424       reg = load_tls_operand (sum, reg);
8425
8426       if (TARGET_ARM)
8427         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8428       else if (TARGET_THUMB2)
8429         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8430       else
8431         {
8432           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8433           emit_move_insn (reg, gen_const_mem (SImode, reg));
8434         }
8435
8436       tp = arm_load_tp (NULL_RTX);
8437
8438       return gen_rtx_PLUS (Pmode, tp, reg);
8439
8440     case TLS_MODEL_LOCAL_EXEC:
8441       tp = arm_load_tp (NULL_RTX);
8442
8443       reg = gen_rtx_UNSPEC (Pmode,
8444                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8445                             UNSPEC_TLS);
8446       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8447
8448       return gen_rtx_PLUS (Pmode, tp, reg);
8449
8450     default:
8451       abort ();
8452     }
8453 }
8454
8455 /* Try machine-dependent ways of modifying an illegitimate address
8456    to be legitimate.  If we find one, return the new, valid address.  */
8457 rtx
8458 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8459 {
8460   if (arm_tls_referenced_p (x))
8461     {
8462       rtx addend = NULL;
8463
8464       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8465         {
8466           addend = XEXP (XEXP (x, 0), 1);
8467           x = XEXP (XEXP (x, 0), 0);
8468         }
8469
8470       if (GET_CODE (x) != SYMBOL_REF)
8471         return x;
8472
8473       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8474
8475       x = legitimize_tls_address (x, NULL_RTX);
8476
8477       if (addend)
8478         {
8479           x = gen_rtx_PLUS (SImode, x, addend);
8480           orig_x = x;
8481         }
8482       else
8483         return x;
8484     }
8485
8486   if (!TARGET_ARM)
8487     {
8488       /* TODO: legitimize_address for Thumb2.  */
8489       if (TARGET_THUMB2)
8490         return x;
8491       return thumb_legitimize_address (x, orig_x, mode);
8492     }
8493
8494   if (GET_CODE (x) == PLUS)
8495     {
8496       rtx xop0 = XEXP (x, 0);
8497       rtx xop1 = XEXP (x, 1);
8498
8499       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8500         xop0 = force_reg (SImode, xop0);
8501
8502       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8503           && !symbol_mentioned_p (xop1))
8504         xop1 = force_reg (SImode, xop1);
8505
8506       if (ARM_BASE_REGISTER_RTX_P (xop0)
8507           && CONST_INT_P (xop1))
8508         {
8509           HOST_WIDE_INT n, low_n;
8510           rtx base_reg, val;
8511           n = INTVAL (xop1);
8512
8513           /* VFP addressing modes actually allow greater offsets, but for
8514              now we just stick with the lowest common denominator.  */
8515           if (mode == DImode || mode == DFmode)
8516             {
8517               low_n = n & 0x0f;
8518               n &= ~0x0f;
8519               if (low_n > 4)
8520                 {
8521                   n += 16;
8522                   low_n -= 16;
8523                 }
8524             }
8525           else
8526             {
8527               low_n = ((mode) == TImode ? 0
8528                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8529               n -= low_n;
8530             }
8531
8532           base_reg = gen_reg_rtx (SImode);
8533           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8534           emit_move_insn (base_reg, val);
8535           x = plus_constant (Pmode, base_reg, low_n);
8536         }
8537       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8538         x = gen_rtx_PLUS (SImode, xop0, xop1);
8539     }
8540
8541   /* XXX We don't allow MINUS any more -- see comment in
8542      arm_legitimate_address_outer_p ().  */
8543   else if (GET_CODE (x) == MINUS)
8544     {
8545       rtx xop0 = XEXP (x, 0);
8546       rtx xop1 = XEXP (x, 1);
8547
8548       if (CONSTANT_P (xop0))
8549         xop0 = force_reg (SImode, xop0);
8550
8551       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8552         xop1 = force_reg (SImode, xop1);
8553
8554       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8555         x = gen_rtx_MINUS (SImode, xop0, xop1);
8556     }
8557
8558   /* Make sure to take full advantage of the pre-indexed addressing mode
8559      with absolute addresses which often allows for the base register to
8560      be factorized for multiple adjacent memory references, and it might
8561      even allows for the mini pool to be avoided entirely. */
8562   else if (CONST_INT_P (x) && optimize > 0)
8563     {
8564       unsigned int bits;
8565       HOST_WIDE_INT mask, base, index;
8566       rtx base_reg;
8567
8568       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8569          use a 8-bit index. So let's use a 12-bit index for SImode only and
8570          hope that arm_gen_constant will enable ldrb to use more bits. */
8571       bits = (mode == SImode) ? 12 : 8;
8572       mask = (1 << bits) - 1;
8573       base = INTVAL (x) & ~mask;
8574       index = INTVAL (x) & mask;
8575       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8576         {
8577           /* It'll most probably be more efficient to generate the base
8578              with more bits set and use a negative index instead. */
8579           base |= mask;
8580           index -= mask;
8581         }
8582       base_reg = force_reg (SImode, GEN_INT (base));
8583       x = plus_constant (Pmode, base_reg, index);
8584     }
8585
8586   if (flag_pic)
8587     {
8588       /* We need to find and carefully transform any SYMBOL and LABEL
8589          references; so go back to the original address expression.  */
8590       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8591
8592       if (new_x != orig_x)
8593         x = new_x;
8594     }
8595
8596   return x;
8597 }
8598
8599
8600 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8601    to be legitimate.  If we find one, return the new, valid address.  */
8602 rtx
8603 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8604 {
8605   if (GET_CODE (x) == PLUS
8606       && CONST_INT_P (XEXP (x, 1))
8607       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8608           || INTVAL (XEXP (x, 1)) < 0))
8609     {
8610       rtx xop0 = XEXP (x, 0);
8611       rtx xop1 = XEXP (x, 1);
8612       HOST_WIDE_INT offset = INTVAL (xop1);
8613
8614       /* Try and fold the offset into a biasing of the base register and
8615          then offsetting that.  Don't do this when optimizing for space
8616          since it can cause too many CSEs.  */
8617       if (optimize_size && offset >= 0
8618           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8619         {
8620           HOST_WIDE_INT delta;
8621
8622           if (offset >= 256)
8623             delta = offset - (256 - GET_MODE_SIZE (mode));
8624           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8625             delta = 31 * GET_MODE_SIZE (mode);
8626           else
8627             delta = offset & (~31 * GET_MODE_SIZE (mode));
8628
8629           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8630                                 NULL_RTX);
8631           x = plus_constant (Pmode, xop0, delta);
8632         }
8633       else if (offset < 0 && offset > -256)
8634         /* Small negative offsets are best done with a subtract before the
8635            dereference, forcing these into a register normally takes two
8636            instructions.  */
8637         x = force_operand (x, NULL_RTX);
8638       else
8639         {
8640           /* For the remaining cases, force the constant into a register.  */
8641           xop1 = force_reg (SImode, xop1);
8642           x = gen_rtx_PLUS (SImode, xop0, xop1);
8643         }
8644     }
8645   else if (GET_CODE (x) == PLUS
8646            && s_register_operand (XEXP (x, 1), SImode)
8647            && !s_register_operand (XEXP (x, 0), SImode))
8648     {
8649       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8650
8651       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8652     }
8653
8654   if (flag_pic)
8655     {
8656       /* We need to find and carefully transform any SYMBOL and LABEL
8657          references; so go back to the original address expression.  */
8658       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8659
8660       if (new_x != orig_x)
8661         x = new_x;
8662     }
8663
8664   return x;
8665 }
8666
8667 /* Return TRUE if X contains any TLS symbol references.  */
8668
8669 bool
8670 arm_tls_referenced_p (rtx x)
8671 {
8672   if (! TARGET_HAVE_TLS)
8673     return false;
8674
8675   subrtx_iterator::array_type array;
8676   FOR_EACH_SUBRTX (iter, array, x, ALL)
8677     {
8678       const_rtx x = *iter;
8679       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8680         {
8681           /* ARM currently does not provide relocations to encode TLS variables
8682              into AArch32 instructions, only data, so there is no way to
8683              currently implement these if a literal pool is disabled.  */
8684           if (arm_disable_literal_pool)
8685             sorry ("accessing thread-local storage is not currently supported "
8686                    "with -mpure-code or -mslow-flash-data");
8687
8688           return true;
8689         }
8690
8691       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8692          TLS offsets, not real symbol references.  */
8693       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8694         iter.skip_subrtxes ();
8695     }
8696   return false;
8697 }
8698
8699 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8700
8701    On the ARM, allow any integer (invalid ones are removed later by insn
8702    patterns), nice doubles and symbol_refs which refer to the function's
8703    constant pool XXX.
8704
8705    When generating pic allow anything.  */
8706
8707 static bool
8708 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8709 {
8710   return flag_pic || !label_mentioned_p (x);
8711 }
8712
8713 static bool
8714 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8715 {
8716   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8717      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8718      for ARMv8-M Baseline or later the result is valid.  */
8719   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8720     x = XEXP (x, 0);
8721
8722   return (CONST_INT_P (x)
8723           || CONST_DOUBLE_P (x)
8724           || CONSTANT_ADDRESS_P (x)
8725           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8726           || flag_pic);
8727 }
8728
8729 static bool
8730 arm_legitimate_constant_p (machine_mode mode, rtx x)
8731 {
8732   return (!arm_cannot_force_const_mem (mode, x)
8733           && (TARGET_32BIT
8734               ? arm_legitimate_constant_p_1 (mode, x)
8735               : thumb_legitimate_constant_p (mode, x)));
8736 }
8737
8738 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8739
8740 static bool
8741 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8742 {
8743   rtx base, offset;
8744
8745   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8746     {
8747       split_const (x, &base, &offset);
8748       if (GET_CODE (base) == SYMBOL_REF
8749           && !offset_within_block_p (base, INTVAL (offset)))
8750         return true;
8751     }
8752   return arm_tls_referenced_p (x);
8753 }
8754 \f
8755 #define REG_OR_SUBREG_REG(X)                                            \
8756   (REG_P (X)                                                    \
8757    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8758
8759 #define REG_OR_SUBREG_RTX(X)                    \
8760    (REG_P (X) ? (X) : SUBREG_REG (X))
8761
8762 static inline int
8763 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8764 {
8765   machine_mode mode = GET_MODE (x);
8766   int total, words;
8767
8768   switch (code)
8769     {
8770     case ASHIFT:
8771     case ASHIFTRT:
8772     case LSHIFTRT:
8773     case ROTATERT:
8774       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8775
8776     case PLUS:
8777     case MINUS:
8778     case COMPARE:
8779     case NEG:
8780     case NOT:
8781       return COSTS_N_INSNS (1);
8782
8783     case MULT:
8784       if (arm_arch6m && arm_m_profile_small_mul)
8785         return COSTS_N_INSNS (32);
8786
8787       if (CONST_INT_P (XEXP (x, 1)))
8788         {
8789           int cycles = 0;
8790           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8791
8792           while (i)
8793             {
8794               i >>= 2;
8795               cycles++;
8796             }
8797           return COSTS_N_INSNS (2) + cycles;
8798         }
8799       return COSTS_N_INSNS (1) + 16;
8800
8801     case SET:
8802       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8803          the mode.  */
8804       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8805       return (COSTS_N_INSNS (words)
8806               + 4 * ((MEM_P (SET_SRC (x)))
8807                      + MEM_P (SET_DEST (x))));
8808
8809     case CONST_INT:
8810       if (outer == SET)
8811         {
8812           if (UINTVAL (x) < 256
8813               /* 16-bit constant.  */
8814               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8815             return 0;
8816           if (thumb_shiftable_const (INTVAL (x)))
8817             return COSTS_N_INSNS (2);
8818           return COSTS_N_INSNS (3);
8819         }
8820       else if ((outer == PLUS || outer == COMPARE)
8821                && INTVAL (x) < 256 && INTVAL (x) > -256)
8822         return 0;
8823       else if ((outer == IOR || outer == XOR || outer == AND)
8824                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8825         return COSTS_N_INSNS (1);
8826       else if (outer == AND)
8827         {
8828           int i;
8829           /* This duplicates the tests in the andsi3 expander.  */
8830           for (i = 9; i <= 31; i++)
8831             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8832                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8833               return COSTS_N_INSNS (2);
8834         }
8835       else if (outer == ASHIFT || outer == ASHIFTRT
8836                || outer == LSHIFTRT)
8837         return 0;
8838       return COSTS_N_INSNS (2);
8839
8840     case CONST:
8841     case CONST_DOUBLE:
8842     case LABEL_REF:
8843     case SYMBOL_REF:
8844       return COSTS_N_INSNS (3);
8845
8846     case UDIV:
8847     case UMOD:
8848     case DIV:
8849     case MOD:
8850       return 100;
8851
8852     case TRUNCATE:
8853       return 99;
8854
8855     case AND:
8856     case XOR:
8857     case IOR:
8858       /* XXX guess.  */
8859       return 8;
8860
8861     case MEM:
8862       /* XXX another guess.  */
8863       /* Memory costs quite a lot for the first word, but subsequent words
8864          load at the equivalent of a single insn each.  */
8865       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8866               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8867                  ? 4 : 0));
8868
8869     case IF_THEN_ELSE:
8870       /* XXX a guess.  */
8871       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8872         return 14;
8873       return 2;
8874
8875     case SIGN_EXTEND:
8876     case ZERO_EXTEND:
8877       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8878       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8879
8880       if (mode == SImode)
8881         return total;
8882
8883       if (arm_arch6)
8884         return total + COSTS_N_INSNS (1);
8885
8886       /* Assume a two-shift sequence.  Increase the cost slightly so
8887          we prefer actual shifts over an extend operation.  */
8888       return total + 1 + COSTS_N_INSNS (2);
8889
8890     default:
8891       return 99;
8892     }
8893 }
8894
8895 /* Estimates the size cost of thumb1 instructions.
8896    For now most of the code is copied from thumb1_rtx_costs. We need more
8897    fine grain tuning when we have more related test cases.  */
8898 static inline int
8899 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8900 {
8901   machine_mode mode = GET_MODE (x);
8902   int words, cost;
8903
8904   switch (code)
8905     {
8906     case ASHIFT:
8907     case ASHIFTRT:
8908     case LSHIFTRT:
8909     case ROTATERT:
8910       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8911
8912     case PLUS:
8913     case MINUS:
8914       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8915          defined by RTL expansion, especially for the expansion of
8916          multiplication.  */
8917       if ((GET_CODE (XEXP (x, 0)) == MULT
8918            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8919           || (GET_CODE (XEXP (x, 1)) == MULT
8920               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8921         return COSTS_N_INSNS (2);
8922       /* Fall through.  */
8923     case COMPARE:
8924     case NEG:
8925     case NOT:
8926       return COSTS_N_INSNS (1);
8927
8928     case MULT:
8929       if (CONST_INT_P (XEXP (x, 1)))
8930         {
8931           /* Thumb1 mul instruction can't operate on const. We must Load it
8932              into a register first.  */
8933           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8934           /* For the targets which have a very small and high-latency multiply
8935              unit, we prefer to synthesize the mult with up to 5 instructions,
8936              giving a good balance between size and performance.  */
8937           if (arm_arch6m && arm_m_profile_small_mul)
8938             return COSTS_N_INSNS (5);
8939           else
8940             return COSTS_N_INSNS (1) + const_size;
8941         }
8942       return COSTS_N_INSNS (1);
8943
8944     case SET:
8945       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8946          the mode.  */
8947       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8948       cost = COSTS_N_INSNS (words);
8949       if (satisfies_constraint_J (SET_SRC (x))
8950           || satisfies_constraint_K (SET_SRC (x))
8951              /* Too big an immediate for a 2-byte mov, using MOVT.  */
8952           || (CONST_INT_P (SET_SRC (x))
8953               && UINTVAL (SET_SRC (x)) >= 256
8954               && TARGET_HAVE_MOVT
8955               && satisfies_constraint_j (SET_SRC (x)))
8956              /* thumb1_movdi_insn.  */
8957           || ((words > 1) && MEM_P (SET_SRC (x))))
8958         cost += COSTS_N_INSNS (1);
8959       return cost;
8960
8961     case CONST_INT:
8962       if (outer == SET)
8963         {
8964           if (UINTVAL (x) < 256)
8965             return COSTS_N_INSNS (1);
8966           /* movw is 4byte long.  */
8967           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8968             return COSTS_N_INSNS (2);
8969           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8970           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8971             return COSTS_N_INSNS (2);
8972           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8973           if (thumb_shiftable_const (INTVAL (x)))
8974             return COSTS_N_INSNS (2);
8975           return COSTS_N_INSNS (3);
8976         }
8977       else if ((outer == PLUS || outer == COMPARE)
8978                && INTVAL (x) < 256 && INTVAL (x) > -256)
8979         return 0;
8980       else if ((outer == IOR || outer == XOR || outer == AND)
8981                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8982         return COSTS_N_INSNS (1);
8983       else if (outer == AND)
8984         {
8985           int i;
8986           /* This duplicates the tests in the andsi3 expander.  */
8987           for (i = 9; i <= 31; i++)
8988             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8989                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8990               return COSTS_N_INSNS (2);
8991         }
8992       else if (outer == ASHIFT || outer == ASHIFTRT
8993                || outer == LSHIFTRT)
8994         return 0;
8995       return COSTS_N_INSNS (2);
8996
8997     case CONST:
8998     case CONST_DOUBLE:
8999     case LABEL_REF:
9000     case SYMBOL_REF:
9001       return COSTS_N_INSNS (3);
9002
9003     case UDIV:
9004     case UMOD:
9005     case DIV:
9006     case MOD:
9007       return 100;
9008
9009     case TRUNCATE:
9010       return 99;
9011
9012     case AND:
9013     case XOR:
9014     case IOR:
9015       return COSTS_N_INSNS (1);
9016
9017     case MEM:
9018       return (COSTS_N_INSNS (1)
9019               + COSTS_N_INSNS (1)
9020                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9021               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9022                  ? COSTS_N_INSNS (1) : 0));
9023
9024     case IF_THEN_ELSE:
9025       /* XXX a guess.  */
9026       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9027         return 14;
9028       return 2;
9029
9030     case ZERO_EXTEND:
9031       /* XXX still guessing.  */
9032       switch (GET_MODE (XEXP (x, 0)))
9033         {
9034           case QImode:
9035             return (1 + (mode == DImode ? 4 : 0)
9036                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9037
9038           case HImode:
9039             return (4 + (mode == DImode ? 4 : 0)
9040                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9041
9042           case SImode:
9043             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9044
9045           default:
9046             return 99;
9047         }
9048
9049     default:
9050       return 99;
9051     }
9052 }
9053
9054 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9055    operand, then return the operand that is being shifted.  If the shift
9056    is not by a constant, then set SHIFT_REG to point to the operand.
9057    Return NULL if OP is not a shifter operand.  */
9058 static rtx
9059 shifter_op_p (rtx op, rtx *shift_reg)
9060 {
9061   enum rtx_code code = GET_CODE (op);
9062
9063   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9064       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9065     return XEXP (op, 0);
9066   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9067     return XEXP (op, 0);
9068   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9069            || code == ASHIFTRT)
9070     {
9071       if (!CONST_INT_P (XEXP (op, 1)))
9072         *shift_reg = XEXP (op, 1);
9073       return XEXP (op, 0);
9074     }
9075
9076   return NULL;
9077 }
9078
9079 static bool
9080 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9081 {
9082   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9083   rtx_code code = GET_CODE (x);
9084   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9085
9086   switch (XINT (x, 1))
9087     {
9088     case UNSPEC_UNALIGNED_LOAD:
9089       /* We can only do unaligned loads into the integer unit, and we can't
9090          use LDM or LDRD.  */
9091       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9092       if (speed_p)
9093         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9094                   + extra_cost->ldst.load_unaligned);
9095
9096 #ifdef NOT_YET
9097       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9098                                  ADDR_SPACE_GENERIC, speed_p);
9099 #endif
9100       return true;
9101
9102     case UNSPEC_UNALIGNED_STORE:
9103       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9104       if (speed_p)
9105         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9106                   + extra_cost->ldst.store_unaligned);
9107
9108       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9109 #ifdef NOT_YET
9110       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9111                                  ADDR_SPACE_GENERIC, speed_p);
9112 #endif
9113       return true;
9114
9115     case UNSPEC_VRINTZ:
9116     case UNSPEC_VRINTP:
9117     case UNSPEC_VRINTM:
9118     case UNSPEC_VRINTR:
9119     case UNSPEC_VRINTX:
9120     case UNSPEC_VRINTA:
9121       if (speed_p)
9122         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9123
9124       return true;
9125     default:
9126       *cost = COSTS_N_INSNS (2);
9127       break;
9128     }
9129   return true;
9130 }
9131
9132 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9133    call (one insn for -Os) and then one for processing the result.  */
9134 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9135
9136 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9137         do                                                              \
9138           {                                                             \
9139             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9140             if (shift_op != NULL                                        \
9141                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9142               {                                                         \
9143                 if (shift_reg)                                          \
9144                   {                                                     \
9145                     if (speed_p)                                        \
9146                       *cost += extra_cost->alu.arith_shift_reg;         \
9147                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9148                                        ASHIFT, 1, speed_p);             \
9149                   }                                                     \
9150                 else if (speed_p)                                       \
9151                   *cost += extra_cost->alu.arith_shift;                 \
9152                                                                         \
9153                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9154                                     ASHIFT, 0, speed_p)                 \
9155                           + rtx_cost (XEXP (x, 1 - IDX),                \
9156                                       GET_MODE (shift_op),              \
9157                                       OP, 1, speed_p));                 \
9158                 return true;                                            \
9159               }                                                         \
9160           }                                                             \
9161         while (0);
9162
9163 /* RTX costs.  Make an estimate of the cost of executing the operation
9164    X, which is contained with an operation with code OUTER_CODE.
9165    SPEED_P indicates whether the cost desired is the performance cost,
9166    or the size cost.  The estimate is stored in COST and the return
9167    value is TRUE if the cost calculation is final, or FALSE if the
9168    caller should recurse through the operands of X to add additional
9169    costs.
9170
9171    We currently make no attempt to model the size savings of Thumb-2
9172    16-bit instructions.  At the normal points in compilation where
9173    this code is called we have no measure of whether the condition
9174    flags are live or not, and thus no realistic way to determine what
9175    the size will eventually be.  */
9176 static bool
9177 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9178                    const struct cpu_cost_table *extra_cost,
9179                    int *cost, bool speed_p)
9180 {
9181   machine_mode mode = GET_MODE (x);
9182
9183   *cost = COSTS_N_INSNS (1);
9184
9185   if (TARGET_THUMB1)
9186     {
9187       if (speed_p)
9188         *cost = thumb1_rtx_costs (x, code, outer_code);
9189       else
9190         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9191       return true;
9192     }
9193
9194   switch (code)
9195     {
9196     case SET:
9197       *cost = 0;
9198       /* SET RTXs don't have a mode so we get it from the destination.  */
9199       mode = GET_MODE (SET_DEST (x));
9200
9201       if (REG_P (SET_SRC (x))
9202           && REG_P (SET_DEST (x)))
9203         {
9204           /* Assume that most copies can be done with a single insn,
9205              unless we don't have HW FP, in which case everything
9206              larger than word mode will require two insns.  */
9207           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9208                                    && GET_MODE_SIZE (mode) > 4)
9209                                   || mode == DImode)
9210                                  ? 2 : 1);
9211           /* Conditional register moves can be encoded
9212              in 16 bits in Thumb mode.  */
9213           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9214             *cost >>= 1;
9215
9216           return true;
9217         }
9218
9219       if (CONST_INT_P (SET_SRC (x)))
9220         {
9221           /* Handle CONST_INT here, since the value doesn't have a mode
9222              and we would otherwise be unable to work out the true cost.  */
9223           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9224                             0, speed_p);
9225           outer_code = SET;
9226           /* Slightly lower the cost of setting a core reg to a constant.
9227              This helps break up chains and allows for better scheduling.  */
9228           if (REG_P (SET_DEST (x))
9229               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9230             *cost -= 1;
9231           x = SET_SRC (x);
9232           /* Immediate moves with an immediate in the range [0, 255] can be
9233              encoded in 16 bits in Thumb mode.  */
9234           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9235               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9236             *cost >>= 1;
9237           goto const_int_cost;
9238         }
9239
9240       return false;
9241
9242     case MEM:
9243       /* A memory access costs 1 insn if the mode is small, or the address is
9244          a single register, otherwise it costs one insn per word.  */
9245       if (REG_P (XEXP (x, 0)))
9246         *cost = COSTS_N_INSNS (1);
9247       else if (flag_pic
9248                && GET_CODE (XEXP (x, 0)) == PLUS
9249                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9250         /* This will be split into two instructions.
9251            See arm.md:calculate_pic_address.  */
9252         *cost = COSTS_N_INSNS (2);
9253       else
9254         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9255
9256       /* For speed optimizations, add the costs of the address and
9257          accessing memory.  */
9258       if (speed_p)
9259 #ifdef NOT_YET
9260         *cost += (extra_cost->ldst.load
9261                   + arm_address_cost (XEXP (x, 0), mode,
9262                                       ADDR_SPACE_GENERIC, speed_p));
9263 #else
9264         *cost += extra_cost->ldst.load;
9265 #endif
9266       return true;
9267
9268     case PARALLEL:
9269     {
9270    /* Calculations of LDM costs are complex.  We assume an initial cost
9271    (ldm_1st) which will load the number of registers mentioned in
9272    ldm_regs_per_insn_1st registers; then each additional
9273    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9274    formula for N regs is thus:
9275
9276    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9277                              + ldm_regs_per_insn_subsequent - 1)
9278                             / ldm_regs_per_insn_subsequent).
9279
9280    Additional costs may also be added for addressing.  A similar
9281    formula is used for STM.  */
9282
9283       bool is_ldm = load_multiple_operation (x, SImode);
9284       bool is_stm = store_multiple_operation (x, SImode);
9285
9286       if (is_ldm || is_stm)
9287         {
9288           if (speed_p)
9289             {
9290               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9291               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9292                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9293                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9294               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9295                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9296                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9297
9298               *cost += regs_per_insn_1st
9299                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9300                                             + regs_per_insn_sub - 1)
9301                                           / regs_per_insn_sub);
9302               return true;
9303             }
9304
9305         }
9306       return false;
9307     }
9308     case DIV:
9309     case UDIV:
9310       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9311           && (mode == SFmode || !TARGET_VFP_SINGLE))
9312         *cost += COSTS_N_INSNS (speed_p
9313                                ? extra_cost->fp[mode != SFmode].div : 0);
9314       else if (mode == SImode && TARGET_IDIV)
9315         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9316       else
9317         *cost = LIBCALL_COST (2);
9318       return false;     /* All arguments must be in registers.  */
9319
9320     case MOD:
9321       /* MOD by a power of 2 can be expanded as:
9322          rsbs    r1, r0, #0
9323          and     r0, r0, #(n - 1)
9324          and     r1, r1, #(n - 1)
9325          rsbpl   r0, r1, #0.  */
9326       if (CONST_INT_P (XEXP (x, 1))
9327           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9328           && mode == SImode)
9329         {
9330           *cost += COSTS_N_INSNS (3);
9331
9332           if (speed_p)
9333             *cost += 2 * extra_cost->alu.logical
9334                      + extra_cost->alu.arith;
9335           return true;
9336         }
9337
9338     /* Fall-through.  */
9339     case UMOD:
9340       *cost = LIBCALL_COST (2);
9341       return false;     /* All arguments must be in registers.  */
9342
9343     case ROTATE:
9344       if (mode == SImode && REG_P (XEXP (x, 1)))
9345         {
9346           *cost += (COSTS_N_INSNS (1)
9347                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9348           if (speed_p)
9349             *cost += extra_cost->alu.shift_reg;
9350           return true;
9351         }
9352       /* Fall through */
9353     case ROTATERT:
9354     case ASHIFT:
9355     case LSHIFTRT:
9356     case ASHIFTRT:
9357       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9358         {
9359           *cost += (COSTS_N_INSNS (2)
9360                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9361           if (speed_p)
9362             *cost += 2 * extra_cost->alu.shift;
9363           return true;
9364         }
9365       else if (mode == SImode)
9366         {
9367           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9368           /* Slightly disparage register shifts at -Os, but not by much.  */
9369           if (!CONST_INT_P (XEXP (x, 1)))
9370             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9371                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9372           return true;
9373         }
9374       else if (GET_MODE_CLASS (mode) == MODE_INT
9375                && GET_MODE_SIZE (mode) < 4)
9376         {
9377           if (code == ASHIFT)
9378             {
9379               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9380               /* Slightly disparage register shifts at -Os, but not by
9381                  much.  */
9382               if (!CONST_INT_P (XEXP (x, 1)))
9383                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9384                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9385             }
9386           else if (code == LSHIFTRT || code == ASHIFTRT)
9387             {
9388               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9389                 {
9390                   /* Can use SBFX/UBFX.  */
9391                   if (speed_p)
9392                     *cost += extra_cost->alu.bfx;
9393                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9394                 }
9395               else
9396                 {
9397                   *cost += COSTS_N_INSNS (1);
9398                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9399                   if (speed_p)
9400                     {
9401                       if (CONST_INT_P (XEXP (x, 1)))
9402                         *cost += 2 * extra_cost->alu.shift;
9403                       else
9404                         *cost += (extra_cost->alu.shift
9405                                   + extra_cost->alu.shift_reg);
9406                     }
9407                   else
9408                     /* Slightly disparage register shifts.  */
9409                     *cost += !CONST_INT_P (XEXP (x, 1));
9410                 }
9411             }
9412           else /* Rotates.  */
9413             {
9414               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9415               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9416               if (speed_p)
9417                 {
9418                   if (CONST_INT_P (XEXP (x, 1)))
9419                     *cost += (2 * extra_cost->alu.shift
9420                               + extra_cost->alu.log_shift);
9421                   else
9422                     *cost += (extra_cost->alu.shift
9423                               + extra_cost->alu.shift_reg
9424                               + extra_cost->alu.log_shift_reg);
9425                 }
9426             }
9427           return true;
9428         }
9429
9430       *cost = LIBCALL_COST (2);
9431       return false;
9432
9433     case BSWAP:
9434       if (arm_arch6)
9435         {
9436           if (mode == SImode)
9437             {
9438               if (speed_p)
9439                 *cost += extra_cost->alu.rev;
9440
9441               return false;
9442             }
9443         }
9444       else
9445         {
9446         /* No rev instruction available.  Look at arm_legacy_rev
9447            and thumb_legacy_rev for the form of RTL used then.  */
9448           if (TARGET_THUMB)
9449             {
9450               *cost += COSTS_N_INSNS (9);
9451
9452               if (speed_p)
9453                 {
9454                   *cost += 6 * extra_cost->alu.shift;
9455                   *cost += 3 * extra_cost->alu.logical;
9456                 }
9457             }
9458           else
9459             {
9460               *cost += COSTS_N_INSNS (4);
9461
9462               if (speed_p)
9463                 {
9464                   *cost += 2 * extra_cost->alu.shift;
9465                   *cost += extra_cost->alu.arith_shift;
9466                   *cost += 2 * extra_cost->alu.logical;
9467                 }
9468             }
9469           return true;
9470         }
9471       return false;
9472
9473     case MINUS:
9474       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9475           && (mode == SFmode || !TARGET_VFP_SINGLE))
9476         {
9477           if (GET_CODE (XEXP (x, 0)) == MULT
9478               || GET_CODE (XEXP (x, 1)) == MULT)
9479             {
9480               rtx mul_op0, mul_op1, sub_op;
9481
9482               if (speed_p)
9483                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9484
9485               if (GET_CODE (XEXP (x, 0)) == MULT)
9486                 {
9487                   mul_op0 = XEXP (XEXP (x, 0), 0);
9488                   mul_op1 = XEXP (XEXP (x, 0), 1);
9489                   sub_op = XEXP (x, 1);
9490                 }
9491               else
9492                 {
9493                   mul_op0 = XEXP (XEXP (x, 1), 0);
9494                   mul_op1 = XEXP (XEXP (x, 1), 1);
9495                   sub_op = XEXP (x, 0);
9496                 }
9497
9498               /* The first operand of the multiply may be optionally
9499                  negated.  */
9500               if (GET_CODE (mul_op0) == NEG)
9501                 mul_op0 = XEXP (mul_op0, 0);
9502
9503               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9504                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9505                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9506
9507               return true;
9508             }
9509
9510           if (speed_p)
9511             *cost += extra_cost->fp[mode != SFmode].addsub;
9512           return false;
9513         }
9514
9515       if (mode == SImode)
9516         {
9517           rtx shift_by_reg = NULL;
9518           rtx shift_op;
9519           rtx non_shift_op;
9520
9521           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9522           if (shift_op == NULL)
9523             {
9524               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9525               non_shift_op = XEXP (x, 0);
9526             }
9527           else
9528             non_shift_op = XEXP (x, 1);
9529
9530           if (shift_op != NULL)
9531             {
9532               if (shift_by_reg != NULL)
9533                 {
9534                   if (speed_p)
9535                     *cost += extra_cost->alu.arith_shift_reg;
9536                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9537                 }
9538               else if (speed_p)
9539                 *cost += extra_cost->alu.arith_shift;
9540
9541               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9542               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9543               return true;
9544             }
9545
9546           if (arm_arch_thumb2
9547               && GET_CODE (XEXP (x, 1)) == MULT)
9548             {
9549               /* MLS.  */
9550               if (speed_p)
9551                 *cost += extra_cost->mult[0].add;
9552               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9553               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9554               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9555               return true;
9556             }
9557
9558           if (CONST_INT_P (XEXP (x, 0)))
9559             {
9560               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9561                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9562                                             NULL_RTX, 1, 0);
9563               *cost = COSTS_N_INSNS (insns);
9564               if (speed_p)
9565                 *cost += insns * extra_cost->alu.arith;
9566               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9567               return true;
9568             }
9569           else if (speed_p)
9570             *cost += extra_cost->alu.arith;
9571
9572           return false;
9573         }
9574
9575       if (GET_MODE_CLASS (mode) == MODE_INT
9576           && GET_MODE_SIZE (mode) < 4)
9577         {
9578           rtx shift_op, shift_reg;
9579           shift_reg = NULL;
9580
9581           /* We check both sides of the MINUS for shifter operands since,
9582              unlike PLUS, it's not commutative.  */
9583
9584           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9585           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9586
9587           /* Slightly disparage, as we might need to widen the result.  */
9588           *cost += 1;
9589           if (speed_p)
9590             *cost += extra_cost->alu.arith;
9591
9592           if (CONST_INT_P (XEXP (x, 0)))
9593             {
9594               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9595               return true;
9596             }
9597
9598           return false;
9599         }
9600
9601       if (mode == DImode)
9602         {
9603           *cost += COSTS_N_INSNS (1);
9604
9605           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9606             {
9607               rtx op1 = XEXP (x, 1);
9608
9609               if (speed_p)
9610                 *cost += 2 * extra_cost->alu.arith;
9611
9612               if (GET_CODE (op1) == ZERO_EXTEND)
9613                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9614                                    0, speed_p);
9615               else
9616                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9617               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9618                                  0, speed_p);
9619               return true;
9620             }
9621           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9622             {
9623               if (speed_p)
9624                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9625               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9626                                   0, speed_p)
9627                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9628               return true;
9629             }
9630           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9631                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9632             {
9633               if (speed_p)
9634                 *cost += (extra_cost->alu.arith
9635                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9636                              ? extra_cost->alu.arith
9637                              : extra_cost->alu.arith_shift));
9638               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9639                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9640                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9641               return true;
9642             }
9643
9644           if (speed_p)
9645             *cost += 2 * extra_cost->alu.arith;
9646           return false;
9647         }
9648
9649       /* Vector mode?  */
9650
9651       *cost = LIBCALL_COST (2);
9652       return false;
9653
9654     case PLUS:
9655       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9656           && (mode == SFmode || !TARGET_VFP_SINGLE))
9657         {
9658           if (GET_CODE (XEXP (x, 0)) == MULT)
9659             {
9660               rtx mul_op0, mul_op1, add_op;
9661
9662               if (speed_p)
9663                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9664
9665               mul_op0 = XEXP (XEXP (x, 0), 0);
9666               mul_op1 = XEXP (XEXP (x, 0), 1);
9667               add_op = XEXP (x, 1);
9668
9669               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9670                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9671                         + rtx_cost (add_op, mode, code, 0, speed_p));
9672
9673               return true;
9674             }
9675
9676           if (speed_p)
9677             *cost += extra_cost->fp[mode != SFmode].addsub;
9678           return false;
9679         }
9680       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9681         {
9682           *cost = LIBCALL_COST (2);
9683           return false;
9684         }
9685
9686         /* Narrow modes can be synthesized in SImode, but the range
9687            of useful sub-operations is limited.  Check for shift operations
9688            on one of the operands.  Only left shifts can be used in the
9689            narrow modes.  */
9690       if (GET_MODE_CLASS (mode) == MODE_INT
9691           && GET_MODE_SIZE (mode) < 4)
9692         {
9693           rtx shift_op, shift_reg;
9694           shift_reg = NULL;
9695
9696           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9697
9698           if (CONST_INT_P (XEXP (x, 1)))
9699             {
9700               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9701                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9702                                             NULL_RTX, 1, 0);
9703               *cost = COSTS_N_INSNS (insns);
9704               if (speed_p)
9705                 *cost += insns * extra_cost->alu.arith;
9706               /* Slightly penalize a narrow operation as the result may
9707                  need widening.  */
9708               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9709               return true;
9710             }
9711
9712           /* Slightly penalize a narrow operation as the result may
9713              need widening.  */
9714           *cost += 1;
9715           if (speed_p)
9716             *cost += extra_cost->alu.arith;
9717
9718           return false;
9719         }
9720
9721       if (mode == SImode)
9722         {
9723           rtx shift_op, shift_reg;
9724
9725           if (TARGET_INT_SIMD
9726               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9727                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9728             {
9729               /* UXTA[BH] or SXTA[BH].  */
9730               if (speed_p)
9731                 *cost += extra_cost->alu.extend_arith;
9732               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9733                                   0, speed_p)
9734                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9735               return true;
9736             }
9737
9738           shift_reg = NULL;
9739           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9740           if (shift_op != NULL)
9741             {
9742               if (shift_reg)
9743                 {
9744                   if (speed_p)
9745                     *cost += extra_cost->alu.arith_shift_reg;
9746                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9747                 }
9748               else if (speed_p)
9749                 *cost += extra_cost->alu.arith_shift;
9750
9751               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9752                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9753               return true;
9754             }
9755           if (GET_CODE (XEXP (x, 0)) == MULT)
9756             {
9757               rtx mul_op = XEXP (x, 0);
9758
9759               if (TARGET_DSP_MULTIPLY
9760                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9761                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9762                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9763                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9764                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9765                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9766                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9767                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9768                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9769                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9770                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9771                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9772                                       == 16))))))
9773                 {
9774                   /* SMLA[BT][BT].  */
9775                   if (speed_p)
9776                     *cost += extra_cost->mult[0].extend_add;
9777                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9778                                       SIGN_EXTEND, 0, speed_p)
9779                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9780                                         SIGN_EXTEND, 0, speed_p)
9781                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9782                   return true;
9783                 }
9784
9785               if (speed_p)
9786                 *cost += extra_cost->mult[0].add;
9787               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9788                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9789                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9790               return true;
9791             }
9792           if (CONST_INT_P (XEXP (x, 1)))
9793             {
9794               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9795                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9796                                             NULL_RTX, 1, 0);
9797               *cost = COSTS_N_INSNS (insns);
9798               if (speed_p)
9799                 *cost += insns * extra_cost->alu.arith;
9800               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9801               return true;
9802             }
9803           else if (speed_p)
9804             *cost += extra_cost->alu.arith;
9805
9806           return false;
9807         }
9808
9809       if (mode == DImode)
9810         {
9811           if (arm_arch3m
9812               && GET_CODE (XEXP (x, 0)) == MULT
9813               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9814                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9815                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9816                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9817             {
9818               if (speed_p)
9819                 *cost += extra_cost->mult[1].extend_add;
9820               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9821                                   ZERO_EXTEND, 0, speed_p)
9822                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9823                                     ZERO_EXTEND, 0, speed_p)
9824                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9825               return true;
9826             }
9827
9828           *cost += COSTS_N_INSNS (1);
9829
9830           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9831               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9832             {
9833               if (speed_p)
9834                 *cost += (extra_cost->alu.arith
9835                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9836                              ? extra_cost->alu.arith
9837                              : extra_cost->alu.arith_shift));
9838
9839               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9840                                   0, speed_p)
9841                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9842               return true;
9843             }
9844
9845           if (speed_p)
9846             *cost += 2 * extra_cost->alu.arith;
9847           return false;
9848         }
9849
9850       /* Vector mode?  */
9851       *cost = LIBCALL_COST (2);
9852       return false;
9853     case IOR:
9854       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9855         {
9856           if (speed_p)
9857             *cost += extra_cost->alu.rev;
9858
9859           return true;
9860         }
9861     /* Fall through.  */
9862     case AND: case XOR:
9863       if (mode == SImode)
9864         {
9865           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9866           rtx op0 = XEXP (x, 0);
9867           rtx shift_op, shift_reg;
9868
9869           if (subcode == NOT
9870               && (code == AND
9871                   || (code == IOR && TARGET_THUMB2)))
9872             op0 = XEXP (op0, 0);
9873
9874           shift_reg = NULL;
9875           shift_op = shifter_op_p (op0, &shift_reg);
9876           if (shift_op != NULL)
9877             {
9878               if (shift_reg)
9879                 {
9880                   if (speed_p)
9881                     *cost += extra_cost->alu.log_shift_reg;
9882                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9883                 }
9884               else if (speed_p)
9885                 *cost += extra_cost->alu.log_shift;
9886
9887               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9888                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9889               return true;
9890             }
9891
9892           if (CONST_INT_P (XEXP (x, 1)))
9893             {
9894               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9895                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9896                                             NULL_RTX, 1, 0);
9897
9898               *cost = COSTS_N_INSNS (insns);
9899               if (speed_p)
9900                 *cost += insns * extra_cost->alu.logical;
9901               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9902               return true;
9903             }
9904
9905           if (speed_p)
9906             *cost += extra_cost->alu.logical;
9907           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9908                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9909           return true;
9910         }
9911
9912       if (mode == DImode)
9913         {
9914           rtx op0 = XEXP (x, 0);
9915           enum rtx_code subcode = GET_CODE (op0);
9916
9917           *cost += COSTS_N_INSNS (1);
9918
9919           if (subcode == NOT
9920               && (code == AND
9921                   || (code == IOR && TARGET_THUMB2)))
9922             op0 = XEXP (op0, 0);
9923
9924           if (GET_CODE (op0) == ZERO_EXTEND)
9925             {
9926               if (speed_p)
9927                 *cost += 2 * extra_cost->alu.logical;
9928
9929               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9930                                   0, speed_p)
9931                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9932               return true;
9933             }
9934           else if (GET_CODE (op0) == SIGN_EXTEND)
9935             {
9936               if (speed_p)
9937                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9938
9939               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9940                                   0, speed_p)
9941                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9942               return true;
9943             }
9944
9945           if (speed_p)
9946             *cost += 2 * extra_cost->alu.logical;
9947
9948           return true;
9949         }
9950       /* Vector mode?  */
9951
9952       *cost = LIBCALL_COST (2);
9953       return false;
9954
9955     case MULT:
9956       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9957           && (mode == SFmode || !TARGET_VFP_SINGLE))
9958         {
9959           rtx op0 = XEXP (x, 0);
9960
9961           if (GET_CODE (op0) == NEG && !flag_rounding_math)
9962             op0 = XEXP (op0, 0);
9963
9964           if (speed_p)
9965             *cost += extra_cost->fp[mode != SFmode].mult;
9966
9967           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9968                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9969           return true;
9970         }
9971       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9972         {
9973           *cost = LIBCALL_COST (2);
9974           return false;
9975         }
9976
9977       if (mode == SImode)
9978         {
9979           if (TARGET_DSP_MULTIPLY
9980               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9981                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9982                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9983                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9984                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9985                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9986                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9987                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9988                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9989                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9990                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9991                               && (INTVAL (XEXP (XEXP (x, 1), 1))
9992                                   == 16))))))
9993             {
9994               /* SMUL[TB][TB].  */
9995               if (speed_p)
9996                 *cost += extra_cost->mult[0].extend;
9997               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9998                                  SIGN_EXTEND, 0, speed_p);
9999               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10000                                  SIGN_EXTEND, 1, speed_p);
10001               return true;
10002             }
10003           if (speed_p)
10004             *cost += extra_cost->mult[0].simple;
10005           return false;
10006         }
10007
10008       if (mode == DImode)
10009         {
10010           if (arm_arch3m
10011               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10012                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10013                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10014                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10015             {
10016               if (speed_p)
10017                 *cost += extra_cost->mult[1].extend;
10018               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10019                                   ZERO_EXTEND, 0, speed_p)
10020                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10021                                     ZERO_EXTEND, 0, speed_p));
10022               return true;
10023             }
10024
10025           *cost = LIBCALL_COST (2);
10026           return false;
10027         }
10028
10029       /* Vector mode?  */
10030       *cost = LIBCALL_COST (2);
10031       return false;
10032
10033     case NEG:
10034       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10035           && (mode == SFmode || !TARGET_VFP_SINGLE))
10036         {
10037           if (GET_CODE (XEXP (x, 0)) == MULT)
10038             {
10039               /* VNMUL.  */
10040               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10041               return true;
10042             }
10043
10044           if (speed_p)
10045             *cost += extra_cost->fp[mode != SFmode].neg;
10046
10047           return false;
10048         }
10049       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10050         {
10051           *cost = LIBCALL_COST (1);
10052           return false;
10053         }
10054
10055       if (mode == SImode)
10056         {
10057           if (GET_CODE (XEXP (x, 0)) == ABS)
10058             {
10059               *cost += COSTS_N_INSNS (1);
10060               /* Assume the non-flag-changing variant.  */
10061               if (speed_p)
10062                 *cost += (extra_cost->alu.log_shift
10063                           + extra_cost->alu.arith_shift);
10064               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10065               return true;
10066             }
10067
10068           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10069               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10070             {
10071               *cost += COSTS_N_INSNS (1);
10072               /* No extra cost for MOV imm and MVN imm.  */
10073               /* If the comparison op is using the flags, there's no further
10074                  cost, otherwise we need to add the cost of the comparison.  */
10075               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10076                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10077                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10078                 {
10079                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10080                   *cost += (COSTS_N_INSNS (1)
10081                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10082                                         0, speed_p)
10083                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10084                                         1, speed_p));
10085                   if (speed_p)
10086                     *cost += extra_cost->alu.arith;
10087                 }
10088               return true;
10089             }
10090
10091           if (speed_p)
10092             *cost += extra_cost->alu.arith;
10093           return false;
10094         }
10095
10096       if (GET_MODE_CLASS (mode) == MODE_INT
10097           && GET_MODE_SIZE (mode) < 4)
10098         {
10099           /* Slightly disparage, as we might need an extend operation.  */
10100           *cost += 1;
10101           if (speed_p)
10102             *cost += extra_cost->alu.arith;
10103           return false;
10104         }
10105
10106       if (mode == DImode)
10107         {
10108           *cost += COSTS_N_INSNS (1);
10109           if (speed_p)
10110             *cost += 2 * extra_cost->alu.arith;
10111           return false;
10112         }
10113
10114       /* Vector mode?  */
10115       *cost = LIBCALL_COST (1);
10116       return false;
10117
10118     case NOT:
10119       if (mode == SImode)
10120         {
10121           rtx shift_op;
10122           rtx shift_reg = NULL;
10123
10124           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10125
10126           if (shift_op)
10127             {
10128               if (shift_reg != NULL)
10129                 {
10130                   if (speed_p)
10131                     *cost += extra_cost->alu.log_shift_reg;
10132                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10133                 }
10134               else if (speed_p)
10135                 *cost += extra_cost->alu.log_shift;
10136               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10137               return true;
10138             }
10139
10140           if (speed_p)
10141             *cost += extra_cost->alu.logical;
10142           return false;
10143         }
10144       if (mode == DImode)
10145         {
10146           *cost += COSTS_N_INSNS (1);
10147           return false;
10148         }
10149
10150       /* Vector mode?  */
10151
10152       *cost += LIBCALL_COST (1);
10153       return false;
10154
10155     case IF_THEN_ELSE:
10156       {
10157         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10158           {
10159             *cost += COSTS_N_INSNS (3);
10160             return true;
10161           }
10162         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10163         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10164
10165         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10166         /* Assume that if one arm of the if_then_else is a register,
10167            that it will be tied with the result and eliminate the
10168            conditional insn.  */
10169         if (REG_P (XEXP (x, 1)))
10170           *cost += op2cost;
10171         else if (REG_P (XEXP (x, 2)))
10172           *cost += op1cost;
10173         else
10174           {
10175             if (speed_p)
10176               {
10177                 if (extra_cost->alu.non_exec_costs_exec)
10178                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10179                 else
10180                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10181               }
10182             else
10183               *cost += op1cost + op2cost;
10184           }
10185       }
10186       return true;
10187
10188     case COMPARE:
10189       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10190         *cost = 0;
10191       else
10192         {
10193           machine_mode op0mode;
10194           /* We'll mostly assume that the cost of a compare is the cost of the
10195              LHS.  However, there are some notable exceptions.  */
10196
10197           /* Floating point compares are never done as side-effects.  */
10198           op0mode = GET_MODE (XEXP (x, 0));
10199           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10200               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10201             {
10202               if (speed_p)
10203                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10204
10205               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10206                 {
10207                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10208                   return true;
10209                 }
10210
10211               return false;
10212             }
10213           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10214             {
10215               *cost = LIBCALL_COST (2);
10216               return false;
10217             }
10218
10219           /* DImode compares normally take two insns.  */
10220           if (op0mode == DImode)
10221             {
10222               *cost += COSTS_N_INSNS (1);
10223               if (speed_p)
10224                 *cost += 2 * extra_cost->alu.arith;
10225               return false;
10226             }
10227
10228           if (op0mode == SImode)
10229             {
10230               rtx shift_op;
10231               rtx shift_reg;
10232
10233               if (XEXP (x, 1) == const0_rtx
10234                   && !(REG_P (XEXP (x, 0))
10235                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10236                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10237                 {
10238                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10239
10240                   /* Multiply operations that set the flags are often
10241                      significantly more expensive.  */
10242                   if (speed_p
10243                       && GET_CODE (XEXP (x, 0)) == MULT
10244                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10245                     *cost += extra_cost->mult[0].flag_setting;
10246
10247                   if (speed_p
10248                       && GET_CODE (XEXP (x, 0)) == PLUS
10249                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10250                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10251                                                             0), 1), mode))
10252                     *cost += extra_cost->mult[0].flag_setting;
10253                   return true;
10254                 }
10255
10256               shift_reg = NULL;
10257               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10258               if (shift_op != NULL)
10259                 {
10260                   if (shift_reg != NULL)
10261                     {
10262                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10263                                          1, speed_p);
10264                       if (speed_p)
10265                         *cost += extra_cost->alu.arith_shift_reg;
10266                     }
10267                   else if (speed_p)
10268                     *cost += extra_cost->alu.arith_shift;
10269                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10270                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10271                   return true;
10272                 }
10273
10274               if (speed_p)
10275                 *cost += extra_cost->alu.arith;
10276               if (CONST_INT_P (XEXP (x, 1))
10277                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10278                 {
10279                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10280                   return true;
10281                 }
10282               return false;
10283             }
10284
10285           /* Vector mode?  */
10286
10287           *cost = LIBCALL_COST (2);
10288           return false;
10289         }
10290       return true;
10291
10292     case EQ:
10293     case NE:
10294     case LT:
10295     case LE:
10296     case GT:
10297     case GE:
10298     case LTU:
10299     case LEU:
10300     case GEU:
10301     case GTU:
10302     case ORDERED:
10303     case UNORDERED:
10304     case UNEQ:
10305     case UNLE:
10306     case UNLT:
10307     case UNGE:
10308     case UNGT:
10309     case LTGT:
10310       if (outer_code == SET)
10311         {
10312           /* Is it a store-flag operation?  */
10313           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10314               && XEXP (x, 1) == const0_rtx)
10315             {
10316               /* Thumb also needs an IT insn.  */
10317               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10318               return true;
10319             }
10320           if (XEXP (x, 1) == const0_rtx)
10321             {
10322               switch (code)
10323                 {
10324                 case LT:
10325                   /* LSR Rd, Rn, #31.  */
10326                   if (speed_p)
10327                     *cost += extra_cost->alu.shift;
10328                   break;
10329
10330                 case EQ:
10331                   /* RSBS T1, Rn, #0
10332                      ADC  Rd, Rn, T1.  */
10333
10334                 case NE:
10335                   /* SUBS T1, Rn, #1
10336                      SBC  Rd, Rn, T1.  */
10337                   *cost += COSTS_N_INSNS (1);
10338                   break;
10339
10340                 case LE:
10341                   /* RSBS T1, Rn, Rn, LSR #31
10342                      ADC  Rd, Rn, T1. */
10343                   *cost += COSTS_N_INSNS (1);
10344                   if (speed_p)
10345                     *cost += extra_cost->alu.arith_shift;
10346                   break;
10347
10348                 case GT:
10349                   /* RSB  Rd, Rn, Rn, ASR #1
10350                      LSR  Rd, Rd, #31.  */
10351                   *cost += COSTS_N_INSNS (1);
10352                   if (speed_p)
10353                     *cost += (extra_cost->alu.arith_shift
10354                               + extra_cost->alu.shift);
10355                   break;
10356
10357                 case GE:
10358                   /* ASR  Rd, Rn, #31
10359                      ADD  Rd, Rn, #1.  */
10360                   *cost += COSTS_N_INSNS (1);
10361                   if (speed_p)
10362                     *cost += extra_cost->alu.shift;
10363                   break;
10364
10365                 default:
10366                   /* Remaining cases are either meaningless or would take
10367                      three insns anyway.  */
10368                   *cost = COSTS_N_INSNS (3);
10369                   break;
10370                 }
10371               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10372               return true;
10373             }
10374           else
10375             {
10376               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10377               if (CONST_INT_P (XEXP (x, 1))
10378                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10379                 {
10380                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10381                   return true;
10382                 }
10383
10384               return false;
10385             }
10386         }
10387       /* Not directly inside a set.  If it involves the condition code
10388          register it must be the condition for a branch, cond_exec or
10389          I_T_E operation.  Since the comparison is performed elsewhere
10390          this is just the control part which has no additional
10391          cost.  */
10392       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10393                && XEXP (x, 1) == const0_rtx)
10394         {
10395           *cost = 0;
10396           return true;
10397         }
10398       return false;
10399
10400     case ABS:
10401       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10402           && (mode == SFmode || !TARGET_VFP_SINGLE))
10403         {
10404           if (speed_p)
10405             *cost += extra_cost->fp[mode != SFmode].neg;
10406
10407           return false;
10408         }
10409       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10410         {
10411           *cost = LIBCALL_COST (1);
10412           return false;
10413         }
10414
10415       if (mode == SImode)
10416         {
10417           if (speed_p)
10418             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10419           return false;
10420         }
10421       /* Vector mode?  */
10422       *cost = LIBCALL_COST (1);
10423       return false;
10424
10425     case SIGN_EXTEND:
10426       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10427           && MEM_P (XEXP (x, 0)))
10428         {
10429           if (mode == DImode)
10430             *cost += COSTS_N_INSNS (1);
10431
10432           if (!speed_p)
10433             return true;
10434
10435           if (GET_MODE (XEXP (x, 0)) == SImode)
10436             *cost += extra_cost->ldst.load;
10437           else
10438             *cost += extra_cost->ldst.load_sign_extend;
10439
10440           if (mode == DImode)
10441             *cost += extra_cost->alu.shift;
10442
10443           return true;
10444         }
10445
10446       /* Widening from less than 32-bits requires an extend operation.  */
10447       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10448         {
10449           /* We have SXTB/SXTH.  */
10450           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10451           if (speed_p)
10452             *cost += extra_cost->alu.extend;
10453         }
10454       else if (GET_MODE (XEXP (x, 0)) != SImode)
10455         {
10456           /* Needs two shifts.  */
10457           *cost += COSTS_N_INSNS (1);
10458           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10459           if (speed_p)
10460             *cost += 2 * extra_cost->alu.shift;
10461         }
10462
10463       /* Widening beyond 32-bits requires one more insn.  */
10464       if (mode == DImode)
10465         {
10466           *cost += COSTS_N_INSNS (1);
10467           if (speed_p)
10468             *cost += extra_cost->alu.shift;
10469         }
10470
10471       return true;
10472
10473     case ZERO_EXTEND:
10474       if ((arm_arch4
10475            || GET_MODE (XEXP (x, 0)) == SImode
10476            || GET_MODE (XEXP (x, 0)) == QImode)
10477           && MEM_P (XEXP (x, 0)))
10478         {
10479           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10480
10481           if (mode == DImode)
10482             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10483
10484           return true;
10485         }
10486
10487       /* Widening from less than 32-bits requires an extend operation.  */
10488       if (GET_MODE (XEXP (x, 0)) == QImode)
10489         {
10490           /* UXTB can be a shorter instruction in Thumb2, but it might
10491              be slower than the AND Rd, Rn, #255 alternative.  When
10492              optimizing for speed it should never be slower to use
10493              AND, and we don't really model 16-bit vs 32-bit insns
10494              here.  */
10495           if (speed_p)
10496             *cost += extra_cost->alu.logical;
10497         }
10498       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10499         {
10500           /* We have UXTB/UXTH.  */
10501           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10502           if (speed_p)
10503             *cost += extra_cost->alu.extend;
10504         }
10505       else if (GET_MODE (XEXP (x, 0)) != SImode)
10506         {
10507           /* Needs two shifts.  It's marginally preferable to use
10508              shifts rather than two BIC instructions as the second
10509              shift may merge with a subsequent insn as a shifter
10510              op.  */
10511           *cost = COSTS_N_INSNS (2);
10512           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10513           if (speed_p)
10514             *cost += 2 * extra_cost->alu.shift;
10515         }
10516
10517       /* Widening beyond 32-bits requires one more insn.  */
10518       if (mode == DImode)
10519         {
10520           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10521         }
10522
10523       return true;
10524
10525     case CONST_INT:
10526       *cost = 0;
10527       /* CONST_INT has no mode, so we cannot tell for sure how many
10528          insns are really going to be needed.  The best we can do is
10529          look at the value passed.  If it fits in SImode, then assume
10530          that's the mode it will be used for.  Otherwise assume it
10531          will be used in DImode.  */
10532       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10533         mode = SImode;
10534       else
10535         mode = DImode;
10536
10537       /* Avoid blowing up in arm_gen_constant ().  */
10538       if (!(outer_code == PLUS
10539             || outer_code == AND
10540             || outer_code == IOR
10541             || outer_code == XOR
10542             || outer_code == MINUS))
10543         outer_code = SET;
10544
10545     const_int_cost:
10546       if (mode == SImode)
10547         {
10548           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10549                                                     INTVAL (x), NULL, NULL,
10550                                                     0, 0));
10551           /* Extra costs?  */
10552         }
10553       else
10554         {
10555           *cost += COSTS_N_INSNS (arm_gen_constant
10556                                   (outer_code, SImode, NULL,
10557                                    trunc_int_for_mode (INTVAL (x), SImode),
10558                                    NULL, NULL, 0, 0)
10559                                   + arm_gen_constant (outer_code, SImode, NULL,
10560                                                       INTVAL (x) >> 32, NULL,
10561                                                       NULL, 0, 0));
10562           /* Extra costs?  */
10563         }
10564
10565       return true;
10566
10567     case CONST:
10568     case LABEL_REF:
10569     case SYMBOL_REF:
10570       if (speed_p)
10571         {
10572           if (arm_arch_thumb2 && !flag_pic)
10573             *cost += COSTS_N_INSNS (1);
10574           else
10575             *cost += extra_cost->ldst.load;
10576         }
10577       else
10578         *cost += COSTS_N_INSNS (1);
10579
10580       if (flag_pic)
10581         {
10582           *cost += COSTS_N_INSNS (1);
10583           if (speed_p)
10584             *cost += extra_cost->alu.arith;
10585         }
10586
10587       return true;
10588
10589     case CONST_FIXED:
10590       *cost = COSTS_N_INSNS (4);
10591       /* Fixme.  */
10592       return true;
10593
10594     case CONST_DOUBLE:
10595       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10596           && (mode == SFmode || !TARGET_VFP_SINGLE))
10597         {
10598           if (vfp3_const_double_rtx (x))
10599             {
10600               if (speed_p)
10601                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10602               return true;
10603             }
10604
10605           if (speed_p)
10606             {
10607               if (mode == DFmode)
10608                 *cost += extra_cost->ldst.loadd;
10609               else
10610                 *cost += extra_cost->ldst.loadf;
10611             }
10612           else
10613             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10614
10615           return true;
10616         }
10617       *cost = COSTS_N_INSNS (4);
10618       return true;
10619
10620     case CONST_VECTOR:
10621       /* Fixme.  */
10622       if (TARGET_NEON
10623           && TARGET_HARD_FLOAT
10624           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10625           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10626         *cost = COSTS_N_INSNS (1);
10627       else
10628         *cost = COSTS_N_INSNS (4);
10629       return true;
10630
10631     case HIGH:
10632     case LO_SUM:
10633       /* When optimizing for size, we prefer constant pool entries to
10634          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10635       if (!speed_p)
10636         *cost += 1;
10637       return true;
10638
10639     case CLZ:
10640       if (speed_p)
10641         *cost += extra_cost->alu.clz;
10642       return false;
10643
10644     case SMIN:
10645       if (XEXP (x, 1) == const0_rtx)
10646         {
10647           if (speed_p)
10648             *cost += extra_cost->alu.log_shift;
10649           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10650           return true;
10651         }
10652       /* Fall through.  */
10653     case SMAX:
10654     case UMIN:
10655     case UMAX:
10656       *cost += COSTS_N_INSNS (1);
10657       return false;
10658
10659     case TRUNCATE:
10660       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10661           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10662           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10663           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10664           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10665                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10666               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10667                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10668                       == ZERO_EXTEND))))
10669         {
10670           if (speed_p)
10671             *cost += extra_cost->mult[1].extend;
10672           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10673                               ZERO_EXTEND, 0, speed_p)
10674                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10675                                 ZERO_EXTEND, 0, speed_p));
10676           return true;
10677         }
10678       *cost = LIBCALL_COST (1);
10679       return false;
10680
10681     case UNSPEC_VOLATILE:
10682     case UNSPEC:
10683       return arm_unspec_cost (x, outer_code, speed_p, cost);
10684
10685     case PC:
10686       /* Reading the PC is like reading any other register.  Writing it
10687          is more expensive, but we take that into account elsewhere.  */
10688       *cost = 0;
10689       return true;
10690
10691     case ZERO_EXTRACT:
10692       /* TODO: Simple zero_extract of bottom bits using AND.  */
10693       /* Fall through.  */
10694     case SIGN_EXTRACT:
10695       if (arm_arch6
10696           && mode == SImode
10697           && CONST_INT_P (XEXP (x, 1))
10698           && CONST_INT_P (XEXP (x, 2)))
10699         {
10700           if (speed_p)
10701             *cost += extra_cost->alu.bfx;
10702           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10703           return true;
10704         }
10705       /* Without UBFX/SBFX, need to resort to shift operations.  */
10706       *cost += COSTS_N_INSNS (1);
10707       if (speed_p)
10708         *cost += 2 * extra_cost->alu.shift;
10709       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10710       return true;
10711
10712     case FLOAT_EXTEND:
10713       if (TARGET_HARD_FLOAT)
10714         {
10715           if (speed_p)
10716             *cost += extra_cost->fp[mode == DFmode].widen;
10717           if (!TARGET_FPU_ARMV8
10718               && GET_MODE (XEXP (x, 0)) == HFmode)
10719             {
10720               /* Pre v8, widening HF->DF is a two-step process, first
10721                  widening to SFmode.  */
10722               *cost += COSTS_N_INSNS (1);
10723               if (speed_p)
10724                 *cost += extra_cost->fp[0].widen;
10725             }
10726           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10727           return true;
10728         }
10729
10730       *cost = LIBCALL_COST (1);
10731       return false;
10732
10733     case FLOAT_TRUNCATE:
10734       if (TARGET_HARD_FLOAT)
10735         {
10736           if (speed_p)
10737             *cost += extra_cost->fp[mode == DFmode].narrow;
10738           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10739           return true;
10740           /* Vector modes?  */
10741         }
10742       *cost = LIBCALL_COST (1);
10743       return false;
10744
10745     case FMA:
10746       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10747         {
10748           rtx op0 = XEXP (x, 0);
10749           rtx op1 = XEXP (x, 1);
10750           rtx op2 = XEXP (x, 2);
10751
10752
10753           /* vfms or vfnma.  */
10754           if (GET_CODE (op0) == NEG)
10755             op0 = XEXP (op0, 0);
10756
10757           /* vfnms or vfnma.  */
10758           if (GET_CODE (op2) == NEG)
10759             op2 = XEXP (op2, 0);
10760
10761           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10762           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10763           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10764
10765           if (speed_p)
10766             *cost += extra_cost->fp[mode ==DFmode].fma;
10767
10768           return true;
10769         }
10770
10771       *cost = LIBCALL_COST (3);
10772       return false;
10773
10774     case FIX:
10775     case UNSIGNED_FIX:
10776       if (TARGET_HARD_FLOAT)
10777         {
10778           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10779              a vcvt fixed-point conversion.  */
10780           if (code == FIX && mode == SImode
10781               && GET_CODE (XEXP (x, 0)) == FIX
10782               && GET_MODE (XEXP (x, 0)) == SFmode
10783               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10784               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10785                  > 0)
10786             {
10787               if (speed_p)
10788                 *cost += extra_cost->fp[0].toint;
10789
10790               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10791                                  code, 0, speed_p);
10792               return true;
10793             }
10794
10795           if (GET_MODE_CLASS (mode) == MODE_INT)
10796             {
10797               mode = GET_MODE (XEXP (x, 0));
10798               if (speed_p)
10799                 *cost += extra_cost->fp[mode == DFmode].toint;
10800               /* Strip of the 'cost' of rounding towards zero.  */
10801               if (GET_CODE (XEXP (x, 0)) == FIX)
10802                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10803                                    0, speed_p);
10804               else
10805                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10806               /* ??? Increase the cost to deal with transferring from
10807                  FP -> CORE registers?  */
10808               return true;
10809             }
10810           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10811                    && TARGET_FPU_ARMV8)
10812             {
10813               if (speed_p)
10814                 *cost += extra_cost->fp[mode == DFmode].roundint;
10815               return false;
10816             }
10817           /* Vector costs? */
10818         }
10819       *cost = LIBCALL_COST (1);
10820       return false;
10821
10822     case FLOAT:
10823     case UNSIGNED_FLOAT:
10824       if (TARGET_HARD_FLOAT)
10825         {
10826           /* ??? Increase the cost to deal with transferring from CORE
10827              -> FP registers?  */
10828           if (speed_p)
10829             *cost += extra_cost->fp[mode == DFmode].fromint;
10830           return false;
10831         }
10832       *cost = LIBCALL_COST (1);
10833       return false;
10834
10835     case CALL:
10836       return true;
10837
10838     case ASM_OPERANDS:
10839       {
10840       /* Just a guess.  Guess number of instructions in the asm
10841          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10842          though (see PR60663).  */
10843         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10844         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10845
10846         *cost = COSTS_N_INSNS (asm_length + num_operands);
10847         return true;
10848       }
10849     default:
10850       if (mode != VOIDmode)
10851         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10852       else
10853         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10854       return false;
10855     }
10856 }
10857
10858 #undef HANDLE_NARROW_SHIFT_ARITH
10859
10860 /* RTX costs entry point.  */
10861
10862 static bool
10863 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10864                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10865 {
10866   bool result;
10867   int code = GET_CODE (x);
10868   gcc_assert (current_tune->insn_extra_cost);
10869
10870   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10871                                 (enum rtx_code) outer_code,
10872                                 current_tune->insn_extra_cost,
10873                                 total, speed);
10874
10875   if (dump_file && (dump_flags & TDF_DETAILS))
10876     {
10877       print_rtl_single (dump_file, x);
10878       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10879                *total, result ? "final" : "partial");
10880     }
10881   return result;
10882 }
10883
10884 /* All address computations that can be done are free, but rtx cost returns
10885    the same for practically all of them.  So we weight the different types
10886    of address here in the order (most pref first):
10887    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10888 static inline int
10889 arm_arm_address_cost (rtx x)
10890 {
10891   enum rtx_code c  = GET_CODE (x);
10892
10893   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10894     return 0;
10895   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10896     return 10;
10897
10898   if (c == PLUS)
10899     {
10900       if (CONST_INT_P (XEXP (x, 1)))
10901         return 2;
10902
10903       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10904         return 3;
10905
10906       return 4;
10907     }
10908
10909   return 6;
10910 }
10911
10912 static inline int
10913 arm_thumb_address_cost (rtx x)
10914 {
10915   enum rtx_code c  = GET_CODE (x);
10916
10917   if (c == REG)
10918     return 1;
10919   if (c == PLUS
10920       && REG_P (XEXP (x, 0))
10921       && CONST_INT_P (XEXP (x, 1)))
10922     return 1;
10923
10924   return 2;
10925 }
10926
10927 static int
10928 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10929                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10930 {
10931   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10932 }
10933
10934 /* Adjust cost hook for XScale.  */
10935 static bool
10936 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10937                           int * cost)
10938 {
10939   /* Some true dependencies can have a higher cost depending
10940      on precisely how certain input operands are used.  */
10941   if (dep_type == 0
10942       && recog_memoized (insn) >= 0
10943       && recog_memoized (dep) >= 0)
10944     {
10945       int shift_opnum = get_attr_shift (insn);
10946       enum attr_type attr_type = get_attr_type (dep);
10947
10948       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10949          operand for INSN.  If we have a shifted input operand and the
10950          instruction we depend on is another ALU instruction, then we may
10951          have to account for an additional stall.  */
10952       if (shift_opnum != 0
10953           && (attr_type == TYPE_ALU_SHIFT_IMM
10954               || attr_type == TYPE_ALUS_SHIFT_IMM
10955               || attr_type == TYPE_LOGIC_SHIFT_IMM
10956               || attr_type == TYPE_LOGICS_SHIFT_IMM
10957               || attr_type == TYPE_ALU_SHIFT_REG
10958               || attr_type == TYPE_ALUS_SHIFT_REG
10959               || attr_type == TYPE_LOGIC_SHIFT_REG
10960               || attr_type == TYPE_LOGICS_SHIFT_REG
10961               || attr_type == TYPE_MOV_SHIFT
10962               || attr_type == TYPE_MVN_SHIFT
10963               || attr_type == TYPE_MOV_SHIFT_REG
10964               || attr_type == TYPE_MVN_SHIFT_REG))
10965         {
10966           rtx shifted_operand;
10967           int opno;
10968
10969           /* Get the shifted operand.  */
10970           extract_insn (insn);
10971           shifted_operand = recog_data.operand[shift_opnum];
10972
10973           /* Iterate over all the operands in DEP.  If we write an operand
10974              that overlaps with SHIFTED_OPERAND, then we have increase the
10975              cost of this dependency.  */
10976           extract_insn (dep);
10977           preprocess_constraints (dep);
10978           for (opno = 0; opno < recog_data.n_operands; opno++)
10979             {
10980               /* We can ignore strict inputs.  */
10981               if (recog_data.operand_type[opno] == OP_IN)
10982                 continue;
10983
10984               if (reg_overlap_mentioned_p (recog_data.operand[opno],
10985                                            shifted_operand))
10986                 {
10987                   *cost = 2;
10988                   return false;
10989                 }
10990             }
10991         }
10992     }
10993   return true;
10994 }
10995
10996 /* Adjust cost hook for Cortex A9.  */
10997 static bool
10998 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10999                              int * cost)
11000 {
11001   switch (dep_type)
11002     {
11003     case REG_DEP_ANTI:
11004       *cost = 0;
11005       return false;
11006
11007     case REG_DEP_TRUE:
11008     case REG_DEP_OUTPUT:
11009         if (recog_memoized (insn) >= 0
11010             && recog_memoized (dep) >= 0)
11011           {
11012             if (GET_CODE (PATTERN (insn)) == SET)
11013               {
11014                 if (GET_MODE_CLASS
11015                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11016                   || GET_MODE_CLASS
11017                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11018                   {
11019                     enum attr_type attr_type_insn = get_attr_type (insn);
11020                     enum attr_type attr_type_dep = get_attr_type (dep);
11021
11022                     /* By default all dependencies of the form
11023                        s0 = s0 <op> s1
11024                        s0 = s0 <op> s2
11025                        have an extra latency of 1 cycle because
11026                        of the input and output dependency in this
11027                        case. However this gets modeled as an true
11028                        dependency and hence all these checks.  */
11029                     if (REG_P (SET_DEST (PATTERN (insn)))
11030                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11031                       {
11032                         /* FMACS is a special case where the dependent
11033                            instruction can be issued 3 cycles before
11034                            the normal latency in case of an output
11035                            dependency.  */
11036                         if ((attr_type_insn == TYPE_FMACS
11037                              || attr_type_insn == TYPE_FMACD)
11038                             && (attr_type_dep == TYPE_FMACS
11039                                 || attr_type_dep == TYPE_FMACD))
11040                           {
11041                             if (dep_type == REG_DEP_OUTPUT)
11042                               *cost = insn_default_latency (dep) - 3;
11043                             else
11044                               *cost = insn_default_latency (dep);
11045                             return false;
11046                           }
11047                         else
11048                           {
11049                             if (dep_type == REG_DEP_OUTPUT)
11050                               *cost = insn_default_latency (dep) + 1;
11051                             else
11052                               *cost = insn_default_latency (dep);
11053                           }
11054                         return false;
11055                       }
11056                   }
11057               }
11058           }
11059         break;
11060
11061     default:
11062       gcc_unreachable ();
11063     }
11064
11065   return true;
11066 }
11067
11068 /* Adjust cost hook for FA726TE.  */
11069 static bool
11070 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11071                            int * cost)
11072 {
11073   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11074      have penalty of 3.  */
11075   if (dep_type == REG_DEP_TRUE
11076       && recog_memoized (insn) >= 0
11077       && recog_memoized (dep) >= 0
11078       && get_attr_conds (dep) == CONDS_SET)
11079     {
11080       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11081       if (get_attr_conds (insn) == CONDS_USE
11082           && get_attr_type (insn) != TYPE_BRANCH)
11083         {
11084           *cost = 3;
11085           return false;
11086         }
11087
11088       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11089           || get_attr_conds (insn) == CONDS_USE)
11090         {
11091           *cost = 0;
11092           return false;
11093         }
11094     }
11095
11096   return true;
11097 }
11098
11099 /* Implement TARGET_REGISTER_MOVE_COST.
11100
11101    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11102    it is typically more expensive than a single memory access.  We set
11103    the cost to less than two memory accesses so that floating
11104    point to integer conversion does not go through memory.  */
11105
11106 int
11107 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11108                         reg_class_t from, reg_class_t to)
11109 {
11110   if (TARGET_32BIT)
11111     {
11112       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11113           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11114         return 15;
11115       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11116                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11117         return 4;
11118       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11119         return 20;
11120       else
11121         return 2;
11122     }
11123   else
11124     {
11125       if (from == HI_REGS || to == HI_REGS)
11126         return 4;
11127       else
11128         return 2;
11129     }
11130 }
11131
11132 /* Implement TARGET_MEMORY_MOVE_COST.  */
11133
11134 int
11135 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11136                       bool in ATTRIBUTE_UNUSED)
11137 {
11138   if (TARGET_32BIT)
11139     return 10;
11140   else
11141     {
11142       if (GET_MODE_SIZE (mode) < 4)
11143         return 8;
11144       else
11145         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11146     }
11147 }
11148
11149 /* Vectorizer cost model implementation.  */
11150
11151 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11152 static int
11153 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11154                                 tree vectype,
11155                                 int misalign ATTRIBUTE_UNUSED)
11156 {
11157   unsigned elements;
11158
11159   switch (type_of_cost)
11160     {
11161       case scalar_stmt:
11162         return current_tune->vec_costs->scalar_stmt_cost;
11163
11164       case scalar_load:
11165         return current_tune->vec_costs->scalar_load_cost;
11166
11167       case scalar_store:
11168         return current_tune->vec_costs->scalar_store_cost;
11169
11170       case vector_stmt:
11171         return current_tune->vec_costs->vec_stmt_cost;
11172
11173       case vector_load:
11174         return current_tune->vec_costs->vec_align_load_cost;
11175
11176       case vector_store:
11177         return current_tune->vec_costs->vec_store_cost;
11178
11179       case vec_to_scalar:
11180         return current_tune->vec_costs->vec_to_scalar_cost;
11181
11182       case scalar_to_vec:
11183         return current_tune->vec_costs->scalar_to_vec_cost;
11184
11185       case unaligned_load:
11186         return current_tune->vec_costs->vec_unalign_load_cost;
11187
11188       case unaligned_store:
11189         return current_tune->vec_costs->vec_unalign_store_cost;
11190
11191       case cond_branch_taken:
11192         return current_tune->vec_costs->cond_taken_branch_cost;
11193
11194       case cond_branch_not_taken:
11195         return current_tune->vec_costs->cond_not_taken_branch_cost;
11196
11197       case vec_perm:
11198       case vec_promote_demote:
11199         return current_tune->vec_costs->vec_stmt_cost;
11200
11201       case vec_construct:
11202         elements = TYPE_VECTOR_SUBPARTS (vectype);
11203         return elements / 2 + 1;
11204
11205       default:
11206         gcc_unreachable ();
11207     }
11208 }
11209
11210 /* Implement targetm.vectorize.add_stmt_cost.  */
11211
11212 static unsigned
11213 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11214                    struct _stmt_vec_info *stmt_info, int misalign,
11215                    enum vect_cost_model_location where)
11216 {
11217   unsigned *cost = (unsigned *) data;
11218   unsigned retval = 0;
11219
11220   if (flag_vect_cost_model)
11221     {
11222       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11223       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11224
11225       /* Statements in an inner loop relative to the loop being
11226          vectorized are weighted more heavily.  The value here is
11227          arbitrary and could potentially be improved with analysis.  */
11228       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11229         count *= 50;  /* FIXME.  */
11230
11231       retval = (unsigned) (count * stmt_cost);
11232       cost[where] += retval;
11233     }
11234
11235   return retval;
11236 }
11237
11238 /* Return true if and only if this insn can dual-issue only as older.  */
11239 static bool
11240 cortexa7_older_only (rtx_insn *insn)
11241 {
11242   if (recog_memoized (insn) < 0)
11243     return false;
11244
11245   switch (get_attr_type (insn))
11246     {
11247     case TYPE_ALU_DSP_REG:
11248     case TYPE_ALU_SREG:
11249     case TYPE_ALUS_SREG:
11250     case TYPE_LOGIC_REG:
11251     case TYPE_LOGICS_REG:
11252     case TYPE_ADC_REG:
11253     case TYPE_ADCS_REG:
11254     case TYPE_ADR:
11255     case TYPE_BFM:
11256     case TYPE_REV:
11257     case TYPE_MVN_REG:
11258     case TYPE_SHIFT_IMM:
11259     case TYPE_SHIFT_REG:
11260     case TYPE_LOAD_BYTE:
11261     case TYPE_LOAD1:
11262     case TYPE_STORE1:
11263     case TYPE_FFARITHS:
11264     case TYPE_FADDS:
11265     case TYPE_FFARITHD:
11266     case TYPE_FADDD:
11267     case TYPE_FMOV:
11268     case TYPE_F_CVT:
11269     case TYPE_FCMPS:
11270     case TYPE_FCMPD:
11271     case TYPE_FCONSTS:
11272     case TYPE_FCONSTD:
11273     case TYPE_FMULS:
11274     case TYPE_FMACS:
11275     case TYPE_FMULD:
11276     case TYPE_FMACD:
11277     case TYPE_FDIVS:
11278     case TYPE_FDIVD:
11279     case TYPE_F_MRC:
11280     case TYPE_F_MRRC:
11281     case TYPE_F_FLAG:
11282     case TYPE_F_LOADS:
11283     case TYPE_F_STORES:
11284       return true;
11285     default:
11286       return false;
11287     }
11288 }
11289
11290 /* Return true if and only if this insn can dual-issue as younger.  */
11291 static bool
11292 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11293 {
11294   if (recog_memoized (insn) < 0)
11295     {
11296       if (verbose > 5)
11297         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11298       return false;
11299     }
11300
11301   switch (get_attr_type (insn))
11302     {
11303     case TYPE_ALU_IMM:
11304     case TYPE_ALUS_IMM:
11305     case TYPE_LOGIC_IMM:
11306     case TYPE_LOGICS_IMM:
11307     case TYPE_EXTEND:
11308     case TYPE_MVN_IMM:
11309     case TYPE_MOV_IMM:
11310     case TYPE_MOV_REG:
11311     case TYPE_MOV_SHIFT:
11312     case TYPE_MOV_SHIFT_REG:
11313     case TYPE_BRANCH:
11314     case TYPE_CALL:
11315       return true;
11316     default:
11317       return false;
11318     }
11319 }
11320
11321
11322 /* Look for an instruction that can dual issue only as an older
11323    instruction, and move it in front of any instructions that can
11324    dual-issue as younger, while preserving the relative order of all
11325    other instructions in the ready list.  This is a hueuristic to help
11326    dual-issue in later cycles, by postponing issue of more flexible
11327    instructions.  This heuristic may affect dual issue opportunities
11328    in the current cycle.  */
11329 static void
11330 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11331                         int *n_readyp, int clock)
11332 {
11333   int i;
11334   int first_older_only = -1, first_younger = -1;
11335
11336   if (verbose > 5)
11337     fprintf (file,
11338              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11339              clock,
11340              *n_readyp);
11341
11342   /* Traverse the ready list from the head (the instruction to issue
11343      first), and looking for the first instruction that can issue as
11344      younger and the first instruction that can dual-issue only as
11345      older.  */
11346   for (i = *n_readyp - 1; i >= 0; i--)
11347     {
11348       rtx_insn *insn = ready[i];
11349       if (cortexa7_older_only (insn))
11350         {
11351           first_older_only = i;
11352           if (verbose > 5)
11353             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11354           break;
11355         }
11356       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11357         first_younger = i;
11358     }
11359
11360   /* Nothing to reorder because either no younger insn found or insn
11361      that can dual-issue only as older appears before any insn that
11362      can dual-issue as younger.  */
11363   if (first_younger == -1)
11364     {
11365       if (verbose > 5)
11366         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11367       return;
11368     }
11369
11370   /* Nothing to reorder because no older-only insn in the ready list.  */
11371   if (first_older_only == -1)
11372     {
11373       if (verbose > 5)
11374         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11375       return;
11376     }
11377
11378   /* Move first_older_only insn before first_younger.  */
11379   if (verbose > 5)
11380     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11381              INSN_UID(ready [first_older_only]),
11382              INSN_UID(ready [first_younger]));
11383   rtx_insn *first_older_only_insn = ready [first_older_only];
11384   for (i = first_older_only; i < first_younger; i++)
11385     {
11386       ready[i] = ready[i+1];
11387     }
11388
11389   ready[i] = first_older_only_insn;
11390   return;
11391 }
11392
11393 /* Implement TARGET_SCHED_REORDER. */
11394 static int
11395 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11396                    int clock)
11397 {
11398   switch (arm_tune)
11399     {
11400     case TARGET_CPU_cortexa7:
11401       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11402       break;
11403     default:
11404       /* Do nothing for other cores.  */
11405       break;
11406     }
11407
11408   return arm_issue_rate ();
11409 }
11410
11411 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11412    It corrects the value of COST based on the relationship between
11413    INSN and DEP through the dependence LINK.  It returns the new
11414    value. There is a per-core adjust_cost hook to adjust scheduler costs
11415    and the per-core hook can choose to completely override the generic
11416    adjust_cost function. Only put bits of code into arm_adjust_cost that
11417    are common across all cores.  */
11418 static int
11419 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11420                  unsigned int)
11421 {
11422   rtx i_pat, d_pat;
11423
11424  /* When generating Thumb-1 code, we want to place flag-setting operations
11425     close to a conditional branch which depends on them, so that we can
11426     omit the comparison. */
11427   if (TARGET_THUMB1
11428       && dep_type == 0
11429       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11430       && recog_memoized (dep) >= 0
11431       && get_attr_conds (dep) == CONDS_SET)
11432     return 0;
11433
11434   if (current_tune->sched_adjust_cost != NULL)
11435     {
11436       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11437         return cost;
11438     }
11439
11440   /* XXX Is this strictly true?  */
11441   if (dep_type == REG_DEP_ANTI
11442       || dep_type == REG_DEP_OUTPUT)
11443     return 0;
11444
11445   /* Call insns don't incur a stall, even if they follow a load.  */
11446   if (dep_type == 0
11447       && CALL_P (insn))
11448     return 1;
11449
11450   if ((i_pat = single_set (insn)) != NULL
11451       && MEM_P (SET_SRC (i_pat))
11452       && (d_pat = single_set (dep)) != NULL
11453       && MEM_P (SET_DEST (d_pat)))
11454     {
11455       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11456       /* This is a load after a store, there is no conflict if the load reads
11457          from a cached area.  Assume that loads from the stack, and from the
11458          constant pool are cached, and that others will miss.  This is a
11459          hack.  */
11460
11461       if ((GET_CODE (src_mem) == SYMBOL_REF
11462            && CONSTANT_POOL_ADDRESS_P (src_mem))
11463           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11464           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11465           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11466         return 1;
11467     }
11468
11469   return cost;
11470 }
11471
11472 int
11473 arm_max_conditional_execute (void)
11474 {
11475   return max_insns_skipped;
11476 }
11477
11478 static int
11479 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11480 {
11481   if (TARGET_32BIT)
11482     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11483   else
11484     return (optimize > 0) ? 2 : 0;
11485 }
11486
11487 static int
11488 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11489 {
11490   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11491 }
11492
11493 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11494    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11495    sequences of non-executed instructions in IT blocks probably take the same
11496    amount of time as executed instructions (and the IT instruction itself takes
11497    space in icache).  This function was experimentally determined to give good
11498    results on a popular embedded benchmark.  */
11499
11500 static int
11501 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11502 {
11503   return (TARGET_32BIT && speed_p) ? 1
11504          : arm_default_branch_cost (speed_p, predictable_p);
11505 }
11506
11507 static int
11508 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11509 {
11510   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11511 }
11512
11513 static bool fp_consts_inited = false;
11514
11515 static REAL_VALUE_TYPE value_fp0;
11516
11517 static void
11518 init_fp_table (void)
11519 {
11520   REAL_VALUE_TYPE r;
11521
11522   r = REAL_VALUE_ATOF ("0", DFmode);
11523   value_fp0 = r;
11524   fp_consts_inited = true;
11525 }
11526
11527 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11528 int
11529 arm_const_double_rtx (rtx x)
11530 {
11531   const REAL_VALUE_TYPE *r;
11532
11533   if (!fp_consts_inited)
11534     init_fp_table ();
11535
11536   r = CONST_DOUBLE_REAL_VALUE (x);
11537   if (REAL_VALUE_MINUS_ZERO (*r))
11538     return 0;
11539
11540   if (real_equal (r, &value_fp0))
11541     return 1;
11542
11543   return 0;
11544 }
11545
11546 /* VFPv3 has a fairly wide range of representable immediates, formed from
11547    "quarter-precision" floating-point values. These can be evaluated using this
11548    formula (with ^ for exponentiation):
11549
11550      -1^s * n * 2^-r
11551
11552    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11553    16 <= n <= 31 and 0 <= r <= 7.
11554
11555    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11556
11557      - A (most-significant) is the sign bit.
11558      - BCD are the exponent (encoded as r XOR 3).
11559      - EFGH are the mantissa (encoded as n - 16).
11560 */
11561
11562 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11563    fconst[sd] instruction, or -1 if X isn't suitable.  */
11564 static int
11565 vfp3_const_double_index (rtx x)
11566 {
11567   REAL_VALUE_TYPE r, m;
11568   int sign, exponent;
11569   unsigned HOST_WIDE_INT mantissa, mant_hi;
11570   unsigned HOST_WIDE_INT mask;
11571   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11572   bool fail;
11573
11574   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11575     return -1;
11576
11577   r = *CONST_DOUBLE_REAL_VALUE (x);
11578
11579   /* We can't represent these things, so detect them first.  */
11580   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11581     return -1;
11582
11583   /* Extract sign, exponent and mantissa.  */
11584   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11585   r = real_value_abs (&r);
11586   exponent = REAL_EXP (&r);
11587   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11588      highest (sign) bit, with a fixed binary point at bit point_pos.
11589      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11590      bits for the mantissa, this may fail (low bits would be lost).  */
11591   real_ldexp (&m, &r, point_pos - exponent);
11592   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11593   mantissa = w.elt (0);
11594   mant_hi = w.elt (1);
11595
11596   /* If there are bits set in the low part of the mantissa, we can't
11597      represent this value.  */
11598   if (mantissa != 0)
11599     return -1;
11600
11601   /* Now make it so that mantissa contains the most-significant bits, and move
11602      the point_pos to indicate that the least-significant bits have been
11603      discarded.  */
11604   point_pos -= HOST_BITS_PER_WIDE_INT;
11605   mantissa = mant_hi;
11606
11607   /* We can permit four significant bits of mantissa only, plus a high bit
11608      which is always 1.  */
11609   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11610   if ((mantissa & mask) != 0)
11611     return -1;
11612
11613   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11614   mantissa >>= point_pos - 5;
11615
11616   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11617      floating-point immediate zero with Neon using an integer-zero load, but
11618      that case is handled elsewhere.)  */
11619   if (mantissa == 0)
11620     return -1;
11621
11622   gcc_assert (mantissa >= 16 && mantissa <= 31);
11623
11624   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11625      normalized significands are in the range [1, 2). (Our mantissa is shifted
11626      left 4 places at this point relative to normalized IEEE754 values).  GCC
11627      internally uses [0.5, 1) (see real.c), so the exponent returned from
11628      REAL_EXP must be altered.  */
11629   exponent = 5 - exponent;
11630
11631   if (exponent < 0 || exponent > 7)
11632     return -1;
11633
11634   /* Sign, mantissa and exponent are now in the correct form to plug into the
11635      formula described in the comment above.  */
11636   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11637 }
11638
11639 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11640 int
11641 vfp3_const_double_rtx (rtx x)
11642 {
11643   if (!TARGET_VFP3)
11644     return 0;
11645
11646   return vfp3_const_double_index (x) != -1;
11647 }
11648
11649 /* Recognize immediates which can be used in various Neon instructions. Legal
11650    immediates are described by the following table (for VMVN variants, the
11651    bitwise inverse of the constant shown is recognized. In either case, VMOV
11652    is output and the correct instruction to use for a given constant is chosen
11653    by the assembler). The constant shown is replicated across all elements of
11654    the destination vector.
11655
11656    insn elems variant constant (binary)
11657    ---- ----- ------- -----------------
11658    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11659    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11660    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11661    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11662    vmov  i16     4    00000000 abcdefgh
11663    vmov  i16     5    abcdefgh 00000000
11664    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11665    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11666    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11667    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11668    vmvn  i16    10    00000000 abcdefgh
11669    vmvn  i16    11    abcdefgh 00000000
11670    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11671    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11672    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11673    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11674    vmov   i8    16    abcdefgh
11675    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11676                       eeeeeeee ffffffff gggggggg hhhhhhhh
11677    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11678    vmov  f32    19    00000000 00000000 00000000 00000000
11679
11680    For case 18, B = !b. Representable values are exactly those accepted by
11681    vfp3_const_double_index, but are output as floating-point numbers rather
11682    than indices.
11683
11684    For case 19, we will change it to vmov.i32 when assembling.
11685
11686    Variants 0-5 (inclusive) may also be used as immediates for the second
11687    operand of VORR/VBIC instructions.
11688
11689    The INVERSE argument causes the bitwise inverse of the given operand to be
11690    recognized instead (used for recognizing legal immediates for the VAND/VORN
11691    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11692    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11693    output, rather than the real insns vbic/vorr).
11694
11695    INVERSE makes no difference to the recognition of float vectors.
11696
11697    The return value is the variant of immediate as shown in the above table, or
11698    -1 if the given value doesn't match any of the listed patterns.
11699 */
11700 static int
11701 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11702                       rtx *modconst, int *elementwidth)
11703 {
11704 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11705   matches = 1;                                  \
11706   for (i = 0; i < idx; i += (STRIDE))           \
11707     if (!(TEST))                                \
11708       matches = 0;                              \
11709   if (matches)                                  \
11710     {                                           \
11711       immtype = (CLASS);                        \
11712       elsize = (ELSIZE);                        \
11713       break;                                    \
11714     }
11715
11716   unsigned int i, elsize = 0, idx = 0, n_elts;
11717   unsigned int innersize;
11718   unsigned char bytes[16];
11719   int immtype = -1, matches;
11720   unsigned int invmask = inverse ? 0xff : 0;
11721   bool vector = GET_CODE (op) == CONST_VECTOR;
11722
11723   if (vector)
11724     n_elts = CONST_VECTOR_NUNITS (op);
11725   else
11726     {
11727       n_elts = 1;
11728       if (mode == VOIDmode)
11729         mode = DImode;
11730     }
11731
11732   innersize = GET_MODE_UNIT_SIZE (mode);
11733
11734   /* Vectors of float constants.  */
11735   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11736     {
11737       rtx el0 = CONST_VECTOR_ELT (op, 0);
11738
11739       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11740         return -1;
11741
11742       /* FP16 vectors cannot be represented.  */
11743       if (GET_MODE_INNER (mode) == HFmode)
11744         return -1;
11745
11746       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11747          are distinct in this context.  */
11748       if (!const_vec_duplicate_p (op))
11749         return -1;
11750
11751       if (modconst)
11752         *modconst = CONST_VECTOR_ELT (op, 0);
11753
11754       if (elementwidth)
11755         *elementwidth = 0;
11756
11757       if (el0 == CONST0_RTX (GET_MODE (el0)))
11758         return 19;
11759       else
11760         return 18;
11761     }
11762
11763   /* The tricks done in the code below apply for little-endian vector layout.
11764      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11765      FIXME: Implement logic for big-endian vectors.  */
11766   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11767     return -1;
11768
11769   /* Splat vector constant out into a byte vector.  */
11770   for (i = 0; i < n_elts; i++)
11771     {
11772       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11773       unsigned HOST_WIDE_INT elpart;
11774
11775       gcc_assert (CONST_INT_P (el));
11776       elpart = INTVAL (el);
11777
11778       for (unsigned int byte = 0; byte < innersize; byte++)
11779         {
11780           bytes[idx++] = (elpart & 0xff) ^ invmask;
11781           elpart >>= BITS_PER_UNIT;
11782         }
11783     }
11784
11785   /* Sanity check.  */
11786   gcc_assert (idx == GET_MODE_SIZE (mode));
11787
11788   do
11789     {
11790       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11791                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11792
11793       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11794                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11795
11796       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11797                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11798
11799       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11800                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11801
11802       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11803
11804       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11805
11806       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11807                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11808
11809       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11810                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11811
11812       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11813                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11814
11815       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11816                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11817
11818       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11819
11820       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11821
11822       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11823                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11824
11825       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11826                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11827
11828       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11829                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11830
11831       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11832                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11833
11834       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11835
11836       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11837                         && bytes[i] == bytes[(i + 8) % idx]);
11838     }
11839   while (0);
11840
11841   if (immtype == -1)
11842     return -1;
11843
11844   if (elementwidth)
11845     *elementwidth = elsize;
11846
11847   if (modconst)
11848     {
11849       unsigned HOST_WIDE_INT imm = 0;
11850
11851       /* Un-invert bytes of recognized vector, if necessary.  */
11852       if (invmask != 0)
11853         for (i = 0; i < idx; i++)
11854           bytes[i] ^= invmask;
11855
11856       if (immtype == 17)
11857         {
11858           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11859           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11860
11861           for (i = 0; i < 8; i++)
11862             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11863                    << (i * BITS_PER_UNIT);
11864
11865           *modconst = GEN_INT (imm);
11866         }
11867       else
11868         {
11869           unsigned HOST_WIDE_INT imm = 0;
11870
11871           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11872             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11873
11874           *modconst = GEN_INT (imm);
11875         }
11876     }
11877
11878   return immtype;
11879 #undef CHECK
11880 }
11881
11882 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11883    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11884    float elements), and a modified constant (whatever should be output for a
11885    VMOV) in *MODCONST.  */
11886
11887 int
11888 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11889                                rtx *modconst, int *elementwidth)
11890 {
11891   rtx tmpconst;
11892   int tmpwidth;
11893   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11894
11895   if (retval == -1)
11896     return 0;
11897
11898   if (modconst)
11899     *modconst = tmpconst;
11900
11901   if (elementwidth)
11902     *elementwidth = tmpwidth;
11903
11904   return 1;
11905 }
11906
11907 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11908    the immediate is valid, write a constant suitable for using as an operand
11909    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11910    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11911
11912 int
11913 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11914                                 rtx *modconst, int *elementwidth)
11915 {
11916   rtx tmpconst;
11917   int tmpwidth;
11918   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11919
11920   if (retval < 0 || retval > 5)
11921     return 0;
11922
11923   if (modconst)
11924     *modconst = tmpconst;
11925
11926   if (elementwidth)
11927     *elementwidth = tmpwidth;
11928
11929   return 1;
11930 }
11931
11932 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
11933    the immediate is valid, write a constant suitable for using as an operand
11934    to VSHR/VSHL to *MODCONST and the corresponding element width to
11935    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11936    because they have different limitations.  */
11937
11938 int
11939 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11940                                 rtx *modconst, int *elementwidth,
11941                                 bool isleftshift)
11942 {
11943   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11944   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11945   unsigned HOST_WIDE_INT last_elt = 0;
11946   unsigned HOST_WIDE_INT maxshift;
11947
11948   /* Split vector constant out into a byte vector.  */
11949   for (i = 0; i < n_elts; i++)
11950     {
11951       rtx el = CONST_VECTOR_ELT (op, i);
11952       unsigned HOST_WIDE_INT elpart;
11953
11954       if (CONST_INT_P (el))
11955         elpart = INTVAL (el);
11956       else if (CONST_DOUBLE_P (el))
11957         return 0;
11958       else
11959         gcc_unreachable ();
11960
11961       if (i != 0 && elpart != last_elt)
11962         return 0;
11963
11964       last_elt = elpart;
11965     }
11966
11967   /* Shift less than element size.  */
11968   maxshift = innersize * 8;
11969
11970   if (isleftshift)
11971     {
11972       /* Left shift immediate value can be from 0 to <size>-1.  */
11973       if (last_elt >= maxshift)
11974         return 0;
11975     }
11976   else
11977     {
11978       /* Right shift immediate value can be from 1 to <size>.  */
11979       if (last_elt == 0 || last_elt > maxshift)
11980         return 0;
11981     }
11982
11983   if (elementwidth)
11984     *elementwidth = innersize * 8;
11985
11986   if (modconst)
11987     *modconst = CONST_VECTOR_ELT (op, 0);
11988
11989   return 1;
11990 }
11991
11992 /* Return a string suitable for output of Neon immediate logic operation
11993    MNEM.  */
11994
11995 char *
11996 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11997                              int inverse, int quad)
11998 {
11999   int width, is_valid;
12000   static char templ[40];
12001
12002   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12003
12004   gcc_assert (is_valid != 0);
12005
12006   if (quad)
12007     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12008   else
12009     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12010
12011   return templ;
12012 }
12013
12014 /* Return a string suitable for output of Neon immediate shift operation
12015    (VSHR or VSHL) MNEM.  */
12016
12017 char *
12018 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12019                              machine_mode mode, int quad,
12020                              bool isleftshift)
12021 {
12022   int width, is_valid;
12023   static char templ[40];
12024
12025   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12026   gcc_assert (is_valid != 0);
12027
12028   if (quad)
12029     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12030   else
12031     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12032
12033   return templ;
12034 }
12035
12036 /* Output a sequence of pairwise operations to implement a reduction.
12037    NOTE: We do "too much work" here, because pairwise operations work on two
12038    registers-worth of operands in one go. Unfortunately we can't exploit those
12039    extra calculations to do the full operation in fewer steps, I don't think.
12040    Although all vector elements of the result but the first are ignored, we
12041    actually calculate the same result in each of the elements. An alternative
12042    such as initially loading a vector with zero to use as each of the second
12043    operands would use up an additional register and take an extra instruction,
12044    for no particular gain.  */
12045
12046 void
12047 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12048                       rtx (*reduc) (rtx, rtx, rtx))
12049 {
12050   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12051   rtx tmpsum = op1;
12052
12053   for (i = parts / 2; i >= 1; i /= 2)
12054     {
12055       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12056       emit_insn (reduc (dest, tmpsum, tmpsum));
12057       tmpsum = dest;
12058     }
12059 }
12060
12061 /* If VALS is a vector constant that can be loaded into a register
12062    using VDUP, generate instructions to do so and return an RTX to
12063    assign to the register.  Otherwise return NULL_RTX.  */
12064
12065 static rtx
12066 neon_vdup_constant (rtx vals)
12067 {
12068   machine_mode mode = GET_MODE (vals);
12069   machine_mode inner_mode = GET_MODE_INNER (mode);
12070   rtx x;
12071
12072   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12073     return NULL_RTX;
12074
12075   if (!const_vec_duplicate_p (vals, &x))
12076     /* The elements are not all the same.  We could handle repeating
12077        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12078        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12079        vdup.i16).  */
12080     return NULL_RTX;
12081
12082   /* We can load this constant by using VDUP and a constant in a
12083      single ARM register.  This will be cheaper than a vector
12084      load.  */
12085
12086   x = copy_to_mode_reg (inner_mode, x);
12087   return gen_rtx_VEC_DUPLICATE (mode, x);
12088 }
12089
12090 /* Generate code to load VALS, which is a PARALLEL containing only
12091    constants (for vec_init) or CONST_VECTOR, efficiently into a
12092    register.  Returns an RTX to copy into the register, or NULL_RTX
12093    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12094
12095 rtx
12096 neon_make_constant (rtx vals)
12097 {
12098   machine_mode mode = GET_MODE (vals);
12099   rtx target;
12100   rtx const_vec = NULL_RTX;
12101   int n_elts = GET_MODE_NUNITS (mode);
12102   int n_const = 0;
12103   int i;
12104
12105   if (GET_CODE (vals) == CONST_VECTOR)
12106     const_vec = vals;
12107   else if (GET_CODE (vals) == PARALLEL)
12108     {
12109       /* A CONST_VECTOR must contain only CONST_INTs and
12110          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12111          Only store valid constants in a CONST_VECTOR.  */
12112       for (i = 0; i < n_elts; ++i)
12113         {
12114           rtx x = XVECEXP (vals, 0, i);
12115           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12116             n_const++;
12117         }
12118       if (n_const == n_elts)
12119         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12120     }
12121   else
12122     gcc_unreachable ();
12123
12124   if (const_vec != NULL
12125       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12126     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12127     return const_vec;
12128   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12129     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12130        pipeline cycle; creating the constant takes one or two ARM
12131        pipeline cycles.  */
12132     return target;
12133   else if (const_vec != NULL_RTX)
12134     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12135        (for either double or quad vectors).  We can not take advantage
12136        of single-cycle VLD1 because we need a PC-relative addressing
12137        mode.  */
12138     return const_vec;
12139   else
12140     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12141        We can not construct an initializer.  */
12142     return NULL_RTX;
12143 }
12144
12145 /* Initialize vector TARGET to VALS.  */
12146
12147 void
12148 neon_expand_vector_init (rtx target, rtx vals)
12149 {
12150   machine_mode mode = GET_MODE (target);
12151   machine_mode inner_mode = GET_MODE_INNER (mode);
12152   int n_elts = GET_MODE_NUNITS (mode);
12153   int n_var = 0, one_var = -1;
12154   bool all_same = true;
12155   rtx x, mem;
12156   int i;
12157
12158   for (i = 0; i < n_elts; ++i)
12159     {
12160       x = XVECEXP (vals, 0, i);
12161       if (!CONSTANT_P (x))
12162         ++n_var, one_var = i;
12163
12164       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12165         all_same = false;
12166     }
12167
12168   if (n_var == 0)
12169     {
12170       rtx constant = neon_make_constant (vals);
12171       if (constant != NULL_RTX)
12172         {
12173           emit_move_insn (target, constant);
12174           return;
12175         }
12176     }
12177
12178   /* Splat a single non-constant element if we can.  */
12179   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12180     {
12181       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12182       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12183       return;
12184     }
12185
12186   /* One field is non-constant.  Load constant then overwrite varying
12187      field.  This is more efficient than using the stack.  */
12188   if (n_var == 1)
12189     {
12190       rtx copy = copy_rtx (vals);
12191       rtx index = GEN_INT (one_var);
12192
12193       /* Load constant part of vector, substitute neighboring value for
12194          varying element.  */
12195       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12196       neon_expand_vector_init (target, copy);
12197
12198       /* Insert variable.  */
12199       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12200       switch (mode)
12201         {
12202         case V8QImode:
12203           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12204           break;
12205         case V16QImode:
12206           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12207           break;
12208         case V4HImode:
12209           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12210           break;
12211         case V8HImode:
12212           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12213           break;
12214         case V2SImode:
12215           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12216           break;
12217         case V4SImode:
12218           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12219           break;
12220         case V2SFmode:
12221           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12222           break;
12223         case V4SFmode:
12224           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12225           break;
12226         case V2DImode:
12227           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12228           break;
12229         default:
12230           gcc_unreachable ();
12231         }
12232       return;
12233     }
12234
12235   /* Construct the vector in memory one field at a time
12236      and load the whole vector.  */
12237   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12238   for (i = 0; i < n_elts; i++)
12239     emit_move_insn (adjust_address_nv (mem, inner_mode,
12240                                     i * GET_MODE_SIZE (inner_mode)),
12241                     XVECEXP (vals, 0, i));
12242   emit_move_insn (target, mem);
12243 }
12244
12245 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12246    ERR if it doesn't.  EXP indicates the source location, which includes the
12247    inlining history for intrinsics.  */
12248
12249 static void
12250 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12251               const_tree exp, const char *desc)
12252 {
12253   HOST_WIDE_INT lane;
12254
12255   gcc_assert (CONST_INT_P (operand));
12256
12257   lane = INTVAL (operand);
12258
12259   if (lane < low || lane >= high)
12260     {
12261       if (exp)
12262         error ("%K%s %wd out of range %wd - %wd",
12263                exp, desc, lane, low, high - 1);
12264       else
12265         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12266     }
12267 }
12268
12269 /* Bounds-check lanes.  */
12270
12271 void
12272 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12273                   const_tree exp)
12274 {
12275   bounds_check (operand, low, high, exp, "lane");
12276 }
12277
12278 /* Bounds-check constants.  */
12279
12280 void
12281 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12282 {
12283   bounds_check (operand, low, high, NULL_TREE, "constant");
12284 }
12285
12286 HOST_WIDE_INT
12287 neon_element_bits (machine_mode mode)
12288 {
12289   return GET_MODE_UNIT_BITSIZE (mode);
12290 }
12291
12292 \f
12293 /* Predicates for `match_operand' and `match_operator'.  */
12294
12295 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12296    WB is true if full writeback address modes are allowed and is false
12297    if limited writeback address modes (POST_INC and PRE_DEC) are
12298    allowed.  */
12299
12300 int
12301 arm_coproc_mem_operand (rtx op, bool wb)
12302 {
12303   rtx ind;
12304
12305   /* Reject eliminable registers.  */
12306   if (! (reload_in_progress || reload_completed || lra_in_progress)
12307       && (   reg_mentioned_p (frame_pointer_rtx, op)
12308           || reg_mentioned_p (arg_pointer_rtx, op)
12309           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12310           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12311           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12312           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12313     return FALSE;
12314
12315   /* Constants are converted into offsets from labels.  */
12316   if (!MEM_P (op))
12317     return FALSE;
12318
12319   ind = XEXP (op, 0);
12320
12321   if (reload_completed
12322       && (GET_CODE (ind) == LABEL_REF
12323           || (GET_CODE (ind) == CONST
12324               && GET_CODE (XEXP (ind, 0)) == PLUS
12325               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12326               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12327     return TRUE;
12328
12329   /* Match: (mem (reg)).  */
12330   if (REG_P (ind))
12331     return arm_address_register_rtx_p (ind, 0);
12332
12333   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12334      acceptable in any case (subject to verification by
12335      arm_address_register_rtx_p).  We need WB to be true to accept
12336      PRE_INC and POST_DEC.  */
12337   if (GET_CODE (ind) == POST_INC
12338       || GET_CODE (ind) == PRE_DEC
12339       || (wb
12340           && (GET_CODE (ind) == PRE_INC
12341               || GET_CODE (ind) == POST_DEC)))
12342     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12343
12344   if (wb
12345       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12346       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12347       && GET_CODE (XEXP (ind, 1)) == PLUS
12348       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12349     ind = XEXP (ind, 1);
12350
12351   /* Match:
12352      (plus (reg)
12353            (const)).  */
12354   if (GET_CODE (ind) == PLUS
12355       && REG_P (XEXP (ind, 0))
12356       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12357       && CONST_INT_P (XEXP (ind, 1))
12358       && INTVAL (XEXP (ind, 1)) > -1024
12359       && INTVAL (XEXP (ind, 1)) <  1024
12360       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12361     return TRUE;
12362
12363   return FALSE;
12364 }
12365
12366 /* Return TRUE if OP is a memory operand which we can load or store a vector
12367    to/from. TYPE is one of the following values:
12368     0 - Vector load/stor (vldr)
12369     1 - Core registers (ldm)
12370     2 - Element/structure loads (vld1)
12371  */
12372 int
12373 neon_vector_mem_operand (rtx op, int type, bool strict)
12374 {
12375   rtx ind;
12376
12377   /* Reject eliminable registers.  */
12378   if (strict && ! (reload_in_progress || reload_completed)
12379       && (reg_mentioned_p (frame_pointer_rtx, op)
12380           || reg_mentioned_p (arg_pointer_rtx, op)
12381           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12382           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12383           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12384           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12385     return FALSE;
12386
12387   /* Constants are converted into offsets from labels.  */
12388   if (!MEM_P (op))
12389     return FALSE;
12390
12391   ind = XEXP (op, 0);
12392
12393   if (reload_completed
12394       && (GET_CODE (ind) == LABEL_REF
12395           || (GET_CODE (ind) == CONST
12396               && GET_CODE (XEXP (ind, 0)) == PLUS
12397               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12398               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12399     return TRUE;
12400
12401   /* Match: (mem (reg)).  */
12402   if (REG_P (ind))
12403     return arm_address_register_rtx_p (ind, 0);
12404
12405   /* Allow post-increment with Neon registers.  */
12406   if ((type != 1 && GET_CODE (ind) == POST_INC)
12407       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12408     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12409
12410   /* Allow post-increment by register for VLDn */
12411   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12412       && GET_CODE (XEXP (ind, 1)) == PLUS
12413       && REG_P (XEXP (XEXP (ind, 1), 1)))
12414      return true;
12415
12416   /* Match:
12417      (plus (reg)
12418           (const)).  */
12419   if (type == 0
12420       && GET_CODE (ind) == PLUS
12421       && REG_P (XEXP (ind, 0))
12422       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12423       && CONST_INT_P (XEXP (ind, 1))
12424       && INTVAL (XEXP (ind, 1)) > -1024
12425       /* For quad modes, we restrict the constant offset to be slightly less
12426          than what the instruction format permits.  We have no such constraint
12427          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12428       && (INTVAL (XEXP (ind, 1))
12429           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12430       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12431     return TRUE;
12432
12433   return FALSE;
12434 }
12435
12436 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12437    type.  */
12438 int
12439 neon_struct_mem_operand (rtx op)
12440 {
12441   rtx ind;
12442
12443   /* Reject eliminable registers.  */
12444   if (! (reload_in_progress || reload_completed)
12445       && (   reg_mentioned_p (frame_pointer_rtx, op)
12446           || reg_mentioned_p (arg_pointer_rtx, op)
12447           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12448           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12449           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12450           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12451     return FALSE;
12452
12453   /* Constants are converted into offsets from labels.  */
12454   if (!MEM_P (op))
12455     return FALSE;
12456
12457   ind = XEXP (op, 0);
12458
12459   if (reload_completed
12460       && (GET_CODE (ind) == LABEL_REF
12461           || (GET_CODE (ind) == CONST
12462               && GET_CODE (XEXP (ind, 0)) == PLUS
12463               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12464               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12465     return TRUE;
12466
12467   /* Match: (mem (reg)).  */
12468   if (REG_P (ind))
12469     return arm_address_register_rtx_p (ind, 0);
12470
12471   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12472   if (GET_CODE (ind) == POST_INC
12473       || GET_CODE (ind) == PRE_DEC)
12474     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12475
12476   return FALSE;
12477 }
12478
12479 /* Return true if X is a register that will be eliminated later on.  */
12480 int
12481 arm_eliminable_register (rtx x)
12482 {
12483   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12484                        || REGNO (x) == ARG_POINTER_REGNUM
12485                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12486                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12487 }
12488
12489 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12490    coprocessor registers.  Otherwise return NO_REGS.  */
12491
12492 enum reg_class
12493 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12494 {
12495   if (mode == HFmode)
12496     {
12497       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12498         return GENERAL_REGS;
12499       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12500         return NO_REGS;
12501       return GENERAL_REGS;
12502     }
12503
12504   /* The neon move patterns handle all legitimate vector and struct
12505      addresses.  */
12506   if (TARGET_NEON
12507       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12508       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12509           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12510           || VALID_NEON_STRUCT_MODE (mode)))
12511     return NO_REGS;
12512
12513   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12514     return NO_REGS;
12515
12516   return GENERAL_REGS;
12517 }
12518
12519 /* Values which must be returned in the most-significant end of the return
12520    register.  */
12521
12522 static bool
12523 arm_return_in_msb (const_tree valtype)
12524 {
12525   return (TARGET_AAPCS_BASED
12526           && BYTES_BIG_ENDIAN
12527           && (AGGREGATE_TYPE_P (valtype)
12528               || TREE_CODE (valtype) == COMPLEX_TYPE
12529               || FIXED_POINT_TYPE_P (valtype)));
12530 }
12531
12532 /* Return TRUE if X references a SYMBOL_REF.  */
12533 int
12534 symbol_mentioned_p (rtx x)
12535 {
12536   const char * fmt;
12537   int i;
12538
12539   if (GET_CODE (x) == SYMBOL_REF)
12540     return 1;
12541
12542   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12543      are constant offsets, not symbols.  */
12544   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12545     return 0;
12546
12547   fmt = GET_RTX_FORMAT (GET_CODE (x));
12548
12549   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12550     {
12551       if (fmt[i] == 'E')
12552         {
12553           int j;
12554
12555           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12556             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12557               return 1;
12558         }
12559       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12560         return 1;
12561     }
12562
12563   return 0;
12564 }
12565
12566 /* Return TRUE if X references a LABEL_REF.  */
12567 int
12568 label_mentioned_p (rtx x)
12569 {
12570   const char * fmt;
12571   int i;
12572
12573   if (GET_CODE (x) == LABEL_REF)
12574     return 1;
12575
12576   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12577      instruction, but they are constant offsets, not symbols.  */
12578   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12579     return 0;
12580
12581   fmt = GET_RTX_FORMAT (GET_CODE (x));
12582   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12583     {
12584       if (fmt[i] == 'E')
12585         {
12586           int j;
12587
12588           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12589             if (label_mentioned_p (XVECEXP (x, i, j)))
12590               return 1;
12591         }
12592       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12593         return 1;
12594     }
12595
12596   return 0;
12597 }
12598
12599 int
12600 tls_mentioned_p (rtx x)
12601 {
12602   switch (GET_CODE (x))
12603     {
12604     case CONST:
12605       return tls_mentioned_p (XEXP (x, 0));
12606
12607     case UNSPEC:
12608       if (XINT (x, 1) == UNSPEC_TLS)
12609         return 1;
12610
12611     /* Fall through.  */
12612     default:
12613       return 0;
12614     }
12615 }
12616
12617 /* Must not copy any rtx that uses a pc-relative address.
12618    Also, disallow copying of load-exclusive instructions that
12619    may appear after splitting of compare-and-swap-style operations
12620    so as to prevent those loops from being transformed away from their
12621    canonical forms (see PR 69904).  */
12622
12623 static bool
12624 arm_cannot_copy_insn_p (rtx_insn *insn)
12625 {
12626   /* The tls call insn cannot be copied, as it is paired with a data
12627      word.  */
12628   if (recog_memoized (insn) == CODE_FOR_tlscall)
12629     return true;
12630
12631   subrtx_iterator::array_type array;
12632   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12633     {
12634       const_rtx x = *iter;
12635       if (GET_CODE (x) == UNSPEC
12636           && (XINT (x, 1) == UNSPEC_PIC_BASE
12637               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12638         return true;
12639     }
12640
12641   rtx set = single_set (insn);
12642   if (set)
12643     {
12644       rtx src = SET_SRC (set);
12645       if (GET_CODE (src) == ZERO_EXTEND)
12646         src = XEXP (src, 0);
12647
12648       /* Catch the load-exclusive and load-acquire operations.  */
12649       if (GET_CODE (src) == UNSPEC_VOLATILE
12650           && (XINT (src, 1) == VUNSPEC_LL
12651               || XINT (src, 1) == VUNSPEC_LAX))
12652         return true;
12653     }
12654   return false;
12655 }
12656
12657 enum rtx_code
12658 minmax_code (rtx x)
12659 {
12660   enum rtx_code code = GET_CODE (x);
12661
12662   switch (code)
12663     {
12664     case SMAX:
12665       return GE;
12666     case SMIN:
12667       return LE;
12668     case UMIN:
12669       return LEU;
12670     case UMAX:
12671       return GEU;
12672     default:
12673       gcc_unreachable ();
12674     }
12675 }
12676
12677 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12678
12679 bool
12680 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12681                         int *mask, bool *signed_sat)
12682 {
12683   /* The high bound must be a power of two minus one.  */
12684   int log = exact_log2 (INTVAL (hi_bound) + 1);
12685   if (log == -1)
12686     return false;
12687
12688   /* The low bound is either zero (for usat) or one less than the
12689      negation of the high bound (for ssat).  */
12690   if (INTVAL (lo_bound) == 0)
12691     {
12692       if (mask)
12693         *mask = log;
12694       if (signed_sat)
12695         *signed_sat = false;
12696
12697       return true;
12698     }
12699
12700   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12701     {
12702       if (mask)
12703         *mask = log + 1;
12704       if (signed_sat)
12705         *signed_sat = true;
12706
12707       return true;
12708     }
12709
12710   return false;
12711 }
12712
12713 /* Return 1 if memory locations are adjacent.  */
12714 int
12715 adjacent_mem_locations (rtx a, rtx b)
12716 {
12717   /* We don't guarantee to preserve the order of these memory refs.  */
12718   if (volatile_refs_p (a) || volatile_refs_p (b))
12719     return 0;
12720
12721   if ((REG_P (XEXP (a, 0))
12722        || (GET_CODE (XEXP (a, 0)) == PLUS
12723            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12724       && (REG_P (XEXP (b, 0))
12725           || (GET_CODE (XEXP (b, 0)) == PLUS
12726               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12727     {
12728       HOST_WIDE_INT val0 = 0, val1 = 0;
12729       rtx reg0, reg1;
12730       int val_diff;
12731
12732       if (GET_CODE (XEXP (a, 0)) == PLUS)
12733         {
12734           reg0 = XEXP (XEXP (a, 0), 0);
12735           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12736         }
12737       else
12738         reg0 = XEXP (a, 0);
12739
12740       if (GET_CODE (XEXP (b, 0)) == PLUS)
12741         {
12742           reg1 = XEXP (XEXP (b, 0), 0);
12743           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12744         }
12745       else
12746         reg1 = XEXP (b, 0);
12747
12748       /* Don't accept any offset that will require multiple
12749          instructions to handle, since this would cause the
12750          arith_adjacentmem pattern to output an overlong sequence.  */
12751       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12752         return 0;
12753
12754       /* Don't allow an eliminable register: register elimination can make
12755          the offset too large.  */
12756       if (arm_eliminable_register (reg0))
12757         return 0;
12758
12759       val_diff = val1 - val0;
12760
12761       if (arm_ld_sched)
12762         {
12763           /* If the target has load delay slots, then there's no benefit
12764              to using an ldm instruction unless the offset is zero and
12765              we are optimizing for size.  */
12766           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12767                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12768                   && (val_diff == 4 || val_diff == -4));
12769         }
12770
12771       return ((REGNO (reg0) == REGNO (reg1))
12772               && (val_diff == 4 || val_diff == -4));
12773     }
12774
12775   return 0;
12776 }
12777
12778 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12779    for load operations, false for store operations.  CONSECUTIVE is true
12780    if the register numbers in the operation must be consecutive in the register
12781    bank. RETURN_PC is true if value is to be loaded in PC.
12782    The pattern we are trying to match for load is:
12783      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12784       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12785        :
12786        :
12787       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12788      ]
12789      where
12790      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12791      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12792      3.  If consecutive is TRUE, then for kth register being loaded,
12793          REGNO (R_dk) = REGNO (R_d0) + k.
12794    The pattern for store is similar.  */
12795 bool
12796 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12797                      bool consecutive, bool return_pc)
12798 {
12799   HOST_WIDE_INT count = XVECLEN (op, 0);
12800   rtx reg, mem, addr;
12801   unsigned regno;
12802   unsigned first_regno;
12803   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12804   rtx elt;
12805   bool addr_reg_in_reglist = false;
12806   bool update = false;
12807   int reg_increment;
12808   int offset_adj;
12809   int regs_per_val;
12810
12811   /* If not in SImode, then registers must be consecutive
12812      (e.g., VLDM instructions for DFmode).  */
12813   gcc_assert ((mode == SImode) || consecutive);
12814   /* Setting return_pc for stores is illegal.  */
12815   gcc_assert (!return_pc || load);
12816
12817   /* Set up the increments and the regs per val based on the mode.  */
12818   reg_increment = GET_MODE_SIZE (mode);
12819   regs_per_val = reg_increment / 4;
12820   offset_adj = return_pc ? 1 : 0;
12821
12822   if (count <= 1
12823       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12824       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12825     return false;
12826
12827   /* Check if this is a write-back.  */
12828   elt = XVECEXP (op, 0, offset_adj);
12829   if (GET_CODE (SET_SRC (elt)) == PLUS)
12830     {
12831       i++;
12832       base = 1;
12833       update = true;
12834
12835       /* The offset adjustment must be the number of registers being
12836          popped times the size of a single register.  */
12837       if (!REG_P (SET_DEST (elt))
12838           || !REG_P (XEXP (SET_SRC (elt), 0))
12839           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12840           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12841           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12842              ((count - 1 - offset_adj) * reg_increment))
12843         return false;
12844     }
12845
12846   i = i + offset_adj;
12847   base = base + offset_adj;
12848   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12849      success depends on the type: VLDM can do just one reg,
12850      LDM must do at least two.  */
12851   if ((count <= i) && (mode == SImode))
12852       return false;
12853
12854   elt = XVECEXP (op, 0, i - 1);
12855   if (GET_CODE (elt) != SET)
12856     return false;
12857
12858   if (load)
12859     {
12860       reg = SET_DEST (elt);
12861       mem = SET_SRC (elt);
12862     }
12863   else
12864     {
12865       reg = SET_SRC (elt);
12866       mem = SET_DEST (elt);
12867     }
12868
12869   if (!REG_P (reg) || !MEM_P (mem))
12870     return false;
12871
12872   regno = REGNO (reg);
12873   first_regno = regno;
12874   addr = XEXP (mem, 0);
12875   if (GET_CODE (addr) == PLUS)
12876     {
12877       if (!CONST_INT_P (XEXP (addr, 1)))
12878         return false;
12879
12880       offset = INTVAL (XEXP (addr, 1));
12881       addr = XEXP (addr, 0);
12882     }
12883
12884   if (!REG_P (addr))
12885     return false;
12886
12887   /* Don't allow SP to be loaded unless it is also the base register. It
12888      guarantees that SP is reset correctly when an LDM instruction
12889      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12890   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12891     return false;
12892
12893   for (; i < count; i++)
12894     {
12895       elt = XVECEXP (op, 0, i);
12896       if (GET_CODE (elt) != SET)
12897         return false;
12898
12899       if (load)
12900         {
12901           reg = SET_DEST (elt);
12902           mem = SET_SRC (elt);
12903         }
12904       else
12905         {
12906           reg = SET_SRC (elt);
12907           mem = SET_DEST (elt);
12908         }
12909
12910       if (!REG_P (reg)
12911           || GET_MODE (reg) != mode
12912           || REGNO (reg) <= regno
12913           || (consecutive
12914               && (REGNO (reg) !=
12915                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12916           /* Don't allow SP to be loaded unless it is also the base register. It
12917              guarantees that SP is reset correctly when an LDM instruction
12918              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12919           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12920           || !MEM_P (mem)
12921           || GET_MODE (mem) != mode
12922           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12923                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12924                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12925                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12926                    offset + (i - base) * reg_increment))
12927               && (!REG_P (XEXP (mem, 0))
12928                   || offset + (i - base) * reg_increment != 0)))
12929         return false;
12930
12931       regno = REGNO (reg);
12932       if (regno == REGNO (addr))
12933         addr_reg_in_reglist = true;
12934     }
12935
12936   if (load)
12937     {
12938       if (update && addr_reg_in_reglist)
12939         return false;
12940
12941       /* For Thumb-1, address register is always modified - either by write-back
12942          or by explicit load.  If the pattern does not describe an update,
12943          then the address register must be in the list of loaded registers.  */
12944       if (TARGET_THUMB1)
12945         return update || addr_reg_in_reglist;
12946     }
12947
12948   return true;
12949 }
12950
12951 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12952    or stores (depending on IS_STORE) into a load-multiple or store-multiple
12953    instruction.  ADD_OFFSET is nonzero if the base address register needs
12954    to be modified with an add instruction before we can use it.  */
12955
12956 static bool
12957 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12958                                  int nops, HOST_WIDE_INT add_offset)
12959  {
12960   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12961      if the offset isn't small enough.  The reason 2 ldrs are faster
12962      is because these ARMs are able to do more than one cache access
12963      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
12964      whilst the ARM8 has a double bandwidth cache.  This means that
12965      these cores can do both an instruction fetch and a data fetch in
12966      a single cycle, so the trick of calculating the address into a
12967      scratch register (one of the result regs) and then doing a load
12968      multiple actually becomes slower (and no smaller in code size).
12969      That is the transformation
12970
12971         ldr     rd1, [rbase + offset]
12972         ldr     rd2, [rbase + offset + 4]
12973
12974      to
12975
12976         add     rd1, rbase, offset
12977         ldmia   rd1, {rd1, rd2}
12978
12979      produces worse code -- '3 cycles + any stalls on rd2' instead of
12980      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
12981      access per cycle, the first sequence could never complete in less
12982      than 6 cycles, whereas the ldm sequence would only take 5 and
12983      would make better use of sequential accesses if not hitting the
12984      cache.
12985
12986      We cheat here and test 'arm_ld_sched' which we currently know to
12987      only be true for the ARM8, ARM9 and StrongARM.  If this ever
12988      changes, then the test below needs to be reworked.  */
12989   if (nops == 2 && arm_ld_sched && add_offset != 0)
12990     return false;
12991
12992   /* XScale has load-store double instructions, but they have stricter
12993      alignment requirements than load-store multiple, so we cannot
12994      use them.
12995
12996      For XScale ldm requires 2 + NREGS cycles to complete and blocks
12997      the pipeline until completion.
12998
12999         NREGS           CYCLES
13000           1               3
13001           2               4
13002           3               5
13003           4               6
13004
13005      An ldr instruction takes 1-3 cycles, but does not block the
13006      pipeline.
13007
13008         NREGS           CYCLES
13009           1              1-3
13010           2              2-6
13011           3              3-9
13012           4              4-12
13013
13014      Best case ldr will always win.  However, the more ldr instructions
13015      we issue, the less likely we are to be able to schedule them well.
13016      Using ldr instructions also increases code size.
13017
13018      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13019      for counts of 3 or 4 regs.  */
13020   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13021     return false;
13022   return true;
13023 }
13024
13025 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13026    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13027    an array ORDER which describes the sequence to use when accessing the
13028    offsets that produces an ascending order.  In this sequence, each
13029    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13030    must have been filled in with the lowest offset by the caller.
13031    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13032    we use to verify that ORDER produces an ascending order of registers.
13033    Return true if it was possible to construct such an order, false if
13034    not.  */
13035
13036 static bool
13037 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13038                       int *unsorted_regs)
13039 {
13040   int i;
13041   for (i = 1; i < nops; i++)
13042     {
13043       int j;
13044
13045       order[i] = order[i - 1];
13046       for (j = 0; j < nops; j++)
13047         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13048           {
13049             /* We must find exactly one offset that is higher than the
13050                previous one by 4.  */
13051             if (order[i] != order[i - 1])
13052               return false;
13053             order[i] = j;
13054           }
13055       if (order[i] == order[i - 1])
13056         return false;
13057       /* The register numbers must be ascending.  */
13058       if (unsorted_regs != NULL
13059           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13060         return false;
13061     }
13062   return true;
13063 }
13064
13065 /* Used to determine in a peephole whether a sequence of load
13066    instructions can be changed into a load-multiple instruction.
13067    NOPS is the number of separate load instructions we are examining.  The
13068    first NOPS entries in OPERANDS are the destination registers, the
13069    next NOPS entries are memory operands.  If this function is
13070    successful, *BASE is set to the common base register of the memory
13071    accesses; *LOAD_OFFSET is set to the first memory location's offset
13072    from that base register.
13073    REGS is an array filled in with the destination register numbers.
13074    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13075    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13076    the sequence of registers in REGS matches the loads from ascending memory
13077    locations, and the function verifies that the register numbers are
13078    themselves ascending.  If CHECK_REGS is false, the register numbers
13079    are stored in the order they are found in the operands.  */
13080 static int
13081 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13082                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13083 {
13084   int unsorted_regs[MAX_LDM_STM_OPS];
13085   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13086   int order[MAX_LDM_STM_OPS];
13087   rtx base_reg_rtx = NULL;
13088   int base_reg = -1;
13089   int i, ldm_case;
13090
13091   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13092      easily extended if required.  */
13093   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13094
13095   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13096
13097   /* Loop over the operands and check that the memory references are
13098      suitable (i.e. immediate offsets from the same base register).  At
13099      the same time, extract the target register, and the memory
13100      offsets.  */
13101   for (i = 0; i < nops; i++)
13102     {
13103       rtx reg;
13104       rtx offset;
13105
13106       /* Convert a subreg of a mem into the mem itself.  */
13107       if (GET_CODE (operands[nops + i]) == SUBREG)
13108         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13109
13110       gcc_assert (MEM_P (operands[nops + i]));
13111
13112       /* Don't reorder volatile memory references; it doesn't seem worth
13113          looking for the case where the order is ok anyway.  */
13114       if (MEM_VOLATILE_P (operands[nops + i]))
13115         return 0;
13116
13117       offset = const0_rtx;
13118
13119       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13120            || (GET_CODE (reg) == SUBREG
13121                && REG_P (reg = SUBREG_REG (reg))))
13122           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13123               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13124                   || (GET_CODE (reg) == SUBREG
13125                       && REG_P (reg = SUBREG_REG (reg))))
13126               && (CONST_INT_P (offset
13127                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13128         {
13129           if (i == 0)
13130             {
13131               base_reg = REGNO (reg);
13132               base_reg_rtx = reg;
13133               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13134                 return 0;
13135             }
13136           else if (base_reg != (int) REGNO (reg))
13137             /* Not addressed from the same base register.  */
13138             return 0;
13139
13140           unsorted_regs[i] = (REG_P (operands[i])
13141                               ? REGNO (operands[i])
13142                               : REGNO (SUBREG_REG (operands[i])));
13143
13144           /* If it isn't an integer register, or if it overwrites the
13145              base register but isn't the last insn in the list, then
13146              we can't do this.  */
13147           if (unsorted_regs[i] < 0
13148               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13149               || unsorted_regs[i] > 14
13150               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13151             return 0;
13152
13153           /* Don't allow SP to be loaded unless it is also the base
13154              register.  It guarantees that SP is reset correctly when
13155              an LDM instruction is interrupted.  Otherwise, we might
13156              end up with a corrupt stack.  */
13157           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13158             return 0;
13159
13160           unsorted_offsets[i] = INTVAL (offset);
13161           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13162             order[0] = i;
13163         }
13164       else
13165         /* Not a suitable memory address.  */
13166         return 0;
13167     }
13168
13169   /* All the useful information has now been extracted from the
13170      operands into unsorted_regs and unsorted_offsets; additionally,
13171      order[0] has been set to the lowest offset in the list.  Sort
13172      the offsets into order, verifying that they are adjacent, and
13173      check that the register numbers are ascending.  */
13174   if (!compute_offset_order (nops, unsorted_offsets, order,
13175                              check_regs ? unsorted_regs : NULL))
13176     return 0;
13177
13178   if (saved_order)
13179     memcpy (saved_order, order, sizeof order);
13180
13181   if (base)
13182     {
13183       *base = base_reg;
13184
13185       for (i = 0; i < nops; i++)
13186         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13187
13188       *load_offset = unsorted_offsets[order[0]];
13189     }
13190
13191   if (TARGET_THUMB1
13192       && !peep2_reg_dead_p (nops, base_reg_rtx))
13193     return 0;
13194
13195   if (unsorted_offsets[order[0]] == 0)
13196     ldm_case = 1; /* ldmia */
13197   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13198     ldm_case = 2; /* ldmib */
13199   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13200     ldm_case = 3; /* ldmda */
13201   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13202     ldm_case = 4; /* ldmdb */
13203   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13204            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13205     ldm_case = 5;
13206   else
13207     return 0;
13208
13209   if (!multiple_operation_profitable_p (false, nops,
13210                                         ldm_case == 5
13211                                         ? unsorted_offsets[order[0]] : 0))
13212     return 0;
13213
13214   return ldm_case;
13215 }
13216
13217 /* Used to determine in a peephole whether a sequence of store instructions can
13218    be changed into a store-multiple instruction.
13219    NOPS is the number of separate store instructions we are examining.
13220    NOPS_TOTAL is the total number of instructions recognized by the peephole
13221    pattern.
13222    The first NOPS entries in OPERANDS are the source registers, the next
13223    NOPS entries are memory operands.  If this function is successful, *BASE is
13224    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13225    to the first memory location's offset from that base register.  REGS is an
13226    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13227    likewise filled with the corresponding rtx's.
13228    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13229    numbers to an ascending order of stores.
13230    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13231    from ascending memory locations, and the function verifies that the register
13232    numbers are themselves ascending.  If CHECK_REGS is false, the register
13233    numbers are stored in the order they are found in the operands.  */
13234 static int
13235 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13236                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13237                          HOST_WIDE_INT *load_offset, bool check_regs)
13238 {
13239   int unsorted_regs[MAX_LDM_STM_OPS];
13240   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13241   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13242   int order[MAX_LDM_STM_OPS];
13243   int base_reg = -1;
13244   rtx base_reg_rtx = NULL;
13245   int i, stm_case;
13246
13247   /* Write back of base register is currently only supported for Thumb 1.  */
13248   int base_writeback = TARGET_THUMB1;
13249
13250   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13251      easily extended if required.  */
13252   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13253
13254   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13255
13256   /* Loop over the operands and check that the memory references are
13257      suitable (i.e. immediate offsets from the same base register).  At
13258      the same time, extract the target register, and the memory
13259      offsets.  */
13260   for (i = 0; i < nops; i++)
13261     {
13262       rtx reg;
13263       rtx offset;
13264
13265       /* Convert a subreg of a mem into the mem itself.  */
13266       if (GET_CODE (operands[nops + i]) == SUBREG)
13267         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13268
13269       gcc_assert (MEM_P (operands[nops + i]));
13270
13271       /* Don't reorder volatile memory references; it doesn't seem worth
13272          looking for the case where the order is ok anyway.  */
13273       if (MEM_VOLATILE_P (operands[nops + i]))
13274         return 0;
13275
13276       offset = const0_rtx;
13277
13278       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13279            || (GET_CODE (reg) == SUBREG
13280                && REG_P (reg = SUBREG_REG (reg))))
13281           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13282               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13283                   || (GET_CODE (reg) == SUBREG
13284                       && REG_P (reg = SUBREG_REG (reg))))
13285               && (CONST_INT_P (offset
13286                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13287         {
13288           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13289                                   ? operands[i] : SUBREG_REG (operands[i]));
13290           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13291
13292           if (i == 0)
13293             {
13294               base_reg = REGNO (reg);
13295               base_reg_rtx = reg;
13296               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13297                 return 0;
13298             }
13299           else if (base_reg != (int) REGNO (reg))
13300             /* Not addressed from the same base register.  */
13301             return 0;
13302
13303           /* If it isn't an integer register, then we can't do this.  */
13304           if (unsorted_regs[i] < 0
13305               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13306               /* The effects are unpredictable if the base register is
13307                  both updated and stored.  */
13308               || (base_writeback && unsorted_regs[i] == base_reg)
13309               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13310               || unsorted_regs[i] > 14)
13311             return 0;
13312
13313           unsorted_offsets[i] = INTVAL (offset);
13314           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13315             order[0] = i;
13316         }
13317       else
13318         /* Not a suitable memory address.  */
13319         return 0;
13320     }
13321
13322   /* All the useful information has now been extracted from the
13323      operands into unsorted_regs and unsorted_offsets; additionally,
13324      order[0] has been set to the lowest offset in the list.  Sort
13325      the offsets into order, verifying that they are adjacent, and
13326      check that the register numbers are ascending.  */
13327   if (!compute_offset_order (nops, unsorted_offsets, order,
13328                              check_regs ? unsorted_regs : NULL))
13329     return 0;
13330
13331   if (saved_order)
13332     memcpy (saved_order, order, sizeof order);
13333
13334   if (base)
13335     {
13336       *base = base_reg;
13337
13338       for (i = 0; i < nops; i++)
13339         {
13340           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13341           if (reg_rtxs)
13342             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13343         }
13344
13345       *load_offset = unsorted_offsets[order[0]];
13346     }
13347
13348   if (TARGET_THUMB1
13349       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13350     return 0;
13351
13352   if (unsorted_offsets[order[0]] == 0)
13353     stm_case = 1; /* stmia */
13354   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13355     stm_case = 2; /* stmib */
13356   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13357     stm_case = 3; /* stmda */
13358   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13359     stm_case = 4; /* stmdb */
13360   else
13361     return 0;
13362
13363   if (!multiple_operation_profitable_p (false, nops, 0))
13364     return 0;
13365
13366   return stm_case;
13367 }
13368 \f
13369 /* Routines for use in generating RTL.  */
13370
13371 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13372    the instruction; REGS and MEMS are arrays containing the operands.
13373    BASEREG is the base register to be used in addressing the memory operands.
13374    WBACK_OFFSET is nonzero if the instruction should update the base
13375    register.  */
13376
13377 static rtx
13378 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13379                          HOST_WIDE_INT wback_offset)
13380 {
13381   int i = 0, j;
13382   rtx result;
13383
13384   if (!multiple_operation_profitable_p (false, count, 0))
13385     {
13386       rtx seq;
13387
13388       start_sequence ();
13389
13390       for (i = 0; i < count; i++)
13391         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13392
13393       if (wback_offset != 0)
13394         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13395
13396       seq = get_insns ();
13397       end_sequence ();
13398
13399       return seq;
13400     }
13401
13402   result = gen_rtx_PARALLEL (VOIDmode,
13403                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13404   if (wback_offset != 0)
13405     {
13406       XVECEXP (result, 0, 0)
13407         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13408       i = 1;
13409       count++;
13410     }
13411
13412   for (j = 0; i < count; i++, j++)
13413     XVECEXP (result, 0, i)
13414       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13415
13416   return result;
13417 }
13418
13419 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13420    the instruction; REGS and MEMS are arrays containing the operands.
13421    BASEREG is the base register to be used in addressing the memory operands.
13422    WBACK_OFFSET is nonzero if the instruction should update the base
13423    register.  */
13424
13425 static rtx
13426 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13427                           HOST_WIDE_INT wback_offset)
13428 {
13429   int i = 0, j;
13430   rtx result;
13431
13432   if (GET_CODE (basereg) == PLUS)
13433     basereg = XEXP (basereg, 0);
13434
13435   if (!multiple_operation_profitable_p (false, count, 0))
13436     {
13437       rtx seq;
13438
13439       start_sequence ();
13440
13441       for (i = 0; i < count; i++)
13442         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13443
13444       if (wback_offset != 0)
13445         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13446
13447       seq = get_insns ();
13448       end_sequence ();
13449
13450       return seq;
13451     }
13452
13453   result = gen_rtx_PARALLEL (VOIDmode,
13454                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13455   if (wback_offset != 0)
13456     {
13457       XVECEXP (result, 0, 0)
13458         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13459       i = 1;
13460       count++;
13461     }
13462
13463   for (j = 0; i < count; i++, j++)
13464     XVECEXP (result, 0, i)
13465       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13466
13467   return result;
13468 }
13469
13470 /* Generate either a load-multiple or a store-multiple instruction.  This
13471    function can be used in situations where we can start with a single MEM
13472    rtx and adjust its address upwards.
13473    COUNT is the number of operations in the instruction, not counting a
13474    possible update of the base register.  REGS is an array containing the
13475    register operands.
13476    BASEREG is the base register to be used in addressing the memory operands,
13477    which are constructed from BASEMEM.
13478    WRITE_BACK specifies whether the generated instruction should include an
13479    update of the base register.
13480    OFFSETP is used to pass an offset to and from this function; this offset
13481    is not used when constructing the address (instead BASEMEM should have an
13482    appropriate offset in its address), it is used only for setting
13483    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13484
13485 static rtx
13486 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13487                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13488 {
13489   rtx mems[MAX_LDM_STM_OPS];
13490   HOST_WIDE_INT offset = *offsetp;
13491   int i;
13492
13493   gcc_assert (count <= MAX_LDM_STM_OPS);
13494
13495   if (GET_CODE (basereg) == PLUS)
13496     basereg = XEXP (basereg, 0);
13497
13498   for (i = 0; i < count; i++)
13499     {
13500       rtx addr = plus_constant (Pmode, basereg, i * 4);
13501       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13502       offset += 4;
13503     }
13504
13505   if (write_back)
13506     *offsetp = offset;
13507
13508   if (is_load)
13509     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13510                                     write_back ? 4 * count : 0);
13511   else
13512     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13513                                      write_back ? 4 * count : 0);
13514 }
13515
13516 rtx
13517 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13518                        rtx basemem, HOST_WIDE_INT *offsetp)
13519 {
13520   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13521                               offsetp);
13522 }
13523
13524 rtx
13525 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13526                         rtx basemem, HOST_WIDE_INT *offsetp)
13527 {
13528   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13529                               offsetp);
13530 }
13531
13532 /* Called from a peephole2 expander to turn a sequence of loads into an
13533    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13534    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13535    is true if we can reorder the registers because they are used commutatively
13536    subsequently.
13537    Returns true iff we could generate a new instruction.  */
13538
13539 bool
13540 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13541 {
13542   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13543   rtx mems[MAX_LDM_STM_OPS];
13544   int i, j, base_reg;
13545   rtx base_reg_rtx;
13546   HOST_WIDE_INT offset;
13547   int write_back = FALSE;
13548   int ldm_case;
13549   rtx addr;
13550
13551   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13552                                      &base_reg, &offset, !sort_regs);
13553
13554   if (ldm_case == 0)
13555     return false;
13556
13557   if (sort_regs)
13558     for (i = 0; i < nops - 1; i++)
13559       for (j = i + 1; j < nops; j++)
13560         if (regs[i] > regs[j])
13561           {
13562             int t = regs[i];
13563             regs[i] = regs[j];
13564             regs[j] = t;
13565           }
13566   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13567
13568   if (TARGET_THUMB1)
13569     {
13570       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13571       gcc_assert (ldm_case == 1 || ldm_case == 5);
13572       write_back = TRUE;
13573     }
13574
13575   if (ldm_case == 5)
13576     {
13577       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13578       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13579       offset = 0;
13580       if (!TARGET_THUMB1)
13581         {
13582           base_reg = regs[0];
13583           base_reg_rtx = newbase;
13584         }
13585     }
13586
13587   for (i = 0; i < nops; i++)
13588     {
13589       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13590       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13591                                               SImode, addr, 0);
13592     }
13593   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13594                                       write_back ? offset + i * 4 : 0));
13595   return true;
13596 }
13597
13598 /* Called from a peephole2 expander to turn a sequence of stores into an
13599    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13600    NOPS indicates how many separate stores we are trying to combine.
13601    Returns true iff we could generate a new instruction.  */
13602
13603 bool
13604 gen_stm_seq (rtx *operands, int nops)
13605 {
13606   int i;
13607   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13608   rtx mems[MAX_LDM_STM_OPS];
13609   int base_reg;
13610   rtx base_reg_rtx;
13611   HOST_WIDE_INT offset;
13612   int write_back = FALSE;
13613   int stm_case;
13614   rtx addr;
13615   bool base_reg_dies;
13616
13617   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13618                                       mem_order, &base_reg, &offset, true);
13619
13620   if (stm_case == 0)
13621     return false;
13622
13623   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13624
13625   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13626   if (TARGET_THUMB1)
13627     {
13628       gcc_assert (base_reg_dies);
13629       write_back = TRUE;
13630     }
13631
13632   if (stm_case == 5)
13633     {
13634       gcc_assert (base_reg_dies);
13635       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13636       offset = 0;
13637     }
13638
13639   addr = plus_constant (Pmode, base_reg_rtx, offset);
13640
13641   for (i = 0; i < nops; i++)
13642     {
13643       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13644       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13645                                               SImode, addr, 0);
13646     }
13647   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13648                                        write_back ? offset + i * 4 : 0));
13649   return true;
13650 }
13651
13652 /* Called from a peephole2 expander to turn a sequence of stores that are
13653    preceded by constant loads into an STM instruction.  OPERANDS are the
13654    operands found by the peephole matcher; NOPS indicates how many
13655    separate stores we are trying to combine; there are 2 * NOPS
13656    instructions in the peephole.
13657    Returns true iff we could generate a new instruction.  */
13658
13659 bool
13660 gen_const_stm_seq (rtx *operands, int nops)
13661 {
13662   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13663   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13664   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13665   rtx mems[MAX_LDM_STM_OPS];
13666   int base_reg;
13667   rtx base_reg_rtx;
13668   HOST_WIDE_INT offset;
13669   int write_back = FALSE;
13670   int stm_case;
13671   rtx addr;
13672   bool base_reg_dies;
13673   int i, j;
13674   HARD_REG_SET allocated;
13675
13676   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13677                                       mem_order, &base_reg, &offset, false);
13678
13679   if (stm_case == 0)
13680     return false;
13681
13682   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13683
13684   /* If the same register is used more than once, try to find a free
13685      register.  */
13686   CLEAR_HARD_REG_SET (allocated);
13687   for (i = 0; i < nops; i++)
13688     {
13689       for (j = i + 1; j < nops; j++)
13690         if (regs[i] == regs[j])
13691           {
13692             rtx t = peep2_find_free_register (0, nops * 2,
13693                                               TARGET_THUMB1 ? "l" : "r",
13694                                               SImode, &allocated);
13695             if (t == NULL_RTX)
13696               return false;
13697             reg_rtxs[i] = t;
13698             regs[i] = REGNO (t);
13699           }
13700     }
13701
13702   /* Compute an ordering that maps the register numbers to an ascending
13703      sequence.  */
13704   reg_order[0] = 0;
13705   for (i = 0; i < nops; i++)
13706     if (regs[i] < regs[reg_order[0]])
13707       reg_order[0] = i;
13708
13709   for (i = 1; i < nops; i++)
13710     {
13711       int this_order = reg_order[i - 1];
13712       for (j = 0; j < nops; j++)
13713         if (regs[j] > regs[reg_order[i - 1]]
13714             && (this_order == reg_order[i - 1]
13715                 || regs[j] < regs[this_order]))
13716           this_order = j;
13717       reg_order[i] = this_order;
13718     }
13719
13720   /* Ensure that registers that must be live after the instruction end
13721      up with the correct value.  */
13722   for (i = 0; i < nops; i++)
13723     {
13724       int this_order = reg_order[i];
13725       if ((this_order != mem_order[i]
13726            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13727           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13728         return false;
13729     }
13730
13731   /* Load the constants.  */
13732   for (i = 0; i < nops; i++)
13733     {
13734       rtx op = operands[2 * nops + mem_order[i]];
13735       sorted_regs[i] = regs[reg_order[i]];
13736       emit_move_insn (reg_rtxs[reg_order[i]], op);
13737     }
13738
13739   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13740
13741   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13742   if (TARGET_THUMB1)
13743     {
13744       gcc_assert (base_reg_dies);
13745       write_back = TRUE;
13746     }
13747
13748   if (stm_case == 5)
13749     {
13750       gcc_assert (base_reg_dies);
13751       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13752       offset = 0;
13753     }
13754
13755   addr = plus_constant (Pmode, base_reg_rtx, offset);
13756
13757   for (i = 0; i < nops; i++)
13758     {
13759       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13760       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13761                                               SImode, addr, 0);
13762     }
13763   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13764                                        write_back ? offset + i * 4 : 0));
13765   return true;
13766 }
13767
13768 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13769    unaligned copies on processors which support unaligned semantics for those
13770    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13771    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13772    An interleave factor of 1 (the minimum) will perform no interleaving.
13773    Load/store multiple are used for aligned addresses where possible.  */
13774
13775 static void
13776 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13777                                    HOST_WIDE_INT length,
13778                                    unsigned int interleave_factor)
13779 {
13780   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13781   int *regnos = XALLOCAVEC (int, interleave_factor);
13782   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13783   HOST_WIDE_INT i, j;
13784   HOST_WIDE_INT remaining = length, words;
13785   rtx halfword_tmp = NULL, byte_tmp = NULL;
13786   rtx dst, src;
13787   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13788   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13789   HOST_WIDE_INT srcoffset, dstoffset;
13790   HOST_WIDE_INT src_autoinc, dst_autoinc;
13791   rtx mem, addr;
13792
13793   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13794
13795   /* Use hard registers if we have aligned source or destination so we can use
13796      load/store multiple with contiguous registers.  */
13797   if (dst_aligned || src_aligned)
13798     for (i = 0; i < interleave_factor; i++)
13799       regs[i] = gen_rtx_REG (SImode, i);
13800   else
13801     for (i = 0; i < interleave_factor; i++)
13802       regs[i] = gen_reg_rtx (SImode);
13803
13804   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13805   src = copy_addr_to_reg (XEXP (srcbase, 0));
13806
13807   srcoffset = dstoffset = 0;
13808
13809   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13810      For copying the last bytes we want to subtract this offset again.  */
13811   src_autoinc = dst_autoinc = 0;
13812
13813   for (i = 0; i < interleave_factor; i++)
13814     regnos[i] = i;
13815
13816   /* Copy BLOCK_SIZE_BYTES chunks.  */
13817
13818   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13819     {
13820       /* Load words.  */
13821       if (src_aligned && interleave_factor > 1)
13822         {
13823           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13824                                             TRUE, srcbase, &srcoffset));
13825           src_autoinc += UNITS_PER_WORD * interleave_factor;
13826         }
13827       else
13828         {
13829           for (j = 0; j < interleave_factor; j++)
13830             {
13831               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13832                                                  - src_autoinc));
13833               mem = adjust_automodify_address (srcbase, SImode, addr,
13834                                                srcoffset + j * UNITS_PER_WORD);
13835               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13836             }
13837           srcoffset += block_size_bytes;
13838         }
13839
13840       /* Store words.  */
13841       if (dst_aligned && interleave_factor > 1)
13842         {
13843           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13844                                              TRUE, dstbase, &dstoffset));
13845           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13846         }
13847       else
13848         {
13849           for (j = 0; j < interleave_factor; j++)
13850             {
13851               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13852                                                  - dst_autoinc));
13853               mem = adjust_automodify_address (dstbase, SImode, addr,
13854                                                dstoffset + j * UNITS_PER_WORD);
13855               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13856             }
13857           dstoffset += block_size_bytes;
13858         }
13859
13860       remaining -= block_size_bytes;
13861     }
13862
13863   /* Copy any whole words left (note these aren't interleaved with any
13864      subsequent halfword/byte load/stores in the interests of simplicity).  */
13865
13866   words = remaining / UNITS_PER_WORD;
13867
13868   gcc_assert (words < interleave_factor);
13869
13870   if (src_aligned && words > 1)
13871     {
13872       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13873                                         &srcoffset));
13874       src_autoinc += UNITS_PER_WORD * words;
13875     }
13876   else
13877     {
13878       for (j = 0; j < words; j++)
13879         {
13880           addr = plus_constant (Pmode, src,
13881                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13882           mem = adjust_automodify_address (srcbase, SImode, addr,
13883                                            srcoffset + j * UNITS_PER_WORD);
13884           if (src_aligned)
13885             emit_move_insn (regs[j], mem);
13886           else
13887             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13888         }
13889       srcoffset += words * UNITS_PER_WORD;
13890     }
13891
13892   if (dst_aligned && words > 1)
13893     {
13894       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13895                                          &dstoffset));
13896       dst_autoinc += words * UNITS_PER_WORD;
13897     }
13898   else
13899     {
13900       for (j = 0; j < words; j++)
13901         {
13902           addr = plus_constant (Pmode, dst,
13903                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13904           mem = adjust_automodify_address (dstbase, SImode, addr,
13905                                            dstoffset + j * UNITS_PER_WORD);
13906           if (dst_aligned)
13907             emit_move_insn (mem, regs[j]);
13908           else
13909             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13910         }
13911       dstoffset += words * UNITS_PER_WORD;
13912     }
13913
13914   remaining -= words * UNITS_PER_WORD;
13915
13916   gcc_assert (remaining < 4);
13917
13918   /* Copy a halfword if necessary.  */
13919
13920   if (remaining >= 2)
13921     {
13922       halfword_tmp = gen_reg_rtx (SImode);
13923
13924       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13925       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13926       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13927
13928       /* Either write out immediately, or delay until we've loaded the last
13929          byte, depending on interleave factor.  */
13930       if (interleave_factor == 1)
13931         {
13932           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13933           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13934           emit_insn (gen_unaligned_storehi (mem,
13935                        gen_lowpart (HImode, halfword_tmp)));
13936           halfword_tmp = NULL;
13937           dstoffset += 2;
13938         }
13939
13940       remaining -= 2;
13941       srcoffset += 2;
13942     }
13943
13944   gcc_assert (remaining < 2);
13945
13946   /* Copy last byte.  */
13947
13948   if ((remaining & 1) != 0)
13949     {
13950       byte_tmp = gen_reg_rtx (SImode);
13951
13952       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13953       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13954       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13955
13956       if (interleave_factor == 1)
13957         {
13958           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13959           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13960           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13961           byte_tmp = NULL;
13962           dstoffset++;
13963         }
13964
13965       remaining--;
13966       srcoffset++;
13967     }
13968
13969   /* Store last halfword if we haven't done so already.  */
13970
13971   if (halfword_tmp)
13972     {
13973       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13974       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13975       emit_insn (gen_unaligned_storehi (mem,
13976                    gen_lowpart (HImode, halfword_tmp)));
13977       dstoffset += 2;
13978     }
13979
13980   /* Likewise for last byte.  */
13981
13982   if (byte_tmp)
13983     {
13984       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13985       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13986       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13987       dstoffset++;
13988     }
13989
13990   gcc_assert (remaining == 0 && srcoffset == dstoffset);
13991 }
13992
13993 /* From mips_adjust_block_mem:
13994
13995    Helper function for doing a loop-based block operation on memory
13996    reference MEM.  Each iteration of the loop will operate on LENGTH
13997    bytes of MEM.
13998
13999    Create a new base register for use within the loop and point it to
14000    the start of MEM.  Create a new memory reference that uses this
14001    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14002
14003 static void
14004 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14005                       rtx *loop_mem)
14006 {
14007   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14008
14009   /* Although the new mem does not refer to a known location,
14010      it does keep up to LENGTH bytes of alignment.  */
14011   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14012   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14013 }
14014
14015 /* From mips_block_move_loop:
14016
14017    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14018    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14019    the memory regions do not overlap.  */
14020
14021 static void
14022 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14023                                unsigned int interleave_factor,
14024                                HOST_WIDE_INT bytes_per_iter)
14025 {
14026   rtx src_reg, dest_reg, final_src, test;
14027   HOST_WIDE_INT leftover;
14028
14029   leftover = length % bytes_per_iter;
14030   length -= leftover;
14031
14032   /* Create registers and memory references for use within the loop.  */
14033   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14034   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14035
14036   /* Calculate the value that SRC_REG should have after the last iteration of
14037      the loop.  */
14038   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14039                                    0, 0, OPTAB_WIDEN);
14040
14041   /* Emit the start of the loop.  */
14042   rtx_code_label *label = gen_label_rtx ();
14043   emit_label (label);
14044
14045   /* Emit the loop body.  */
14046   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14047                                      interleave_factor);
14048
14049   /* Move on to the next block.  */
14050   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14051   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14052
14053   /* Emit the loop condition.  */
14054   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14055   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14056
14057   /* Mop up any left-over bytes.  */
14058   if (leftover)
14059     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14060 }
14061
14062 /* Emit a block move when either the source or destination is unaligned (not
14063    aligned to a four-byte boundary).  This may need further tuning depending on
14064    core type, optimize_size setting, etc.  */
14065
14066 static int
14067 arm_movmemqi_unaligned (rtx *operands)
14068 {
14069   HOST_WIDE_INT length = INTVAL (operands[2]);
14070
14071   if (optimize_size)
14072     {
14073       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14074       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14075       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14076          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14077          or dst_aligned though: allow more interleaving in those cases since the
14078          resulting code can be smaller.  */
14079       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14080       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14081
14082       if (length > 12)
14083         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14084                                        interleave_factor, bytes_per_iter);
14085       else
14086         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14087                                            interleave_factor);
14088     }
14089   else
14090     {
14091       /* Note that the loop created by arm_block_move_unaligned_loop may be
14092          subject to loop unrolling, which makes tuning this condition a little
14093          redundant.  */
14094       if (length > 32)
14095         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14096       else
14097         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14098     }
14099
14100   return 1;
14101 }
14102
14103 int
14104 arm_gen_movmemqi (rtx *operands)
14105 {
14106   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14107   HOST_WIDE_INT srcoffset, dstoffset;
14108   int i;
14109   rtx src, dst, srcbase, dstbase;
14110   rtx part_bytes_reg = NULL;
14111   rtx mem;
14112
14113   if (!CONST_INT_P (operands[2])
14114       || !CONST_INT_P (operands[3])
14115       || INTVAL (operands[2]) > 64)
14116     return 0;
14117
14118   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14119     return arm_movmemqi_unaligned (operands);
14120
14121   if (INTVAL (operands[3]) & 3)
14122     return 0;
14123
14124   dstbase = operands[0];
14125   srcbase = operands[1];
14126
14127   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14128   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14129
14130   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14131   out_words_to_go = INTVAL (operands[2]) / 4;
14132   last_bytes = INTVAL (operands[2]) & 3;
14133   dstoffset = srcoffset = 0;
14134
14135   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14136     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14137
14138   for (i = 0; in_words_to_go >= 2; i+=4)
14139     {
14140       if (in_words_to_go > 4)
14141         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14142                                           TRUE, srcbase, &srcoffset));
14143       else
14144         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14145                                           src, FALSE, srcbase,
14146                                           &srcoffset));
14147
14148       if (out_words_to_go)
14149         {
14150           if (out_words_to_go > 4)
14151             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14152                                                TRUE, dstbase, &dstoffset));
14153           else if (out_words_to_go != 1)
14154             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14155                                                out_words_to_go, dst,
14156                                                (last_bytes == 0
14157                                                 ? FALSE : TRUE),
14158                                                dstbase, &dstoffset));
14159           else
14160             {
14161               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14162               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14163               if (last_bytes != 0)
14164                 {
14165                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14166                   dstoffset += 4;
14167                 }
14168             }
14169         }
14170
14171       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14172       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14173     }
14174
14175   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14176   if (out_words_to_go)
14177     {
14178       rtx sreg;
14179
14180       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14181       sreg = copy_to_reg (mem);
14182
14183       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14184       emit_move_insn (mem, sreg);
14185       in_words_to_go--;
14186
14187       gcc_assert (!in_words_to_go);     /* Sanity check */
14188     }
14189
14190   if (in_words_to_go)
14191     {
14192       gcc_assert (in_words_to_go > 0);
14193
14194       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14195       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14196     }
14197
14198   gcc_assert (!last_bytes || part_bytes_reg);
14199
14200   if (BYTES_BIG_ENDIAN && last_bytes)
14201     {
14202       rtx tmp = gen_reg_rtx (SImode);
14203
14204       /* The bytes we want are in the top end of the word.  */
14205       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14206                               GEN_INT (8 * (4 - last_bytes))));
14207       part_bytes_reg = tmp;
14208
14209       while (last_bytes)
14210         {
14211           mem = adjust_automodify_address (dstbase, QImode,
14212                                            plus_constant (Pmode, dst,
14213                                                           last_bytes - 1),
14214                                            dstoffset + last_bytes - 1);
14215           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14216
14217           if (--last_bytes)
14218             {
14219               tmp = gen_reg_rtx (SImode);
14220               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14221               part_bytes_reg = tmp;
14222             }
14223         }
14224
14225     }
14226   else
14227     {
14228       if (last_bytes > 1)
14229         {
14230           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14231           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14232           last_bytes -= 2;
14233           if (last_bytes)
14234             {
14235               rtx tmp = gen_reg_rtx (SImode);
14236               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14237               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14238               part_bytes_reg = tmp;
14239               dstoffset += 2;
14240             }
14241         }
14242
14243       if (last_bytes)
14244         {
14245           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14246           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14247         }
14248     }
14249
14250   return 1;
14251 }
14252
14253 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14254 by mode size.  */
14255 inline static rtx
14256 next_consecutive_mem (rtx mem)
14257 {
14258   machine_mode mode = GET_MODE (mem);
14259   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14260   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14261
14262   return adjust_automodify_address (mem, mode, addr, offset);
14263 }
14264
14265 /* Copy using LDRD/STRD instructions whenever possible.
14266    Returns true upon success. */
14267 bool
14268 gen_movmem_ldrd_strd (rtx *operands)
14269 {
14270   unsigned HOST_WIDE_INT len;
14271   HOST_WIDE_INT align;
14272   rtx src, dst, base;
14273   rtx reg0;
14274   bool src_aligned, dst_aligned;
14275   bool src_volatile, dst_volatile;
14276
14277   gcc_assert (CONST_INT_P (operands[2]));
14278   gcc_assert (CONST_INT_P (operands[3]));
14279
14280   len = UINTVAL (operands[2]);
14281   if (len > 64)
14282     return false;
14283
14284   /* Maximum alignment we can assume for both src and dst buffers.  */
14285   align = INTVAL (operands[3]);
14286
14287   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14288     return false;
14289
14290   /* Place src and dst addresses in registers
14291      and update the corresponding mem rtx.  */
14292   dst = operands[0];
14293   dst_volatile = MEM_VOLATILE_P (dst);
14294   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14295   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14296   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14297
14298   src = operands[1];
14299   src_volatile = MEM_VOLATILE_P (src);
14300   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14301   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14302   src = adjust_automodify_address (src, VOIDmode, base, 0);
14303
14304   if (!unaligned_access && !(src_aligned && dst_aligned))
14305     return false;
14306
14307   if (src_volatile || dst_volatile)
14308     return false;
14309
14310   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14311   if (!(dst_aligned || src_aligned))
14312     return arm_gen_movmemqi (operands);
14313
14314   /* If the either src or dst is unaligned we'll be accessing it as pairs
14315      of unaligned SImode accesses.  Otherwise we can generate DImode
14316      ldrd/strd instructions.  */
14317   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14318   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14319
14320   while (len >= 8)
14321     {
14322       len -= 8;
14323       reg0 = gen_reg_rtx (DImode);
14324       rtx low_reg = NULL_RTX;
14325       rtx hi_reg = NULL_RTX;
14326
14327       if (!src_aligned || !dst_aligned)
14328         {
14329           low_reg = gen_lowpart (SImode, reg0);
14330           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14331         }
14332       if (src_aligned)
14333         emit_move_insn (reg0, src);
14334       else
14335         {
14336           emit_insn (gen_unaligned_loadsi (low_reg, src));
14337           src = next_consecutive_mem (src);
14338           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14339         }
14340
14341       if (dst_aligned)
14342         emit_move_insn (dst, reg0);
14343       else
14344         {
14345           emit_insn (gen_unaligned_storesi (dst, low_reg));
14346           dst = next_consecutive_mem (dst);
14347           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14348         }
14349
14350       src = next_consecutive_mem (src);
14351       dst = next_consecutive_mem (dst);
14352     }
14353
14354   gcc_assert (len < 8);
14355   if (len >= 4)
14356     {
14357       /* More than a word but less than a double-word to copy.  Copy a word.  */
14358       reg0 = gen_reg_rtx (SImode);
14359       src = adjust_address (src, SImode, 0);
14360       dst = adjust_address (dst, SImode, 0);
14361       if (src_aligned)
14362         emit_move_insn (reg0, src);
14363       else
14364         emit_insn (gen_unaligned_loadsi (reg0, src));
14365
14366       if (dst_aligned)
14367         emit_move_insn (dst, reg0);
14368       else
14369         emit_insn (gen_unaligned_storesi (dst, reg0));
14370
14371       src = next_consecutive_mem (src);
14372       dst = next_consecutive_mem (dst);
14373       len -= 4;
14374     }
14375
14376   if (len == 0)
14377     return true;
14378
14379   /* Copy the remaining bytes.  */
14380   if (len >= 2)
14381     {
14382       dst = adjust_address (dst, HImode, 0);
14383       src = adjust_address (src, HImode, 0);
14384       reg0 = gen_reg_rtx (SImode);
14385       if (src_aligned)
14386         emit_insn (gen_zero_extendhisi2 (reg0, src));
14387       else
14388         emit_insn (gen_unaligned_loadhiu (reg0, src));
14389
14390       if (dst_aligned)
14391         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14392       else
14393         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14394
14395       src = next_consecutive_mem (src);
14396       dst = next_consecutive_mem (dst);
14397       if (len == 2)
14398         return true;
14399     }
14400
14401   dst = adjust_address (dst, QImode, 0);
14402   src = adjust_address (src, QImode, 0);
14403   reg0 = gen_reg_rtx (QImode);
14404   emit_move_insn (reg0, src);
14405   emit_move_insn (dst, reg0);
14406   return true;
14407 }
14408
14409 /* Select a dominance comparison mode if possible for a test of the general
14410    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14411    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14412    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14413    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14414    In all cases OP will be either EQ or NE, but we don't need to know which
14415    here.  If we are unable to support a dominance comparison we return
14416    CC mode.  This will then fail to match for the RTL expressions that
14417    generate this call.  */
14418 machine_mode
14419 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14420 {
14421   enum rtx_code cond1, cond2;
14422   int swapped = 0;
14423
14424   /* Currently we will probably get the wrong result if the individual
14425      comparisons are not simple.  This also ensures that it is safe to
14426      reverse a comparison if necessary.  */
14427   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14428        != CCmode)
14429       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14430           != CCmode))
14431     return CCmode;
14432
14433   /* The if_then_else variant of this tests the second condition if the
14434      first passes, but is true if the first fails.  Reverse the first
14435      condition to get a true "inclusive-or" expression.  */
14436   if (cond_or == DOM_CC_NX_OR_Y)
14437     cond1 = reverse_condition (cond1);
14438
14439   /* If the comparisons are not equal, and one doesn't dominate the other,
14440      then we can't do this.  */
14441   if (cond1 != cond2
14442       && !comparison_dominates_p (cond1, cond2)
14443       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14444     return CCmode;
14445
14446   if (swapped)
14447     std::swap (cond1, cond2);
14448
14449   switch (cond1)
14450     {
14451     case EQ:
14452       if (cond_or == DOM_CC_X_AND_Y)
14453         return CC_DEQmode;
14454
14455       switch (cond2)
14456         {
14457         case EQ: return CC_DEQmode;
14458         case LE: return CC_DLEmode;
14459         case LEU: return CC_DLEUmode;
14460         case GE: return CC_DGEmode;
14461         case GEU: return CC_DGEUmode;
14462         default: gcc_unreachable ();
14463         }
14464
14465     case LT:
14466       if (cond_or == DOM_CC_X_AND_Y)
14467         return CC_DLTmode;
14468
14469       switch (cond2)
14470         {
14471         case  LT:
14472             return CC_DLTmode;
14473         case LE:
14474           return CC_DLEmode;
14475         case NE:
14476           return CC_DNEmode;
14477         default:
14478           gcc_unreachable ();
14479         }
14480
14481     case GT:
14482       if (cond_or == DOM_CC_X_AND_Y)
14483         return CC_DGTmode;
14484
14485       switch (cond2)
14486         {
14487         case GT:
14488           return CC_DGTmode;
14489         case GE:
14490           return CC_DGEmode;
14491         case NE:
14492           return CC_DNEmode;
14493         default:
14494           gcc_unreachable ();
14495         }
14496
14497     case LTU:
14498       if (cond_or == DOM_CC_X_AND_Y)
14499         return CC_DLTUmode;
14500
14501       switch (cond2)
14502         {
14503         case LTU:
14504           return CC_DLTUmode;
14505         case LEU:
14506           return CC_DLEUmode;
14507         case NE:
14508           return CC_DNEmode;
14509         default:
14510           gcc_unreachable ();
14511         }
14512
14513     case GTU:
14514       if (cond_or == DOM_CC_X_AND_Y)
14515         return CC_DGTUmode;
14516
14517       switch (cond2)
14518         {
14519         case GTU:
14520           return CC_DGTUmode;
14521         case GEU:
14522           return CC_DGEUmode;
14523         case NE:
14524           return CC_DNEmode;
14525         default:
14526           gcc_unreachable ();
14527         }
14528
14529     /* The remaining cases only occur when both comparisons are the
14530        same.  */
14531     case NE:
14532       gcc_assert (cond1 == cond2);
14533       return CC_DNEmode;
14534
14535     case LE:
14536       gcc_assert (cond1 == cond2);
14537       return CC_DLEmode;
14538
14539     case GE:
14540       gcc_assert (cond1 == cond2);
14541       return CC_DGEmode;
14542
14543     case LEU:
14544       gcc_assert (cond1 == cond2);
14545       return CC_DLEUmode;
14546
14547     case GEU:
14548       gcc_assert (cond1 == cond2);
14549       return CC_DGEUmode;
14550
14551     default:
14552       gcc_unreachable ();
14553     }
14554 }
14555
14556 machine_mode
14557 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14558 {
14559   /* All floating point compares return CCFP if it is an equality
14560      comparison, and CCFPE otherwise.  */
14561   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14562     {
14563       switch (op)
14564         {
14565         case EQ:
14566         case NE:
14567         case UNORDERED:
14568         case ORDERED:
14569         case UNLT:
14570         case UNLE:
14571         case UNGT:
14572         case UNGE:
14573         case UNEQ:
14574         case LTGT:
14575           return CCFPmode;
14576
14577         case LT:
14578         case LE:
14579         case GT:
14580         case GE:
14581           return CCFPEmode;
14582
14583         default:
14584           gcc_unreachable ();
14585         }
14586     }
14587
14588   /* A compare with a shifted operand.  Because of canonicalization, the
14589      comparison will have to be swapped when we emit the assembler.  */
14590   if (GET_MODE (y) == SImode
14591       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14592       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14593           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14594           || GET_CODE (x) == ROTATERT))
14595     return CC_SWPmode;
14596
14597   /* This operation is performed swapped, but since we only rely on the Z
14598      flag we don't need an additional mode.  */
14599   if (GET_MODE (y) == SImode
14600       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14601       && GET_CODE (x) == NEG
14602       && (op == EQ || op == NE))
14603     return CC_Zmode;
14604
14605   /* This is a special case that is used by combine to allow a
14606      comparison of a shifted byte load to be split into a zero-extend
14607      followed by a comparison of the shifted integer (only valid for
14608      equalities and unsigned inequalities).  */
14609   if (GET_MODE (x) == SImode
14610       && GET_CODE (x) == ASHIFT
14611       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14612       && GET_CODE (XEXP (x, 0)) == SUBREG
14613       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14614       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14615       && (op == EQ || op == NE
14616           || op == GEU || op == GTU || op == LTU || op == LEU)
14617       && CONST_INT_P (y))
14618     return CC_Zmode;
14619
14620   /* A construct for a conditional compare, if the false arm contains
14621      0, then both conditions must be true, otherwise either condition
14622      must be true.  Not all conditions are possible, so CCmode is
14623      returned if it can't be done.  */
14624   if (GET_CODE (x) == IF_THEN_ELSE
14625       && (XEXP (x, 2) == const0_rtx
14626           || XEXP (x, 2) == const1_rtx)
14627       && COMPARISON_P (XEXP (x, 0))
14628       && COMPARISON_P (XEXP (x, 1)))
14629     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14630                                          INTVAL (XEXP (x, 2)));
14631
14632   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14633   if (GET_CODE (x) == AND
14634       && (op == EQ || op == NE)
14635       && COMPARISON_P (XEXP (x, 0))
14636       && COMPARISON_P (XEXP (x, 1)))
14637     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14638                                          DOM_CC_X_AND_Y);
14639
14640   if (GET_CODE (x) == IOR
14641       && (op == EQ || op == NE)
14642       && COMPARISON_P (XEXP (x, 0))
14643       && COMPARISON_P (XEXP (x, 1)))
14644     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14645                                          DOM_CC_X_OR_Y);
14646
14647   /* An operation (on Thumb) where we want to test for a single bit.
14648      This is done by shifting that bit up into the top bit of a
14649      scratch register; we can then branch on the sign bit.  */
14650   if (TARGET_THUMB1
14651       && GET_MODE (x) == SImode
14652       && (op == EQ || op == NE)
14653       && GET_CODE (x) == ZERO_EXTRACT
14654       && XEXP (x, 1) == const1_rtx)
14655     return CC_Nmode;
14656
14657   /* An operation that sets the condition codes as a side-effect, the
14658      V flag is not set correctly, so we can only use comparisons where
14659      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14660      instead.)  */
14661   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14662   if (GET_MODE (x) == SImode
14663       && y == const0_rtx
14664       && (op == EQ || op == NE || op == LT || op == GE)
14665       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14666           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14667           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14668           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14669           || GET_CODE (x) == LSHIFTRT
14670           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14671           || GET_CODE (x) == ROTATERT
14672           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14673     return CC_NOOVmode;
14674
14675   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14676     return CC_Zmode;
14677
14678   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14679       && GET_CODE (x) == PLUS
14680       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14681     return CC_Cmode;
14682
14683   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14684     {
14685       switch (op)
14686         {
14687         case EQ:
14688         case NE:
14689           /* A DImode comparison against zero can be implemented by
14690              or'ing the two halves together.  */
14691           if (y == const0_rtx)
14692             return CC_Zmode;
14693
14694           /* We can do an equality test in three Thumb instructions.  */
14695           if (!TARGET_32BIT)
14696             return CC_Zmode;
14697
14698           /* FALLTHROUGH */
14699
14700         case LTU:
14701         case LEU:
14702         case GTU:
14703         case GEU:
14704           /* DImode unsigned comparisons can be implemented by cmp +
14705              cmpeq without a scratch register.  Not worth doing in
14706              Thumb-2.  */
14707           if (TARGET_32BIT)
14708             return CC_CZmode;
14709
14710           /* FALLTHROUGH */
14711
14712         case LT:
14713         case LE:
14714         case GT:
14715         case GE:
14716           /* DImode signed and unsigned comparisons can be implemented
14717              by cmp + sbcs with a scratch register, but that does not
14718              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14719           gcc_assert (op != EQ && op != NE);
14720           return CC_NCVmode;
14721
14722         default:
14723           gcc_unreachable ();
14724         }
14725     }
14726
14727   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14728     return GET_MODE (x);
14729
14730   return CCmode;
14731 }
14732
14733 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14734    return the rtx for register 0 in the proper mode.  FP means this is a
14735    floating point compare: I don't think that it is needed on the arm.  */
14736 rtx
14737 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14738 {
14739   machine_mode mode;
14740   rtx cc_reg;
14741   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14742
14743   /* We might have X as a constant, Y as a register because of the predicates
14744      used for cmpdi.  If so, force X to a register here.  */
14745   if (dimode_comparison && !REG_P (x))
14746     x = force_reg (DImode, x);
14747
14748   mode = SELECT_CC_MODE (code, x, y);
14749   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14750
14751   if (dimode_comparison
14752       && mode != CC_CZmode)
14753     {
14754       rtx clobber, set;
14755
14756       /* To compare two non-zero values for equality, XOR them and
14757          then compare against zero.  Not used for ARM mode; there
14758          CC_CZmode is cheaper.  */
14759       if (mode == CC_Zmode && y != const0_rtx)
14760         {
14761           gcc_assert (!reload_completed);
14762           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14763           y = const0_rtx;
14764         }
14765
14766       /* A scratch register is required.  */
14767       if (reload_completed)
14768         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14769       else
14770         scratch = gen_rtx_SCRATCH (SImode);
14771
14772       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14773       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14774       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14775     }
14776   else
14777     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14778
14779   return cc_reg;
14780 }
14781
14782 /* Generate a sequence of insns that will generate the correct return
14783    address mask depending on the physical architecture that the program
14784    is running on.  */
14785 rtx
14786 arm_gen_return_addr_mask (void)
14787 {
14788   rtx reg = gen_reg_rtx (Pmode);
14789
14790   emit_insn (gen_return_addr_mask (reg));
14791   return reg;
14792 }
14793
14794 void
14795 arm_reload_in_hi (rtx *operands)
14796 {
14797   rtx ref = operands[1];
14798   rtx base, scratch;
14799   HOST_WIDE_INT offset = 0;
14800
14801   if (GET_CODE (ref) == SUBREG)
14802     {
14803       offset = SUBREG_BYTE (ref);
14804       ref = SUBREG_REG (ref);
14805     }
14806
14807   if (REG_P (ref))
14808     {
14809       /* We have a pseudo which has been spilt onto the stack; there
14810          are two cases here: the first where there is a simple
14811          stack-slot replacement and a second where the stack-slot is
14812          out of range, or is used as a subreg.  */
14813       if (reg_equiv_mem (REGNO (ref)))
14814         {
14815           ref = reg_equiv_mem (REGNO (ref));
14816           base = find_replacement (&XEXP (ref, 0));
14817         }
14818       else
14819         /* The slot is out of range, or was dressed up in a SUBREG.  */
14820         base = reg_equiv_address (REGNO (ref));
14821
14822       /* PR 62554: If there is no equivalent memory location then just move
14823          the value as an SImode register move.  This happens when the target
14824          architecture variant does not have an HImode register move.  */
14825       if (base == NULL)
14826         {
14827           gcc_assert (REG_P (operands[0]));
14828           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14829                                 gen_rtx_SUBREG (SImode, ref, 0)));
14830           return;
14831         }
14832     }
14833   else
14834     base = find_replacement (&XEXP (ref, 0));
14835
14836   /* Handle the case where the address is too complex to be offset by 1.  */
14837   if (GET_CODE (base) == MINUS
14838       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14839     {
14840       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14841
14842       emit_set_insn (base_plus, base);
14843       base = base_plus;
14844     }
14845   else if (GET_CODE (base) == PLUS)
14846     {
14847       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14848       HOST_WIDE_INT hi, lo;
14849
14850       offset += INTVAL (XEXP (base, 1));
14851       base = XEXP (base, 0);
14852
14853       /* Rework the address into a legal sequence of insns.  */
14854       /* Valid range for lo is -4095 -> 4095 */
14855       lo = (offset >= 0
14856             ? (offset & 0xfff)
14857             : -((-offset) & 0xfff));
14858
14859       /* Corner case, if lo is the max offset then we would be out of range
14860          once we have added the additional 1 below, so bump the msb into the
14861          pre-loading insn(s).  */
14862       if (lo == 4095)
14863         lo &= 0x7ff;
14864
14865       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14866              ^ (HOST_WIDE_INT) 0x80000000)
14867             - (HOST_WIDE_INT) 0x80000000);
14868
14869       gcc_assert (hi + lo == offset);
14870
14871       if (hi != 0)
14872         {
14873           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14874
14875           /* Get the base address; addsi3 knows how to handle constants
14876              that require more than one insn.  */
14877           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14878           base = base_plus;
14879           offset = lo;
14880         }
14881     }
14882
14883   /* Operands[2] may overlap operands[0] (though it won't overlap
14884      operands[1]), that's why we asked for a DImode reg -- so we can
14885      use the bit that does not overlap.  */
14886   if (REGNO (operands[2]) == REGNO (operands[0]))
14887     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14888   else
14889     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14890
14891   emit_insn (gen_zero_extendqisi2 (scratch,
14892                                    gen_rtx_MEM (QImode,
14893                                                 plus_constant (Pmode, base,
14894                                                                offset))));
14895   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14896                                    gen_rtx_MEM (QImode,
14897                                                 plus_constant (Pmode, base,
14898                                                                offset + 1))));
14899   if (!BYTES_BIG_ENDIAN)
14900     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14901                    gen_rtx_IOR (SImode,
14902                                 gen_rtx_ASHIFT
14903                                 (SImode,
14904                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14905                                  GEN_INT (8)),
14906                                 scratch));
14907   else
14908     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14909                    gen_rtx_IOR (SImode,
14910                                 gen_rtx_ASHIFT (SImode, scratch,
14911                                                 GEN_INT (8)),
14912                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14913 }
14914
14915 /* Handle storing a half-word to memory during reload by synthesizing as two
14916    byte stores.  Take care not to clobber the input values until after we
14917    have moved them somewhere safe.  This code assumes that if the DImode
14918    scratch in operands[2] overlaps either the input value or output address
14919    in some way, then that value must die in this insn (we absolutely need
14920    two scratch registers for some corner cases).  */
14921 void
14922 arm_reload_out_hi (rtx *operands)
14923 {
14924   rtx ref = operands[0];
14925   rtx outval = operands[1];
14926   rtx base, scratch;
14927   HOST_WIDE_INT offset = 0;
14928
14929   if (GET_CODE (ref) == SUBREG)
14930     {
14931       offset = SUBREG_BYTE (ref);
14932       ref = SUBREG_REG (ref);
14933     }
14934
14935   if (REG_P (ref))
14936     {
14937       /* We have a pseudo which has been spilt onto the stack; there
14938          are two cases here: the first where there is a simple
14939          stack-slot replacement and a second where the stack-slot is
14940          out of range, or is used as a subreg.  */
14941       if (reg_equiv_mem (REGNO (ref)))
14942         {
14943           ref = reg_equiv_mem (REGNO (ref));
14944           base = find_replacement (&XEXP (ref, 0));
14945         }
14946       else
14947         /* The slot is out of range, or was dressed up in a SUBREG.  */
14948         base = reg_equiv_address (REGNO (ref));
14949
14950       /* PR 62254: If there is no equivalent memory location then just move
14951          the value as an SImode register move.  This happens when the target
14952          architecture variant does not have an HImode register move.  */
14953       if (base == NULL)
14954         {
14955           gcc_assert (REG_P (outval) || SUBREG_P (outval));
14956
14957           if (REG_P (outval))
14958             {
14959               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14960                                     gen_rtx_SUBREG (SImode, outval, 0)));
14961             }
14962           else /* SUBREG_P (outval)  */
14963             {
14964               if (GET_MODE (SUBREG_REG (outval)) == SImode)
14965                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14966                                       SUBREG_REG (outval)));
14967               else
14968                 /* FIXME: Handle other cases ?  */
14969                 gcc_unreachable ();
14970             }
14971           return;
14972         }
14973     }
14974   else
14975     base = find_replacement (&XEXP (ref, 0));
14976
14977   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14978
14979   /* Handle the case where the address is too complex to be offset by 1.  */
14980   if (GET_CODE (base) == MINUS
14981       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14982     {
14983       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14984
14985       /* Be careful not to destroy OUTVAL.  */
14986       if (reg_overlap_mentioned_p (base_plus, outval))
14987         {
14988           /* Updating base_plus might destroy outval, see if we can
14989              swap the scratch and base_plus.  */
14990           if (!reg_overlap_mentioned_p (scratch, outval))
14991             std::swap (scratch, base_plus);
14992           else
14993             {
14994               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14995
14996               /* Be conservative and copy OUTVAL into the scratch now,
14997                  this should only be necessary if outval is a subreg
14998                  of something larger than a word.  */
14999               /* XXX Might this clobber base?  I can't see how it can,
15000                  since scratch is known to overlap with OUTVAL, and
15001                  must be wider than a word.  */
15002               emit_insn (gen_movhi (scratch_hi, outval));
15003               outval = scratch_hi;
15004             }
15005         }
15006
15007       emit_set_insn (base_plus, base);
15008       base = base_plus;
15009     }
15010   else if (GET_CODE (base) == PLUS)
15011     {
15012       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15013       HOST_WIDE_INT hi, lo;
15014
15015       offset += INTVAL (XEXP (base, 1));
15016       base = XEXP (base, 0);
15017
15018       /* Rework the address into a legal sequence of insns.  */
15019       /* Valid range for lo is -4095 -> 4095 */
15020       lo = (offset >= 0
15021             ? (offset & 0xfff)
15022             : -((-offset) & 0xfff));
15023
15024       /* Corner case, if lo is the max offset then we would be out of range
15025          once we have added the additional 1 below, so bump the msb into the
15026          pre-loading insn(s).  */
15027       if (lo == 4095)
15028         lo &= 0x7ff;
15029
15030       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15031              ^ (HOST_WIDE_INT) 0x80000000)
15032             - (HOST_WIDE_INT) 0x80000000);
15033
15034       gcc_assert (hi + lo == offset);
15035
15036       if (hi != 0)
15037         {
15038           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15039
15040           /* Be careful not to destroy OUTVAL.  */
15041           if (reg_overlap_mentioned_p (base_plus, outval))
15042             {
15043               /* Updating base_plus might destroy outval, see if we
15044                  can swap the scratch and base_plus.  */
15045               if (!reg_overlap_mentioned_p (scratch, outval))
15046                 std::swap (scratch, base_plus);
15047               else
15048                 {
15049                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15050
15051                   /* Be conservative and copy outval into scratch now,
15052                      this should only be necessary if outval is a
15053                      subreg of something larger than a word.  */
15054                   /* XXX Might this clobber base?  I can't see how it
15055                      can, since scratch is known to overlap with
15056                      outval.  */
15057                   emit_insn (gen_movhi (scratch_hi, outval));
15058                   outval = scratch_hi;
15059                 }
15060             }
15061
15062           /* Get the base address; addsi3 knows how to handle constants
15063              that require more than one insn.  */
15064           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15065           base = base_plus;
15066           offset = lo;
15067         }
15068     }
15069
15070   if (BYTES_BIG_ENDIAN)
15071     {
15072       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15073                                          plus_constant (Pmode, base,
15074                                                         offset + 1)),
15075                             gen_lowpart (QImode, outval)));
15076       emit_insn (gen_lshrsi3 (scratch,
15077                               gen_rtx_SUBREG (SImode, outval, 0),
15078                               GEN_INT (8)));
15079       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15080                                                                 offset)),
15081                             gen_lowpart (QImode, scratch)));
15082     }
15083   else
15084     {
15085       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15086                                                                 offset)),
15087                             gen_lowpart (QImode, outval)));
15088       emit_insn (gen_lshrsi3 (scratch,
15089                               gen_rtx_SUBREG (SImode, outval, 0),
15090                               GEN_INT (8)));
15091       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15092                                          plus_constant (Pmode, base,
15093                                                         offset + 1)),
15094                             gen_lowpart (QImode, scratch)));
15095     }
15096 }
15097
15098 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15099    (padded to the size of a word) should be passed in a register.  */
15100
15101 static bool
15102 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15103 {
15104   if (TARGET_AAPCS_BASED)
15105     return must_pass_in_stack_var_size (mode, type);
15106   else
15107     return must_pass_in_stack_var_size_or_pad (mode, type);
15108 }
15109
15110
15111 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15112    Return true if an argument passed on the stack should be padded upwards,
15113    i.e. if the least-significant byte has useful data.
15114    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15115    aggregate types are placed in the lowest memory address.  */
15116
15117 bool
15118 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15119 {
15120   if (!TARGET_AAPCS_BASED)
15121     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15122
15123   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15124     return false;
15125
15126   return true;
15127 }
15128
15129
15130 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15131    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15132    register has useful data, and return the opposite if the most
15133    significant byte does.  */
15134
15135 bool
15136 arm_pad_reg_upward (machine_mode mode,
15137                     tree type, int first ATTRIBUTE_UNUSED)
15138 {
15139   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15140     {
15141       /* For AAPCS, small aggregates, small fixed-point types,
15142          and small complex types are always padded upwards.  */
15143       if (type)
15144         {
15145           if ((AGGREGATE_TYPE_P (type)
15146                || TREE_CODE (type) == COMPLEX_TYPE
15147                || FIXED_POINT_TYPE_P (type))
15148               && int_size_in_bytes (type) <= 4)
15149             return true;
15150         }
15151       else
15152         {
15153           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15154               && GET_MODE_SIZE (mode) <= 4)
15155             return true;
15156         }
15157     }
15158
15159   /* Otherwise, use default padding.  */
15160   return !BYTES_BIG_ENDIAN;
15161 }
15162
15163 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15164    assuming that the address in the base register is word aligned.  */
15165 bool
15166 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15167 {
15168   HOST_WIDE_INT max_offset;
15169
15170   /* Offset must be a multiple of 4 in Thumb mode.  */
15171   if (TARGET_THUMB2 && ((offset & 3) != 0))
15172     return false;
15173
15174   if (TARGET_THUMB2)
15175     max_offset = 1020;
15176   else if (TARGET_ARM)
15177     max_offset = 255;
15178   else
15179     return false;
15180
15181   return ((offset <= max_offset) && (offset >= -max_offset));
15182 }
15183
15184 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15185    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15186    Assumes that the address in the base register RN is word aligned.  Pattern
15187    guarantees that both memory accesses use the same base register,
15188    the offsets are constants within the range, and the gap between the offsets is 4.
15189    If preload complete then check that registers are legal.  WBACK indicates whether
15190    address is updated.  LOAD indicates whether memory access is load or store.  */
15191 bool
15192 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15193                        bool wback, bool load)
15194 {
15195   unsigned int t, t2, n;
15196
15197   if (!reload_completed)
15198     return true;
15199
15200   if (!offset_ok_for_ldrd_strd (offset))
15201     return false;
15202
15203   t = REGNO (rt);
15204   t2 = REGNO (rt2);
15205   n = REGNO (rn);
15206
15207   if ((TARGET_THUMB2)
15208       && ((wback && (n == t || n == t2))
15209           || (t == SP_REGNUM)
15210           || (t == PC_REGNUM)
15211           || (t2 == SP_REGNUM)
15212           || (t2 == PC_REGNUM)
15213           || (!load && (n == PC_REGNUM))
15214           || (load && (t == t2))
15215           /* Triggers Cortex-M3 LDRD errata.  */
15216           || (!wback && load && fix_cm3_ldrd && (n == t))))
15217     return false;
15218
15219   if ((TARGET_ARM)
15220       && ((wback && (n == t || n == t2))
15221           || (t2 == PC_REGNUM)
15222           || (t % 2 != 0)   /* First destination register is not even.  */
15223           || (t2 != t + 1)
15224           /* PC can be used as base register (for offset addressing only),
15225              but it is depricated.  */
15226           || (n == PC_REGNUM)))
15227     return false;
15228
15229   return true;
15230 }
15231
15232 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15233    operand MEM's address contains an immediate offset from the base
15234    register and has no side effects, in which case it sets BASE and
15235    OFFSET accordingly.  */
15236 static bool
15237 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15238 {
15239   rtx addr;
15240
15241   gcc_assert (base != NULL && offset != NULL);
15242
15243   /* TODO: Handle more general memory operand patterns, such as
15244      PRE_DEC and PRE_INC.  */
15245
15246   if (side_effects_p (mem))
15247     return false;
15248
15249   /* Can't deal with subregs.  */
15250   if (GET_CODE (mem) == SUBREG)
15251     return false;
15252
15253   gcc_assert (MEM_P (mem));
15254
15255   *offset = const0_rtx;
15256
15257   addr = XEXP (mem, 0);
15258
15259   /* If addr isn't valid for DImode, then we can't handle it.  */
15260   if (!arm_legitimate_address_p (DImode, addr,
15261                                  reload_in_progress || reload_completed))
15262     return false;
15263
15264   if (REG_P (addr))
15265     {
15266       *base = addr;
15267       return true;
15268     }
15269   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15270     {
15271       *base = XEXP (addr, 0);
15272       *offset = XEXP (addr, 1);
15273       return (REG_P (*base) && CONST_INT_P (*offset));
15274     }
15275
15276   return false;
15277 }
15278
15279 /* Called from a peephole2 to replace two word-size accesses with a
15280    single LDRD/STRD instruction.  Returns true iff we can generate a
15281    new instruction sequence.  That is, both accesses use the same base
15282    register and the gap between constant offsets is 4.  This function
15283    may reorder its operands to match ldrd/strd RTL templates.
15284    OPERANDS are the operands found by the peephole matcher;
15285    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15286    corresponding memory operands.  LOAD indicaates whether the access
15287    is load or store.  CONST_STORE indicates a store of constant
15288    integer values held in OPERANDS[4,5] and assumes that the pattern
15289    is of length 4 insn, for the purpose of checking dead registers.
15290    COMMUTE indicates that register operands may be reordered.  */
15291 bool
15292 gen_operands_ldrd_strd (rtx *operands, bool load,
15293                         bool const_store, bool commute)
15294 {
15295   int nops = 2;
15296   HOST_WIDE_INT offsets[2], offset;
15297   rtx base = NULL_RTX;
15298   rtx cur_base, cur_offset, tmp;
15299   int i, gap;
15300   HARD_REG_SET regset;
15301
15302   gcc_assert (!const_store || !load);
15303   /* Check that the memory references are immediate offsets from the
15304      same base register.  Extract the base register, the destination
15305      registers, and the corresponding memory offsets.  */
15306   for (i = 0; i < nops; i++)
15307     {
15308       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15309         return false;
15310
15311       if (i == 0)
15312         base = cur_base;
15313       else if (REGNO (base) != REGNO (cur_base))
15314         return false;
15315
15316       offsets[i] = INTVAL (cur_offset);
15317       if (GET_CODE (operands[i]) == SUBREG)
15318         {
15319           tmp = SUBREG_REG (operands[i]);
15320           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15321           operands[i] = tmp;
15322         }
15323     }
15324
15325   /* Make sure there is no dependency between the individual loads.  */
15326   if (load && REGNO (operands[0]) == REGNO (base))
15327     return false; /* RAW */
15328
15329   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15330     return false; /* WAW */
15331
15332   /* If the same input register is used in both stores
15333      when storing different constants, try to find a free register.
15334      For example, the code
15335         mov r0, 0
15336         str r0, [r2]
15337         mov r0, 1
15338         str r0, [r2, #4]
15339      can be transformed into
15340         mov r1, 0
15341         mov r0, 1
15342         strd r1, r0, [r2]
15343      in Thumb mode assuming that r1 is free.
15344      For ARM mode do the same but only if the starting register
15345      can be made to be even.  */
15346   if (const_store
15347       && REGNO (operands[0]) == REGNO (operands[1])
15348       && INTVAL (operands[4]) != INTVAL (operands[5]))
15349     {
15350     if (TARGET_THUMB2)
15351       {
15352         CLEAR_HARD_REG_SET (regset);
15353         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15354         if (tmp == NULL_RTX)
15355           return false;
15356
15357         /* Use the new register in the first load to ensure that
15358            if the original input register is not dead after peephole,
15359            then it will have the correct constant value.  */
15360         operands[0] = tmp;
15361       }
15362     else if (TARGET_ARM)
15363       {
15364         int regno = REGNO (operands[0]);
15365         if (!peep2_reg_dead_p (4, operands[0]))
15366           {
15367             /* When the input register is even and is not dead after the
15368                pattern, it has to hold the second constant but we cannot
15369                form a legal STRD in ARM mode with this register as the second
15370                register.  */
15371             if (regno % 2 == 0)
15372               return false;
15373
15374             /* Is regno-1 free? */
15375             SET_HARD_REG_SET (regset);
15376             CLEAR_HARD_REG_BIT(regset, regno - 1);
15377             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15378             if (tmp == NULL_RTX)
15379               return false;
15380
15381             operands[0] = tmp;
15382           }
15383         else
15384           {
15385             /* Find a DImode register.  */
15386             CLEAR_HARD_REG_SET (regset);
15387             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15388             if (tmp != NULL_RTX)
15389               {
15390                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15391                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15392               }
15393             else
15394               {
15395                 /* Can we use the input register to form a DI register?  */
15396                 SET_HARD_REG_SET (regset);
15397                 CLEAR_HARD_REG_BIT(regset,
15398                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15399                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15400                 if (tmp == NULL_RTX)
15401                   return false;
15402                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15403               }
15404           }
15405
15406         gcc_assert (operands[0] != NULL_RTX);
15407         gcc_assert (operands[1] != NULL_RTX);
15408         gcc_assert (REGNO (operands[0]) % 2 == 0);
15409         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15410       }
15411     }
15412
15413   /* Make sure the instructions are ordered with lower memory access first.  */
15414   if (offsets[0] > offsets[1])
15415     {
15416       gap = offsets[0] - offsets[1];
15417       offset = offsets[1];
15418
15419       /* Swap the instructions such that lower memory is accessed first.  */
15420       std::swap (operands[0], operands[1]);
15421       std::swap (operands[2], operands[3]);
15422       if (const_store)
15423         std::swap (operands[4], operands[5]);
15424     }
15425   else
15426     {
15427       gap = offsets[1] - offsets[0];
15428       offset = offsets[0];
15429     }
15430
15431   /* Make sure accesses are to consecutive memory locations.  */
15432   if (gap != 4)
15433     return false;
15434
15435   /* Make sure we generate legal instructions.  */
15436   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15437                              false, load))
15438     return true;
15439
15440   /* In Thumb state, where registers are almost unconstrained, there
15441      is little hope to fix it.  */
15442   if (TARGET_THUMB2)
15443     return false;
15444
15445   if (load && commute)
15446     {
15447       /* Try reordering registers.  */
15448       std::swap (operands[0], operands[1]);
15449       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15450                                  false, load))
15451         return true;
15452     }
15453
15454   if (const_store)
15455     {
15456       /* If input registers are dead after this pattern, they can be
15457          reordered or replaced by other registers that are free in the
15458          current pattern.  */
15459       if (!peep2_reg_dead_p (4, operands[0])
15460           || !peep2_reg_dead_p (4, operands[1]))
15461         return false;
15462
15463       /* Try to reorder the input registers.  */
15464       /* For example, the code
15465            mov r0, 0
15466            mov r1, 1
15467            str r1, [r2]
15468            str r0, [r2, #4]
15469          can be transformed into
15470            mov r1, 0
15471            mov r0, 1
15472            strd r0, [r2]
15473       */
15474       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15475                                   false, false))
15476         {
15477           std::swap (operands[0], operands[1]);
15478           return true;
15479         }
15480
15481       /* Try to find a free DI register.  */
15482       CLEAR_HARD_REG_SET (regset);
15483       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15484       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15485       while (true)
15486         {
15487           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15488           if (tmp == NULL_RTX)
15489             return false;
15490
15491           /* DREG must be an even-numbered register in DImode.
15492              Split it into SI registers.  */
15493           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15494           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15495           gcc_assert (operands[0] != NULL_RTX);
15496           gcc_assert (operands[1] != NULL_RTX);
15497           gcc_assert (REGNO (operands[0]) % 2 == 0);
15498           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15499
15500           return (operands_ok_ldrd_strd (operands[0], operands[1],
15501                                          base, offset,
15502                                          false, load));
15503         }
15504     }
15505
15506   return false;
15507 }
15508
15509
15510
15511 \f
15512 /* Print a symbolic form of X to the debug file, F.  */
15513 static void
15514 arm_print_value (FILE *f, rtx x)
15515 {
15516   switch (GET_CODE (x))
15517     {
15518     case CONST_INT:
15519       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15520       return;
15521
15522     case CONST_DOUBLE:
15523       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15524       return;
15525
15526     case CONST_VECTOR:
15527       {
15528         int i;
15529
15530         fprintf (f, "<");
15531         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15532           {
15533             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15534             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15535               fputc (',', f);
15536           }
15537         fprintf (f, ">");
15538       }
15539       return;
15540
15541     case CONST_STRING:
15542       fprintf (f, "\"%s\"", XSTR (x, 0));
15543       return;
15544
15545     case SYMBOL_REF:
15546       fprintf (f, "`%s'", XSTR (x, 0));
15547       return;
15548
15549     case LABEL_REF:
15550       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15551       return;
15552
15553     case CONST:
15554       arm_print_value (f, XEXP (x, 0));
15555       return;
15556
15557     case PLUS:
15558       arm_print_value (f, XEXP (x, 0));
15559       fprintf (f, "+");
15560       arm_print_value (f, XEXP (x, 1));
15561       return;
15562
15563     case PC:
15564       fprintf (f, "pc");
15565       return;
15566
15567     default:
15568       fprintf (f, "????");
15569       return;
15570     }
15571 }
15572 \f
15573 /* Routines for manipulation of the constant pool.  */
15574
15575 /* Arm instructions cannot load a large constant directly into a
15576    register; they have to come from a pc relative load.  The constant
15577    must therefore be placed in the addressable range of the pc
15578    relative load.  Depending on the precise pc relative load
15579    instruction the range is somewhere between 256 bytes and 4k.  This
15580    means that we often have to dump a constant inside a function, and
15581    generate code to branch around it.
15582
15583    It is important to minimize this, since the branches will slow
15584    things down and make the code larger.
15585
15586    Normally we can hide the table after an existing unconditional
15587    branch so that there is no interruption of the flow, but in the
15588    worst case the code looks like this:
15589
15590         ldr     rn, L1
15591         ...
15592         b       L2
15593         align
15594         L1:     .long value
15595         L2:
15596         ...
15597
15598         ldr     rn, L3
15599         ...
15600         b       L4
15601         align
15602         L3:     .long value
15603         L4:
15604         ...
15605
15606    We fix this by performing a scan after scheduling, which notices
15607    which instructions need to have their operands fetched from the
15608    constant table and builds the table.
15609
15610    The algorithm starts by building a table of all the constants that
15611    need fixing up and all the natural barriers in the function (places
15612    where a constant table can be dropped without breaking the flow).
15613    For each fixup we note how far the pc-relative replacement will be
15614    able to reach and the offset of the instruction into the function.
15615
15616    Having built the table we then group the fixes together to form
15617    tables that are as large as possible (subject to addressing
15618    constraints) and emit each table of constants after the last
15619    barrier that is within range of all the instructions in the group.
15620    If a group does not contain a barrier, then we forcibly create one
15621    by inserting a jump instruction into the flow.  Once the table has
15622    been inserted, the insns are then modified to reference the
15623    relevant entry in the pool.
15624
15625    Possible enhancements to the algorithm (not implemented) are:
15626
15627    1) For some processors and object formats, there may be benefit in
15628    aligning the pools to the start of cache lines; this alignment
15629    would need to be taken into account when calculating addressability
15630    of a pool.  */
15631
15632 /* These typedefs are located at the start of this file, so that
15633    they can be used in the prototypes there.  This comment is to
15634    remind readers of that fact so that the following structures
15635    can be understood more easily.
15636
15637      typedef struct minipool_node    Mnode;
15638      typedef struct minipool_fixup   Mfix;  */
15639
15640 struct minipool_node
15641 {
15642   /* Doubly linked chain of entries.  */
15643   Mnode * next;
15644   Mnode * prev;
15645   /* The maximum offset into the code that this entry can be placed.  While
15646      pushing fixes for forward references, all entries are sorted in order
15647      of increasing max_address.  */
15648   HOST_WIDE_INT max_address;
15649   /* Similarly for an entry inserted for a backwards ref.  */
15650   HOST_WIDE_INT min_address;
15651   /* The number of fixes referencing this entry.  This can become zero
15652      if we "unpush" an entry.  In this case we ignore the entry when we
15653      come to emit the code.  */
15654   int refcount;
15655   /* The offset from the start of the minipool.  */
15656   HOST_WIDE_INT offset;
15657   /* The value in table.  */
15658   rtx value;
15659   /* The mode of value.  */
15660   machine_mode mode;
15661   /* The size of the value.  With iWMMXt enabled
15662      sizes > 4 also imply an alignment of 8-bytes.  */
15663   int fix_size;
15664 };
15665
15666 struct minipool_fixup
15667 {
15668   Mfix *            next;
15669   rtx_insn *        insn;
15670   HOST_WIDE_INT     address;
15671   rtx *             loc;
15672   machine_mode mode;
15673   int               fix_size;
15674   rtx               value;
15675   Mnode *           minipool;
15676   HOST_WIDE_INT     forwards;
15677   HOST_WIDE_INT     backwards;
15678 };
15679
15680 /* Fixes less than a word need padding out to a word boundary.  */
15681 #define MINIPOOL_FIX_SIZE(mode) \
15682   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15683
15684 static Mnode *  minipool_vector_head;
15685 static Mnode *  minipool_vector_tail;
15686 static rtx_code_label   *minipool_vector_label;
15687 static int      minipool_pad;
15688
15689 /* The linked list of all minipool fixes required for this function.  */
15690 Mfix *          minipool_fix_head;
15691 Mfix *          minipool_fix_tail;
15692 /* The fix entry for the current minipool, once it has been placed.  */
15693 Mfix *          minipool_barrier;
15694
15695 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15696 #define JUMP_TABLES_IN_TEXT_SECTION 0
15697 #endif
15698
15699 static HOST_WIDE_INT
15700 get_jump_table_size (rtx_jump_table_data *insn)
15701 {
15702   /* ADDR_VECs only take room if read-only data does into the text
15703      section.  */
15704   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15705     {
15706       rtx body = PATTERN (insn);
15707       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15708       HOST_WIDE_INT size;
15709       HOST_WIDE_INT modesize;
15710
15711       modesize = GET_MODE_SIZE (GET_MODE (body));
15712       size = modesize * XVECLEN (body, elt);
15713       switch (modesize)
15714         {
15715         case 1:
15716           /* Round up size  of TBB table to a halfword boundary.  */
15717           size = (size + 1) & ~HOST_WIDE_INT_1;
15718           break;
15719         case 2:
15720           /* No padding necessary for TBH.  */
15721           break;
15722         case 4:
15723           /* Add two bytes for alignment on Thumb.  */
15724           if (TARGET_THUMB)
15725             size += 2;
15726           break;
15727         default:
15728           gcc_unreachable ();
15729         }
15730       return size;
15731     }
15732
15733   return 0;
15734 }
15735
15736 /* Return the maximum amount of padding that will be inserted before
15737    label LABEL.  */
15738
15739 static HOST_WIDE_INT
15740 get_label_padding (rtx label)
15741 {
15742   HOST_WIDE_INT align, min_insn_size;
15743
15744   align = 1 << label_to_alignment (label);
15745   min_insn_size = TARGET_THUMB ? 2 : 4;
15746   return align > min_insn_size ? align - min_insn_size : 0;
15747 }
15748
15749 /* Move a minipool fix MP from its current location to before MAX_MP.
15750    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15751    constraints may need updating.  */
15752 static Mnode *
15753 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15754                                HOST_WIDE_INT max_address)
15755 {
15756   /* The code below assumes these are different.  */
15757   gcc_assert (mp != max_mp);
15758
15759   if (max_mp == NULL)
15760     {
15761       if (max_address < mp->max_address)
15762         mp->max_address = max_address;
15763     }
15764   else
15765     {
15766       if (max_address > max_mp->max_address - mp->fix_size)
15767         mp->max_address = max_mp->max_address - mp->fix_size;
15768       else
15769         mp->max_address = max_address;
15770
15771       /* Unlink MP from its current position.  Since max_mp is non-null,
15772        mp->prev must be non-null.  */
15773       mp->prev->next = mp->next;
15774       if (mp->next != NULL)
15775         mp->next->prev = mp->prev;
15776       else
15777         minipool_vector_tail = mp->prev;
15778
15779       /* Re-insert it before MAX_MP.  */
15780       mp->next = max_mp;
15781       mp->prev = max_mp->prev;
15782       max_mp->prev = mp;
15783
15784       if (mp->prev != NULL)
15785         mp->prev->next = mp;
15786       else
15787         minipool_vector_head = mp;
15788     }
15789
15790   /* Save the new entry.  */
15791   max_mp = mp;
15792
15793   /* Scan over the preceding entries and adjust their addresses as
15794      required.  */
15795   while (mp->prev != NULL
15796          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15797     {
15798       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15799       mp = mp->prev;
15800     }
15801
15802   return max_mp;
15803 }
15804
15805 /* Add a constant to the minipool for a forward reference.  Returns the
15806    node added or NULL if the constant will not fit in this pool.  */
15807 static Mnode *
15808 add_minipool_forward_ref (Mfix *fix)
15809 {
15810   /* If set, max_mp is the first pool_entry that has a lower
15811      constraint than the one we are trying to add.  */
15812   Mnode *       max_mp = NULL;
15813   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15814   Mnode *       mp;
15815
15816   /* If the minipool starts before the end of FIX->INSN then this FIX
15817      can not be placed into the current pool.  Furthermore, adding the
15818      new constant pool entry may cause the pool to start FIX_SIZE bytes
15819      earlier.  */
15820   if (minipool_vector_head &&
15821       (fix->address + get_attr_length (fix->insn)
15822        >= minipool_vector_head->max_address - fix->fix_size))
15823     return NULL;
15824
15825   /* Scan the pool to see if a constant with the same value has
15826      already been added.  While we are doing this, also note the
15827      location where we must insert the constant if it doesn't already
15828      exist.  */
15829   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15830     {
15831       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15832           && fix->mode == mp->mode
15833           && (!LABEL_P (fix->value)
15834               || (CODE_LABEL_NUMBER (fix->value)
15835                   == CODE_LABEL_NUMBER (mp->value)))
15836           && rtx_equal_p (fix->value, mp->value))
15837         {
15838           /* More than one fix references this entry.  */
15839           mp->refcount++;
15840           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15841         }
15842
15843       /* Note the insertion point if necessary.  */
15844       if (max_mp == NULL
15845           && mp->max_address > max_address)
15846         max_mp = mp;
15847
15848       /* If we are inserting an 8-bytes aligned quantity and
15849          we have not already found an insertion point, then
15850          make sure that all such 8-byte aligned quantities are
15851          placed at the start of the pool.  */
15852       if (ARM_DOUBLEWORD_ALIGN
15853           && max_mp == NULL
15854           && fix->fix_size >= 8
15855           && mp->fix_size < 8)
15856         {
15857           max_mp = mp;
15858           max_address = mp->max_address;
15859         }
15860     }
15861
15862   /* The value is not currently in the minipool, so we need to create
15863      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15864      the end of the list since the placement is less constrained than
15865      any existing entry.  Otherwise, we insert the new fix before
15866      MAX_MP and, if necessary, adjust the constraints on the other
15867      entries.  */
15868   mp = XNEW (Mnode);
15869   mp->fix_size = fix->fix_size;
15870   mp->mode = fix->mode;
15871   mp->value = fix->value;
15872   mp->refcount = 1;
15873   /* Not yet required for a backwards ref.  */
15874   mp->min_address = -65536;
15875
15876   if (max_mp == NULL)
15877     {
15878       mp->max_address = max_address;
15879       mp->next = NULL;
15880       mp->prev = minipool_vector_tail;
15881
15882       if (mp->prev == NULL)
15883         {
15884           minipool_vector_head = mp;
15885           minipool_vector_label = gen_label_rtx ();
15886         }
15887       else
15888         mp->prev->next = mp;
15889
15890       minipool_vector_tail = mp;
15891     }
15892   else
15893     {
15894       if (max_address > max_mp->max_address - mp->fix_size)
15895         mp->max_address = max_mp->max_address - mp->fix_size;
15896       else
15897         mp->max_address = max_address;
15898
15899       mp->next = max_mp;
15900       mp->prev = max_mp->prev;
15901       max_mp->prev = mp;
15902       if (mp->prev != NULL)
15903         mp->prev->next = mp;
15904       else
15905         minipool_vector_head = mp;
15906     }
15907
15908   /* Save the new entry.  */
15909   max_mp = mp;
15910
15911   /* Scan over the preceding entries and adjust their addresses as
15912      required.  */
15913   while (mp->prev != NULL
15914          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15915     {
15916       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15917       mp = mp->prev;
15918     }
15919
15920   return max_mp;
15921 }
15922
15923 static Mnode *
15924 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15925                                 HOST_WIDE_INT  min_address)
15926 {
15927   HOST_WIDE_INT offset;
15928
15929   /* The code below assumes these are different.  */
15930   gcc_assert (mp != min_mp);
15931
15932   if (min_mp == NULL)
15933     {
15934       if (min_address > mp->min_address)
15935         mp->min_address = min_address;
15936     }
15937   else
15938     {
15939       /* We will adjust this below if it is too loose.  */
15940       mp->min_address = min_address;
15941
15942       /* Unlink MP from its current position.  Since min_mp is non-null,
15943          mp->next must be non-null.  */
15944       mp->next->prev = mp->prev;
15945       if (mp->prev != NULL)
15946         mp->prev->next = mp->next;
15947       else
15948         minipool_vector_head = mp->next;
15949
15950       /* Reinsert it after MIN_MP.  */
15951       mp->prev = min_mp;
15952       mp->next = min_mp->next;
15953       min_mp->next = mp;
15954       if (mp->next != NULL)
15955         mp->next->prev = mp;
15956       else
15957         minipool_vector_tail = mp;
15958     }
15959
15960   min_mp = mp;
15961
15962   offset = 0;
15963   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15964     {
15965       mp->offset = offset;
15966       if (mp->refcount > 0)
15967         offset += mp->fix_size;
15968
15969       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15970         mp->next->min_address = mp->min_address + mp->fix_size;
15971     }
15972
15973   return min_mp;
15974 }
15975
15976 /* Add a constant to the minipool for a backward reference.  Returns the
15977    node added or NULL if the constant will not fit in this pool.
15978
15979    Note that the code for insertion for a backwards reference can be
15980    somewhat confusing because the calculated offsets for each fix do
15981    not take into account the size of the pool (which is still under
15982    construction.  */
15983 static Mnode *
15984 add_minipool_backward_ref (Mfix *fix)
15985 {
15986   /* If set, min_mp is the last pool_entry that has a lower constraint
15987      than the one we are trying to add.  */
15988   Mnode *min_mp = NULL;
15989   /* This can be negative, since it is only a constraint.  */
15990   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
15991   Mnode *mp;
15992
15993   /* If we can't reach the current pool from this insn, or if we can't
15994      insert this entry at the end of the pool without pushing other
15995      fixes out of range, then we don't try.  This ensures that we
15996      can't fail later on.  */
15997   if (min_address >= minipool_barrier->address
15998       || (minipool_vector_tail->min_address + fix->fix_size
15999           >= minipool_barrier->address))
16000     return NULL;
16001
16002   /* Scan the pool to see if a constant with the same value has
16003      already been added.  While we are doing this, also note the
16004      location where we must insert the constant if it doesn't already
16005      exist.  */
16006   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16007     {
16008       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16009           && fix->mode == mp->mode
16010           && (!LABEL_P (fix->value)
16011               || (CODE_LABEL_NUMBER (fix->value)
16012                   == CODE_LABEL_NUMBER (mp->value)))
16013           && rtx_equal_p (fix->value, mp->value)
16014           /* Check that there is enough slack to move this entry to the
16015              end of the table (this is conservative).  */
16016           && (mp->max_address
16017               > (minipool_barrier->address
16018                  + minipool_vector_tail->offset
16019                  + minipool_vector_tail->fix_size)))
16020         {
16021           mp->refcount++;
16022           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16023         }
16024
16025       if (min_mp != NULL)
16026         mp->min_address += fix->fix_size;
16027       else
16028         {
16029           /* Note the insertion point if necessary.  */
16030           if (mp->min_address < min_address)
16031             {
16032               /* For now, we do not allow the insertion of 8-byte alignment
16033                  requiring nodes anywhere but at the start of the pool.  */
16034               if (ARM_DOUBLEWORD_ALIGN
16035                   && fix->fix_size >= 8 && mp->fix_size < 8)
16036                 return NULL;
16037               else
16038                 min_mp = mp;
16039             }
16040           else if (mp->max_address
16041                    < minipool_barrier->address + mp->offset + fix->fix_size)
16042             {
16043               /* Inserting before this entry would push the fix beyond
16044                  its maximum address (which can happen if we have
16045                  re-located a forwards fix); force the new fix to come
16046                  after it.  */
16047               if (ARM_DOUBLEWORD_ALIGN
16048                   && fix->fix_size >= 8 && mp->fix_size < 8)
16049                 return NULL;
16050               else
16051                 {
16052                   min_mp = mp;
16053                   min_address = mp->min_address + fix->fix_size;
16054                 }
16055             }
16056           /* Do not insert a non-8-byte aligned quantity before 8-byte
16057              aligned quantities.  */
16058           else if (ARM_DOUBLEWORD_ALIGN
16059                    && fix->fix_size < 8
16060                    && mp->fix_size >= 8)
16061             {
16062               min_mp = mp;
16063               min_address = mp->min_address + fix->fix_size;
16064             }
16065         }
16066     }
16067
16068   /* We need to create a new entry.  */
16069   mp = XNEW (Mnode);
16070   mp->fix_size = fix->fix_size;
16071   mp->mode = fix->mode;
16072   mp->value = fix->value;
16073   mp->refcount = 1;
16074   mp->max_address = minipool_barrier->address + 65536;
16075
16076   mp->min_address = min_address;
16077
16078   if (min_mp == NULL)
16079     {
16080       mp->prev = NULL;
16081       mp->next = minipool_vector_head;
16082
16083       if (mp->next == NULL)
16084         {
16085           minipool_vector_tail = mp;
16086           minipool_vector_label = gen_label_rtx ();
16087         }
16088       else
16089         mp->next->prev = mp;
16090
16091       minipool_vector_head = mp;
16092     }
16093   else
16094     {
16095       mp->next = min_mp->next;
16096       mp->prev = min_mp;
16097       min_mp->next = mp;
16098
16099       if (mp->next != NULL)
16100         mp->next->prev = mp;
16101       else
16102         minipool_vector_tail = mp;
16103     }
16104
16105   /* Save the new entry.  */
16106   min_mp = mp;
16107
16108   if (mp->prev)
16109     mp = mp->prev;
16110   else
16111     mp->offset = 0;
16112
16113   /* Scan over the following entries and adjust their offsets.  */
16114   while (mp->next != NULL)
16115     {
16116       if (mp->next->min_address < mp->min_address + mp->fix_size)
16117         mp->next->min_address = mp->min_address + mp->fix_size;
16118
16119       if (mp->refcount)
16120         mp->next->offset = mp->offset + mp->fix_size;
16121       else
16122         mp->next->offset = mp->offset;
16123
16124       mp = mp->next;
16125     }
16126
16127   return min_mp;
16128 }
16129
16130 static void
16131 assign_minipool_offsets (Mfix *barrier)
16132 {
16133   HOST_WIDE_INT offset = 0;
16134   Mnode *mp;
16135
16136   minipool_barrier = barrier;
16137
16138   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16139     {
16140       mp->offset = offset;
16141
16142       if (mp->refcount > 0)
16143         offset += mp->fix_size;
16144     }
16145 }
16146
16147 /* Output the literal table */
16148 static void
16149 dump_minipool (rtx_insn *scan)
16150 {
16151   Mnode * mp;
16152   Mnode * nmp;
16153   int align64 = 0;
16154
16155   if (ARM_DOUBLEWORD_ALIGN)
16156     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16157       if (mp->refcount > 0 && mp->fix_size >= 8)
16158         {
16159           align64 = 1;
16160           break;
16161         }
16162
16163   if (dump_file)
16164     fprintf (dump_file,
16165              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16166              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16167
16168   scan = emit_label_after (gen_label_rtx (), scan);
16169   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16170   scan = emit_label_after (minipool_vector_label, scan);
16171
16172   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16173     {
16174       if (mp->refcount > 0)
16175         {
16176           if (dump_file)
16177             {
16178               fprintf (dump_file,
16179                        ";;  Offset %u, min %ld, max %ld ",
16180                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16181                        (unsigned long) mp->max_address);
16182               arm_print_value (dump_file, mp->value);
16183               fputc ('\n', dump_file);
16184             }
16185
16186           rtx val = copy_rtx (mp->value);
16187
16188           switch (GET_MODE_SIZE (mp->mode))
16189             {
16190 #ifdef HAVE_consttable_1
16191             case 1:
16192               scan = emit_insn_after (gen_consttable_1 (val), scan);
16193               break;
16194
16195 #endif
16196 #ifdef HAVE_consttable_2
16197             case 2:
16198               scan = emit_insn_after (gen_consttable_2 (val), scan);
16199               break;
16200
16201 #endif
16202 #ifdef HAVE_consttable_4
16203             case 4:
16204               scan = emit_insn_after (gen_consttable_4 (val), scan);
16205               break;
16206
16207 #endif
16208 #ifdef HAVE_consttable_8
16209             case 8:
16210               scan = emit_insn_after (gen_consttable_8 (val), scan);
16211               break;
16212
16213 #endif
16214 #ifdef HAVE_consttable_16
16215             case 16:
16216               scan = emit_insn_after (gen_consttable_16 (val), scan);
16217               break;
16218
16219 #endif
16220             default:
16221               gcc_unreachable ();
16222             }
16223         }
16224
16225       nmp = mp->next;
16226       free (mp);
16227     }
16228
16229   minipool_vector_head = minipool_vector_tail = NULL;
16230   scan = emit_insn_after (gen_consttable_end (), scan);
16231   scan = emit_barrier_after (scan);
16232 }
16233
16234 /* Return the cost of forcibly inserting a barrier after INSN.  */
16235 static int
16236 arm_barrier_cost (rtx_insn *insn)
16237 {
16238   /* Basing the location of the pool on the loop depth is preferable,
16239      but at the moment, the basic block information seems to be
16240      corrupt by this stage of the compilation.  */
16241   int base_cost = 50;
16242   rtx_insn *next = next_nonnote_insn (insn);
16243
16244   if (next != NULL && LABEL_P (next))
16245     base_cost -= 20;
16246
16247   switch (GET_CODE (insn))
16248     {
16249     case CODE_LABEL:
16250       /* It will always be better to place the table before the label, rather
16251          than after it.  */
16252       return 50;
16253
16254     case INSN:
16255     case CALL_INSN:
16256       return base_cost;
16257
16258     case JUMP_INSN:
16259       return base_cost - 10;
16260
16261     default:
16262       return base_cost + 10;
16263     }
16264 }
16265
16266 /* Find the best place in the insn stream in the range
16267    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16268    Create the barrier by inserting a jump and add a new fix entry for
16269    it.  */
16270 static Mfix *
16271 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16272 {
16273   HOST_WIDE_INT count = 0;
16274   rtx_barrier *barrier;
16275   rtx_insn *from = fix->insn;
16276   /* The instruction after which we will insert the jump.  */
16277   rtx_insn *selected = NULL;
16278   int selected_cost;
16279   /* The address at which the jump instruction will be placed.  */
16280   HOST_WIDE_INT selected_address;
16281   Mfix * new_fix;
16282   HOST_WIDE_INT max_count = max_address - fix->address;
16283   rtx_code_label *label = gen_label_rtx ();
16284
16285   selected_cost = arm_barrier_cost (from);
16286   selected_address = fix->address;
16287
16288   while (from && count < max_count)
16289     {
16290       rtx_jump_table_data *tmp;
16291       int new_cost;
16292
16293       /* This code shouldn't have been called if there was a natural barrier
16294          within range.  */
16295       gcc_assert (!BARRIER_P (from));
16296
16297       /* Count the length of this insn.  This must stay in sync with the
16298          code that pushes minipool fixes.  */
16299       if (LABEL_P (from))
16300         count += get_label_padding (from);
16301       else
16302         count += get_attr_length (from);
16303
16304       /* If there is a jump table, add its length.  */
16305       if (tablejump_p (from, NULL, &tmp))
16306         {
16307           count += get_jump_table_size (tmp);
16308
16309           /* Jump tables aren't in a basic block, so base the cost on
16310              the dispatch insn.  If we select this location, we will
16311              still put the pool after the table.  */
16312           new_cost = arm_barrier_cost (from);
16313
16314           if (count < max_count
16315               && (!selected || new_cost <= selected_cost))
16316             {
16317               selected = tmp;
16318               selected_cost = new_cost;
16319               selected_address = fix->address + count;
16320             }
16321
16322           /* Continue after the dispatch table.  */
16323           from = NEXT_INSN (tmp);
16324           continue;
16325         }
16326
16327       new_cost = arm_barrier_cost (from);
16328
16329       if (count < max_count
16330           && (!selected || new_cost <= selected_cost))
16331         {
16332           selected = from;
16333           selected_cost = new_cost;
16334           selected_address = fix->address + count;
16335         }
16336
16337       from = NEXT_INSN (from);
16338     }
16339
16340   /* Make sure that we found a place to insert the jump.  */
16341   gcc_assert (selected);
16342
16343   /* Make sure we do not split a call and its corresponding
16344      CALL_ARG_LOCATION note.  */
16345   if (CALL_P (selected))
16346     {
16347       rtx_insn *next = NEXT_INSN (selected);
16348       if (next && NOTE_P (next)
16349           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16350           selected = next;
16351     }
16352
16353   /* Create a new JUMP_INSN that branches around a barrier.  */
16354   from = emit_jump_insn_after (gen_jump (label), selected);
16355   JUMP_LABEL (from) = label;
16356   barrier = emit_barrier_after (from);
16357   emit_label_after (label, barrier);
16358
16359   /* Create a minipool barrier entry for the new barrier.  */
16360   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16361   new_fix->insn = barrier;
16362   new_fix->address = selected_address;
16363   new_fix->next = fix->next;
16364   fix->next = new_fix;
16365
16366   return new_fix;
16367 }
16368
16369 /* Record that there is a natural barrier in the insn stream at
16370    ADDRESS.  */
16371 static void
16372 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16373 {
16374   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16375
16376   fix->insn = insn;
16377   fix->address = address;
16378
16379   fix->next = NULL;
16380   if (minipool_fix_head != NULL)
16381     minipool_fix_tail->next = fix;
16382   else
16383     minipool_fix_head = fix;
16384
16385   minipool_fix_tail = fix;
16386 }
16387
16388 /* Record INSN, which will need fixing up to load a value from the
16389    minipool.  ADDRESS is the offset of the insn since the start of the
16390    function; LOC is a pointer to the part of the insn which requires
16391    fixing; VALUE is the constant that must be loaded, which is of type
16392    MODE.  */
16393 static void
16394 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16395                    machine_mode mode, rtx value)
16396 {
16397   gcc_assert (!arm_disable_literal_pool);
16398   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16399
16400   fix->insn = insn;
16401   fix->address = address;
16402   fix->loc = loc;
16403   fix->mode = mode;
16404   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16405   fix->value = value;
16406   fix->forwards = get_attr_pool_range (insn);
16407   fix->backwards = get_attr_neg_pool_range (insn);
16408   fix->minipool = NULL;
16409
16410   /* If an insn doesn't have a range defined for it, then it isn't
16411      expecting to be reworked by this code.  Better to stop now than
16412      to generate duff assembly code.  */
16413   gcc_assert (fix->forwards || fix->backwards);
16414
16415   /* If an entry requires 8-byte alignment then assume all constant pools
16416      require 4 bytes of padding.  Trying to do this later on a per-pool
16417      basis is awkward because existing pool entries have to be modified.  */
16418   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16419     minipool_pad = 4;
16420
16421   if (dump_file)
16422     {
16423       fprintf (dump_file,
16424                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16425                GET_MODE_NAME (mode),
16426                INSN_UID (insn), (unsigned long) address,
16427                -1 * (long)fix->backwards, (long)fix->forwards);
16428       arm_print_value (dump_file, fix->value);
16429       fprintf (dump_file, "\n");
16430     }
16431
16432   /* Add it to the chain of fixes.  */
16433   fix->next = NULL;
16434
16435   if (minipool_fix_head != NULL)
16436     minipool_fix_tail->next = fix;
16437   else
16438     minipool_fix_head = fix;
16439
16440   minipool_fix_tail = fix;
16441 }
16442
16443 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16444    Returns the number of insns needed, or 99 if we always want to synthesize
16445    the value.  */
16446 int
16447 arm_max_const_double_inline_cost ()
16448 {
16449   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16450 }
16451
16452 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16453    Returns the number of insns needed, or 99 if we don't know how to
16454    do it.  */
16455 int
16456 arm_const_double_inline_cost (rtx val)
16457 {
16458   rtx lowpart, highpart;
16459   machine_mode mode;
16460
16461   mode = GET_MODE (val);
16462
16463   if (mode == VOIDmode)
16464     mode = DImode;
16465
16466   gcc_assert (GET_MODE_SIZE (mode) == 8);
16467
16468   lowpart = gen_lowpart (SImode, val);
16469   highpart = gen_highpart_mode (SImode, mode, val);
16470
16471   gcc_assert (CONST_INT_P (lowpart));
16472   gcc_assert (CONST_INT_P (highpart));
16473
16474   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16475                             NULL_RTX, NULL_RTX, 0, 0)
16476           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16477                               NULL_RTX, NULL_RTX, 0, 0));
16478 }
16479
16480 /* Cost of loading a SImode constant.  */
16481 static inline int
16482 arm_const_inline_cost (enum rtx_code code, rtx val)
16483 {
16484   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16485                            NULL_RTX, NULL_RTX, 1, 0);
16486 }
16487
16488 /* Return true if it is worthwhile to split a 64-bit constant into two
16489    32-bit operations.  This is the case if optimizing for size, or
16490    if we have load delay slots, or if one 32-bit part can be done with
16491    a single data operation.  */
16492 bool
16493 arm_const_double_by_parts (rtx val)
16494 {
16495   machine_mode mode = GET_MODE (val);
16496   rtx part;
16497
16498   if (optimize_size || arm_ld_sched)
16499     return true;
16500
16501   if (mode == VOIDmode)
16502     mode = DImode;
16503
16504   part = gen_highpart_mode (SImode, mode, val);
16505
16506   gcc_assert (CONST_INT_P (part));
16507
16508   if (const_ok_for_arm (INTVAL (part))
16509       || const_ok_for_arm (~INTVAL (part)))
16510     return true;
16511
16512   part = gen_lowpart (SImode, val);
16513
16514   gcc_assert (CONST_INT_P (part));
16515
16516   if (const_ok_for_arm (INTVAL (part))
16517       || const_ok_for_arm (~INTVAL (part)))
16518     return true;
16519
16520   return false;
16521 }
16522
16523 /* Return true if it is possible to inline both the high and low parts
16524    of a 64-bit constant into 32-bit data processing instructions.  */
16525 bool
16526 arm_const_double_by_immediates (rtx val)
16527 {
16528   machine_mode mode = GET_MODE (val);
16529   rtx part;
16530
16531   if (mode == VOIDmode)
16532     mode = DImode;
16533
16534   part = gen_highpart_mode (SImode, mode, val);
16535
16536   gcc_assert (CONST_INT_P (part));
16537
16538   if (!const_ok_for_arm (INTVAL (part)))
16539     return false;
16540
16541   part = gen_lowpart (SImode, val);
16542
16543   gcc_assert (CONST_INT_P (part));
16544
16545   if (!const_ok_for_arm (INTVAL (part)))
16546     return false;
16547
16548   return true;
16549 }
16550
16551 /* Scan INSN and note any of its operands that need fixing.
16552    If DO_PUSHES is false we do not actually push any of the fixups
16553    needed.  */
16554 static void
16555 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16556 {
16557   int opno;
16558
16559   extract_constrain_insn (insn);
16560
16561   if (recog_data.n_alternatives == 0)
16562     return;
16563
16564   /* Fill in recog_op_alt with information about the constraints of
16565      this insn.  */
16566   preprocess_constraints (insn);
16567
16568   const operand_alternative *op_alt = which_op_alt ();
16569   for (opno = 0; opno < recog_data.n_operands; opno++)
16570     {
16571       /* Things we need to fix can only occur in inputs.  */
16572       if (recog_data.operand_type[opno] != OP_IN)
16573         continue;
16574
16575       /* If this alternative is a memory reference, then any mention
16576          of constants in this alternative is really to fool reload
16577          into allowing us to accept one there.  We need to fix them up
16578          now so that we output the right code.  */
16579       if (op_alt[opno].memory_ok)
16580         {
16581           rtx op = recog_data.operand[opno];
16582
16583           if (CONSTANT_P (op))
16584             {
16585               if (do_pushes)
16586                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16587                                    recog_data.operand_mode[opno], op);
16588             }
16589           else if (MEM_P (op)
16590                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16591                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16592             {
16593               if (do_pushes)
16594                 {
16595                   rtx cop = avoid_constant_pool_reference (op);
16596
16597                   /* Casting the address of something to a mode narrower
16598                      than a word can cause avoid_constant_pool_reference()
16599                      to return the pool reference itself.  That's no good to
16600                      us here.  Lets just hope that we can use the
16601                      constant pool value directly.  */
16602                   if (op == cop)
16603                     cop = get_pool_constant (XEXP (op, 0));
16604
16605                   push_minipool_fix (insn, address,
16606                                      recog_data.operand_loc[opno],
16607                                      recog_data.operand_mode[opno], cop);
16608                 }
16609
16610             }
16611         }
16612     }
16613
16614   return;
16615 }
16616
16617 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16618    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16619    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16620    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16621    or four masks, depending on whether it is being computed for a
16622    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16623    respectively.  The tree for the type of the argument or a field within an
16624    argument is passed in ARG_TYPE, the current register this argument or field
16625    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16626    argument or field starts at is passed in STARTING_BIT and the last used bit
16627    is kept in LAST_USED_BIT which is also updated accordingly.  */
16628
16629 static unsigned HOST_WIDE_INT
16630 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16631                                uint32_t * padding_bits_to_clear,
16632                                unsigned starting_bit, int * last_used_bit)
16633
16634 {
16635   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16636
16637   if (TREE_CODE (arg_type) == RECORD_TYPE)
16638     {
16639       unsigned current_bit = starting_bit;
16640       tree field;
16641       long int offset, size;
16642
16643
16644       field = TYPE_FIELDS (arg_type);
16645       while (field)
16646         {
16647           /* The offset within a structure is always an offset from
16648              the start of that structure.  Make sure we take that into the
16649              calculation of the register based offset that we use here.  */
16650           offset = starting_bit;
16651           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16652           offset %= 32;
16653
16654           /* This is the actual size of the field, for bitfields this is the
16655              bitfield width and not the container size.  */
16656           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16657
16658           if (*last_used_bit != offset)
16659             {
16660               if (offset < *last_used_bit)
16661                 {
16662                   /* This field's offset is before the 'last_used_bit', that
16663                      means this field goes on the next register.  So we need to
16664                      pad the rest of the current register and increase the
16665                      register number.  */
16666                   uint32_t mask;
16667                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16668                   mask++;
16669
16670                   padding_bits_to_clear[*regno] |= mask;
16671                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16672                   (*regno)++;
16673                 }
16674               else
16675                 {
16676                   /* Otherwise we pad the bits between the last field's end and
16677                      the start of the new field.  */
16678                   uint32_t mask;
16679
16680                   mask = ((uint32_t)-1) >> (32 - offset);
16681                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16682                   padding_bits_to_clear[*regno] |= mask;
16683                 }
16684               current_bit = offset;
16685             }
16686
16687           /* Calculate further padding bits for inner structs/unions too.  */
16688           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16689             {
16690               *last_used_bit = current_bit;
16691               not_to_clear_reg_mask
16692                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16693                                                   padding_bits_to_clear, offset,
16694                                                   last_used_bit);
16695             }
16696           else
16697             {
16698               /* Update 'current_bit' with this field's size.  If the
16699                  'current_bit' lies in a subsequent register, update 'regno' and
16700                  reset 'current_bit' to point to the current bit in that new
16701                  register.  */
16702               current_bit += size;
16703               while (current_bit >= 32)
16704                 {
16705                   current_bit-=32;
16706                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16707                   (*regno)++;
16708                 }
16709               *last_used_bit = current_bit;
16710             }
16711
16712           field = TREE_CHAIN (field);
16713         }
16714       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16715     }
16716   else if (TREE_CODE (arg_type) == UNION_TYPE)
16717     {
16718       tree field, field_t;
16719       int i, regno_t, field_size;
16720       int max_reg = -1;
16721       int max_bit = -1;
16722       uint32_t mask;
16723       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16724         = {-1, -1, -1, -1};
16725
16726       /* To compute the padding bits in a union we only consider bits as
16727          padding bits if they are always either a padding bit or fall outside a
16728          fields size for all fields in the union.  */
16729       field = TYPE_FIELDS (arg_type);
16730       while (field)
16731         {
16732           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16733             = {0U, 0U, 0U, 0U};
16734           int last_used_bit_t = *last_used_bit;
16735           regno_t = *regno;
16736           field_t = TREE_TYPE (field);
16737
16738           /* If the field's type is either a record or a union make sure to
16739              compute their padding bits too.  */
16740           if (RECORD_OR_UNION_TYPE_P (field_t))
16741             not_to_clear_reg_mask
16742               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16743                                                 &padding_bits_to_clear_t[0],
16744                                                 starting_bit, &last_used_bit_t);
16745           else
16746             {
16747               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16748               regno_t = (field_size / 32) + *regno;
16749               last_used_bit_t = (starting_bit + field_size) % 32;
16750             }
16751
16752           for (i = *regno; i < regno_t; i++)
16753             {
16754               /* For all but the last register used by this field only keep the
16755                  padding bits that were padding bits in this field.  */
16756               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16757             }
16758
16759             /* For the last register, keep all padding bits that were padding
16760                bits in this field and any padding bits that are still valid
16761                as padding bits but fall outside of this field's size.  */
16762             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16763             padding_bits_to_clear_res[regno_t]
16764               &= padding_bits_to_clear_t[regno_t] | mask;
16765
16766           /* Update the maximum size of the fields in terms of registers used
16767              ('max_reg') and the 'last_used_bit' in said register.  */
16768           if (max_reg < regno_t)
16769             {
16770               max_reg = regno_t;
16771               max_bit = last_used_bit_t;
16772             }
16773           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16774             max_bit = last_used_bit_t;
16775
16776           field = TREE_CHAIN (field);
16777         }
16778
16779       /* Update the current padding_bits_to_clear using the intersection of the
16780          padding bits of all the fields.  */
16781       for (i=*regno; i < max_reg; i++)
16782         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16783
16784       /* Do not keep trailing padding bits, we do not know yet whether this
16785          is the end of the argument.  */
16786       mask = ((uint32_t) 1 << max_bit) - 1;
16787       padding_bits_to_clear[max_reg]
16788         |= padding_bits_to_clear_res[max_reg] & mask;
16789
16790       *regno = max_reg;
16791       *last_used_bit = max_bit;
16792     }
16793   else
16794     /* This function should only be used for structs and unions.  */
16795     gcc_unreachable ();
16796
16797   return not_to_clear_reg_mask;
16798 }
16799
16800 /* In the context of ARMv8-M Security Extensions, this function is used for both
16801    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16802    registers are used when returning or passing arguments, which is then
16803    returned as a mask.  It will also compute a mask to indicate padding/unused
16804    bits for each of these registers, and passes this through the
16805    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16806    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16807    the starting register used to pass this argument or return value is passed
16808    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16809    for struct and union types.  */
16810
16811 static unsigned HOST_WIDE_INT
16812 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16813                              uint32_t * padding_bits_to_clear)
16814
16815 {
16816   int last_used_bit = 0;
16817   unsigned HOST_WIDE_INT not_to_clear_mask;
16818
16819   if (RECORD_OR_UNION_TYPE_P (arg_type))
16820     {
16821       not_to_clear_mask
16822         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16823                                          padding_bits_to_clear, 0,
16824                                          &last_used_bit);
16825
16826
16827       /* If the 'last_used_bit' is not zero, that means we are still using a
16828          part of the last 'regno'.  In such cases we must clear the trailing
16829          bits.  Otherwise we are not using regno and we should mark it as to
16830          clear.  */
16831       if (last_used_bit != 0)
16832         padding_bits_to_clear[regno]
16833           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16834       else
16835         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16836     }
16837   else
16838     {
16839       not_to_clear_mask = 0;
16840       /* We are not dealing with structs nor unions.  So these arguments may be
16841          passed in floating point registers too.  In some cases a BLKmode is
16842          used when returning or passing arguments in multiple VFP registers.  */
16843       if (GET_MODE (arg_rtx) == BLKmode)
16844         {
16845           int i, arg_regs;
16846           rtx reg;
16847
16848           /* This should really only occur when dealing with the hard-float
16849              ABI.  */
16850           gcc_assert (TARGET_HARD_FLOAT_ABI);
16851
16852           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16853             {
16854               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16855               gcc_assert (REG_P (reg));
16856
16857               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16858
16859               /* If we are dealing with DF mode, make sure we don't
16860                  clear either of the registers it addresses.  */
16861               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16862               if (arg_regs > 1)
16863                 {
16864                   unsigned HOST_WIDE_INT mask;
16865                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16866                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16867                   not_to_clear_mask |= mask;
16868                 }
16869             }
16870         }
16871       else
16872         {
16873           /* Otherwise we can rely on the MODE to determine how many registers
16874              are being used by this argument.  */
16875           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16876           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16877           if (arg_regs > 1)
16878             {
16879               unsigned HOST_WIDE_INT
16880               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16881               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16882               not_to_clear_mask |= mask;
16883             }
16884         }
16885     }
16886
16887   return not_to_clear_mask;
16888 }
16889
16890 /* Clears caller saved registers not used to pass arguments before a
16891    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16892    registers is done in __gnu_cmse_nonsecure_call libcall.
16893    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16894
16895 static void
16896 cmse_nonsecure_call_clear_caller_saved (void)
16897 {
16898   basic_block bb;
16899
16900   FOR_EACH_BB_FN (bb, cfun)
16901     {
16902       rtx_insn *insn;
16903
16904       FOR_BB_INSNS (bb, insn)
16905         {
16906           uint64_t to_clear_mask, float_mask;
16907           rtx_insn *seq;
16908           rtx pat, call, unspec, reg, cleared_reg, tmp;
16909           unsigned int regno, maxregno;
16910           rtx address;
16911           CUMULATIVE_ARGS args_so_far_v;
16912           cumulative_args_t args_so_far;
16913           tree arg_type, fntype;
16914           bool using_r4, first_param = true;
16915           function_args_iterator args_iter;
16916           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16917           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16918
16919           if (!NONDEBUG_INSN_P (insn))
16920             continue;
16921
16922           if (!CALL_P (insn))
16923             continue;
16924
16925           pat = PATTERN (insn);
16926           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16927           call = XVECEXP (pat, 0, 0);
16928
16929           /* Get the real call RTX if the insn sets a value, ie. returns.  */
16930           if (GET_CODE (call) == SET)
16931               call = SET_SRC (call);
16932
16933           /* Check if it is a cmse_nonsecure_call.  */
16934           unspec = XEXP (call, 0);
16935           if (GET_CODE (unspec) != UNSPEC
16936               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16937             continue;
16938
16939           /* Determine the caller-saved registers we need to clear.  */
16940           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16941           maxregno = NUM_ARG_REGS - 1;
16942           /* Only look at the caller-saved floating point registers in case of
16943              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
16944              lazy store and loads which clear both caller- and callee-saved
16945              registers.  */
16946           if (TARGET_HARD_FLOAT_ABI)
16947             {
16948               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16949               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16950               to_clear_mask |= float_mask;
16951               maxregno = D7_VFP_REGNUM;
16952             }
16953
16954           /* Make sure the register used to hold the function address is not
16955              cleared.  */
16956           address = RTVEC_ELT (XVEC (unspec, 0), 0);
16957           gcc_assert (MEM_P (address));
16958           gcc_assert (REG_P (XEXP (address, 0)));
16959           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16960
16961           /* Set basic block of call insn so that df rescan is performed on
16962              insns inserted here.  */
16963           set_block_for_insn (insn, bb);
16964           df_set_flags (DF_DEFER_INSN_RESCAN);
16965           start_sequence ();
16966
16967           /* Make sure the scheduler doesn't schedule other insns beyond
16968              here.  */
16969           emit_insn (gen_blockage ());
16970
16971           /* Walk through all arguments and clear registers appropriately.
16972           */
16973           fntype = TREE_TYPE (MEM_EXPR (address));
16974           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16975                                     NULL_TREE);
16976           args_so_far = pack_cumulative_args (&args_so_far_v);
16977           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16978             {
16979               rtx arg_rtx;
16980               machine_mode arg_mode = TYPE_MODE (arg_type);
16981
16982               if (VOID_TYPE_P (arg_type))
16983                 continue;
16984
16985               if (!first_param)
16986                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
16987                                           true);
16988
16989               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
16990                                           true);
16991               gcc_assert (REG_P (arg_rtx));
16992               to_clear_mask
16993                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
16994                                                REGNO (arg_rtx),
16995                                                padding_bits_to_clear_ptr);
16996
16997               first_param = false;
16998             }
16999
17000           /* Clear padding bits where needed.  */
17001           cleared_reg = XEXP (address, 0);
17002           reg = gen_rtx_REG (SImode, IP_REGNUM);
17003           using_r4 = false;
17004           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17005             {
17006               if (padding_bits_to_clear[regno] == 0)
17007                 continue;
17008
17009               /* If this is a Thumb-1 target copy the address of the function
17010                  we are calling from 'r4' into 'ip' such that we can use r4 to
17011                  clear the unused bits in the arguments.  */
17012               if (TARGET_THUMB1 && !using_r4)
17013                 {
17014                   using_r4 =  true;
17015                   reg = cleared_reg;
17016                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17017                                           reg);
17018                 }
17019
17020               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17021               emit_move_insn (reg, tmp);
17022               /* Also fill the top half of the negated
17023                  padding_bits_to_clear.  */
17024               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17025                 {
17026                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17027                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17028                                                                 GEN_INT (16),
17029                                                                 GEN_INT (16)),
17030                                           tmp));
17031                 }
17032
17033               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17034                                      gen_rtx_REG (SImode, regno),
17035                                      reg));
17036
17037             }
17038           if (using_r4)
17039             emit_move_insn (cleared_reg,
17040                             gen_rtx_REG (SImode, IP_REGNUM));
17041
17042           /* We use right shift and left shift to clear the LSB of the address
17043              we jump to instead of using bic, to avoid having to use an extra
17044              register on Thumb-1.  */
17045           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17046           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17047           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17048           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17049
17050           /* Clearing all registers that leak before doing a non-secure
17051              call.  */
17052           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17053             {
17054               if (!(to_clear_mask & (1LL << regno)))
17055                 continue;
17056
17057               /* If regno is an even vfp register and its successor is also to
17058                  be cleared, use vmov.  */
17059               if (IS_VFP_REGNUM (regno))
17060                 {
17061                   if (TARGET_VFP_DOUBLE
17062                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17063                       && to_clear_mask & (1LL << (regno + 1)))
17064                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17065                                     CONST0_RTX (DFmode));
17066                   else
17067                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17068                                     CONST0_RTX (SFmode));
17069                 }
17070               else
17071                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17072             }
17073
17074           seq = get_insns ();
17075           end_sequence ();
17076           emit_insn_before (seq, insn);
17077
17078         }
17079     }
17080 }
17081
17082 /* Rewrite move insn into subtract of 0 if the condition codes will
17083    be useful in next conditional jump insn.  */
17084
17085 static void
17086 thumb1_reorg (void)
17087 {
17088   basic_block bb;
17089
17090   FOR_EACH_BB_FN (bb, cfun)
17091     {
17092       rtx dest, src;
17093       rtx cmp, op0, op1, set = NULL;
17094       rtx_insn *prev, *insn = BB_END (bb);
17095       bool insn_clobbered = false;
17096
17097       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17098         insn = PREV_INSN (insn);
17099
17100       /* Find the last cbranchsi4_insn in basic block BB.  */
17101       if (insn == BB_HEAD (bb)
17102           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17103         continue;
17104
17105       /* Get the register with which we are comparing.  */
17106       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17107       op0 = XEXP (cmp, 0);
17108       op1 = XEXP (cmp, 1);
17109
17110       /* Check that comparison is against ZERO.  */
17111       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17112         continue;
17113
17114       /* Find the first flag setting insn before INSN in basic block BB.  */
17115       gcc_assert (insn != BB_HEAD (bb));
17116       for (prev = PREV_INSN (insn);
17117            (!insn_clobbered
17118             && prev != BB_HEAD (bb)
17119             && (NOTE_P (prev)
17120                 || DEBUG_INSN_P (prev)
17121                 || ((set = single_set (prev)) != NULL
17122                     && get_attr_conds (prev) == CONDS_NOCOND)));
17123            prev = PREV_INSN (prev))
17124         {
17125           if (reg_set_p (op0, prev))
17126             insn_clobbered = true;
17127         }
17128
17129       /* Skip if op0 is clobbered by insn other than prev. */
17130       if (insn_clobbered)
17131         continue;
17132
17133       if (!set)
17134         continue;
17135
17136       dest = SET_DEST (set);
17137       src = SET_SRC (set);
17138       if (!low_register_operand (dest, SImode)
17139           || !low_register_operand (src, SImode))
17140         continue;
17141
17142       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17143          in INSN.  Both src and dest of the move insn are checked.  */
17144       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17145         {
17146           dest = copy_rtx (dest);
17147           src = copy_rtx (src);
17148           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17149           PATTERN (prev) = gen_rtx_SET (dest, src);
17150           INSN_CODE (prev) = -1;
17151           /* Set test register in INSN to dest.  */
17152           XEXP (cmp, 0) = copy_rtx (dest);
17153           INSN_CODE (insn) = -1;
17154         }
17155     }
17156 }
17157
17158 /* Convert instructions to their cc-clobbering variant if possible, since
17159    that allows us to use smaller encodings.  */
17160
17161 static void
17162 thumb2_reorg (void)
17163 {
17164   basic_block bb;
17165   regset_head live;
17166
17167   INIT_REG_SET (&live);
17168
17169   /* We are freeing block_for_insn in the toplev to keep compatibility
17170      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17171   compute_bb_for_insn ();
17172   df_analyze ();
17173
17174   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17175
17176   FOR_EACH_BB_FN (bb, cfun)
17177     {
17178       if ((current_tune->disparage_flag_setting_t16_encodings
17179            == tune_params::DISPARAGE_FLAGS_ALL)
17180           && optimize_bb_for_speed_p (bb))
17181         continue;
17182
17183       rtx_insn *insn;
17184       Convert_Action action = SKIP;
17185       Convert_Action action_for_partial_flag_setting
17186         = ((current_tune->disparage_flag_setting_t16_encodings
17187             != tune_params::DISPARAGE_FLAGS_NEITHER)
17188            && optimize_bb_for_speed_p (bb))
17189           ? SKIP : CONV;
17190
17191       COPY_REG_SET (&live, DF_LR_OUT (bb));
17192       df_simulate_initialize_backwards (bb, &live);
17193       FOR_BB_INSNS_REVERSE (bb, insn)
17194         {
17195           if (NONJUMP_INSN_P (insn)
17196               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17197               && GET_CODE (PATTERN (insn)) == SET)
17198             {
17199               action = SKIP;
17200               rtx pat = PATTERN (insn);
17201               rtx dst = XEXP (pat, 0);
17202               rtx src = XEXP (pat, 1);
17203               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17204
17205               if (UNARY_P (src) || BINARY_P (src))
17206                   op0 = XEXP (src, 0);
17207
17208               if (BINARY_P (src))
17209                   op1 = XEXP (src, 1);
17210
17211               if (low_register_operand (dst, SImode))
17212                 {
17213                   switch (GET_CODE (src))
17214                     {
17215                     case PLUS:
17216                       /* Adding two registers and storing the result
17217                          in the first source is already a 16-bit
17218                          operation.  */
17219                       if (rtx_equal_p (dst, op0)
17220                           && register_operand (op1, SImode))
17221                         break;
17222
17223                       if (low_register_operand (op0, SImode))
17224                         {
17225                           /* ADDS <Rd>,<Rn>,<Rm>  */
17226                           if (low_register_operand (op1, SImode))
17227                             action = CONV;
17228                           /* ADDS <Rdn>,#<imm8>  */
17229                           /* SUBS <Rdn>,#<imm8>  */
17230                           else if (rtx_equal_p (dst, op0)
17231                                    && CONST_INT_P (op1)
17232                                    && IN_RANGE (INTVAL (op1), -255, 255))
17233                             action = CONV;
17234                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17235                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17236                           else if (CONST_INT_P (op1)
17237                                    && IN_RANGE (INTVAL (op1), -7, 7))
17238                             action = CONV;
17239                         }
17240                       /* ADCS <Rd>, <Rn>  */
17241                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17242                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17243                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17244                                                        SImode)
17245                               && COMPARISON_P (op1)
17246                               && cc_register (XEXP (op1, 0), VOIDmode)
17247                               && maybe_get_arm_condition_code (op1) == ARM_CS
17248                               && XEXP (op1, 1) == const0_rtx)
17249                         action = CONV;
17250                       break;
17251
17252                     case MINUS:
17253                       /* RSBS <Rd>,<Rn>,#0
17254                          Not handled here: see NEG below.  */
17255                       /* SUBS <Rd>,<Rn>,#<imm3>
17256                          SUBS <Rdn>,#<imm8>
17257                          Not handled here: see PLUS above.  */
17258                       /* SUBS <Rd>,<Rn>,<Rm>  */
17259                       if (low_register_operand (op0, SImode)
17260                           && low_register_operand (op1, SImode))
17261                             action = CONV;
17262                       break;
17263
17264                     case MULT:
17265                       /* MULS <Rdm>,<Rn>,<Rdm>
17266                          As an exception to the rule, this is only used
17267                          when optimizing for size since MULS is slow on all
17268                          known implementations.  We do not even want to use
17269                          MULS in cold code, if optimizing for speed, so we
17270                          test the global flag here.  */
17271                       if (!optimize_size)
17272                         break;
17273                       /* Fall through.  */
17274                     case AND:
17275                     case IOR:
17276                     case XOR:
17277                       /* ANDS <Rdn>,<Rm>  */
17278                       if (rtx_equal_p (dst, op0)
17279                           && low_register_operand (op1, SImode))
17280                         action = action_for_partial_flag_setting;
17281                       else if (rtx_equal_p (dst, op1)
17282                                && low_register_operand (op0, SImode))
17283                         action = action_for_partial_flag_setting == SKIP
17284                                  ? SKIP : SWAP_CONV;
17285                       break;
17286
17287                     case ASHIFTRT:
17288                     case ASHIFT:
17289                     case LSHIFTRT:
17290                       /* ASRS <Rdn>,<Rm> */
17291                       /* LSRS <Rdn>,<Rm> */
17292                       /* LSLS <Rdn>,<Rm> */
17293                       if (rtx_equal_p (dst, op0)
17294                           && low_register_operand (op1, SImode))
17295                         action = action_for_partial_flag_setting;
17296                       /* ASRS <Rd>,<Rm>,#<imm5> */
17297                       /* LSRS <Rd>,<Rm>,#<imm5> */
17298                       /* LSLS <Rd>,<Rm>,#<imm5> */
17299                       else if (low_register_operand (op0, SImode)
17300                                && CONST_INT_P (op1)
17301                                && IN_RANGE (INTVAL (op1), 0, 31))
17302                         action = action_for_partial_flag_setting;
17303                       break;
17304
17305                     case ROTATERT:
17306                       /* RORS <Rdn>,<Rm>  */
17307                       if (rtx_equal_p (dst, op0)
17308                           && low_register_operand (op1, SImode))
17309                         action = action_for_partial_flag_setting;
17310                       break;
17311
17312                     case NOT:
17313                       /* MVNS <Rd>,<Rm>  */
17314                       if (low_register_operand (op0, SImode))
17315                         action = action_for_partial_flag_setting;
17316                       break;
17317
17318                     case NEG:
17319                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17320                       if (low_register_operand (op0, SImode))
17321                         action = CONV;
17322                       break;
17323
17324                     case CONST_INT:
17325                       /* MOVS <Rd>,#<imm8>  */
17326                       if (CONST_INT_P (src)
17327                           && IN_RANGE (INTVAL (src), 0, 255))
17328                         action = action_for_partial_flag_setting;
17329                       break;
17330
17331                     case REG:
17332                       /* MOVS and MOV<c> with registers have different
17333                          encodings, so are not relevant here.  */
17334                       break;
17335
17336                     default:
17337                       break;
17338                     }
17339                 }
17340
17341               if (action != SKIP)
17342                 {
17343                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17344                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17345                   rtvec vec;
17346
17347                   if (action == SWAP_CONV)
17348                     {
17349                       src = copy_rtx (src);
17350                       XEXP (src, 0) = op1;
17351                       XEXP (src, 1) = op0;
17352                       pat = gen_rtx_SET (dst, src);
17353                       vec = gen_rtvec (2, pat, clobber);
17354                     }
17355                   else /* action == CONV */
17356                     vec = gen_rtvec (2, pat, clobber);
17357
17358                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17359                   INSN_CODE (insn) = -1;
17360                 }
17361             }
17362
17363           if (NONDEBUG_INSN_P (insn))
17364             df_simulate_one_insn_backwards (bb, insn, &live);
17365         }
17366     }
17367
17368   CLEAR_REG_SET (&live);
17369 }
17370
17371 /* Gcc puts the pool in the wrong place for ARM, since we can only
17372    load addresses a limited distance around the pc.  We do some
17373    special munging to move the constant pool values to the correct
17374    point in the code.  */
17375 static void
17376 arm_reorg (void)
17377 {
17378   rtx_insn *insn;
17379   HOST_WIDE_INT address = 0;
17380   Mfix * fix;
17381
17382   if (use_cmse)
17383     cmse_nonsecure_call_clear_caller_saved ();
17384   if (TARGET_THUMB1)
17385     thumb1_reorg ();
17386   else if (TARGET_THUMB2)
17387     thumb2_reorg ();
17388
17389   /* Ensure all insns that must be split have been split at this point.
17390      Otherwise, the pool placement code below may compute incorrect
17391      insn lengths.  Note that when optimizing, all insns have already
17392      been split at this point.  */
17393   if (!optimize)
17394     split_all_insns_noflow ();
17395
17396   /* Make sure we do not attempt to create a literal pool even though it should
17397      no longer be necessary to create any.  */
17398   if (arm_disable_literal_pool)
17399     return ;
17400
17401   minipool_fix_head = minipool_fix_tail = NULL;
17402
17403   /* The first insn must always be a note, or the code below won't
17404      scan it properly.  */
17405   insn = get_insns ();
17406   gcc_assert (NOTE_P (insn));
17407   minipool_pad = 0;
17408
17409   /* Scan all the insns and record the operands that will need fixing.  */
17410   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17411     {
17412       if (BARRIER_P (insn))
17413         push_minipool_barrier (insn, address);
17414       else if (INSN_P (insn))
17415         {
17416           rtx_jump_table_data *table;
17417
17418           note_invalid_constants (insn, address, true);
17419           address += get_attr_length (insn);
17420
17421           /* If the insn is a vector jump, add the size of the table
17422              and skip the table.  */
17423           if (tablejump_p (insn, NULL, &table))
17424             {
17425               address += get_jump_table_size (table);
17426               insn = table;
17427             }
17428         }
17429       else if (LABEL_P (insn))
17430         /* Add the worst-case padding due to alignment.  We don't add
17431            the _current_ padding because the minipool insertions
17432            themselves might change it.  */
17433         address += get_label_padding (insn);
17434     }
17435
17436   fix = minipool_fix_head;
17437
17438   /* Now scan the fixups and perform the required changes.  */
17439   while (fix)
17440     {
17441       Mfix * ftmp;
17442       Mfix * fdel;
17443       Mfix *  last_added_fix;
17444       Mfix * last_barrier = NULL;
17445       Mfix * this_fix;
17446
17447       /* Skip any further barriers before the next fix.  */
17448       while (fix && BARRIER_P (fix->insn))
17449         fix = fix->next;
17450
17451       /* No more fixes.  */
17452       if (fix == NULL)
17453         break;
17454
17455       last_added_fix = NULL;
17456
17457       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17458         {
17459           if (BARRIER_P (ftmp->insn))
17460             {
17461               if (ftmp->address >= minipool_vector_head->max_address)
17462                 break;
17463
17464               last_barrier = ftmp;
17465             }
17466           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17467             break;
17468
17469           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17470         }
17471
17472       /* If we found a barrier, drop back to that; any fixes that we
17473          could have reached but come after the barrier will now go in
17474          the next mini-pool.  */
17475       if (last_barrier != NULL)
17476         {
17477           /* Reduce the refcount for those fixes that won't go into this
17478              pool after all.  */
17479           for (fdel = last_barrier->next;
17480                fdel && fdel != ftmp;
17481                fdel = fdel->next)
17482             {
17483               fdel->minipool->refcount--;
17484               fdel->minipool = NULL;
17485             }
17486
17487           ftmp = last_barrier;
17488         }
17489       else
17490         {
17491           /* ftmp is first fix that we can't fit into this pool and
17492              there no natural barriers that we could use.  Insert a
17493              new barrier in the code somewhere between the previous
17494              fix and this one, and arrange to jump around it.  */
17495           HOST_WIDE_INT max_address;
17496
17497           /* The last item on the list of fixes must be a barrier, so
17498              we can never run off the end of the list of fixes without
17499              last_barrier being set.  */
17500           gcc_assert (ftmp);
17501
17502           max_address = minipool_vector_head->max_address;
17503           /* Check that there isn't another fix that is in range that
17504              we couldn't fit into this pool because the pool was
17505              already too large: we need to put the pool before such an
17506              instruction.  The pool itself may come just after the
17507              fix because create_fix_barrier also allows space for a
17508              jump instruction.  */
17509           if (ftmp->address < max_address)
17510             max_address = ftmp->address + 1;
17511
17512           last_barrier = create_fix_barrier (last_added_fix, max_address);
17513         }
17514
17515       assign_minipool_offsets (last_barrier);
17516
17517       while (ftmp)
17518         {
17519           if (!BARRIER_P (ftmp->insn)
17520               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17521                   == NULL))
17522             break;
17523
17524           ftmp = ftmp->next;
17525         }
17526
17527       /* Scan over the fixes we have identified for this pool, fixing them
17528          up and adding the constants to the pool itself.  */
17529       for (this_fix = fix; this_fix && ftmp != this_fix;
17530            this_fix = this_fix->next)
17531         if (!BARRIER_P (this_fix->insn))
17532           {
17533             rtx addr
17534               = plus_constant (Pmode,
17535                                gen_rtx_LABEL_REF (VOIDmode,
17536                                                   minipool_vector_label),
17537                                this_fix->minipool->offset);
17538             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17539           }
17540
17541       dump_minipool (last_barrier->insn);
17542       fix = ftmp;
17543     }
17544
17545   /* From now on we must synthesize any constants that we can't handle
17546      directly.  This can happen if the RTL gets split during final
17547      instruction generation.  */
17548   cfun->machine->after_arm_reorg = 1;
17549
17550   /* Free the minipool memory.  */
17551   obstack_free (&minipool_obstack, minipool_startobj);
17552 }
17553 \f
17554 /* Routines to output assembly language.  */
17555
17556 /* Return string representation of passed in real value.  */
17557 static const char *
17558 fp_const_from_val (REAL_VALUE_TYPE *r)
17559 {
17560   if (!fp_consts_inited)
17561     init_fp_table ();
17562
17563   gcc_assert (real_equal (r, &value_fp0));
17564   return "0";
17565 }
17566
17567 /* OPERANDS[0] is the entire list of insns that constitute pop,
17568    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17569    is in the list, UPDATE is true iff the list contains explicit
17570    update of base register.  */
17571 void
17572 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17573                          bool update)
17574 {
17575   int i;
17576   char pattern[100];
17577   int offset;
17578   const char *conditional;
17579   int num_saves = XVECLEN (operands[0], 0);
17580   unsigned int regno;
17581   unsigned int regno_base = REGNO (operands[1]);
17582   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17583
17584   offset = 0;
17585   offset += update ? 1 : 0;
17586   offset += return_pc ? 1 : 0;
17587
17588   /* Is the base register in the list?  */
17589   for (i = offset; i < num_saves; i++)
17590     {
17591       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17592       /* If SP is in the list, then the base register must be SP.  */
17593       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17594       /* If base register is in the list, there must be no explicit update.  */
17595       if (regno == regno_base)
17596         gcc_assert (!update);
17597     }
17598
17599   conditional = reverse ? "%?%D0" : "%?%d0";
17600   /* Can't use POP if returning from an interrupt.  */
17601   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17602     sprintf (pattern, "pop%s\t{", conditional);
17603   else
17604     {
17605       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17606          It's just a convention, their semantics are identical.  */
17607       if (regno_base == SP_REGNUM)
17608         sprintf (pattern, "ldmfd%s\t", conditional);
17609       else if (update)
17610         sprintf (pattern, "ldmia%s\t", conditional);
17611       else
17612         sprintf (pattern, "ldm%s\t", conditional);
17613
17614       strcat (pattern, reg_names[regno_base]);
17615       if (update)
17616         strcat (pattern, "!, {");
17617       else
17618         strcat (pattern, ", {");
17619     }
17620
17621   /* Output the first destination register.  */
17622   strcat (pattern,
17623           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17624
17625   /* Output the rest of the destination registers.  */
17626   for (i = offset + 1; i < num_saves; i++)
17627     {
17628       strcat (pattern, ", ");
17629       strcat (pattern,
17630               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17631     }
17632
17633   strcat (pattern, "}");
17634
17635   if (interrupt_p && return_pc)
17636     strcat (pattern, "^");
17637
17638   output_asm_insn (pattern, &cond);
17639 }
17640
17641
17642 /* Output the assembly for a store multiple.  */
17643
17644 const char *
17645 vfp_output_vstmd (rtx * operands)
17646 {
17647   char pattern[100];
17648   int p;
17649   int base;
17650   int i;
17651   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17652                    ? XEXP (operands[0], 0)
17653                    : XEXP (XEXP (operands[0], 0), 0);
17654   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17655
17656   if (push_p)
17657     strcpy (pattern, "vpush%?.64\t{%P1");
17658   else
17659     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17660
17661   p = strlen (pattern);
17662
17663   gcc_assert (REG_P (operands[1]));
17664
17665   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17666   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17667     {
17668       p += sprintf (&pattern[p], ", d%d", base + i);
17669     }
17670   strcpy (&pattern[p], "}");
17671
17672   output_asm_insn (pattern, operands);
17673   return "";
17674 }
17675
17676
17677 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17678    number of bytes pushed.  */
17679
17680 static int
17681 vfp_emit_fstmd (int base_reg, int count)
17682 {
17683   rtx par;
17684   rtx dwarf;
17685   rtx tmp, reg;
17686   int i;
17687
17688   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17689      register pairs are stored by a store multiple insn.  We avoid this
17690      by pushing an extra pair.  */
17691   if (count == 2 && !arm_arch6)
17692     {
17693       if (base_reg == LAST_VFP_REGNUM - 3)
17694         base_reg -= 2;
17695       count++;
17696     }
17697
17698   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17699      larger stores into multiple parts (up to a maximum of two, in
17700      practice).  */
17701   if (count > 16)
17702     {
17703       int saved;
17704       /* NOTE: base_reg is an internal register number, so each D register
17705          counts as 2.  */
17706       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17707       saved += vfp_emit_fstmd (base_reg, 16);
17708       return saved;
17709     }
17710
17711   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17712   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17713
17714   reg = gen_rtx_REG (DFmode, base_reg);
17715   base_reg += 2;
17716
17717   XVECEXP (par, 0, 0)
17718     = gen_rtx_SET (gen_frame_mem
17719                    (BLKmode,
17720                     gen_rtx_PRE_MODIFY (Pmode,
17721                                         stack_pointer_rtx,
17722                                         plus_constant
17723                                         (Pmode, stack_pointer_rtx,
17724                                          - (count * 8)))
17725                     ),
17726                    gen_rtx_UNSPEC (BLKmode,
17727                                    gen_rtvec (1, reg),
17728                                    UNSPEC_PUSH_MULT));
17729
17730   tmp = gen_rtx_SET (stack_pointer_rtx,
17731                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17732   RTX_FRAME_RELATED_P (tmp) = 1;
17733   XVECEXP (dwarf, 0, 0) = tmp;
17734
17735   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17736   RTX_FRAME_RELATED_P (tmp) = 1;
17737   XVECEXP (dwarf, 0, 1) = tmp;
17738
17739   for (i = 1; i < count; i++)
17740     {
17741       reg = gen_rtx_REG (DFmode, base_reg);
17742       base_reg += 2;
17743       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17744
17745       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17746                                         plus_constant (Pmode,
17747                                                        stack_pointer_rtx,
17748                                                        i * 8)),
17749                          reg);
17750       RTX_FRAME_RELATED_P (tmp) = 1;
17751       XVECEXP (dwarf, 0, i + 1) = tmp;
17752     }
17753
17754   par = emit_insn (par);
17755   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17756   RTX_FRAME_RELATED_P (par) = 1;
17757
17758   return count * 8;
17759 }
17760
17761 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17762    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17763
17764 bool
17765 detect_cmse_nonsecure_call (tree addr)
17766 {
17767   if (!addr)
17768     return FALSE;
17769
17770   tree fntype = TREE_TYPE (addr);
17771   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17772                                     TYPE_ATTRIBUTES (fntype)))
17773     return TRUE;
17774   return FALSE;
17775 }
17776
17777
17778 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17779    the call target.  */
17780
17781 void
17782 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17783 {
17784   rtx insn;
17785
17786   insn = emit_call_insn (pat);
17787
17788   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17789      If the call might use such an entry, add a use of the PIC register
17790      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17791   if (TARGET_VXWORKS_RTP
17792       && flag_pic
17793       && !sibcall
17794       && GET_CODE (addr) == SYMBOL_REF
17795       && (SYMBOL_REF_DECL (addr)
17796           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17797           : !SYMBOL_REF_LOCAL_P (addr)))
17798     {
17799       require_pic_register ();
17800       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17801     }
17802
17803   if (TARGET_AAPCS_BASED)
17804     {
17805       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17806          linker.  We need to add an IP clobber to allow setting
17807          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17808          is not needed since it's a fixed register.  */
17809       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17810       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17811     }
17812 }
17813
17814 /* Output a 'call' insn.  */
17815 const char *
17816 output_call (rtx *operands)
17817 {
17818   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17819
17820   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17821   if (REGNO (operands[0]) == LR_REGNUM)
17822     {
17823       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17824       output_asm_insn ("mov%?\t%0, %|lr", operands);
17825     }
17826
17827   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17828
17829   if (TARGET_INTERWORK || arm_arch4t)
17830     output_asm_insn ("bx%?\t%0", operands);
17831   else
17832     output_asm_insn ("mov%?\t%|pc, %0", operands);
17833
17834   return "";
17835 }
17836
17837 /* Output a move from arm registers to arm registers of a long double
17838    OPERANDS[0] is the destination.
17839    OPERANDS[1] is the source.  */
17840 const char *
17841 output_mov_long_double_arm_from_arm (rtx *operands)
17842 {
17843   /* We have to be careful here because the two might overlap.  */
17844   int dest_start = REGNO (operands[0]);
17845   int src_start = REGNO (operands[1]);
17846   rtx ops[2];
17847   int i;
17848
17849   if (dest_start < src_start)
17850     {
17851       for (i = 0; i < 3; i++)
17852         {
17853           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17854           ops[1] = gen_rtx_REG (SImode, src_start + i);
17855           output_asm_insn ("mov%?\t%0, %1", ops);
17856         }
17857     }
17858   else
17859     {
17860       for (i = 2; i >= 0; i--)
17861         {
17862           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17863           ops[1] = gen_rtx_REG (SImode, src_start + i);
17864           output_asm_insn ("mov%?\t%0, %1", ops);
17865         }
17866     }
17867
17868   return "";
17869 }
17870
17871 void
17872 arm_emit_movpair (rtx dest, rtx src)
17873  {
17874   /* If the src is an immediate, simplify it.  */
17875   if (CONST_INT_P (src))
17876     {
17877       HOST_WIDE_INT val = INTVAL (src);
17878       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17879       if ((val >> 16) & 0x0000ffff)
17880         {
17881           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17882                                                GEN_INT (16)),
17883                          GEN_INT ((val >> 16) & 0x0000ffff));
17884           rtx_insn *insn = get_last_insn ();
17885           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17886         }
17887       return;
17888     }
17889    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17890    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17891    rtx_insn *insn = get_last_insn ();
17892    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17893  }
17894
17895 /* Output a move between double words.  It must be REG<-MEM
17896    or MEM<-REG.  */
17897 const char *
17898 output_move_double (rtx *operands, bool emit, int *count)
17899 {
17900   enum rtx_code code0 = GET_CODE (operands[0]);
17901   enum rtx_code code1 = GET_CODE (operands[1]);
17902   rtx otherops[3];
17903   if (count)
17904     *count = 1;
17905
17906   /* The only case when this might happen is when
17907      you are looking at the length of a DImode instruction
17908      that has an invalid constant in it.  */
17909   if (code0 == REG && code1 != MEM)
17910     {
17911       gcc_assert (!emit);
17912       *count = 2;
17913       return "";
17914     }
17915
17916   if (code0 == REG)
17917     {
17918       unsigned int reg0 = REGNO (operands[0]);
17919
17920       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17921
17922       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17923
17924       switch (GET_CODE (XEXP (operands[1], 0)))
17925         {
17926         case REG:
17927
17928           if (emit)
17929             {
17930               if (TARGET_LDRD
17931                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17932                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17933               else
17934                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17935             }
17936           break;
17937
17938         case PRE_INC:
17939           gcc_assert (TARGET_LDRD);
17940           if (emit)
17941             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17942           break;
17943
17944         case PRE_DEC:
17945           if (emit)
17946             {
17947               if (TARGET_LDRD)
17948                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17949               else
17950                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17951             }
17952           break;
17953
17954         case POST_INC:
17955           if (emit)
17956             {
17957               if (TARGET_LDRD)
17958                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17959               else
17960                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17961             }
17962           break;
17963
17964         case POST_DEC:
17965           gcc_assert (TARGET_LDRD);
17966           if (emit)
17967             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17968           break;
17969
17970         case PRE_MODIFY:
17971         case POST_MODIFY:
17972           /* Autoicrement addressing modes should never have overlapping
17973              base and destination registers, and overlapping index registers
17974              are already prohibited, so this doesn't need to worry about
17975              fix_cm3_ldrd.  */
17976           otherops[0] = operands[0];
17977           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17978           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17979
17980           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17981             {
17982               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17983                 {
17984                   /* Registers overlap so split out the increment.  */
17985                   if (emit)
17986                     {
17987                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
17988                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17989                     }
17990                   if (count)
17991                     *count = 2;
17992                 }
17993               else
17994                 {
17995                   /* Use a single insn if we can.
17996                      FIXME: IWMMXT allows offsets larger than ldrd can
17997                      handle, fix these up with a pair of ldr.  */
17998                   if (TARGET_THUMB2
17999                       || !CONST_INT_P (otherops[2])
18000                       || (INTVAL (otherops[2]) > -256
18001                           && INTVAL (otherops[2]) < 256))
18002                     {
18003                       if (emit)
18004                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18005                     }
18006                   else
18007                     {
18008                       if (emit)
18009                         {
18010                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18011                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18012                         }
18013                       if (count)
18014                         *count = 2;
18015
18016                     }
18017                 }
18018             }
18019           else
18020             {
18021               /* Use a single insn if we can.
18022                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18023                  fix these up with a pair of ldr.  */
18024               if (TARGET_THUMB2
18025                   || !CONST_INT_P (otherops[2])
18026                   || (INTVAL (otherops[2]) > -256
18027                       && INTVAL (otherops[2]) < 256))
18028                 {
18029                   if (emit)
18030                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18031                 }
18032               else
18033                 {
18034                   if (emit)
18035                     {
18036                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18037                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18038                     }
18039                   if (count)
18040                     *count = 2;
18041                 }
18042             }
18043           break;
18044
18045         case LABEL_REF:
18046         case CONST:
18047           /* We might be able to use ldrd %0, %1 here.  However the range is
18048              different to ldr/adr, and it is broken on some ARMv7-M
18049              implementations.  */
18050           /* Use the second register of the pair to avoid problematic
18051              overlap.  */
18052           otherops[1] = operands[1];
18053           if (emit)
18054             output_asm_insn ("adr%?\t%0, %1", otherops);
18055           operands[1] = otherops[0];
18056           if (emit)
18057             {
18058               if (TARGET_LDRD)
18059                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18060               else
18061                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18062             }
18063
18064           if (count)
18065             *count = 2;
18066           break;
18067
18068           /* ??? This needs checking for thumb2.  */
18069         default:
18070           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18071                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18072             {
18073               otherops[0] = operands[0];
18074               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18075               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18076
18077               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18078                 {
18079                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18080                     {
18081                       switch ((int) INTVAL (otherops[2]))
18082                         {
18083                         case -8:
18084                           if (emit)
18085                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18086                           return "";
18087                         case -4:
18088                           if (TARGET_THUMB2)
18089                             break;
18090                           if (emit)
18091                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18092                           return "";
18093                         case 4:
18094                           if (TARGET_THUMB2)
18095                             break;
18096                           if (emit)
18097                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18098                           return "";
18099                         }
18100                     }
18101                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18102                   operands[1] = otherops[0];
18103                   if (TARGET_LDRD
18104                       && (REG_P (otherops[2])
18105                           || TARGET_THUMB2
18106                           || (CONST_INT_P (otherops[2])
18107                               && INTVAL (otherops[2]) > -256
18108                               && INTVAL (otherops[2]) < 256)))
18109                     {
18110                       if (reg_overlap_mentioned_p (operands[0],
18111                                                    otherops[2]))
18112                         {
18113                           /* Swap base and index registers over to
18114                              avoid a conflict.  */
18115                           std::swap (otherops[1], otherops[2]);
18116                         }
18117                       /* If both registers conflict, it will usually
18118                          have been fixed by a splitter.  */
18119                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18120                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18121                         {
18122                           if (emit)
18123                             {
18124                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18125                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18126                             }
18127                           if (count)
18128                             *count = 2;
18129                         }
18130                       else
18131                         {
18132                           otherops[0] = operands[0];
18133                           if (emit)
18134                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18135                         }
18136                       return "";
18137                     }
18138
18139                   if (CONST_INT_P (otherops[2]))
18140                     {
18141                       if (emit)
18142                         {
18143                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18144                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18145                           else
18146                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18147                         }
18148                     }
18149                   else
18150                     {
18151                       if (emit)
18152                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18153                     }
18154                 }
18155               else
18156                 {
18157                   if (emit)
18158                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18159                 }
18160
18161               if (count)
18162                 *count = 2;
18163
18164               if (TARGET_LDRD)
18165                 return "ldrd%?\t%0, [%1]";
18166
18167               return "ldmia%?\t%1, %M0";
18168             }
18169           else
18170             {
18171               otherops[1] = adjust_address (operands[1], SImode, 4);
18172               /* Take care of overlapping base/data reg.  */
18173               if (reg_mentioned_p (operands[0], operands[1]))
18174                 {
18175                   if (emit)
18176                     {
18177                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18178                       output_asm_insn ("ldr%?\t%0, %1", operands);
18179                     }
18180                   if (count)
18181                     *count = 2;
18182
18183                 }
18184               else
18185                 {
18186                   if (emit)
18187                     {
18188                       output_asm_insn ("ldr%?\t%0, %1", operands);
18189                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18190                     }
18191                   if (count)
18192                     *count = 2;
18193                 }
18194             }
18195         }
18196     }
18197   else
18198     {
18199       /* Constraints should ensure this.  */
18200       gcc_assert (code0 == MEM && code1 == REG);
18201       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18202                   || (TARGET_ARM && TARGET_LDRD));
18203
18204       switch (GET_CODE (XEXP (operands[0], 0)))
18205         {
18206         case REG:
18207           if (emit)
18208             {
18209               if (TARGET_LDRD)
18210                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18211               else
18212                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18213             }
18214           break;
18215
18216         case PRE_INC:
18217           gcc_assert (TARGET_LDRD);
18218           if (emit)
18219             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18220           break;
18221
18222         case PRE_DEC:
18223           if (emit)
18224             {
18225               if (TARGET_LDRD)
18226                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18227               else
18228                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18229             }
18230           break;
18231
18232         case POST_INC:
18233           if (emit)
18234             {
18235               if (TARGET_LDRD)
18236                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18237               else
18238                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18239             }
18240           break;
18241
18242         case POST_DEC:
18243           gcc_assert (TARGET_LDRD);
18244           if (emit)
18245             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18246           break;
18247
18248         case PRE_MODIFY:
18249         case POST_MODIFY:
18250           otherops[0] = operands[1];
18251           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18252           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18253
18254           /* IWMMXT allows offsets larger than ldrd can handle,
18255              fix these up with a pair of ldr.  */
18256           if (!TARGET_THUMB2
18257               && CONST_INT_P (otherops[2])
18258               && (INTVAL(otherops[2]) <= -256
18259                   || INTVAL(otherops[2]) >= 256))
18260             {
18261               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18262                 {
18263                   if (emit)
18264                     {
18265                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18266                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18267                     }
18268                   if (count)
18269                     *count = 2;
18270                 }
18271               else
18272                 {
18273                   if (emit)
18274                     {
18275                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18276                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18277                     }
18278                   if (count)
18279                     *count = 2;
18280                 }
18281             }
18282           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18283             {
18284               if (emit)
18285                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18286             }
18287           else
18288             {
18289               if (emit)
18290                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18291             }
18292           break;
18293
18294         case PLUS:
18295           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18296           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18297             {
18298               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18299                 {
18300                 case -8:
18301                   if (emit)
18302                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18303                   return "";
18304
18305                 case -4:
18306                   if (TARGET_THUMB2)
18307                     break;
18308                   if (emit)
18309                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18310                   return "";
18311
18312                 case 4:
18313                   if (TARGET_THUMB2)
18314                     break;
18315                   if (emit)
18316                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18317                   return "";
18318                 }
18319             }
18320           if (TARGET_LDRD
18321               && (REG_P (otherops[2])
18322                   || TARGET_THUMB2
18323                   || (CONST_INT_P (otherops[2])
18324                       && INTVAL (otherops[2]) > -256
18325                       && INTVAL (otherops[2]) < 256)))
18326             {
18327               otherops[0] = operands[1];
18328               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18329               if (emit)
18330                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18331               return "";
18332             }
18333           /* Fall through */
18334
18335         default:
18336           otherops[0] = adjust_address (operands[0], SImode, 4);
18337           otherops[1] = operands[1];
18338           if (emit)
18339             {
18340               output_asm_insn ("str%?\t%1, %0", operands);
18341               output_asm_insn ("str%?\t%H1, %0", otherops);
18342             }
18343           if (count)
18344             *count = 2;
18345         }
18346     }
18347
18348   return "";
18349 }
18350
18351 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18352    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18353
18354 const char *
18355 output_move_quad (rtx *operands)
18356 {
18357   if (REG_P (operands[0]))
18358     {
18359       /* Load, or reg->reg move.  */
18360
18361       if (MEM_P (operands[1]))
18362         {
18363           switch (GET_CODE (XEXP (operands[1], 0)))
18364             {
18365             case REG:
18366               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18367               break;
18368
18369             case LABEL_REF:
18370             case CONST:
18371               output_asm_insn ("adr%?\t%0, %1", operands);
18372               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18373               break;
18374
18375             default:
18376               gcc_unreachable ();
18377             }
18378         }
18379       else
18380         {
18381           rtx ops[2];
18382           int dest, src, i;
18383
18384           gcc_assert (REG_P (operands[1]));
18385
18386           dest = REGNO (operands[0]);
18387           src = REGNO (operands[1]);
18388
18389           /* This seems pretty dumb, but hopefully GCC won't try to do it
18390              very often.  */
18391           if (dest < src)
18392             for (i = 0; i < 4; i++)
18393               {
18394                 ops[0] = gen_rtx_REG (SImode, dest + i);
18395                 ops[1] = gen_rtx_REG (SImode, src + i);
18396                 output_asm_insn ("mov%?\t%0, %1", ops);
18397               }
18398           else
18399             for (i = 3; i >= 0; i--)
18400               {
18401                 ops[0] = gen_rtx_REG (SImode, dest + i);
18402                 ops[1] = gen_rtx_REG (SImode, src + i);
18403                 output_asm_insn ("mov%?\t%0, %1", ops);
18404               }
18405         }
18406     }
18407   else
18408     {
18409       gcc_assert (MEM_P (operands[0]));
18410       gcc_assert (REG_P (operands[1]));
18411       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18412
18413       switch (GET_CODE (XEXP (operands[0], 0)))
18414         {
18415         case REG:
18416           output_asm_insn ("stm%?\t%m0, %M1", operands);
18417           break;
18418
18419         default:
18420           gcc_unreachable ();
18421         }
18422     }
18423
18424   return "";
18425 }
18426
18427 /* Output a VFP load or store instruction.  */
18428
18429 const char *
18430 output_move_vfp (rtx *operands)
18431 {
18432   rtx reg, mem, addr, ops[2];
18433   int load = REG_P (operands[0]);
18434   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18435   int sp = (!TARGET_VFP_FP16INST
18436             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18437   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18438   const char *templ;
18439   char buff[50];
18440   machine_mode mode;
18441
18442   reg = operands[!load];
18443   mem = operands[load];
18444
18445   mode = GET_MODE (reg);
18446
18447   gcc_assert (REG_P (reg));
18448   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18449   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18450               || mode == SFmode
18451               || mode == DFmode
18452               || mode == HImode
18453               || mode == SImode
18454               || mode == DImode
18455               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18456   gcc_assert (MEM_P (mem));
18457
18458   addr = XEXP (mem, 0);
18459
18460   switch (GET_CODE (addr))
18461     {
18462     case PRE_DEC:
18463       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18464       ops[0] = XEXP (addr, 0);
18465       ops[1] = reg;
18466       break;
18467
18468     case POST_INC:
18469       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18470       ops[0] = XEXP (addr, 0);
18471       ops[1] = reg;
18472       break;
18473
18474     default:
18475       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18476       ops[0] = reg;
18477       ops[1] = mem;
18478       break;
18479     }
18480
18481   sprintf (buff, templ,
18482            load ? "ld" : "st",
18483            dp ? "64" : sp ? "32" : "16",
18484            dp ? "P" : "",
18485            integer_p ? "\t%@ int" : "");
18486   output_asm_insn (buff, ops);
18487
18488   return "";
18489 }
18490
18491 /* Output a Neon double-word or quad-word load or store, or a load
18492    or store for larger structure modes.
18493
18494    WARNING: The ordering of elements is weird in big-endian mode,
18495    because the EABI requires that vectors stored in memory appear
18496    as though they were stored by a VSTM, as required by the EABI.
18497    GCC RTL defines element ordering based on in-memory order.
18498    This can be different from the architectural ordering of elements
18499    within a NEON register. The intrinsics defined in arm_neon.h use the
18500    NEON register element ordering, not the GCC RTL element ordering.
18501
18502    For example, the in-memory ordering of a big-endian a quadword
18503    vector with 16-bit elements when stored from register pair {d0,d1}
18504    will be (lowest address first, d0[N] is NEON register element N):
18505
18506      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18507
18508    When necessary, quadword registers (dN, dN+1) are moved to ARM
18509    registers from rN in the order:
18510
18511      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18512
18513    So that STM/LDM can be used on vectors in ARM registers, and the
18514    same memory layout will result as if VSTM/VLDM were used.
18515
18516    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18517    possible, which allows use of appropriate alignment tags.
18518    Note that the choice of "64" is independent of the actual vector
18519    element size; this size simply ensures that the behavior is
18520    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18521
18522    Due to limitations of those instructions, use of VST1.64/VLD1.64
18523    is not possible if:
18524     - the address contains PRE_DEC, or
18525     - the mode refers to more than 4 double-word registers
18526
18527    In those cases, it would be possible to replace VSTM/VLDM by a
18528    sequence of instructions; this is not currently implemented since
18529    this is not certain to actually improve performance.  */
18530
18531 const char *
18532 output_move_neon (rtx *operands)
18533 {
18534   rtx reg, mem, addr, ops[2];
18535   int regno, nregs, load = REG_P (operands[0]);
18536   const char *templ;
18537   char buff[50];
18538   machine_mode mode;
18539
18540   reg = operands[!load];
18541   mem = operands[load];
18542
18543   mode = GET_MODE (reg);
18544
18545   gcc_assert (REG_P (reg));
18546   regno = REGNO (reg);
18547   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18548   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18549               || NEON_REGNO_OK_FOR_QUAD (regno));
18550   gcc_assert (VALID_NEON_DREG_MODE (mode)
18551               || VALID_NEON_QREG_MODE (mode)
18552               || VALID_NEON_STRUCT_MODE (mode));
18553   gcc_assert (MEM_P (mem));
18554
18555   addr = XEXP (mem, 0);
18556
18557   /* Strip off const from addresses like (const (plus (...))).  */
18558   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18559     addr = XEXP (addr, 0);
18560
18561   switch (GET_CODE (addr))
18562     {
18563     case POST_INC:
18564       /* We have to use vldm / vstm for too-large modes.  */
18565       if (nregs > 4)
18566         {
18567           templ = "v%smia%%?\t%%0!, %%h1";
18568           ops[0] = XEXP (addr, 0);
18569         }
18570       else
18571         {
18572           templ = "v%s1.64\t%%h1, %%A0";
18573           ops[0] = mem;
18574         }
18575       ops[1] = reg;
18576       break;
18577
18578     case PRE_DEC:
18579       /* We have to use vldm / vstm in this case, since there is no
18580          pre-decrement form of the vld1 / vst1 instructions.  */
18581       templ = "v%smdb%%?\t%%0!, %%h1";
18582       ops[0] = XEXP (addr, 0);
18583       ops[1] = reg;
18584       break;
18585
18586     case POST_MODIFY:
18587       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18588       gcc_unreachable ();
18589
18590     case REG:
18591       /* We have to use vldm / vstm for too-large modes.  */
18592       if (nregs > 1)
18593         {
18594           if (nregs > 4)
18595             templ = "v%smia%%?\t%%m0, %%h1";
18596           else
18597             templ = "v%s1.64\t%%h1, %%A0";
18598
18599           ops[0] = mem;
18600           ops[1] = reg;
18601           break;
18602         }
18603       /* Fall through.  */
18604     case LABEL_REF:
18605     case PLUS:
18606       {
18607         int i;
18608         int overlap = -1;
18609         for (i = 0; i < nregs; i++)
18610           {
18611             /* We're only using DImode here because it's a convenient size.  */
18612             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18613             ops[1] = adjust_address (mem, DImode, 8 * i);
18614             if (reg_overlap_mentioned_p (ops[0], mem))
18615               {
18616                 gcc_assert (overlap == -1);
18617                 overlap = i;
18618               }
18619             else
18620               {
18621                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18622                 output_asm_insn (buff, ops);
18623               }
18624           }
18625         if (overlap != -1)
18626           {
18627             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18628             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18629             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18630             output_asm_insn (buff, ops);
18631           }
18632
18633         return "";
18634       }
18635
18636     default:
18637       gcc_unreachable ();
18638     }
18639
18640   sprintf (buff, templ, load ? "ld" : "st");
18641   output_asm_insn (buff, ops);
18642
18643   return "";
18644 }
18645
18646 /* Compute and return the length of neon_mov<mode>, where <mode> is
18647    one of VSTRUCT modes: EI, OI, CI or XI.  */
18648 int
18649 arm_attr_length_move_neon (rtx_insn *insn)
18650 {
18651   rtx reg, mem, addr;
18652   int load;
18653   machine_mode mode;
18654
18655   extract_insn_cached (insn);
18656
18657   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18658     {
18659       mode = GET_MODE (recog_data.operand[0]);
18660       switch (mode)
18661         {
18662         case EImode:
18663         case OImode:
18664           return 8;
18665         case CImode:
18666           return 12;
18667         case XImode:
18668           return 16;
18669         default:
18670           gcc_unreachable ();
18671         }
18672     }
18673
18674   load = REG_P (recog_data.operand[0]);
18675   reg = recog_data.operand[!load];
18676   mem = recog_data.operand[load];
18677
18678   gcc_assert (MEM_P (mem));
18679
18680   mode = GET_MODE (reg);
18681   addr = XEXP (mem, 0);
18682
18683   /* Strip off const from addresses like (const (plus (...))).  */
18684   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18685     addr = XEXP (addr, 0);
18686
18687   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18688     {
18689       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18690       return insns * 4;
18691     }
18692   else
18693     return 4;
18694 }
18695
18696 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18697    return zero.  */
18698
18699 int
18700 arm_address_offset_is_imm (rtx_insn *insn)
18701 {
18702   rtx mem, addr;
18703
18704   extract_insn_cached (insn);
18705
18706   if (REG_P (recog_data.operand[0]))
18707     return 0;
18708
18709   mem = recog_data.operand[0];
18710
18711   gcc_assert (MEM_P (mem));
18712
18713   addr = XEXP (mem, 0);
18714
18715   if (REG_P (addr)
18716       || (GET_CODE (addr) == PLUS
18717           && REG_P (XEXP (addr, 0))
18718           && CONST_INT_P (XEXP (addr, 1))))
18719     return 1;
18720   else
18721     return 0;
18722 }
18723
18724 /* Output an ADD r, s, #n where n may be too big for one instruction.
18725    If adding zero to one register, output nothing.  */
18726 const char *
18727 output_add_immediate (rtx *operands)
18728 {
18729   HOST_WIDE_INT n = INTVAL (operands[2]);
18730
18731   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18732     {
18733       if (n < 0)
18734         output_multi_immediate (operands,
18735                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18736                                 -n);
18737       else
18738         output_multi_immediate (operands,
18739                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18740                                 n);
18741     }
18742
18743   return "";
18744 }
18745
18746 /* Output a multiple immediate operation.
18747    OPERANDS is the vector of operands referred to in the output patterns.
18748    INSTR1 is the output pattern to use for the first constant.
18749    INSTR2 is the output pattern to use for subsequent constants.
18750    IMMED_OP is the index of the constant slot in OPERANDS.
18751    N is the constant value.  */
18752 static const char *
18753 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18754                         int immed_op, HOST_WIDE_INT n)
18755 {
18756 #if HOST_BITS_PER_WIDE_INT > 32
18757   n &= 0xffffffff;
18758 #endif
18759
18760   if (n == 0)
18761     {
18762       /* Quick and easy output.  */
18763       operands[immed_op] = const0_rtx;
18764       output_asm_insn (instr1, operands);
18765     }
18766   else
18767     {
18768       int i;
18769       const char * instr = instr1;
18770
18771       /* Note that n is never zero here (which would give no output).  */
18772       for (i = 0; i < 32; i += 2)
18773         {
18774           if (n & (3 << i))
18775             {
18776               operands[immed_op] = GEN_INT (n & (255 << i));
18777               output_asm_insn (instr, operands);
18778               instr = instr2;
18779               i += 6;
18780             }
18781         }
18782     }
18783
18784   return "";
18785 }
18786
18787 /* Return the name of a shifter operation.  */
18788 static const char *
18789 arm_shift_nmem(enum rtx_code code)
18790 {
18791   switch (code)
18792     {
18793     case ASHIFT:
18794       return ARM_LSL_NAME;
18795
18796     case ASHIFTRT:
18797       return "asr";
18798
18799     case LSHIFTRT:
18800       return "lsr";
18801
18802     case ROTATERT:
18803       return "ror";
18804
18805     default:
18806       abort();
18807     }
18808 }
18809
18810 /* Return the appropriate ARM instruction for the operation code.
18811    The returned result should not be overwritten.  OP is the rtx of the
18812    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18813    was shifted.  */
18814 const char *
18815 arithmetic_instr (rtx op, int shift_first_arg)
18816 {
18817   switch (GET_CODE (op))
18818     {
18819     case PLUS:
18820       return "add";
18821
18822     case MINUS:
18823       return shift_first_arg ? "rsb" : "sub";
18824
18825     case IOR:
18826       return "orr";
18827
18828     case XOR:
18829       return "eor";
18830
18831     case AND:
18832       return "and";
18833
18834     case ASHIFT:
18835     case ASHIFTRT:
18836     case LSHIFTRT:
18837     case ROTATERT:
18838       return arm_shift_nmem(GET_CODE(op));
18839
18840     default:
18841       gcc_unreachable ();
18842     }
18843 }
18844
18845 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18846    for the operation code.  The returned result should not be overwritten.
18847    OP is the rtx code of the shift.
18848    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18849    shift.  */
18850 static const char *
18851 shift_op (rtx op, HOST_WIDE_INT *amountp)
18852 {
18853   const char * mnem;
18854   enum rtx_code code = GET_CODE (op);
18855
18856   switch (code)
18857     {
18858     case ROTATE:
18859       if (!CONST_INT_P (XEXP (op, 1)))
18860         {
18861           output_operand_lossage ("invalid shift operand");
18862           return NULL;
18863         }
18864
18865       code = ROTATERT;
18866       *amountp = 32 - INTVAL (XEXP (op, 1));
18867       mnem = "ror";
18868       break;
18869
18870     case ASHIFT:
18871     case ASHIFTRT:
18872     case LSHIFTRT:
18873     case ROTATERT:
18874       mnem = arm_shift_nmem(code);
18875       if (CONST_INT_P (XEXP (op, 1)))
18876         {
18877           *amountp = INTVAL (XEXP (op, 1));
18878         }
18879       else if (REG_P (XEXP (op, 1)))
18880         {
18881           *amountp = -1;
18882           return mnem;
18883         }
18884       else
18885         {
18886           output_operand_lossage ("invalid shift operand");
18887           return NULL;
18888         }
18889       break;
18890
18891     case MULT:
18892       /* We never have to worry about the amount being other than a
18893          power of 2, since this case can never be reloaded from a reg.  */
18894       if (!CONST_INT_P (XEXP (op, 1)))
18895         {
18896           output_operand_lossage ("invalid shift operand");
18897           return NULL;
18898         }
18899
18900       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18901
18902       /* Amount must be a power of two.  */
18903       if (*amountp & (*amountp - 1))
18904         {
18905           output_operand_lossage ("invalid shift operand");
18906           return NULL;
18907         }
18908
18909       *amountp = exact_log2 (*amountp);
18910       gcc_assert (IN_RANGE (*amountp, 0, 31));
18911       return ARM_LSL_NAME;
18912
18913     default:
18914       output_operand_lossage ("invalid shift operand");
18915       return NULL;
18916     }
18917
18918   /* This is not 100% correct, but follows from the desire to merge
18919      multiplication by a power of 2 with the recognizer for a
18920      shift.  >=32 is not a valid shift for "lsl", so we must try and
18921      output a shift that produces the correct arithmetical result.
18922      Using lsr #32 is identical except for the fact that the carry bit
18923      is not set correctly if we set the flags; but we never use the
18924      carry bit from such an operation, so we can ignore that.  */
18925   if (code == ROTATERT)
18926     /* Rotate is just modulo 32.  */
18927     *amountp &= 31;
18928   else if (*amountp != (*amountp & 31))
18929     {
18930       if (code == ASHIFT)
18931         mnem = "lsr";
18932       *amountp = 32;
18933     }
18934
18935   /* Shifts of 0 are no-ops.  */
18936   if (*amountp == 0)
18937     return NULL;
18938
18939   return mnem;
18940 }
18941
18942 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18943    because /bin/as is horribly restrictive.  The judgement about
18944    whether or not each character is 'printable' (and can be output as
18945    is) or not (and must be printed with an octal escape) must be made
18946    with reference to the *host* character set -- the situation is
18947    similar to that discussed in the comments above pp_c_char in
18948    c-pretty-print.c.  */
18949
18950 #define MAX_ASCII_LEN 51
18951
18952 void
18953 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18954 {
18955   int i;
18956   int len_so_far = 0;
18957
18958   fputs ("\t.ascii\t\"", stream);
18959
18960   for (i = 0; i < len; i++)
18961     {
18962       int c = p[i];
18963
18964       if (len_so_far >= MAX_ASCII_LEN)
18965         {
18966           fputs ("\"\n\t.ascii\t\"", stream);
18967           len_so_far = 0;
18968         }
18969
18970       if (ISPRINT (c))
18971         {
18972           if (c == '\\' || c == '\"')
18973             {
18974               putc ('\\', stream);
18975               len_so_far++;
18976             }
18977           putc (c, stream);
18978           len_so_far++;
18979         }
18980       else
18981         {
18982           fprintf (stream, "\\%03o", c);
18983           len_so_far += 4;
18984         }
18985     }
18986
18987   fputs ("\"\n", stream);
18988 }
18989 \f
18990 /* Whether a register is callee saved or not.  This is necessary because high
18991    registers are marked as caller saved when optimizing for size on Thumb-1
18992    targets despite being callee saved in order to avoid using them.  */
18993 #define callee_saved_reg_p(reg) \
18994   (!call_used_regs[reg] \
18995    || (TARGET_THUMB1 && optimize_size \
18996        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18997
18998 /* Compute the register save mask for registers 0 through 12
18999    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19000
19001 static unsigned long
19002 arm_compute_save_reg0_reg12_mask (void)
19003 {
19004   unsigned long func_type = arm_current_func_type ();
19005   unsigned long save_reg_mask = 0;
19006   unsigned int reg;
19007
19008   if (IS_INTERRUPT (func_type))
19009     {
19010       unsigned int max_reg;
19011       /* Interrupt functions must not corrupt any registers,
19012          even call clobbered ones.  If this is a leaf function
19013          we can just examine the registers used by the RTL, but
19014          otherwise we have to assume that whatever function is
19015          called might clobber anything, and so we have to save
19016          all the call-clobbered registers as well.  */
19017       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19018         /* FIQ handlers have registers r8 - r12 banked, so
19019            we only need to check r0 - r7, Normal ISRs only
19020            bank r14 and r15, so we must check up to r12.
19021            r13 is the stack pointer which is always preserved,
19022            so we do not need to consider it here.  */
19023         max_reg = 7;
19024       else
19025         max_reg = 12;
19026
19027       for (reg = 0; reg <= max_reg; reg++)
19028         if (df_regs_ever_live_p (reg)
19029             || (! crtl->is_leaf && call_used_regs[reg]))
19030           save_reg_mask |= (1 << reg);
19031
19032       /* Also save the pic base register if necessary.  */
19033       if (flag_pic
19034           && !TARGET_SINGLE_PIC_BASE
19035           && arm_pic_register != INVALID_REGNUM
19036           && crtl->uses_pic_offset_table)
19037         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19038     }
19039   else if (IS_VOLATILE(func_type))
19040     {
19041       /* For noreturn functions we historically omitted register saves
19042          altogether.  However this really messes up debugging.  As a
19043          compromise save just the frame pointers.  Combined with the link
19044          register saved elsewhere this should be sufficient to get
19045          a backtrace.  */
19046       if (frame_pointer_needed)
19047         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19048       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19049         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19050       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19051         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19052     }
19053   else
19054     {
19055       /* In the normal case we only need to save those registers
19056          which are call saved and which are used by this function.  */
19057       for (reg = 0; reg <= 11; reg++)
19058         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19059           save_reg_mask |= (1 << reg);
19060
19061       /* Handle the frame pointer as a special case.  */
19062       if (frame_pointer_needed)
19063         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19064
19065       /* If we aren't loading the PIC register,
19066          don't stack it even though it may be live.  */
19067       if (flag_pic
19068           && !TARGET_SINGLE_PIC_BASE
19069           && arm_pic_register != INVALID_REGNUM
19070           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19071               || crtl->uses_pic_offset_table))
19072         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19073
19074       /* The prologue will copy SP into R0, so save it.  */
19075       if (IS_STACKALIGN (func_type))
19076         save_reg_mask |= 1;
19077     }
19078
19079   /* Save registers so the exception handler can modify them.  */
19080   if (crtl->calls_eh_return)
19081     {
19082       unsigned int i;
19083
19084       for (i = 0; ; i++)
19085         {
19086           reg = EH_RETURN_DATA_REGNO (i);
19087           if (reg == INVALID_REGNUM)
19088             break;
19089           save_reg_mask |= 1 << reg;
19090         }
19091     }
19092
19093   return save_reg_mask;
19094 }
19095
19096 /* Return true if r3 is live at the start of the function.  */
19097
19098 static bool
19099 arm_r3_live_at_start_p (void)
19100 {
19101   /* Just look at cfg info, which is still close enough to correct at this
19102      point.  This gives false positives for broken functions that might use
19103      uninitialized data that happens to be allocated in r3, but who cares?  */
19104   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19105 }
19106
19107 /* Compute the number of bytes used to store the static chain register on the
19108    stack, above the stack frame.  We need to know this accurately to get the
19109    alignment of the rest of the stack frame correct.  */
19110
19111 static int
19112 arm_compute_static_chain_stack_bytes (void)
19113 {
19114   /* See the defining assertion in arm_expand_prologue.  */
19115   if (IS_NESTED (arm_current_func_type ())
19116       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19117           || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19118               && !df_regs_ever_live_p (LR_REGNUM)))
19119       && arm_r3_live_at_start_p ()
19120       && crtl->args.pretend_args_size == 0)
19121     return 4;
19122
19123   return 0;
19124 }
19125
19126 /* Compute a bit mask of which core registers need to be
19127    saved on the stack for the current function.
19128    This is used by arm_compute_frame_layout, which may add extra registers.  */
19129
19130 static unsigned long
19131 arm_compute_save_core_reg_mask (void)
19132 {
19133   unsigned int save_reg_mask = 0;
19134   unsigned long func_type = arm_current_func_type ();
19135   unsigned int reg;
19136
19137   if (IS_NAKED (func_type))
19138     /* This should never really happen.  */
19139     return 0;
19140
19141   /* If we are creating a stack frame, then we must save the frame pointer,
19142      IP (which will hold the old stack pointer), LR and the PC.  */
19143   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19144     save_reg_mask |=
19145       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19146       | (1 << IP_REGNUM)
19147       | (1 << LR_REGNUM)
19148       | (1 << PC_REGNUM);
19149
19150   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19151
19152   /* Decide if we need to save the link register.
19153      Interrupt routines have their own banked link register,
19154      so they never need to save it.
19155      Otherwise if we do not use the link register we do not need to save
19156      it.  If we are pushing other registers onto the stack however, we
19157      can save an instruction in the epilogue by pushing the link register
19158      now and then popping it back into the PC.  This incurs extra memory
19159      accesses though, so we only do it when optimizing for size, and only
19160      if we know that we will not need a fancy return sequence.  */
19161   if (df_regs_ever_live_p (LR_REGNUM)
19162       || (save_reg_mask
19163           && optimize_size
19164           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19165           && !crtl->tail_call_emit
19166           && !crtl->calls_eh_return))
19167     save_reg_mask |= 1 << LR_REGNUM;
19168
19169   if (cfun->machine->lr_save_eliminated)
19170     save_reg_mask &= ~ (1 << LR_REGNUM);
19171
19172   if (TARGET_REALLY_IWMMXT
19173       && ((bit_count (save_reg_mask)
19174            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19175                            arm_compute_static_chain_stack_bytes())
19176            ) % 2) != 0)
19177     {
19178       /* The total number of registers that are going to be pushed
19179          onto the stack is odd.  We need to ensure that the stack
19180          is 64-bit aligned before we start to save iWMMXt registers,
19181          and also before we start to create locals.  (A local variable
19182          might be a double or long long which we will load/store using
19183          an iWMMXt instruction).  Therefore we need to push another
19184          ARM register, so that the stack will be 64-bit aligned.  We
19185          try to avoid using the arg registers (r0 -r3) as they might be
19186          used to pass values in a tail call.  */
19187       for (reg = 4; reg <= 12; reg++)
19188         if ((save_reg_mask & (1 << reg)) == 0)
19189           break;
19190
19191       if (reg <= 12)
19192         save_reg_mask |= (1 << reg);
19193       else
19194         {
19195           cfun->machine->sibcall_blocked = 1;
19196           save_reg_mask |= (1 << 3);
19197         }
19198     }
19199
19200   /* We may need to push an additional register for use initializing the
19201      PIC base register.  */
19202   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19203       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19204     {
19205       reg = thumb_find_work_register (1 << 4);
19206       if (!call_used_regs[reg])
19207         save_reg_mask |= (1 << reg);
19208     }
19209
19210   return save_reg_mask;
19211 }
19212
19213 /* Compute a bit mask of which core registers need to be
19214    saved on the stack for the current function.  */
19215 static unsigned long
19216 thumb1_compute_save_core_reg_mask (void)
19217 {
19218   unsigned long mask;
19219   unsigned reg;
19220
19221   mask = 0;
19222   for (reg = 0; reg < 12; reg ++)
19223     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19224       mask |= 1 << reg;
19225
19226   /* Handle the frame pointer as a special case.  */
19227   if (frame_pointer_needed)
19228     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19229
19230   if (flag_pic
19231       && !TARGET_SINGLE_PIC_BASE
19232       && arm_pic_register != INVALID_REGNUM
19233       && crtl->uses_pic_offset_table)
19234     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19235
19236   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19237   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19238     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19239
19240   /* LR will also be pushed if any lo regs are pushed.  */
19241   if (mask & 0xff || thumb_force_lr_save ())
19242     mask |= (1 << LR_REGNUM);
19243
19244   /* Make sure we have a low work register if we need one.
19245      We will need one if we are going to push a high register,
19246      but we are not currently intending to push a low register.  */
19247   if ((mask & 0xff) == 0
19248       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19249     {
19250       /* Use thumb_find_work_register to choose which register
19251          we will use.  If the register is live then we will
19252          have to push it.  Use LAST_LO_REGNUM as our fallback
19253          choice for the register to select.  */
19254       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19255       /* Make sure the register returned by thumb_find_work_register is
19256          not part of the return value.  */
19257       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19258         reg = LAST_LO_REGNUM;
19259
19260       if (callee_saved_reg_p (reg))
19261         mask |= 1 << reg;
19262     }
19263
19264   /* The 504 below is 8 bytes less than 512 because there are two possible
19265      alignment words.  We can't tell here if they will be present or not so we
19266      have to play it safe and assume that they are. */
19267   if ((CALLER_INTERWORKING_SLOT_SIZE +
19268        ROUND_UP_WORD (get_frame_size ()) +
19269        crtl->outgoing_args_size) >= 504)
19270     {
19271       /* This is the same as the code in thumb1_expand_prologue() which
19272          determines which register to use for stack decrement. */
19273       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19274         if (mask & (1 << reg))
19275           break;
19276
19277       if (reg > LAST_LO_REGNUM)
19278         {
19279           /* Make sure we have a register available for stack decrement. */
19280           mask |= 1 << LAST_LO_REGNUM;
19281         }
19282     }
19283
19284   return mask;
19285 }
19286
19287
19288 /* Return the number of bytes required to save VFP registers.  */
19289 static int
19290 arm_get_vfp_saved_size (void)
19291 {
19292   unsigned int regno;
19293   int count;
19294   int saved;
19295
19296   saved = 0;
19297   /* Space for saved VFP registers.  */
19298   if (TARGET_HARD_FLOAT)
19299     {
19300       count = 0;
19301       for (regno = FIRST_VFP_REGNUM;
19302            regno < LAST_VFP_REGNUM;
19303            regno += 2)
19304         {
19305           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19306               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19307             {
19308               if (count > 0)
19309                 {
19310                   /* Workaround ARM10 VFPr1 bug.  */
19311                   if (count == 2 && !arm_arch6)
19312                     count++;
19313                   saved += count * 8;
19314                 }
19315               count = 0;
19316             }
19317           else
19318             count++;
19319         }
19320       if (count > 0)
19321         {
19322           if (count == 2 && !arm_arch6)
19323             count++;
19324           saved += count * 8;
19325         }
19326     }
19327   return saved;
19328 }
19329
19330
19331 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19332    everything bar the final return instruction.  If simple_return is true,
19333    then do not output epilogue, because it has already been emitted in RTL.  */
19334 const char *
19335 output_return_instruction (rtx operand, bool really_return, bool reverse,
19336                            bool simple_return)
19337 {
19338   char conditional[10];
19339   char instr[100];
19340   unsigned reg;
19341   unsigned long live_regs_mask;
19342   unsigned long func_type;
19343   arm_stack_offsets *offsets;
19344
19345   func_type = arm_current_func_type ();
19346
19347   if (IS_NAKED (func_type))
19348     return "";
19349
19350   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19351     {
19352       /* If this function was declared non-returning, and we have
19353          found a tail call, then we have to trust that the called
19354          function won't return.  */
19355       if (really_return)
19356         {
19357           rtx ops[2];
19358
19359           /* Otherwise, trap an attempted return by aborting.  */
19360           ops[0] = operand;
19361           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19362                                        : "abort");
19363           assemble_external_libcall (ops[1]);
19364           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19365         }
19366
19367       return "";
19368     }
19369
19370   gcc_assert (!cfun->calls_alloca || really_return);
19371
19372   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19373
19374   cfun->machine->return_used_this_function = 1;
19375
19376   offsets = arm_get_frame_offsets ();
19377   live_regs_mask = offsets->saved_regs_mask;
19378
19379   if (!simple_return && live_regs_mask)
19380     {
19381       const char * return_reg;
19382
19383       /* If we do not have any special requirements for function exit
19384          (e.g. interworking) then we can load the return address
19385          directly into the PC.  Otherwise we must load it into LR.  */
19386       if (really_return
19387           && !IS_CMSE_ENTRY (func_type)
19388           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19389         return_reg = reg_names[PC_REGNUM];
19390       else
19391         return_reg = reg_names[LR_REGNUM];
19392
19393       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19394         {
19395           /* There are three possible reasons for the IP register
19396              being saved.  1) a stack frame was created, in which case
19397              IP contains the old stack pointer, or 2) an ISR routine
19398              corrupted it, or 3) it was saved to align the stack on
19399              iWMMXt.  In case 1, restore IP into SP, otherwise just
19400              restore IP.  */
19401           if (frame_pointer_needed)
19402             {
19403               live_regs_mask &= ~ (1 << IP_REGNUM);
19404               live_regs_mask |=   (1 << SP_REGNUM);
19405             }
19406           else
19407             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19408         }
19409
19410       /* On some ARM architectures it is faster to use LDR rather than
19411          LDM to load a single register.  On other architectures, the
19412          cost is the same.  In 26 bit mode, or for exception handlers,
19413          we have to use LDM to load the PC so that the CPSR is also
19414          restored.  */
19415       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19416         if (live_regs_mask == (1U << reg))
19417           break;
19418
19419       if (reg <= LAST_ARM_REGNUM
19420           && (reg != LR_REGNUM
19421               || ! really_return
19422               || ! IS_INTERRUPT (func_type)))
19423         {
19424           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19425                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19426         }
19427       else
19428         {
19429           char *p;
19430           int first = 1;
19431
19432           /* Generate the load multiple instruction to restore the
19433              registers.  Note we can get here, even if
19434              frame_pointer_needed is true, but only if sp already
19435              points to the base of the saved core registers.  */
19436           if (live_regs_mask & (1 << SP_REGNUM))
19437             {
19438               unsigned HOST_WIDE_INT stack_adjust;
19439
19440               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19441               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19442
19443               if (stack_adjust && arm_arch5 && TARGET_ARM)
19444                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19445               else
19446                 {
19447                   /* If we can't use ldmib (SA110 bug),
19448                      then try to pop r3 instead.  */
19449                   if (stack_adjust)
19450                     live_regs_mask |= 1 << 3;
19451
19452                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19453                 }
19454             }
19455           /* For interrupt returns we have to use an LDM rather than
19456              a POP so that we can use the exception return variant.  */
19457           else if (IS_INTERRUPT (func_type))
19458             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19459           else
19460             sprintf (instr, "pop%s\t{", conditional);
19461
19462           p = instr + strlen (instr);
19463
19464           for (reg = 0; reg <= SP_REGNUM; reg++)
19465             if (live_regs_mask & (1 << reg))
19466               {
19467                 int l = strlen (reg_names[reg]);
19468
19469                 if (first)
19470                   first = 0;
19471                 else
19472                   {
19473                     memcpy (p, ", ", 2);
19474                     p += 2;
19475                   }
19476
19477                 memcpy (p, "%|", 2);
19478                 memcpy (p + 2, reg_names[reg], l);
19479                 p += l + 2;
19480               }
19481
19482           if (live_regs_mask & (1 << LR_REGNUM))
19483             {
19484               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19485               /* If returning from an interrupt, restore the CPSR.  */
19486               if (IS_INTERRUPT (func_type))
19487                 strcat (p, "^");
19488             }
19489           else
19490             strcpy (p, "}");
19491         }
19492
19493       output_asm_insn (instr, & operand);
19494
19495       /* See if we need to generate an extra instruction to
19496          perform the actual function return.  */
19497       if (really_return
19498           && func_type != ARM_FT_INTERWORKED
19499           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19500         {
19501           /* The return has already been handled
19502              by loading the LR into the PC.  */
19503           return "";
19504         }
19505     }
19506
19507   if (really_return)
19508     {
19509       switch ((int) ARM_FUNC_TYPE (func_type))
19510         {
19511         case ARM_FT_ISR:
19512         case ARM_FT_FIQ:
19513           /* ??? This is wrong for unified assembly syntax.  */
19514           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19515           break;
19516
19517         case ARM_FT_INTERWORKED:
19518           gcc_assert (arm_arch5 || arm_arch4t);
19519           sprintf (instr, "bx%s\t%%|lr", conditional);
19520           break;
19521
19522         case ARM_FT_EXCEPTION:
19523           /* ??? This is wrong for unified assembly syntax.  */
19524           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19525           break;
19526
19527         default:
19528           if (IS_CMSE_ENTRY (func_type))
19529             {
19530               /* Check if we have to clear the 'GE bits' which is only used if
19531                  parallel add and subtraction instructions are available.  */
19532               if (TARGET_INT_SIMD)
19533                 snprintf (instr, sizeof (instr),
19534                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19535               else
19536                 snprintf (instr, sizeof (instr),
19537                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19538
19539               output_asm_insn (instr, & operand);
19540               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19541                 {
19542                   /* Clear the cumulative exception-status bits (0-4,7) and the
19543                      condition code bits (28-31) of the FPSCR.  We need to
19544                      remember to clear the first scratch register used (IP) and
19545                      save and restore the second (r4).  */
19546                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19547                   output_asm_insn (instr, & operand);
19548                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19549                   output_asm_insn (instr, & operand);
19550                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19551                   output_asm_insn (instr, & operand);
19552                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19553                   output_asm_insn (instr, & operand);
19554                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19555                   output_asm_insn (instr, & operand);
19556                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19557                   output_asm_insn (instr, & operand);
19558                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19559                   output_asm_insn (instr, & operand);
19560                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19561                   output_asm_insn (instr, & operand);
19562                 }
19563               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19564             }
19565           /* Use bx if it's available.  */
19566           else if (arm_arch5 || arm_arch4t)
19567             sprintf (instr, "bx%s\t%%|lr", conditional);
19568           else
19569             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19570           break;
19571         }
19572
19573       output_asm_insn (instr, & operand);
19574     }
19575
19576   return "";
19577 }
19578
19579 /* Output in FILE asm statements needed to declare the NAME of the function
19580    defined by its DECL node.  */
19581
19582 void
19583 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19584 {
19585   size_t cmse_name_len;
19586   char *cmse_name = 0;
19587   char cmse_prefix[] = "__acle_se_";
19588
19589   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19590      extra function label for each function with the 'cmse_nonsecure_entry'
19591      attribute.  This extra function label should be prepended with
19592      '__acle_se_', telling the linker that it needs to create secure gateway
19593      veneers for this function.  */
19594   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19595                                     DECL_ATTRIBUTES (decl)))
19596     {
19597       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19598       cmse_name = XALLOCAVEC (char, cmse_name_len);
19599       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19600       targetm.asm_out.globalize_label (file, cmse_name);
19601
19602       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19603       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19604     }
19605
19606   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19607   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19608   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19609   ASM_OUTPUT_LABEL (file, name);
19610
19611   if (cmse_name)
19612     ASM_OUTPUT_LABEL (file, cmse_name);
19613
19614   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19615 }
19616
19617 /* Write the function name into the code section, directly preceding
19618    the function prologue.
19619
19620    Code will be output similar to this:
19621      t0
19622          .ascii "arm_poke_function_name", 0
19623          .align
19624      t1
19625          .word 0xff000000 + (t1 - t0)
19626      arm_poke_function_name
19627          mov     ip, sp
19628          stmfd   sp!, {fp, ip, lr, pc}
19629          sub     fp, ip, #4
19630
19631    When performing a stack backtrace, code can inspect the value
19632    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19633    at location pc - 12 and the top 8 bits are set, then we know
19634    that there is a function name embedded immediately preceding this
19635    location and has length ((pc[-3]) & 0xff000000).
19636
19637    We assume that pc is declared as a pointer to an unsigned long.
19638
19639    It is of no benefit to output the function name if we are assembling
19640    a leaf function.  These function types will not contain a stack
19641    backtrace structure, therefore it is not possible to determine the
19642    function name.  */
19643 void
19644 arm_poke_function_name (FILE *stream, const char *name)
19645 {
19646   unsigned long alignlength;
19647   unsigned long length;
19648   rtx           x;
19649
19650   length      = strlen (name) + 1;
19651   alignlength = ROUND_UP_WORD (length);
19652
19653   ASM_OUTPUT_ASCII (stream, name, length);
19654   ASM_OUTPUT_ALIGN (stream, 2);
19655   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19656   assemble_aligned_integer (UNITS_PER_WORD, x);
19657 }
19658
19659 /* Place some comments into the assembler stream
19660    describing the current function.  */
19661 static void
19662 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19663 {
19664   unsigned long func_type;
19665
19666   /* Sanity check.  */
19667   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19668
19669   func_type = arm_current_func_type ();
19670
19671   switch ((int) ARM_FUNC_TYPE (func_type))
19672     {
19673     default:
19674     case ARM_FT_NORMAL:
19675       break;
19676     case ARM_FT_INTERWORKED:
19677       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19678       break;
19679     case ARM_FT_ISR:
19680       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19681       break;
19682     case ARM_FT_FIQ:
19683       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19684       break;
19685     case ARM_FT_EXCEPTION:
19686       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19687       break;
19688     }
19689
19690   if (IS_NAKED (func_type))
19691     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19692
19693   if (IS_VOLATILE (func_type))
19694     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19695
19696   if (IS_NESTED (func_type))
19697     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19698   if (IS_STACKALIGN (func_type))
19699     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19700   if (IS_CMSE_ENTRY (func_type))
19701     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19702
19703   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19704                crtl->args.size,
19705                crtl->args.pretend_args_size, frame_size);
19706
19707   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19708                frame_pointer_needed,
19709                cfun->machine->uses_anonymous_args);
19710
19711   if (cfun->machine->lr_save_eliminated)
19712     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19713
19714   if (crtl->calls_eh_return)
19715     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19716
19717 }
19718
19719 static void
19720 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19721                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19722 {
19723   arm_stack_offsets *offsets;
19724
19725   if (TARGET_THUMB1)
19726     {
19727       int regno;
19728
19729       /* Emit any call-via-reg trampolines that are needed for v4t support
19730          of call_reg and call_value_reg type insns.  */
19731       for (regno = 0; regno < LR_REGNUM; regno++)
19732         {
19733           rtx label = cfun->machine->call_via[regno];
19734
19735           if (label != NULL)
19736             {
19737               switch_to_section (function_section (current_function_decl));
19738               targetm.asm_out.internal_label (asm_out_file, "L",
19739                                               CODE_LABEL_NUMBER (label));
19740               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19741             }
19742         }
19743
19744       /* ??? Probably not safe to set this here, since it assumes that a
19745          function will be emitted as assembly immediately after we generate
19746          RTL for it.  This does not happen for inline functions.  */
19747       cfun->machine->return_used_this_function = 0;
19748     }
19749   else /* TARGET_32BIT */
19750     {
19751       /* We need to take into account any stack-frame rounding.  */
19752       offsets = arm_get_frame_offsets ();
19753
19754       gcc_assert (!use_return_insn (FALSE, NULL)
19755                   || (cfun->machine->return_used_this_function != 0)
19756                   || offsets->saved_regs == offsets->outgoing_args
19757                   || frame_pointer_needed);
19758     }
19759 }
19760
19761 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19762    STR and STRD.  If an even number of registers are being pushed, one
19763    or more STRD patterns are created for each register pair.  If an
19764    odd number of registers are pushed, emit an initial STR followed by
19765    as many STRD instructions as are needed.  This works best when the
19766    stack is initially 64-bit aligned (the normal case), since it
19767    ensures that each STRD is also 64-bit aligned.  */
19768 static void
19769 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19770 {
19771   int num_regs = 0;
19772   int i;
19773   int regno;
19774   rtx par = NULL_RTX;
19775   rtx dwarf = NULL_RTX;
19776   rtx tmp;
19777   bool first = true;
19778
19779   num_regs = bit_count (saved_regs_mask);
19780
19781   /* Must be at least one register to save, and can't save SP or PC.  */
19782   gcc_assert (num_regs > 0 && num_regs <= 14);
19783   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19784   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19785
19786   /* Create sequence for DWARF info.  All the frame-related data for
19787      debugging is held in this wrapper.  */
19788   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19789
19790   /* Describe the stack adjustment.  */
19791   tmp = gen_rtx_SET (stack_pointer_rtx,
19792                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19793   RTX_FRAME_RELATED_P (tmp) = 1;
19794   XVECEXP (dwarf, 0, 0) = tmp;
19795
19796   /* Find the first register.  */
19797   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19798     ;
19799
19800   i = 0;
19801
19802   /* If there's an odd number of registers to push.  Start off by
19803      pushing a single register.  This ensures that subsequent strd
19804      operations are dword aligned (assuming that SP was originally
19805      64-bit aligned).  */
19806   if ((num_regs & 1) != 0)
19807     {
19808       rtx reg, mem, insn;
19809
19810       reg = gen_rtx_REG (SImode, regno);
19811       if (num_regs == 1)
19812         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19813                                                      stack_pointer_rtx));
19814       else
19815         mem = gen_frame_mem (Pmode,
19816                              gen_rtx_PRE_MODIFY
19817                              (Pmode, stack_pointer_rtx,
19818                               plus_constant (Pmode, stack_pointer_rtx,
19819                                              -4 * num_regs)));
19820
19821       tmp = gen_rtx_SET (mem, reg);
19822       RTX_FRAME_RELATED_P (tmp) = 1;
19823       insn = emit_insn (tmp);
19824       RTX_FRAME_RELATED_P (insn) = 1;
19825       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19826       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19827       RTX_FRAME_RELATED_P (tmp) = 1;
19828       i++;
19829       regno++;
19830       XVECEXP (dwarf, 0, i) = tmp;
19831       first = false;
19832     }
19833
19834   while (i < num_regs)
19835     if (saved_regs_mask & (1 << regno))
19836       {
19837         rtx reg1, reg2, mem1, mem2;
19838         rtx tmp0, tmp1, tmp2;
19839         int regno2;
19840
19841         /* Find the register to pair with this one.  */
19842         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19843              regno2++)
19844           ;
19845
19846         reg1 = gen_rtx_REG (SImode, regno);
19847         reg2 = gen_rtx_REG (SImode, regno2);
19848
19849         if (first)
19850           {
19851             rtx insn;
19852
19853             first = false;
19854             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19855                                                         stack_pointer_rtx,
19856                                                         -4 * num_regs));
19857             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19858                                                         stack_pointer_rtx,
19859                                                         -4 * (num_regs - 1)));
19860             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19861                                 plus_constant (Pmode, stack_pointer_rtx,
19862                                                -4 * (num_regs)));
19863             tmp1 = gen_rtx_SET (mem1, reg1);
19864             tmp2 = gen_rtx_SET (mem2, reg2);
19865             RTX_FRAME_RELATED_P (tmp0) = 1;
19866             RTX_FRAME_RELATED_P (tmp1) = 1;
19867             RTX_FRAME_RELATED_P (tmp2) = 1;
19868             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19869             XVECEXP (par, 0, 0) = tmp0;
19870             XVECEXP (par, 0, 1) = tmp1;
19871             XVECEXP (par, 0, 2) = tmp2;
19872             insn = emit_insn (par);
19873             RTX_FRAME_RELATED_P (insn) = 1;
19874             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19875           }
19876         else
19877           {
19878             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19879                                                         stack_pointer_rtx,
19880                                                         4 * i));
19881             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19882                                                         stack_pointer_rtx,
19883                                                         4 * (i + 1)));
19884             tmp1 = gen_rtx_SET (mem1, reg1);
19885             tmp2 = gen_rtx_SET (mem2, reg2);
19886             RTX_FRAME_RELATED_P (tmp1) = 1;
19887             RTX_FRAME_RELATED_P (tmp2) = 1;
19888             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19889             XVECEXP (par, 0, 0) = tmp1;
19890             XVECEXP (par, 0, 1) = tmp2;
19891             emit_insn (par);
19892           }
19893
19894         /* Create unwind information.  This is an approximation.  */
19895         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19896                                            plus_constant (Pmode,
19897                                                           stack_pointer_rtx,
19898                                                           4 * i)),
19899                             reg1);
19900         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19901                                            plus_constant (Pmode,
19902                                                           stack_pointer_rtx,
19903                                                           4 * (i + 1))),
19904                             reg2);
19905
19906         RTX_FRAME_RELATED_P (tmp1) = 1;
19907         RTX_FRAME_RELATED_P (tmp2) = 1;
19908         XVECEXP (dwarf, 0, i + 1) = tmp1;
19909         XVECEXP (dwarf, 0, i + 2) = tmp2;
19910         i += 2;
19911         regno = regno2 + 1;
19912       }
19913     else
19914       regno++;
19915
19916   return;
19917 }
19918
19919 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19920    whenever possible, otherwise it emits single-word stores.  The first store
19921    also allocates stack space for all saved registers, using writeback with
19922    post-addressing mode.  All other stores use offset addressing.  If no STRD
19923    can be emitted, this function emits a sequence of single-word stores,
19924    and not an STM as before, because single-word stores provide more freedom
19925    scheduling and can be turned into an STM by peephole optimizations.  */
19926 static void
19927 arm_emit_strd_push (unsigned long saved_regs_mask)
19928 {
19929   int num_regs = 0;
19930   int i, j, dwarf_index  = 0;
19931   int offset = 0;
19932   rtx dwarf = NULL_RTX;
19933   rtx insn = NULL_RTX;
19934   rtx tmp, mem;
19935
19936   /* TODO: A more efficient code can be emitted by changing the
19937      layout, e.g., first push all pairs that can use STRD to keep the
19938      stack aligned, and then push all other registers.  */
19939   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19940     if (saved_regs_mask & (1 << i))
19941       num_regs++;
19942
19943   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19944   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19945   gcc_assert (num_regs > 0);
19946
19947   /* Create sequence for DWARF info.  */
19948   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19949
19950   /* For dwarf info, we generate explicit stack update.  */
19951   tmp = gen_rtx_SET (stack_pointer_rtx,
19952                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19953   RTX_FRAME_RELATED_P (tmp) = 1;
19954   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19955
19956   /* Save registers.  */
19957   offset = - 4 * num_regs;
19958   j = 0;
19959   while (j <= LAST_ARM_REGNUM)
19960     if (saved_regs_mask & (1 << j))
19961       {
19962         if ((j % 2 == 0)
19963             && (saved_regs_mask & (1 << (j + 1))))
19964           {
19965             /* Current register and previous register form register pair for
19966                which STRD can be generated.  */
19967             if (offset < 0)
19968               {
19969                 /* Allocate stack space for all saved registers.  */
19970                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19971                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19972                 mem = gen_frame_mem (DImode, tmp);
19973                 offset = 0;
19974               }
19975             else if (offset > 0)
19976               mem = gen_frame_mem (DImode,
19977                                    plus_constant (Pmode,
19978                                                   stack_pointer_rtx,
19979                                                   offset));
19980             else
19981               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19982
19983             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19984             RTX_FRAME_RELATED_P (tmp) = 1;
19985             tmp = emit_insn (tmp);
19986
19987             /* Record the first store insn.  */
19988             if (dwarf_index == 1)
19989               insn = tmp;
19990
19991             /* Generate dwarf info.  */
19992             mem = gen_frame_mem (SImode,
19993                                  plus_constant (Pmode,
19994                                                 stack_pointer_rtx,
19995                                                 offset));
19996             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19997             RTX_FRAME_RELATED_P (tmp) = 1;
19998             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19999
20000             mem = gen_frame_mem (SImode,
20001                                  plus_constant (Pmode,
20002                                                 stack_pointer_rtx,
20003                                                 offset + 4));
20004             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20005             RTX_FRAME_RELATED_P (tmp) = 1;
20006             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20007
20008             offset += 8;
20009             j += 2;
20010           }
20011         else
20012           {
20013             /* Emit a single word store.  */
20014             if (offset < 0)
20015               {
20016                 /* Allocate stack space for all saved registers.  */
20017                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20018                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20019                 mem = gen_frame_mem (SImode, tmp);
20020                 offset = 0;
20021               }
20022             else if (offset > 0)
20023               mem = gen_frame_mem (SImode,
20024                                    plus_constant (Pmode,
20025                                                   stack_pointer_rtx,
20026                                                   offset));
20027             else
20028               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20029
20030             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20031             RTX_FRAME_RELATED_P (tmp) = 1;
20032             tmp = emit_insn (tmp);
20033
20034             /* Record the first store insn.  */
20035             if (dwarf_index == 1)
20036               insn = tmp;
20037
20038             /* Generate dwarf info.  */
20039             mem = gen_frame_mem (SImode,
20040                                  plus_constant(Pmode,
20041                                                stack_pointer_rtx,
20042                                                offset));
20043             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20044             RTX_FRAME_RELATED_P (tmp) = 1;
20045             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20046
20047             offset += 4;
20048             j += 1;
20049           }
20050       }
20051     else
20052       j++;
20053
20054   /* Attach dwarf info to the first insn we generate.  */
20055   gcc_assert (insn != NULL_RTX);
20056   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20057   RTX_FRAME_RELATED_P (insn) = 1;
20058 }
20059
20060 /* Generate and emit an insn that we will recognize as a push_multi.
20061    Unfortunately, since this insn does not reflect very well the actual
20062    semantics of the operation, we need to annotate the insn for the benefit
20063    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20064    MASK for registers that should be annotated for DWARF2 frame unwind
20065    information.  */
20066 static rtx
20067 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20068 {
20069   int num_regs = 0;
20070   int num_dwarf_regs = 0;
20071   int i, j;
20072   rtx par;
20073   rtx dwarf;
20074   int dwarf_par_index;
20075   rtx tmp, reg;
20076
20077   /* We don't record the PC in the dwarf frame information.  */
20078   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20079
20080   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20081     {
20082       if (mask & (1 << i))
20083         num_regs++;
20084       if (dwarf_regs_mask & (1 << i))
20085         num_dwarf_regs++;
20086     }
20087
20088   gcc_assert (num_regs && num_regs <= 16);
20089   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20090
20091   /* For the body of the insn we are going to generate an UNSPEC in
20092      parallel with several USEs.  This allows the insn to be recognized
20093      by the push_multi pattern in the arm.md file.
20094
20095      The body of the insn looks something like this:
20096
20097        (parallel [
20098            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20099                                         (const_int:SI <num>)))
20100                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20101            (use (reg:SI XX))
20102            (use (reg:SI YY))
20103            ...
20104         ])
20105
20106      For the frame note however, we try to be more explicit and actually
20107      show each register being stored into the stack frame, plus a (single)
20108      decrement of the stack pointer.  We do it this way in order to be
20109      friendly to the stack unwinding code, which only wants to see a single
20110      stack decrement per instruction.  The RTL we generate for the note looks
20111      something like this:
20112
20113       (sequence [
20114            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20115            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20116            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20117            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20118            ...
20119         ])
20120
20121      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20122      instead we'd have a parallel expression detailing all
20123      the stores to the various memory addresses so that debug
20124      information is more up-to-date. Remember however while writing
20125      this to take care of the constraints with the push instruction.
20126
20127      Note also that this has to be taken care of for the VFP registers.
20128
20129      For more see PR43399.  */
20130
20131   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20132   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20133   dwarf_par_index = 1;
20134
20135   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20136     {
20137       if (mask & (1 << i))
20138         {
20139           reg = gen_rtx_REG (SImode, i);
20140
20141           XVECEXP (par, 0, 0)
20142             = gen_rtx_SET (gen_frame_mem
20143                            (BLKmode,
20144                             gen_rtx_PRE_MODIFY (Pmode,
20145                                                 stack_pointer_rtx,
20146                                                 plus_constant
20147                                                 (Pmode, stack_pointer_rtx,
20148                                                  -4 * num_regs))
20149                             ),
20150                            gen_rtx_UNSPEC (BLKmode,
20151                                            gen_rtvec (1, reg),
20152                                            UNSPEC_PUSH_MULT));
20153
20154           if (dwarf_regs_mask & (1 << i))
20155             {
20156               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20157                                  reg);
20158               RTX_FRAME_RELATED_P (tmp) = 1;
20159               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20160             }
20161
20162           break;
20163         }
20164     }
20165
20166   for (j = 1, i++; j < num_regs; i++)
20167     {
20168       if (mask & (1 << i))
20169         {
20170           reg = gen_rtx_REG (SImode, i);
20171
20172           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20173
20174           if (dwarf_regs_mask & (1 << i))
20175             {
20176               tmp
20177                 = gen_rtx_SET (gen_frame_mem
20178                                (SImode,
20179                                 plus_constant (Pmode, stack_pointer_rtx,
20180                                                4 * j)),
20181                                reg);
20182               RTX_FRAME_RELATED_P (tmp) = 1;
20183               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20184             }
20185
20186           j++;
20187         }
20188     }
20189
20190   par = emit_insn (par);
20191
20192   tmp = gen_rtx_SET (stack_pointer_rtx,
20193                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20194   RTX_FRAME_RELATED_P (tmp) = 1;
20195   XVECEXP (dwarf, 0, 0) = tmp;
20196
20197   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20198
20199   return par;
20200 }
20201
20202 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20203    SIZE is the offset to be adjusted.
20204    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20205 static void
20206 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20207 {
20208   rtx dwarf;
20209
20210   RTX_FRAME_RELATED_P (insn) = 1;
20211   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20212   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20213 }
20214
20215 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20216    SAVED_REGS_MASK shows which registers need to be restored.
20217
20218    Unfortunately, since this insn does not reflect very well the actual
20219    semantics of the operation, we need to annotate the insn for the benefit
20220    of DWARF2 frame unwind information.  */
20221 static void
20222 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20223 {
20224   int num_regs = 0;
20225   int i, j;
20226   rtx par;
20227   rtx dwarf = NULL_RTX;
20228   rtx tmp, reg;
20229   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20230   int offset_adj;
20231   int emit_update;
20232
20233   offset_adj = return_in_pc ? 1 : 0;
20234   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20235     if (saved_regs_mask & (1 << i))
20236       num_regs++;
20237
20238   gcc_assert (num_regs && num_regs <= 16);
20239
20240   /* If SP is in reglist, then we don't emit SP update insn.  */
20241   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20242
20243   /* The parallel needs to hold num_regs SETs
20244      and one SET for the stack update.  */
20245   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20246
20247   if (return_in_pc)
20248     XVECEXP (par, 0, 0) = ret_rtx;
20249
20250   if (emit_update)
20251     {
20252       /* Increment the stack pointer, based on there being
20253          num_regs 4-byte registers to restore.  */
20254       tmp = gen_rtx_SET (stack_pointer_rtx,
20255                          plus_constant (Pmode,
20256                                         stack_pointer_rtx,
20257                                         4 * num_regs));
20258       RTX_FRAME_RELATED_P (tmp) = 1;
20259       XVECEXP (par, 0, offset_adj) = tmp;
20260     }
20261
20262   /* Now restore every reg, which may include PC.  */
20263   for (j = 0, i = 0; j < num_regs; i++)
20264     if (saved_regs_mask & (1 << i))
20265       {
20266         reg = gen_rtx_REG (SImode, i);
20267         if ((num_regs == 1) && emit_update && !return_in_pc)
20268           {
20269             /* Emit single load with writeback.  */
20270             tmp = gen_frame_mem (SImode,
20271                                  gen_rtx_POST_INC (Pmode,
20272                                                    stack_pointer_rtx));
20273             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20274             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20275             return;
20276           }
20277
20278         tmp = gen_rtx_SET (reg,
20279                            gen_frame_mem
20280                            (SImode,
20281                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20282         RTX_FRAME_RELATED_P (tmp) = 1;
20283         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20284
20285         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20286            should not have PC, skip PC.  */
20287         if (i != PC_REGNUM)
20288           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20289
20290         j++;
20291       }
20292
20293   if (return_in_pc)
20294     par = emit_jump_insn (par);
20295   else
20296     par = emit_insn (par);
20297
20298   REG_NOTES (par) = dwarf;
20299   if (!return_in_pc)
20300     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20301                                  stack_pointer_rtx, stack_pointer_rtx);
20302 }
20303
20304 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20305    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20306
20307    Unfortunately, since this insn does not reflect very well the actual
20308    semantics of the operation, we need to annotate the insn for the benefit
20309    of DWARF2 frame unwind information.  */
20310 static void
20311 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20312 {
20313   int i, j;
20314   rtx par;
20315   rtx dwarf = NULL_RTX;
20316   rtx tmp, reg;
20317
20318   gcc_assert (num_regs && num_regs <= 32);
20319
20320     /* Workaround ARM10 VFPr1 bug.  */
20321   if (num_regs == 2 && !arm_arch6)
20322     {
20323       if (first_reg == 15)
20324         first_reg--;
20325
20326       num_regs++;
20327     }
20328
20329   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20330      there could be up to 32 D-registers to restore.
20331      If there are more than 16 D-registers, make two recursive calls,
20332      each of which emits one pop_multi instruction.  */
20333   if (num_regs > 16)
20334     {
20335       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20336       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20337       return;
20338     }
20339
20340   /* The parallel needs to hold num_regs SETs
20341      and one SET for the stack update.  */
20342   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20343
20344   /* Increment the stack pointer, based on there being
20345      num_regs 8-byte registers to restore.  */
20346   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20347   RTX_FRAME_RELATED_P (tmp) = 1;
20348   XVECEXP (par, 0, 0) = tmp;
20349
20350   /* Now show every reg that will be restored, using a SET for each.  */
20351   for (j = 0, i=first_reg; j < num_regs; i += 2)
20352     {
20353       reg = gen_rtx_REG (DFmode, i);
20354
20355       tmp = gen_rtx_SET (reg,
20356                          gen_frame_mem
20357                          (DFmode,
20358                           plus_constant (Pmode, base_reg, 8 * j)));
20359       RTX_FRAME_RELATED_P (tmp) = 1;
20360       XVECEXP (par, 0, j + 1) = tmp;
20361
20362       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20363
20364       j++;
20365     }
20366
20367   par = emit_insn (par);
20368   REG_NOTES (par) = dwarf;
20369
20370   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20371   if (REGNO (base_reg) == IP_REGNUM)
20372     {
20373       RTX_FRAME_RELATED_P (par) = 1;
20374       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20375     }
20376   else
20377     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20378                                  base_reg, base_reg);
20379 }
20380
20381 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20382    number of registers are being popped, multiple LDRD patterns are created for
20383    all register pairs.  If odd number of registers are popped, last register is
20384    loaded by using LDR pattern.  */
20385 static void
20386 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20387 {
20388   int num_regs = 0;
20389   int i, j;
20390   rtx par = NULL_RTX;
20391   rtx dwarf = NULL_RTX;
20392   rtx tmp, reg, tmp1;
20393   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20394
20395   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20396     if (saved_regs_mask & (1 << i))
20397       num_regs++;
20398
20399   gcc_assert (num_regs && num_regs <= 16);
20400
20401   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20402      to be popped.  So, if num_regs is even, now it will become odd,
20403      and we can generate pop with PC.  If num_regs is odd, it will be
20404      even now, and ldr with return can be generated for PC.  */
20405   if (return_in_pc)
20406     num_regs--;
20407
20408   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20409
20410   /* Var j iterates over all the registers to gather all the registers in
20411      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20412      A PARALLEL RTX of register-pair is created here, so that pattern for
20413      LDRD can be matched.  As PC is always last register to be popped, and
20414      we have already decremented num_regs if PC, we don't have to worry
20415      about PC in this loop.  */
20416   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20417     if (saved_regs_mask & (1 << j))
20418       {
20419         /* Create RTX for memory load.  */
20420         reg = gen_rtx_REG (SImode, j);
20421         tmp = gen_rtx_SET (reg,
20422                            gen_frame_mem (SImode,
20423                                plus_constant (Pmode,
20424                                               stack_pointer_rtx, 4 * i)));
20425         RTX_FRAME_RELATED_P (tmp) = 1;
20426
20427         if (i % 2 == 0)
20428           {
20429             /* When saved-register index (i) is even, the RTX to be emitted is
20430                yet to be created.  Hence create it first.  The LDRD pattern we
20431                are generating is :
20432                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20433                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20434                where target registers need not be consecutive.  */
20435             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20436             dwarf = NULL_RTX;
20437           }
20438
20439         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20440            added as 0th element and if i is odd, reg_i is added as 1st element
20441            of LDRD pattern shown above.  */
20442         XVECEXP (par, 0, (i % 2)) = tmp;
20443         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20444
20445         if ((i % 2) == 1)
20446           {
20447             /* When saved-register index (i) is odd, RTXs for both the registers
20448                to be loaded are generated in above given LDRD pattern, and the
20449                pattern can be emitted now.  */
20450             par = emit_insn (par);
20451             REG_NOTES (par) = dwarf;
20452             RTX_FRAME_RELATED_P (par) = 1;
20453           }
20454
20455         i++;
20456       }
20457
20458   /* If the number of registers pushed is odd AND return_in_pc is false OR
20459      number of registers are even AND return_in_pc is true, last register is
20460      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20461      then LDR with post increment.  */
20462
20463   /* Increment the stack pointer, based on there being
20464      num_regs 4-byte registers to restore.  */
20465   tmp = gen_rtx_SET (stack_pointer_rtx,
20466                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20467   RTX_FRAME_RELATED_P (tmp) = 1;
20468   tmp = emit_insn (tmp);
20469   if (!return_in_pc)
20470     {
20471       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20472                                    stack_pointer_rtx, stack_pointer_rtx);
20473     }
20474
20475   dwarf = NULL_RTX;
20476
20477   if (((num_regs % 2) == 1 && !return_in_pc)
20478       || ((num_regs % 2) == 0 && return_in_pc))
20479     {
20480       /* Scan for the single register to be popped.  Skip until the saved
20481          register is found.  */
20482       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20483
20484       /* Gen LDR with post increment here.  */
20485       tmp1 = gen_rtx_MEM (SImode,
20486                           gen_rtx_POST_INC (SImode,
20487                                             stack_pointer_rtx));
20488       set_mem_alias_set (tmp1, get_frame_alias_set ());
20489
20490       reg = gen_rtx_REG (SImode, j);
20491       tmp = gen_rtx_SET (reg, tmp1);
20492       RTX_FRAME_RELATED_P (tmp) = 1;
20493       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20494
20495       if (return_in_pc)
20496         {
20497           /* If return_in_pc, j must be PC_REGNUM.  */
20498           gcc_assert (j == PC_REGNUM);
20499           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20500           XVECEXP (par, 0, 0) = ret_rtx;
20501           XVECEXP (par, 0, 1) = tmp;
20502           par = emit_jump_insn (par);
20503         }
20504       else
20505         {
20506           par = emit_insn (tmp);
20507           REG_NOTES (par) = dwarf;
20508           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20509                                        stack_pointer_rtx, stack_pointer_rtx);
20510         }
20511
20512     }
20513   else if ((num_regs % 2) == 1 && return_in_pc)
20514     {
20515       /* There are 2 registers to be popped.  So, generate the pattern
20516          pop_multiple_with_stack_update_and_return to pop in PC.  */
20517       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20518     }
20519
20520   return;
20521 }
20522
20523 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20524    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20525    offset addressing and then generates one separate stack udpate. This provides
20526    more scheduling freedom, compared to writeback on every load.  However,
20527    if the function returns using load into PC directly
20528    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20529    before the last load.  TODO: Add a peephole optimization to recognize
20530    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20531    peephole optimization to merge the load at stack-offset zero
20532    with the stack update instruction using load with writeback
20533    in post-index addressing mode.  */
20534 static void
20535 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20536 {
20537   int j = 0;
20538   int offset = 0;
20539   rtx par = NULL_RTX;
20540   rtx dwarf = NULL_RTX;
20541   rtx tmp, mem;
20542
20543   /* Restore saved registers.  */
20544   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20545   j = 0;
20546   while (j <= LAST_ARM_REGNUM)
20547     if (saved_regs_mask & (1 << j))
20548       {
20549         if ((j % 2) == 0
20550             && (saved_regs_mask & (1 << (j + 1)))
20551             && (j + 1) != PC_REGNUM)
20552           {
20553             /* Current register and next register form register pair for which
20554                LDRD can be generated. PC is always the last register popped, and
20555                we handle it separately.  */
20556             if (offset > 0)
20557               mem = gen_frame_mem (DImode,
20558                                    plus_constant (Pmode,
20559                                                   stack_pointer_rtx,
20560                                                   offset));
20561             else
20562               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20563
20564             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20565             tmp = emit_insn (tmp);
20566             RTX_FRAME_RELATED_P (tmp) = 1;
20567
20568             /* Generate dwarf info.  */
20569
20570             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20571                                     gen_rtx_REG (SImode, j),
20572                                     NULL_RTX);
20573             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20574                                     gen_rtx_REG (SImode, j + 1),
20575                                     dwarf);
20576
20577             REG_NOTES (tmp) = dwarf;
20578
20579             offset += 8;
20580             j += 2;
20581           }
20582         else if (j != PC_REGNUM)
20583           {
20584             /* Emit a single word load.  */
20585             if (offset > 0)
20586               mem = gen_frame_mem (SImode,
20587                                    plus_constant (Pmode,
20588                                                   stack_pointer_rtx,
20589                                                   offset));
20590             else
20591               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20592
20593             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20594             tmp = emit_insn (tmp);
20595             RTX_FRAME_RELATED_P (tmp) = 1;
20596
20597             /* Generate dwarf info.  */
20598             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20599                                               gen_rtx_REG (SImode, j),
20600                                               NULL_RTX);
20601
20602             offset += 4;
20603             j += 1;
20604           }
20605         else /* j == PC_REGNUM */
20606           j++;
20607       }
20608     else
20609       j++;
20610
20611   /* Update the stack.  */
20612   if (offset > 0)
20613     {
20614       tmp = gen_rtx_SET (stack_pointer_rtx,
20615                          plus_constant (Pmode,
20616                                         stack_pointer_rtx,
20617                                         offset));
20618       tmp = emit_insn (tmp);
20619       arm_add_cfa_adjust_cfa_note (tmp, offset,
20620                                    stack_pointer_rtx, stack_pointer_rtx);
20621       offset = 0;
20622     }
20623
20624   if (saved_regs_mask & (1 << PC_REGNUM))
20625     {
20626       /* Only PC is to be popped.  */
20627       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20628       XVECEXP (par, 0, 0) = ret_rtx;
20629       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20630                          gen_frame_mem (SImode,
20631                                         gen_rtx_POST_INC (SImode,
20632                                                           stack_pointer_rtx)));
20633       RTX_FRAME_RELATED_P (tmp) = 1;
20634       XVECEXP (par, 0, 1) = tmp;
20635       par = emit_jump_insn (par);
20636
20637       /* Generate dwarf info.  */
20638       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20639                               gen_rtx_REG (SImode, PC_REGNUM),
20640                               NULL_RTX);
20641       REG_NOTES (par) = dwarf;
20642       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20643                                    stack_pointer_rtx, stack_pointer_rtx);
20644     }
20645 }
20646
20647 /* Calculate the size of the return value that is passed in registers.  */
20648 static unsigned
20649 arm_size_return_regs (void)
20650 {
20651   machine_mode mode;
20652
20653   if (crtl->return_rtx != 0)
20654     mode = GET_MODE (crtl->return_rtx);
20655   else
20656     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20657
20658   return GET_MODE_SIZE (mode);
20659 }
20660
20661 /* Return true if the current function needs to save/restore LR.  */
20662 static bool
20663 thumb_force_lr_save (void)
20664 {
20665   return !cfun->machine->lr_save_eliminated
20666          && (!crtl->is_leaf
20667              || thumb_far_jump_used_p ()
20668              || df_regs_ever_live_p (LR_REGNUM));
20669 }
20670
20671 /* We do not know if r3 will be available because
20672    we do have an indirect tailcall happening in this
20673    particular case.  */
20674 static bool
20675 is_indirect_tailcall_p (rtx call)
20676 {
20677   rtx pat = PATTERN (call);
20678
20679   /* Indirect tail call.  */
20680   pat = XVECEXP (pat, 0, 0);
20681   if (GET_CODE (pat) == SET)
20682     pat = SET_SRC (pat);
20683
20684   pat = XEXP (XEXP (pat, 0), 0);
20685   return REG_P (pat);
20686 }
20687
20688 /* Return true if r3 is used by any of the tail call insns in the
20689    current function.  */
20690 static bool
20691 any_sibcall_could_use_r3 (void)
20692 {
20693   edge_iterator ei;
20694   edge e;
20695
20696   if (!crtl->tail_call_emit)
20697     return false;
20698   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20699     if (e->flags & EDGE_SIBCALL)
20700       {
20701         rtx_insn *call = BB_END (e->src);
20702         if (!CALL_P (call))
20703           call = prev_nonnote_nondebug_insn (call);
20704         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20705         if (find_regno_fusage (call, USE, 3)
20706             || is_indirect_tailcall_p (call))
20707           return true;
20708       }
20709   return false;
20710 }
20711
20712
20713 /* Compute the distance from register FROM to register TO.
20714    These can be the arg pointer (26), the soft frame pointer (25),
20715    the stack pointer (13) or the hard frame pointer (11).
20716    In thumb mode r7 is used as the soft frame pointer, if needed.
20717    Typical stack layout looks like this:
20718
20719        old stack pointer -> |    |
20720                              ----
20721                             |    | \
20722                             |    |   saved arguments for
20723                             |    |   vararg functions
20724                             |    | /
20725                               --
20726    hard FP & arg pointer -> |    | \
20727                             |    |   stack
20728                             |    |   frame
20729                             |    | /
20730                               --
20731                             |    | \
20732                             |    |   call saved
20733                             |    |   registers
20734       soft frame pointer -> |    | /
20735                               --
20736                             |    | \
20737                             |    |   local
20738                             |    |   variables
20739      locals base pointer -> |    | /
20740                               --
20741                             |    | \
20742                             |    |   outgoing
20743                             |    |   arguments
20744    current stack pointer -> |    | /
20745                               --
20746
20747   For a given function some or all of these stack components
20748   may not be needed, giving rise to the possibility of
20749   eliminating some of the registers.
20750
20751   The values returned by this function must reflect the behavior
20752   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20753
20754   The sign of the number returned reflects the direction of stack
20755   growth, so the values are positive for all eliminations except
20756   from the soft frame pointer to the hard frame pointer.
20757
20758   SFP may point just inside the local variables block to ensure correct
20759   alignment.  */
20760
20761
20762 /* Return cached stack offsets.  */
20763
20764 static arm_stack_offsets *
20765 arm_get_frame_offsets (void)
20766 {
20767   struct arm_stack_offsets *offsets;
20768
20769   offsets = &cfun->machine->stack_offsets;
20770
20771   return offsets;
20772 }
20773
20774
20775 /* Calculate stack offsets.  These are used to calculate register elimination
20776    offsets and in prologue/epilogue code.  Also calculates which registers
20777    should be saved.  */
20778
20779 static void
20780 arm_compute_frame_layout (void)
20781 {
20782   struct arm_stack_offsets *offsets;
20783   unsigned long func_type;
20784   int saved;
20785   int core_saved;
20786   HOST_WIDE_INT frame_size;
20787   int i;
20788
20789   offsets = &cfun->machine->stack_offsets;
20790
20791   /* Initially this is the size of the local variables.  It will translated
20792      into an offset once we have determined the size of preceding data.  */
20793   frame_size = ROUND_UP_WORD (get_frame_size ());
20794
20795   /* Space for variadic functions.  */
20796   offsets->saved_args = crtl->args.pretend_args_size;
20797
20798   /* In Thumb mode this is incorrect, but never used.  */
20799   offsets->frame
20800     = (offsets->saved_args
20801        + arm_compute_static_chain_stack_bytes ()
20802        + (frame_pointer_needed ? 4 : 0));
20803
20804   if (TARGET_32BIT)
20805     {
20806       unsigned int regno;
20807
20808       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20809       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20810       saved = core_saved;
20811
20812       /* We know that SP will be doubleword aligned on entry, and we must
20813          preserve that condition at any subroutine call.  We also require the
20814          soft frame pointer to be doubleword aligned.  */
20815
20816       if (TARGET_REALLY_IWMMXT)
20817         {
20818           /* Check for the call-saved iWMMXt registers.  */
20819           for (regno = FIRST_IWMMXT_REGNUM;
20820                regno <= LAST_IWMMXT_REGNUM;
20821                regno++)
20822             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20823               saved += 8;
20824         }
20825
20826       func_type = arm_current_func_type ();
20827       /* Space for saved VFP registers.  */
20828       if (! IS_VOLATILE (func_type)
20829           && TARGET_HARD_FLOAT)
20830         saved += arm_get_vfp_saved_size ();
20831     }
20832   else /* TARGET_THUMB1 */
20833     {
20834       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20835       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20836       saved = core_saved;
20837       if (TARGET_BACKTRACE)
20838         saved += 16;
20839     }
20840
20841   /* Saved registers include the stack frame.  */
20842   offsets->saved_regs
20843     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20844   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20845
20846   /* A leaf function does not need any stack alignment if it has nothing
20847      on the stack.  */
20848   if (crtl->is_leaf && frame_size == 0
20849       /* However if it calls alloca(), we have a dynamically allocated
20850          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20851       && ! cfun->calls_alloca)
20852     {
20853       offsets->outgoing_args = offsets->soft_frame;
20854       offsets->locals_base = offsets->soft_frame;
20855       return;
20856     }
20857
20858   /* Ensure SFP has the correct alignment.  */
20859   if (ARM_DOUBLEWORD_ALIGN
20860       && (offsets->soft_frame & 7))
20861     {
20862       offsets->soft_frame += 4;
20863       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20864          when there is a stack frame as the alignment will be rolled into
20865          the normal stack adjustment.  */
20866       if (frame_size + crtl->outgoing_args_size == 0)
20867         {
20868           int reg = -1;
20869
20870           /* Register r3 is caller-saved.  Normally it does not need to be
20871              saved on entry by the prologue.  However if we choose to save
20872              it for padding then we may confuse the compiler into thinking
20873              a prologue sequence is required when in fact it is not.  This
20874              will occur when shrink-wrapping if r3 is used as a scratch
20875              register and there are no other callee-saved writes.
20876
20877              This situation can be avoided when other callee-saved registers
20878              are available and r3 is not mandatory if we choose a callee-saved
20879              register for padding.  */
20880           bool prefer_callee_reg_p = false;
20881
20882           /* If it is safe to use r3, then do so.  This sometimes
20883              generates better code on Thumb-2 by avoiding the need to
20884              use 32-bit push/pop instructions.  */
20885           if (! any_sibcall_could_use_r3 ()
20886               && arm_size_return_regs () <= 12
20887               && (offsets->saved_regs_mask & (1 << 3)) == 0
20888               && (TARGET_THUMB2
20889                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20890             {
20891               reg = 3;
20892               if (!TARGET_THUMB2)
20893                 prefer_callee_reg_p = true;
20894             }
20895           if (reg == -1
20896               || prefer_callee_reg_p)
20897             {
20898               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20899                 {
20900                   /* Avoid fixed registers; they may be changed at
20901                      arbitrary times so it's unsafe to restore them
20902                      during the epilogue.  */
20903                   if (!fixed_regs[i]
20904                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20905                     {
20906                       reg = i;
20907                       break;
20908                     }
20909                 }
20910             }
20911
20912           if (reg != -1)
20913             {
20914               offsets->saved_regs += 4;
20915               offsets->saved_regs_mask |= (1 << reg);
20916             }
20917         }
20918     }
20919
20920   offsets->locals_base = offsets->soft_frame + frame_size;
20921   offsets->outgoing_args = (offsets->locals_base
20922                             + crtl->outgoing_args_size);
20923
20924   if (ARM_DOUBLEWORD_ALIGN)
20925     {
20926       /* Ensure SP remains doubleword aligned.  */
20927       if (offsets->outgoing_args & 7)
20928         offsets->outgoing_args += 4;
20929       gcc_assert (!(offsets->outgoing_args & 7));
20930     }
20931 }
20932
20933
20934 /* Calculate the relative offsets for the different stack pointers.  Positive
20935    offsets are in the direction of stack growth.  */
20936
20937 HOST_WIDE_INT
20938 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20939 {
20940   arm_stack_offsets *offsets;
20941
20942   offsets = arm_get_frame_offsets ();
20943
20944   /* OK, now we have enough information to compute the distances.
20945      There must be an entry in these switch tables for each pair
20946      of registers in ELIMINABLE_REGS, even if some of the entries
20947      seem to be redundant or useless.  */
20948   switch (from)
20949     {
20950     case ARG_POINTER_REGNUM:
20951       switch (to)
20952         {
20953         case THUMB_HARD_FRAME_POINTER_REGNUM:
20954           return 0;
20955
20956         case FRAME_POINTER_REGNUM:
20957           /* This is the reverse of the soft frame pointer
20958              to hard frame pointer elimination below.  */
20959           return offsets->soft_frame - offsets->saved_args;
20960
20961         case ARM_HARD_FRAME_POINTER_REGNUM:
20962           /* This is only non-zero in the case where the static chain register
20963              is stored above the frame.  */
20964           return offsets->frame - offsets->saved_args - 4;
20965
20966         case STACK_POINTER_REGNUM:
20967           /* If nothing has been pushed on the stack at all
20968              then this will return -4.  This *is* correct!  */
20969           return offsets->outgoing_args - (offsets->saved_args + 4);
20970
20971         default:
20972           gcc_unreachable ();
20973         }
20974       gcc_unreachable ();
20975
20976     case FRAME_POINTER_REGNUM:
20977       switch (to)
20978         {
20979         case THUMB_HARD_FRAME_POINTER_REGNUM:
20980           return 0;
20981
20982         case ARM_HARD_FRAME_POINTER_REGNUM:
20983           /* The hard frame pointer points to the top entry in the
20984              stack frame.  The soft frame pointer to the bottom entry
20985              in the stack frame.  If there is no stack frame at all,
20986              then they are identical.  */
20987
20988           return offsets->frame - offsets->soft_frame;
20989
20990         case STACK_POINTER_REGNUM:
20991           return offsets->outgoing_args - offsets->soft_frame;
20992
20993         default:
20994           gcc_unreachable ();
20995         }
20996       gcc_unreachable ();
20997
20998     default:
20999       /* You cannot eliminate from the stack pointer.
21000          In theory you could eliminate from the hard frame
21001          pointer to the stack pointer, but this will never
21002          happen, since if a stack frame is not needed the
21003          hard frame pointer will never be used.  */
21004       gcc_unreachable ();
21005     }
21006 }
21007
21008 /* Given FROM and TO register numbers, say whether this elimination is
21009    allowed.  Frame pointer elimination is automatically handled.
21010
21011    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21012    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21013    pointer, we must eliminate FRAME_POINTER_REGNUM into
21014    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21015    ARG_POINTER_REGNUM.  */
21016
21017 bool
21018 arm_can_eliminate (const int from, const int to)
21019 {
21020   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21021           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21022           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21023           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21024            true);
21025 }
21026
21027 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21028    number of bytes pushed.  */
21029
21030 static int
21031 arm_save_coproc_regs(void)
21032 {
21033   int saved_size = 0;
21034   unsigned reg;
21035   unsigned start_reg;
21036   rtx insn;
21037
21038   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21039     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21040       {
21041         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21042         insn = gen_rtx_MEM (V2SImode, insn);
21043         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21044         RTX_FRAME_RELATED_P (insn) = 1;
21045         saved_size += 8;
21046       }
21047
21048   if (TARGET_HARD_FLOAT)
21049     {
21050       start_reg = FIRST_VFP_REGNUM;
21051
21052       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21053         {
21054           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21055               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21056             {
21057               if (start_reg != reg)
21058                 saved_size += vfp_emit_fstmd (start_reg,
21059                                               (reg - start_reg) / 2);
21060               start_reg = reg + 2;
21061             }
21062         }
21063       if (start_reg != reg)
21064         saved_size += vfp_emit_fstmd (start_reg,
21065                                       (reg - start_reg) / 2);
21066     }
21067   return saved_size;
21068 }
21069
21070
21071 /* Set the Thumb frame pointer from the stack pointer.  */
21072
21073 static void
21074 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21075 {
21076   HOST_WIDE_INT amount;
21077   rtx insn, dwarf;
21078
21079   amount = offsets->outgoing_args - offsets->locals_base;
21080   if (amount < 1024)
21081     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21082                                   stack_pointer_rtx, GEN_INT (amount)));
21083   else
21084     {
21085       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21086       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21087          expects the first two operands to be the same.  */
21088       if (TARGET_THUMB2)
21089         {
21090           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21091                                         stack_pointer_rtx,
21092                                         hard_frame_pointer_rtx));
21093         }
21094       else
21095         {
21096           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21097                                         hard_frame_pointer_rtx,
21098                                         stack_pointer_rtx));
21099         }
21100       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21101                            plus_constant (Pmode, stack_pointer_rtx, amount));
21102       RTX_FRAME_RELATED_P (dwarf) = 1;
21103       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21104     }
21105
21106   RTX_FRAME_RELATED_P (insn) = 1;
21107 }
21108
21109 struct scratch_reg {
21110   rtx reg;
21111   bool saved;
21112 };
21113
21114 /* Return a short-lived scratch register for use as a 2nd scratch register on
21115    function entry after the registers are saved in the prologue.  This register
21116    must be released by means of release_scratch_register_on_entry.  IP is not
21117    considered since it is always used as the 1st scratch register if available.
21118
21119    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21120    mask of live registers.  */
21121
21122 static void
21123 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21124                                unsigned long live_regs)
21125 {
21126   int regno = -1;
21127
21128   sr->saved = false;
21129
21130   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21131     regno = LR_REGNUM;
21132   else
21133     {
21134       unsigned int i;
21135
21136       for (i = 4; i < 11; i++)
21137         if (regno1 != i && (live_regs & (1 << i)) != 0)
21138           {
21139             regno = i;
21140             break;
21141           }
21142
21143       if (regno < 0)
21144         {
21145           /* If IP is used as the 1st scratch register for a nested function,
21146              then either r3 wasn't available or is used to preserve IP.  */
21147           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21148             regno1 = 3;
21149           regno = (regno1 == 3 ? 2 : 3);
21150           sr->saved
21151             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21152                                regno);
21153         }
21154     }
21155
21156   sr->reg = gen_rtx_REG (SImode, regno);
21157   if (sr->saved)
21158     {
21159       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21160       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21161       rtx x = gen_rtx_SET (stack_pointer_rtx,
21162                            plus_constant (Pmode, stack_pointer_rtx, -4));
21163       RTX_FRAME_RELATED_P (insn) = 1;
21164       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21165     }
21166 }
21167
21168 /* Release a scratch register obtained from the preceding function.  */
21169
21170 static void
21171 release_scratch_register_on_entry (struct scratch_reg *sr)
21172 {
21173   if (sr->saved)
21174     {
21175       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21176       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21177       rtx x = gen_rtx_SET (stack_pointer_rtx,
21178                            plus_constant (Pmode, stack_pointer_rtx, 4));
21179       RTX_FRAME_RELATED_P (insn) = 1;
21180       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21181     }
21182 }
21183
21184 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21185
21186 #if PROBE_INTERVAL > 4096
21187 #error Cannot use indexed addressing mode for stack probing
21188 #endif
21189
21190 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21191    inclusive.  These are offsets from the current stack pointer.  REGNO1
21192    is the index number of the 1st scratch register and LIVE_REGS is the
21193    mask of live registers.  */
21194
21195 static void
21196 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21197                             unsigned int regno1, unsigned long live_regs)
21198 {
21199   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21200
21201   /* See if we have a constant small number of probes to generate.  If so,
21202      that's the easy case.  */
21203   if (size <= PROBE_INTERVAL)
21204     {
21205       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21206       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21207       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21208     }
21209
21210   /* The run-time loop is made up of 10 insns in the generic case while the
21211      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21212   else if (size <= 5 * PROBE_INTERVAL)
21213     {
21214       HOST_WIDE_INT i, rem;
21215
21216       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21217       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21218       emit_stack_probe (reg1);
21219
21220       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21221          it exceeds SIZE.  If only two probes are needed, this will not
21222          generate any code.  Then probe at FIRST + SIZE.  */
21223       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21224         {
21225           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21226           emit_stack_probe (reg1);
21227         }
21228
21229       rem = size - (i - PROBE_INTERVAL);
21230       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21231         {
21232           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21233           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21234         }
21235       else
21236         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21237     }
21238
21239   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21240      extra careful with variables wrapping around because we might be at
21241      the very top (or the very bottom) of the address space and we have
21242      to be able to handle this case properly; in particular, we use an
21243      equality test for the loop condition.  */
21244   else
21245     {
21246       HOST_WIDE_INT rounded_size;
21247       struct scratch_reg sr;
21248
21249       get_scratch_register_on_entry (&sr, regno1, live_regs);
21250
21251       emit_move_insn (reg1, GEN_INT (first));
21252
21253
21254       /* Step 1: round SIZE to the previous multiple of the interval.  */
21255
21256       rounded_size = size & -PROBE_INTERVAL;
21257       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21258
21259
21260       /* Step 2: compute initial and final value of the loop counter.  */
21261
21262       /* TEST_ADDR = SP + FIRST.  */
21263       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21264
21265       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21266       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21267
21268
21269       /* Step 3: the loop
21270
21271          do
21272            {
21273              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21274              probe at TEST_ADDR
21275            }
21276          while (TEST_ADDR != LAST_ADDR)
21277
21278          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21279          until it is equal to ROUNDED_SIZE.  */
21280
21281       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21282
21283
21284       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21285          that SIZE is equal to ROUNDED_SIZE.  */
21286
21287       if (size != rounded_size)
21288         {
21289           HOST_WIDE_INT rem = size - rounded_size;
21290
21291           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21292             {
21293               emit_set_insn (sr.reg,
21294                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21295               emit_stack_probe (plus_constant (Pmode, sr.reg,
21296                                                PROBE_INTERVAL - rem));
21297             }
21298           else
21299             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21300         }
21301
21302       release_scratch_register_on_entry (&sr);
21303     }
21304
21305   /* Make sure nothing is scheduled before we are done.  */
21306   emit_insn (gen_blockage ());
21307 }
21308
21309 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21310    absolute addresses.  */
21311
21312 const char *
21313 output_probe_stack_range (rtx reg1, rtx reg2)
21314 {
21315   static int labelno = 0;
21316   char loop_lab[32];
21317   rtx xops[2];
21318
21319   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21320
21321   /* Loop.  */
21322   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21323
21324   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21325   xops[0] = reg1;
21326   xops[1] = GEN_INT (PROBE_INTERVAL);
21327   output_asm_insn ("sub\t%0, %0, %1", xops);
21328
21329   /* Probe at TEST_ADDR.  */
21330   output_asm_insn ("str\tr0, [%0, #0]", xops);
21331
21332   /* Test if TEST_ADDR == LAST_ADDR.  */
21333   xops[1] = reg2;
21334   output_asm_insn ("cmp\t%0, %1", xops);
21335
21336   /* Branch.  */
21337   fputs ("\tbne\t", asm_out_file);
21338   assemble_name_raw (asm_out_file, loop_lab);
21339   fputc ('\n', asm_out_file);
21340
21341   return "";
21342 }
21343
21344 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21345    function.  */
21346 void
21347 arm_expand_prologue (void)
21348 {
21349   rtx amount;
21350   rtx insn;
21351   rtx ip_rtx;
21352   unsigned long live_regs_mask;
21353   unsigned long func_type;
21354   int fp_offset = 0;
21355   int saved_pretend_args = 0;
21356   int saved_regs = 0;
21357   unsigned HOST_WIDE_INT args_to_push;
21358   HOST_WIDE_INT size;
21359   arm_stack_offsets *offsets;
21360   bool clobber_ip;
21361
21362   func_type = arm_current_func_type ();
21363
21364   /* Naked functions don't have prologues.  */
21365   if (IS_NAKED (func_type))
21366     {
21367       if (flag_stack_usage_info)
21368         current_function_static_stack_size = 0;
21369       return;
21370     }
21371
21372   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21373   args_to_push = crtl->args.pretend_args_size;
21374
21375   /* Compute which register we will have to save onto the stack.  */
21376   offsets = arm_get_frame_offsets ();
21377   live_regs_mask = offsets->saved_regs_mask;
21378
21379   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21380
21381   if (IS_STACKALIGN (func_type))
21382     {
21383       rtx r0, r1;
21384
21385       /* Handle a word-aligned stack pointer.  We generate the following:
21386
21387           mov r0, sp
21388           bic r1, r0, #7
21389           mov sp, r1
21390           <save and restore r0 in normal prologue/epilogue>
21391           mov sp, r0
21392           bx lr
21393
21394          The unwinder doesn't need to know about the stack realignment.
21395          Just tell it we saved SP in r0.  */
21396       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21397
21398       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21399       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21400
21401       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21402       RTX_FRAME_RELATED_P (insn) = 1;
21403       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21404
21405       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21406
21407       /* ??? The CFA changes here, which may cause GDB to conclude that it
21408          has entered a different function.  That said, the unwind info is
21409          correct, individually, before and after this instruction because
21410          we've described the save of SP, which will override the default
21411          handling of SP as restoring from the CFA.  */
21412       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21413     }
21414
21415   /* The static chain register is the same as the IP register.  If it is
21416      clobbered when creating the frame, we need to save and restore it.  */
21417   clobber_ip = IS_NESTED (func_type)
21418                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21419                    || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21420                        && !df_regs_ever_live_p (LR_REGNUM)
21421                        && arm_r3_live_at_start_p ()));
21422
21423   /* Find somewhere to store IP whilst the frame is being created.
21424      We try the following places in order:
21425
21426        1. The last argument register r3 if it is available.
21427        2. A slot on the stack above the frame if there are no
21428           arguments to push onto the stack.
21429        3. Register r3 again, after pushing the argument registers
21430           onto the stack, if this is a varargs function.
21431        4. The last slot on the stack created for the arguments to
21432           push, if this isn't a varargs function.
21433
21434      Note - we only need to tell the dwarf2 backend about the SP
21435      adjustment in the second variant; the static chain register
21436      doesn't need to be unwound, as it doesn't contain a value
21437      inherited from the caller.  */
21438   if (clobber_ip)
21439     {
21440       if (!arm_r3_live_at_start_p ())
21441         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21442       else if (args_to_push == 0)
21443         {
21444           rtx addr, dwarf;
21445
21446           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21447           saved_regs += 4;
21448
21449           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21450           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21451           fp_offset = 4;
21452
21453           /* Just tell the dwarf backend that we adjusted SP.  */
21454           dwarf = gen_rtx_SET (stack_pointer_rtx,
21455                                plus_constant (Pmode, stack_pointer_rtx,
21456                                               -fp_offset));
21457           RTX_FRAME_RELATED_P (insn) = 1;
21458           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21459         }
21460       else
21461         {
21462           /* Store the args on the stack.  */
21463           if (cfun->machine->uses_anonymous_args)
21464             {
21465               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21466                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21467               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21468               saved_pretend_args = 1;
21469             }
21470           else
21471             {
21472               rtx addr, dwarf;
21473
21474               if (args_to_push == 4)
21475                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21476               else
21477                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21478                                            plus_constant (Pmode,
21479                                                           stack_pointer_rtx,
21480                                                           -args_to_push));
21481
21482               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21483
21484               /* Just tell the dwarf backend that we adjusted SP.  */
21485               dwarf = gen_rtx_SET (stack_pointer_rtx,
21486                                    plus_constant (Pmode, stack_pointer_rtx,
21487                                                   -args_to_push));
21488               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21489             }
21490
21491           RTX_FRAME_RELATED_P (insn) = 1;
21492           fp_offset = args_to_push;
21493           args_to_push = 0;
21494         }
21495     }
21496
21497   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21498     {
21499       if (IS_INTERRUPT (func_type))
21500         {
21501           /* Interrupt functions must not corrupt any registers.
21502              Creating a frame pointer however, corrupts the IP
21503              register, so we must push it first.  */
21504           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21505
21506           /* Do not set RTX_FRAME_RELATED_P on this insn.
21507              The dwarf stack unwinding code only wants to see one
21508              stack decrement per function, and this is not it.  If
21509              this instruction is labeled as being part of the frame
21510              creation sequence then dwarf2out_frame_debug_expr will
21511              die when it encounters the assignment of IP to FP
21512              later on, since the use of SP here establishes SP as
21513              the CFA register and not IP.
21514
21515              Anyway this instruction is not really part of the stack
21516              frame creation although it is part of the prologue.  */
21517         }
21518
21519       insn = emit_set_insn (ip_rtx,
21520                             plus_constant (Pmode, stack_pointer_rtx,
21521                                            fp_offset));
21522       RTX_FRAME_RELATED_P (insn) = 1;
21523     }
21524
21525   if (args_to_push)
21526     {
21527       /* Push the argument registers, or reserve space for them.  */
21528       if (cfun->machine->uses_anonymous_args)
21529         insn = emit_multi_reg_push
21530           ((0xf0 >> (args_to_push / 4)) & 0xf,
21531            (0xf0 >> (args_to_push / 4)) & 0xf);
21532       else
21533         insn = emit_insn
21534           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21535                        GEN_INT (- args_to_push)));
21536       RTX_FRAME_RELATED_P (insn) = 1;
21537     }
21538
21539   /* If this is an interrupt service routine, and the link register
21540      is going to be pushed, and we're not generating extra
21541      push of IP (needed when frame is needed and frame layout if apcs),
21542      subtracting four from LR now will mean that the function return
21543      can be done with a single instruction.  */
21544   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21545       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21546       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21547       && TARGET_ARM)
21548     {
21549       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21550
21551       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21552     }
21553
21554   if (live_regs_mask)
21555     {
21556       unsigned long dwarf_regs_mask = live_regs_mask;
21557
21558       saved_regs += bit_count (live_regs_mask) * 4;
21559       if (optimize_size && !frame_pointer_needed
21560           && saved_regs == offsets->saved_regs - offsets->saved_args)
21561         {
21562           /* If no coprocessor registers are being pushed and we don't have
21563              to worry about a frame pointer then push extra registers to
21564              create the stack frame.  This is done in a way that does not
21565              alter the frame layout, so is independent of the epilogue.  */
21566           int n;
21567           int frame;
21568           n = 0;
21569           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21570             n++;
21571           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21572           if (frame && n * 4 >= frame)
21573             {
21574               n = frame / 4;
21575               live_regs_mask |= (1 << n) - 1;
21576               saved_regs += frame;
21577             }
21578         }
21579
21580       if (TARGET_LDRD
21581           && current_tune->prefer_ldrd_strd
21582           && !optimize_function_for_size_p (cfun))
21583         {
21584           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21585           if (TARGET_THUMB2)
21586             thumb2_emit_strd_push (live_regs_mask);
21587           else if (TARGET_ARM
21588                    && !TARGET_APCS_FRAME
21589                    && !IS_INTERRUPT (func_type))
21590             arm_emit_strd_push (live_regs_mask);
21591           else
21592             {
21593               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21594               RTX_FRAME_RELATED_P (insn) = 1;
21595             }
21596         }
21597       else
21598         {
21599           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21600           RTX_FRAME_RELATED_P (insn) = 1;
21601         }
21602     }
21603
21604   if (! IS_VOLATILE (func_type))
21605     saved_regs += arm_save_coproc_regs ();
21606
21607   if (frame_pointer_needed && TARGET_ARM)
21608     {
21609       /* Create the new frame pointer.  */
21610       if (TARGET_APCS_FRAME)
21611         {
21612           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21613           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21614           RTX_FRAME_RELATED_P (insn) = 1;
21615         }
21616       else
21617         {
21618           insn = GEN_INT (saved_regs - (4 + fp_offset));
21619           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21620                                         stack_pointer_rtx, insn));
21621           RTX_FRAME_RELATED_P (insn) = 1;
21622         }
21623     }
21624
21625   size = offsets->outgoing_args - offsets->saved_args;
21626   if (flag_stack_usage_info)
21627     current_function_static_stack_size = size;
21628
21629   /* If this isn't an interrupt service routine and we have a frame, then do
21630      stack checking.  We use IP as the first scratch register, except for the
21631      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21632   if (!IS_INTERRUPT (func_type)
21633       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21634     {
21635       unsigned int regno;
21636
21637       if (!IS_NESTED (func_type) || clobber_ip)
21638         regno = IP_REGNUM;
21639       else if (df_regs_ever_live_p (LR_REGNUM))
21640         regno = LR_REGNUM;
21641       else
21642         regno = 3;
21643
21644       if (crtl->is_leaf && !cfun->calls_alloca)
21645         {
21646           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21647             arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21648                                         size - STACK_CHECK_PROTECT,
21649                                         regno, live_regs_mask);
21650         }
21651       else if (size > 0)
21652         arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21653                                     regno, live_regs_mask);
21654     }
21655
21656   /* Recover the static chain register.  */
21657   if (clobber_ip)
21658     {
21659       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21660         insn = gen_rtx_REG (SImode, 3);
21661       else
21662         {
21663           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21664           insn = gen_frame_mem (SImode, insn);
21665         }
21666       emit_set_insn (ip_rtx, insn);
21667       emit_insn (gen_force_register_use (ip_rtx));
21668     }
21669
21670   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21671     {
21672       /* This add can produce multiple insns for a large constant, so we
21673          need to get tricky.  */
21674       rtx_insn *last = get_last_insn ();
21675
21676       amount = GEN_INT (offsets->saved_args + saved_regs
21677                         - offsets->outgoing_args);
21678
21679       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21680                                     amount));
21681       do
21682         {
21683           last = last ? NEXT_INSN (last) : get_insns ();
21684           RTX_FRAME_RELATED_P (last) = 1;
21685         }
21686       while (last != insn);
21687
21688       /* If the frame pointer is needed, emit a special barrier that
21689          will prevent the scheduler from moving stores to the frame
21690          before the stack adjustment.  */
21691       if (frame_pointer_needed)
21692         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21693                                          hard_frame_pointer_rtx));
21694     }
21695
21696
21697   if (frame_pointer_needed && TARGET_THUMB2)
21698     thumb_set_frame_pointer (offsets);
21699
21700   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21701     {
21702       unsigned long mask;
21703
21704       mask = live_regs_mask;
21705       mask &= THUMB2_WORK_REGS;
21706       if (!IS_NESTED (func_type))
21707         mask |= (1 << IP_REGNUM);
21708       arm_load_pic_register (mask);
21709     }
21710
21711   /* If we are profiling, make sure no instructions are scheduled before
21712      the call to mcount.  Similarly if the user has requested no
21713      scheduling in the prolog.  Similarly if we want non-call exceptions
21714      using the EABI unwinder, to prevent faulting instructions from being
21715      swapped with a stack adjustment.  */
21716   if (crtl->profile || !TARGET_SCHED_PROLOG
21717       || (arm_except_unwind_info (&global_options) == UI_TARGET
21718           && cfun->can_throw_non_call_exceptions))
21719     emit_insn (gen_blockage ());
21720
21721   /* If the link register is being kept alive, with the return address in it,
21722      then make sure that it does not get reused by the ce2 pass.  */
21723   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21724     cfun->machine->lr_save_eliminated = 1;
21725 }
21726 \f
21727 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21728 static void
21729 arm_print_condition (FILE *stream)
21730 {
21731   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21732     {
21733       /* Branch conversion is not implemented for Thumb-2.  */
21734       if (TARGET_THUMB)
21735         {
21736           output_operand_lossage ("predicated Thumb instruction");
21737           return;
21738         }
21739       if (current_insn_predicate != NULL)
21740         {
21741           output_operand_lossage
21742             ("predicated instruction in conditional sequence");
21743           return;
21744         }
21745
21746       fputs (arm_condition_codes[arm_current_cc], stream);
21747     }
21748   else if (current_insn_predicate)
21749     {
21750       enum arm_cond_code code;
21751
21752       if (TARGET_THUMB1)
21753         {
21754           output_operand_lossage ("predicated Thumb instruction");
21755           return;
21756         }
21757
21758       code = get_arm_condition_code (current_insn_predicate);
21759       fputs (arm_condition_codes[code], stream);
21760     }
21761 }
21762
21763
21764 /* Globally reserved letters: acln
21765    Puncutation letters currently used: @_|?().!#
21766    Lower case letters currently used: bcdefhimpqtvwxyz
21767    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21768    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21769
21770    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21771
21772    If CODE is 'd', then the X is a condition operand and the instruction
21773    should only be executed if the condition is true.
21774    if CODE is 'D', then the X is a condition operand and the instruction
21775    should only be executed if the condition is false: however, if the mode
21776    of the comparison is CCFPEmode, then always execute the instruction -- we
21777    do this because in these circumstances !GE does not necessarily imply LT;
21778    in these cases the instruction pattern will take care to make sure that
21779    an instruction containing %d will follow, thereby undoing the effects of
21780    doing this instruction unconditionally.
21781    If CODE is 'N' then X is a floating point operand that must be negated
21782    before output.
21783    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21784    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21785 static void
21786 arm_print_operand (FILE *stream, rtx x, int code)
21787 {
21788   switch (code)
21789     {
21790     case '@':
21791       fputs (ASM_COMMENT_START, stream);
21792       return;
21793
21794     case '_':
21795       fputs (user_label_prefix, stream);
21796       return;
21797
21798     case '|':
21799       fputs (REGISTER_PREFIX, stream);
21800       return;
21801
21802     case '?':
21803       arm_print_condition (stream);
21804       return;
21805
21806     case '.':
21807       /* The current condition code for a condition code setting instruction.
21808          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21809       fputc('s', stream);
21810       arm_print_condition (stream);
21811       return;
21812
21813     case '!':
21814       /* If the instruction is conditionally executed then print
21815          the current condition code, otherwise print 's'.  */
21816       gcc_assert (TARGET_THUMB2);
21817       if (current_insn_predicate)
21818         arm_print_condition (stream);
21819       else
21820         fputc('s', stream);
21821       break;
21822
21823     /* %# is a "break" sequence. It doesn't output anything, but is used to
21824        separate e.g. operand numbers from following text, if that text consists
21825        of further digits which we don't want to be part of the operand
21826        number.  */
21827     case '#':
21828       return;
21829
21830     case 'N':
21831       {
21832         REAL_VALUE_TYPE r;
21833         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21834         fprintf (stream, "%s", fp_const_from_val (&r));
21835       }
21836       return;
21837
21838     /* An integer or symbol address without a preceding # sign.  */
21839     case 'c':
21840       switch (GET_CODE (x))
21841         {
21842         case CONST_INT:
21843           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21844           break;
21845
21846         case SYMBOL_REF:
21847           output_addr_const (stream, x);
21848           break;
21849
21850         case CONST:
21851           if (GET_CODE (XEXP (x, 0)) == PLUS
21852               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21853             {
21854               output_addr_const (stream, x);
21855               break;
21856             }
21857           /* Fall through.  */
21858
21859         default:
21860           output_operand_lossage ("Unsupported operand for code '%c'", code);
21861         }
21862       return;
21863
21864     /* An integer that we want to print in HEX.  */
21865     case 'x':
21866       switch (GET_CODE (x))
21867         {
21868         case CONST_INT:
21869           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21870           break;
21871
21872         default:
21873           output_operand_lossage ("Unsupported operand for code '%c'", code);
21874         }
21875       return;
21876
21877     case 'B':
21878       if (CONST_INT_P (x))
21879         {
21880           HOST_WIDE_INT val;
21881           val = ARM_SIGN_EXTEND (~INTVAL (x));
21882           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21883         }
21884       else
21885         {
21886           putc ('~', stream);
21887           output_addr_const (stream, x);
21888         }
21889       return;
21890
21891     case 'b':
21892       /* Print the log2 of a CONST_INT.  */
21893       {
21894         HOST_WIDE_INT val;
21895
21896         if (!CONST_INT_P (x)
21897             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21898           output_operand_lossage ("Unsupported operand for code '%c'", code);
21899         else
21900           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21901       }
21902       return;
21903
21904     case 'L':
21905       /* The low 16 bits of an immediate constant.  */
21906       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21907       return;
21908
21909     case 'i':
21910       fprintf (stream, "%s", arithmetic_instr (x, 1));
21911       return;
21912
21913     case 'I':
21914       fprintf (stream, "%s", arithmetic_instr (x, 0));
21915       return;
21916
21917     case 'S':
21918       {
21919         HOST_WIDE_INT val;
21920         const char *shift;
21921
21922         shift = shift_op (x, &val);
21923
21924         if (shift)
21925           {
21926             fprintf (stream, ", %s ", shift);
21927             if (val == -1)
21928               arm_print_operand (stream, XEXP (x, 1), 0);
21929             else
21930               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21931           }
21932       }
21933       return;
21934
21935       /* An explanation of the 'Q', 'R' and 'H' register operands:
21936
21937          In a pair of registers containing a DI or DF value the 'Q'
21938          operand returns the register number of the register containing
21939          the least significant part of the value.  The 'R' operand returns
21940          the register number of the register containing the most
21941          significant part of the value.
21942
21943          The 'H' operand returns the higher of the two register numbers.
21944          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21945          same as the 'Q' operand, since the most significant part of the
21946          value is held in the lower number register.  The reverse is true
21947          on systems where WORDS_BIG_ENDIAN is false.
21948
21949          The purpose of these operands is to distinguish between cases
21950          where the endian-ness of the values is important (for example
21951          when they are added together), and cases where the endian-ness
21952          is irrelevant, but the order of register operations is important.
21953          For example when loading a value from memory into a register
21954          pair, the endian-ness does not matter.  Provided that the value
21955          from the lower memory address is put into the lower numbered
21956          register, and the value from the higher address is put into the
21957          higher numbered register, the load will work regardless of whether
21958          the value being loaded is big-wordian or little-wordian.  The
21959          order of the two register loads can matter however, if the address
21960          of the memory location is actually held in one of the registers
21961          being overwritten by the load.
21962
21963          The 'Q' and 'R' constraints are also available for 64-bit
21964          constants.  */
21965     case 'Q':
21966       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21967         {
21968           rtx part = gen_lowpart (SImode, x);
21969           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21970           return;
21971         }
21972
21973       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21974         {
21975           output_operand_lossage ("invalid operand for code '%c'", code);
21976           return;
21977         }
21978
21979       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21980       return;
21981
21982     case 'R':
21983       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21984         {
21985           machine_mode mode = GET_MODE (x);
21986           rtx part;
21987
21988           if (mode == VOIDmode)
21989             mode = DImode;
21990           part = gen_highpart_mode (SImode, mode, x);
21991           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21992           return;
21993         }
21994
21995       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21996         {
21997           output_operand_lossage ("invalid operand for code '%c'", code);
21998           return;
21999         }
22000
22001       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22002       return;
22003
22004     case 'H':
22005       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22006         {
22007           output_operand_lossage ("invalid operand for code '%c'", code);
22008           return;
22009         }
22010
22011       asm_fprintf (stream, "%r", REGNO (x) + 1);
22012       return;
22013
22014     case 'J':
22015       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22016         {
22017           output_operand_lossage ("invalid operand for code '%c'", code);
22018           return;
22019         }
22020
22021       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22022       return;
22023
22024     case 'K':
22025       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22026         {
22027           output_operand_lossage ("invalid operand for code '%c'", code);
22028           return;
22029         }
22030
22031       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22032       return;
22033
22034     case 'm':
22035       asm_fprintf (stream, "%r",
22036                    REG_P (XEXP (x, 0))
22037                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22038       return;
22039
22040     case 'M':
22041       asm_fprintf (stream, "{%r-%r}",
22042                    REGNO (x),
22043                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22044       return;
22045
22046     /* Like 'M', but writing doubleword vector registers, for use by Neon
22047        insns.  */
22048     case 'h':
22049       {
22050         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22051         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22052         if (numregs == 1)
22053           asm_fprintf (stream, "{d%d}", regno);
22054         else
22055           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22056       }
22057       return;
22058
22059     case 'd':
22060       /* CONST_TRUE_RTX means always -- that's the default.  */
22061       if (x == const_true_rtx)
22062         return;
22063
22064       if (!COMPARISON_P (x))
22065         {
22066           output_operand_lossage ("invalid operand for code '%c'", code);
22067           return;
22068         }
22069
22070       fputs (arm_condition_codes[get_arm_condition_code (x)],
22071              stream);
22072       return;
22073
22074     case 'D':
22075       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22076          want to do that.  */
22077       if (x == const_true_rtx)
22078         {
22079           output_operand_lossage ("instruction never executed");
22080           return;
22081         }
22082       if (!COMPARISON_P (x))
22083         {
22084           output_operand_lossage ("invalid operand for code '%c'", code);
22085           return;
22086         }
22087
22088       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22089                                  (get_arm_condition_code (x))],
22090              stream);
22091       return;
22092
22093     case 's':
22094     case 'V':
22095     case 'W':
22096     case 'X':
22097     case 'Y':
22098     case 'Z':
22099       /* Former Maverick support, removed after GCC-4.7.  */
22100       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22101       return;
22102
22103     case 'U':
22104       if (!REG_P (x)
22105           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22106           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22107         /* Bad value for wCG register number.  */
22108         {
22109           output_operand_lossage ("invalid operand for code '%c'", code);
22110           return;
22111         }
22112
22113       else
22114         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22115       return;
22116
22117       /* Print an iWMMXt control register name.  */
22118     case 'w':
22119       if (!CONST_INT_P (x)
22120           || INTVAL (x) < 0
22121           || INTVAL (x) >= 16)
22122         /* Bad value for wC register number.  */
22123         {
22124           output_operand_lossage ("invalid operand for code '%c'", code);
22125           return;
22126         }
22127
22128       else
22129         {
22130           static const char * wc_reg_names [16] =
22131             {
22132               "wCID",  "wCon",  "wCSSF", "wCASF",
22133               "wC4",   "wC5",   "wC6",   "wC7",
22134               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22135               "wC12",  "wC13",  "wC14",  "wC15"
22136             };
22137
22138           fputs (wc_reg_names [INTVAL (x)], stream);
22139         }
22140       return;
22141
22142     /* Print the high single-precision register of a VFP double-precision
22143        register.  */
22144     case 'p':
22145       {
22146         machine_mode mode = GET_MODE (x);
22147         int regno;
22148
22149         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22150           {
22151             output_operand_lossage ("invalid operand for code '%c'", code);
22152             return;
22153           }
22154
22155         regno = REGNO (x);
22156         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22157           {
22158             output_operand_lossage ("invalid operand for code '%c'", code);
22159             return;
22160           }
22161
22162         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22163       }
22164       return;
22165
22166     /* Print a VFP/Neon double precision or quad precision register name.  */
22167     case 'P':
22168     case 'q':
22169       {
22170         machine_mode mode = GET_MODE (x);
22171         int is_quad = (code == 'q');
22172         int regno;
22173
22174         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22175           {
22176             output_operand_lossage ("invalid operand for code '%c'", code);
22177             return;
22178           }
22179
22180         if (!REG_P (x)
22181             || !IS_VFP_REGNUM (REGNO (x)))
22182           {
22183             output_operand_lossage ("invalid operand for code '%c'", code);
22184             return;
22185           }
22186
22187         regno = REGNO (x);
22188         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22189             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22190           {
22191             output_operand_lossage ("invalid operand for code '%c'", code);
22192             return;
22193           }
22194
22195         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22196           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22197       }
22198       return;
22199
22200     /* These two codes print the low/high doubleword register of a Neon quad
22201        register, respectively.  For pair-structure types, can also print
22202        low/high quadword registers.  */
22203     case 'e':
22204     case 'f':
22205       {
22206         machine_mode mode = GET_MODE (x);
22207         int regno;
22208
22209         if ((GET_MODE_SIZE (mode) != 16
22210              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22211           {
22212             output_operand_lossage ("invalid operand for code '%c'", code);
22213             return;
22214           }
22215
22216         regno = REGNO (x);
22217         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22218           {
22219             output_operand_lossage ("invalid operand for code '%c'", code);
22220             return;
22221           }
22222
22223         if (GET_MODE_SIZE (mode) == 16)
22224           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22225                                   + (code == 'f' ? 1 : 0));
22226         else
22227           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22228                                   + (code == 'f' ? 1 : 0));
22229       }
22230       return;
22231
22232     /* Print a VFPv3 floating-point constant, represented as an integer
22233        index.  */
22234     case 'G':
22235       {
22236         int index = vfp3_const_double_index (x);
22237         gcc_assert (index != -1);
22238         fprintf (stream, "%d", index);
22239       }
22240       return;
22241
22242     /* Print bits representing opcode features for Neon.
22243
22244        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22245        and polynomials as unsigned.
22246
22247        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22248
22249        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22250
22251     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22252     case 'T':
22253       {
22254         HOST_WIDE_INT bits = INTVAL (x);
22255         fputc ("uspf"[bits & 3], stream);
22256       }
22257       return;
22258
22259     /* Likewise, but signed and unsigned integers are both 'i'.  */
22260     case 'F':
22261       {
22262         HOST_WIDE_INT bits = INTVAL (x);
22263         fputc ("iipf"[bits & 3], stream);
22264       }
22265       return;
22266
22267     /* As for 'T', but emit 'u' instead of 'p'.  */
22268     case 't':
22269       {
22270         HOST_WIDE_INT bits = INTVAL (x);
22271         fputc ("usuf"[bits & 3], stream);
22272       }
22273       return;
22274
22275     /* Bit 2: rounding (vs none).  */
22276     case 'O':
22277       {
22278         HOST_WIDE_INT bits = INTVAL (x);
22279         fputs ((bits & 4) != 0 ? "r" : "", stream);
22280       }
22281       return;
22282
22283     /* Memory operand for vld1/vst1 instruction.  */
22284     case 'A':
22285       {
22286         rtx addr;
22287         bool postinc = FALSE;
22288         rtx postinc_reg = NULL;
22289         unsigned align, memsize, align_bits;
22290
22291         gcc_assert (MEM_P (x));
22292         addr = XEXP (x, 0);
22293         if (GET_CODE (addr) == POST_INC)
22294           {
22295             postinc = 1;
22296             addr = XEXP (addr, 0);
22297           }
22298         if (GET_CODE (addr) == POST_MODIFY)
22299           {
22300             postinc_reg = XEXP( XEXP (addr, 1), 1);
22301             addr = XEXP (addr, 0);
22302           }
22303         asm_fprintf (stream, "[%r", REGNO (addr));
22304
22305         /* We know the alignment of this access, so we can emit a hint in the
22306            instruction (for some alignments) as an aid to the memory subsystem
22307            of the target.  */
22308         align = MEM_ALIGN (x) >> 3;
22309         memsize = MEM_SIZE (x);
22310
22311         /* Only certain alignment specifiers are supported by the hardware.  */
22312         if (memsize == 32 && (align % 32) == 0)
22313           align_bits = 256;
22314         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22315           align_bits = 128;
22316         else if (memsize >= 8 && (align % 8) == 0)
22317           align_bits = 64;
22318         else
22319           align_bits = 0;
22320
22321         if (align_bits != 0)
22322           asm_fprintf (stream, ":%d", align_bits);
22323
22324         asm_fprintf (stream, "]");
22325
22326         if (postinc)
22327           fputs("!", stream);
22328         if (postinc_reg)
22329           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22330       }
22331       return;
22332
22333     case 'C':
22334       {
22335         rtx addr;
22336
22337         gcc_assert (MEM_P (x));
22338         addr = XEXP (x, 0);
22339         gcc_assert (REG_P (addr));
22340         asm_fprintf (stream, "[%r]", REGNO (addr));
22341       }
22342       return;
22343
22344     /* Translate an S register number into a D register number and element index.  */
22345     case 'y':
22346       {
22347         machine_mode mode = GET_MODE (x);
22348         int regno;
22349
22350         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22351           {
22352             output_operand_lossage ("invalid operand for code '%c'", code);
22353             return;
22354           }
22355
22356         regno = REGNO (x);
22357         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22358           {
22359             output_operand_lossage ("invalid operand for code '%c'", code);
22360             return;
22361           }
22362
22363         regno = regno - FIRST_VFP_REGNUM;
22364         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22365       }
22366       return;
22367
22368     case 'v':
22369         gcc_assert (CONST_DOUBLE_P (x));
22370         int result;
22371         result = vfp3_const_double_for_fract_bits (x);
22372         if (result == 0)
22373           result = vfp3_const_double_for_bits (x);
22374         fprintf (stream, "#%d", result);
22375         return;
22376
22377     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22378        number into a D register number and element index.  */
22379     case 'z':
22380       {
22381         machine_mode mode = GET_MODE (x);
22382         int regno;
22383
22384         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22385           {
22386             output_operand_lossage ("invalid operand for code '%c'", code);
22387             return;
22388           }
22389
22390         regno = REGNO (x);
22391         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22392           {
22393             output_operand_lossage ("invalid operand for code '%c'", code);
22394             return;
22395           }
22396
22397         regno = regno - FIRST_VFP_REGNUM;
22398         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22399       }
22400       return;
22401
22402     default:
22403       if (x == 0)
22404         {
22405           output_operand_lossage ("missing operand");
22406           return;
22407         }
22408
22409       switch (GET_CODE (x))
22410         {
22411         case REG:
22412           asm_fprintf (stream, "%r", REGNO (x));
22413           break;
22414
22415         case MEM:
22416           output_address (GET_MODE (x), XEXP (x, 0));
22417           break;
22418
22419         case CONST_DOUBLE:
22420           {
22421             char fpstr[20];
22422             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22423                               sizeof (fpstr), 0, 1);
22424             fprintf (stream, "#%s", fpstr);
22425           }
22426           break;
22427
22428         default:
22429           gcc_assert (GET_CODE (x) != NEG);
22430           fputc ('#', stream);
22431           if (GET_CODE (x) == HIGH)
22432             {
22433               fputs (":lower16:", stream);
22434               x = XEXP (x, 0);
22435             }
22436
22437           output_addr_const (stream, x);
22438           break;
22439         }
22440     }
22441 }
22442 \f
22443 /* Target hook for printing a memory address.  */
22444 static void
22445 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22446 {
22447   if (TARGET_32BIT)
22448     {
22449       int is_minus = GET_CODE (x) == MINUS;
22450
22451       if (REG_P (x))
22452         asm_fprintf (stream, "[%r]", REGNO (x));
22453       else if (GET_CODE (x) == PLUS || is_minus)
22454         {
22455           rtx base = XEXP (x, 0);
22456           rtx index = XEXP (x, 1);
22457           HOST_WIDE_INT offset = 0;
22458           if (!REG_P (base)
22459               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22460             {
22461               /* Ensure that BASE is a register.  */
22462               /* (one of them must be).  */
22463               /* Also ensure the SP is not used as in index register.  */
22464               std::swap (base, index);
22465             }
22466           switch (GET_CODE (index))
22467             {
22468             case CONST_INT:
22469               offset = INTVAL (index);
22470               if (is_minus)
22471                 offset = -offset;
22472               asm_fprintf (stream, "[%r, #%wd]",
22473                            REGNO (base), offset);
22474               break;
22475
22476             case REG:
22477               asm_fprintf (stream, "[%r, %s%r]",
22478                            REGNO (base), is_minus ? "-" : "",
22479                            REGNO (index));
22480               break;
22481
22482             case MULT:
22483             case ASHIFTRT:
22484             case LSHIFTRT:
22485             case ASHIFT:
22486             case ROTATERT:
22487               {
22488                 asm_fprintf (stream, "[%r, %s%r",
22489                              REGNO (base), is_minus ? "-" : "",
22490                              REGNO (XEXP (index, 0)));
22491                 arm_print_operand (stream, index, 'S');
22492                 fputs ("]", stream);
22493                 break;
22494               }
22495
22496             default:
22497               gcc_unreachable ();
22498             }
22499         }
22500       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22501                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22502         {
22503           gcc_assert (REG_P (XEXP (x, 0)));
22504
22505           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22506             asm_fprintf (stream, "[%r, #%s%d]!",
22507                          REGNO (XEXP (x, 0)),
22508                          GET_CODE (x) == PRE_DEC ? "-" : "",
22509                          GET_MODE_SIZE (mode));
22510           else
22511             asm_fprintf (stream, "[%r], #%s%d",
22512                          REGNO (XEXP (x, 0)),
22513                          GET_CODE (x) == POST_DEC ? "-" : "",
22514                          GET_MODE_SIZE (mode));
22515         }
22516       else if (GET_CODE (x) == PRE_MODIFY)
22517         {
22518           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22519           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22520             asm_fprintf (stream, "#%wd]!",
22521                          INTVAL (XEXP (XEXP (x, 1), 1)));
22522           else
22523             asm_fprintf (stream, "%r]!",
22524                          REGNO (XEXP (XEXP (x, 1), 1)));
22525         }
22526       else if (GET_CODE (x) == POST_MODIFY)
22527         {
22528           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22529           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22530             asm_fprintf (stream, "#%wd",
22531                          INTVAL (XEXP (XEXP (x, 1), 1)));
22532           else
22533             asm_fprintf (stream, "%r",
22534                          REGNO (XEXP (XEXP (x, 1), 1)));
22535         }
22536       else output_addr_const (stream, x);
22537     }
22538   else
22539     {
22540       if (REG_P (x))
22541         asm_fprintf (stream, "[%r]", REGNO (x));
22542       else if (GET_CODE (x) == POST_INC)
22543         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22544       else if (GET_CODE (x) == PLUS)
22545         {
22546           gcc_assert (REG_P (XEXP (x, 0)));
22547           if (CONST_INT_P (XEXP (x, 1)))
22548             asm_fprintf (stream, "[%r, #%wd]",
22549                          REGNO (XEXP (x, 0)),
22550                          INTVAL (XEXP (x, 1)));
22551           else
22552             asm_fprintf (stream, "[%r, %r]",
22553                          REGNO (XEXP (x, 0)),
22554                          REGNO (XEXP (x, 1)));
22555         }
22556       else
22557         output_addr_const (stream, x);
22558     }
22559 }
22560 \f
22561 /* Target hook for indicating whether a punctuation character for
22562    TARGET_PRINT_OPERAND is valid.  */
22563 static bool
22564 arm_print_operand_punct_valid_p (unsigned char code)
22565 {
22566   return (code == '@' || code == '|' || code == '.'
22567           || code == '(' || code == ')' || code == '#'
22568           || (TARGET_32BIT && (code == '?'))
22569           || (TARGET_THUMB2 && (code == '!'))
22570           || (TARGET_THUMB && (code == '_')));
22571 }
22572 \f
22573 /* Target hook for assembling integer objects.  The ARM version needs to
22574    handle word-sized values specially.  */
22575 static bool
22576 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22577 {
22578   machine_mode mode;
22579
22580   if (size == UNITS_PER_WORD && aligned_p)
22581     {
22582       fputs ("\t.word\t", asm_out_file);
22583       output_addr_const (asm_out_file, x);
22584
22585       /* Mark symbols as position independent.  We only do this in the
22586          .text segment, not in the .data segment.  */
22587       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22588           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22589         {
22590           /* See legitimize_pic_address for an explanation of the
22591              TARGET_VXWORKS_RTP check.  */
22592           /* References to weak symbols cannot be resolved locally:
22593              they may be overridden by a non-weak definition at link
22594              time.  */
22595           if (!arm_pic_data_is_text_relative
22596               || (GET_CODE (x) == SYMBOL_REF
22597                   && (!SYMBOL_REF_LOCAL_P (x)
22598                       || (SYMBOL_REF_DECL (x)
22599                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22600             fputs ("(GOT)", asm_out_file);
22601           else
22602             fputs ("(GOTOFF)", asm_out_file);
22603         }
22604       fputc ('\n', asm_out_file);
22605       return true;
22606     }
22607
22608   mode = GET_MODE (x);
22609
22610   if (arm_vector_mode_supported_p (mode))
22611     {
22612       int i, units;
22613
22614       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22615
22616       units = CONST_VECTOR_NUNITS (x);
22617       size = GET_MODE_UNIT_SIZE (mode);
22618
22619       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22620         for (i = 0; i < units; i++)
22621           {
22622             rtx elt = CONST_VECTOR_ELT (x, i);
22623             assemble_integer
22624               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22625           }
22626       else
22627         for (i = 0; i < units; i++)
22628           {
22629             rtx elt = CONST_VECTOR_ELT (x, i);
22630             assemble_real
22631               (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22632                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22633           }
22634
22635       return true;
22636     }
22637
22638   return default_assemble_integer (x, size, aligned_p);
22639 }
22640
22641 static void
22642 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22643 {
22644   section *s;
22645
22646   if (!TARGET_AAPCS_BASED)
22647     {
22648       (is_ctor ?
22649        default_named_section_asm_out_constructor
22650        : default_named_section_asm_out_destructor) (symbol, priority);
22651       return;
22652     }
22653
22654   /* Put these in the .init_array section, using a special relocation.  */
22655   if (priority != DEFAULT_INIT_PRIORITY)
22656     {
22657       char buf[18];
22658       sprintf (buf, "%s.%.5u",
22659                is_ctor ? ".init_array" : ".fini_array",
22660                priority);
22661       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22662     }
22663   else if (is_ctor)
22664     s = ctors_section;
22665   else
22666     s = dtors_section;
22667
22668   switch_to_section (s);
22669   assemble_align (POINTER_SIZE);
22670   fputs ("\t.word\t", asm_out_file);
22671   output_addr_const (asm_out_file, symbol);
22672   fputs ("(target1)\n", asm_out_file);
22673 }
22674
22675 /* Add a function to the list of static constructors.  */
22676
22677 static void
22678 arm_elf_asm_constructor (rtx symbol, int priority)
22679 {
22680   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22681 }
22682
22683 /* Add a function to the list of static destructors.  */
22684
22685 static void
22686 arm_elf_asm_destructor (rtx symbol, int priority)
22687 {
22688   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22689 }
22690 \f
22691 /* A finite state machine takes care of noticing whether or not instructions
22692    can be conditionally executed, and thus decrease execution time and code
22693    size by deleting branch instructions.  The fsm is controlled by
22694    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22695
22696 /* The state of the fsm controlling condition codes are:
22697    0: normal, do nothing special
22698    1: make ASM_OUTPUT_OPCODE not output this instruction
22699    2: make ASM_OUTPUT_OPCODE not output this instruction
22700    3: make instructions conditional
22701    4: make instructions conditional
22702
22703    State transitions (state->state by whom under condition):
22704    0 -> 1 final_prescan_insn if the `target' is a label
22705    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22706    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22707    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22708    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22709           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22710    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22711           (the target insn is arm_target_insn).
22712
22713    If the jump clobbers the conditions then we use states 2 and 4.
22714
22715    A similar thing can be done with conditional return insns.
22716
22717    XXX In case the `target' is an unconditional branch, this conditionalising
22718    of the instructions always reduces code size, but not always execution
22719    time.  But then, I want to reduce the code size to somewhere near what
22720    /bin/cc produces.  */
22721
22722 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22723    instructions.  When a COND_EXEC instruction is seen the subsequent
22724    instructions are scanned so that multiple conditional instructions can be
22725    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22726    specify the length and true/false mask for the IT block.  These will be
22727    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22728
22729 /* Returns the index of the ARM condition code string in
22730    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22731    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22732
22733 enum arm_cond_code
22734 maybe_get_arm_condition_code (rtx comparison)
22735 {
22736   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22737   enum arm_cond_code code;
22738   enum rtx_code comp_code = GET_CODE (comparison);
22739
22740   if (GET_MODE_CLASS (mode) != MODE_CC)
22741     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22742                            XEXP (comparison, 1));
22743
22744   switch (mode)
22745     {
22746     case CC_DNEmode: code = ARM_NE; goto dominance;
22747     case CC_DEQmode: code = ARM_EQ; goto dominance;
22748     case CC_DGEmode: code = ARM_GE; goto dominance;
22749     case CC_DGTmode: code = ARM_GT; goto dominance;
22750     case CC_DLEmode: code = ARM_LE; goto dominance;
22751     case CC_DLTmode: code = ARM_LT; goto dominance;
22752     case CC_DGEUmode: code = ARM_CS; goto dominance;
22753     case CC_DGTUmode: code = ARM_HI; goto dominance;
22754     case CC_DLEUmode: code = ARM_LS; goto dominance;
22755     case CC_DLTUmode: code = ARM_CC;
22756
22757     dominance:
22758       if (comp_code == EQ)
22759         return ARM_INVERSE_CONDITION_CODE (code);
22760       if (comp_code == NE)
22761         return code;
22762       return ARM_NV;
22763
22764     case CC_NOOVmode:
22765       switch (comp_code)
22766         {
22767         case NE: return ARM_NE;
22768         case EQ: return ARM_EQ;
22769         case GE: return ARM_PL;
22770         case LT: return ARM_MI;
22771         default: return ARM_NV;
22772         }
22773
22774     case CC_Zmode:
22775       switch (comp_code)
22776         {
22777         case NE: return ARM_NE;
22778         case EQ: return ARM_EQ;
22779         default: return ARM_NV;
22780         }
22781
22782     case CC_Nmode:
22783       switch (comp_code)
22784         {
22785         case NE: return ARM_MI;
22786         case EQ: return ARM_PL;
22787         default: return ARM_NV;
22788         }
22789
22790     case CCFPEmode:
22791     case CCFPmode:
22792       /* We can handle all cases except UNEQ and LTGT.  */
22793       switch (comp_code)
22794         {
22795         case GE: return ARM_GE;
22796         case GT: return ARM_GT;
22797         case LE: return ARM_LS;
22798         case LT: return ARM_MI;
22799         case NE: return ARM_NE;
22800         case EQ: return ARM_EQ;
22801         case ORDERED: return ARM_VC;
22802         case UNORDERED: return ARM_VS;
22803         case UNLT: return ARM_LT;
22804         case UNLE: return ARM_LE;
22805         case UNGT: return ARM_HI;
22806         case UNGE: return ARM_PL;
22807           /* UNEQ and LTGT do not have a representation.  */
22808         case UNEQ: /* Fall through.  */
22809         case LTGT: /* Fall through.  */
22810         default: return ARM_NV;
22811         }
22812
22813     case CC_SWPmode:
22814       switch (comp_code)
22815         {
22816         case NE: return ARM_NE;
22817         case EQ: return ARM_EQ;
22818         case GE: return ARM_LE;
22819         case GT: return ARM_LT;
22820         case LE: return ARM_GE;
22821         case LT: return ARM_GT;
22822         case GEU: return ARM_LS;
22823         case GTU: return ARM_CC;
22824         case LEU: return ARM_CS;
22825         case LTU: return ARM_HI;
22826         default: return ARM_NV;
22827         }
22828
22829     case CC_Cmode:
22830       switch (comp_code)
22831         {
22832         case LTU: return ARM_CS;
22833         case GEU: return ARM_CC;
22834         case NE: return ARM_CS;
22835         case EQ: return ARM_CC;
22836         default: return ARM_NV;
22837         }
22838
22839     case CC_CZmode:
22840       switch (comp_code)
22841         {
22842         case NE: return ARM_NE;
22843         case EQ: return ARM_EQ;
22844         case GEU: return ARM_CS;
22845         case GTU: return ARM_HI;
22846         case LEU: return ARM_LS;
22847         case LTU: return ARM_CC;
22848         default: return ARM_NV;
22849         }
22850
22851     case CC_NCVmode:
22852       switch (comp_code)
22853         {
22854         case GE: return ARM_GE;
22855         case LT: return ARM_LT;
22856         case GEU: return ARM_CS;
22857         case LTU: return ARM_CC;
22858         default: return ARM_NV;
22859         }
22860
22861     case CC_Vmode:
22862       switch (comp_code)
22863         {
22864         case NE: return ARM_VS;
22865         case EQ: return ARM_VC;
22866         default: return ARM_NV;
22867         }
22868
22869     case CCmode:
22870       switch (comp_code)
22871         {
22872         case NE: return ARM_NE;
22873         case EQ: return ARM_EQ;
22874         case GE: return ARM_GE;
22875         case GT: return ARM_GT;
22876         case LE: return ARM_LE;
22877         case LT: return ARM_LT;
22878         case GEU: return ARM_CS;
22879         case GTU: return ARM_HI;
22880         case LEU: return ARM_LS;
22881         case LTU: return ARM_CC;
22882         default: return ARM_NV;
22883         }
22884
22885     default: gcc_unreachable ();
22886     }
22887 }
22888
22889 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22890 static enum arm_cond_code
22891 get_arm_condition_code (rtx comparison)
22892 {
22893   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22894   gcc_assert (code != ARM_NV);
22895   return code;
22896 }
22897
22898 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22899    instructions.  */
22900 void
22901 thumb2_final_prescan_insn (rtx_insn *insn)
22902 {
22903   rtx_insn *first_insn = insn;
22904   rtx body = PATTERN (insn);
22905   rtx predicate;
22906   enum arm_cond_code code;
22907   int n;
22908   int mask;
22909   int max;
22910
22911   /* max_insns_skipped in the tune was already taken into account in the
22912      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22913      just emit the IT blocks as we can.  It does not make sense to split
22914      the IT blocks.  */
22915   max = MAX_INSN_PER_IT_BLOCK;
22916
22917   /* Remove the previous insn from the count of insns to be output.  */
22918   if (arm_condexec_count)
22919       arm_condexec_count--;
22920
22921   /* Nothing to do if we are already inside a conditional block.  */
22922   if (arm_condexec_count)
22923     return;
22924
22925   if (GET_CODE (body) != COND_EXEC)
22926     return;
22927
22928   /* Conditional jumps are implemented directly.  */
22929   if (JUMP_P (insn))
22930     return;
22931
22932   predicate = COND_EXEC_TEST (body);
22933   arm_current_cc = get_arm_condition_code (predicate);
22934
22935   n = get_attr_ce_count (insn);
22936   arm_condexec_count = 1;
22937   arm_condexec_mask = (1 << n) - 1;
22938   arm_condexec_masklen = n;
22939   /* See if subsequent instructions can be combined into the same block.  */
22940   for (;;)
22941     {
22942       insn = next_nonnote_insn (insn);
22943
22944       /* Jumping into the middle of an IT block is illegal, so a label or
22945          barrier terminates the block.  */
22946       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22947         break;
22948
22949       body = PATTERN (insn);
22950       /* USE and CLOBBER aren't really insns, so just skip them.  */
22951       if (GET_CODE (body) == USE
22952           || GET_CODE (body) == CLOBBER)
22953         continue;
22954
22955       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22956       if (GET_CODE (body) != COND_EXEC)
22957         break;
22958       /* Maximum number of conditionally executed instructions in a block.  */
22959       n = get_attr_ce_count (insn);
22960       if (arm_condexec_masklen + n > max)
22961         break;
22962
22963       predicate = COND_EXEC_TEST (body);
22964       code = get_arm_condition_code (predicate);
22965       mask = (1 << n) - 1;
22966       if (arm_current_cc == code)
22967         arm_condexec_mask |= (mask << arm_condexec_masklen);
22968       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22969         break;
22970
22971       arm_condexec_count++;
22972       arm_condexec_masklen += n;
22973
22974       /* A jump must be the last instruction in a conditional block.  */
22975       if (JUMP_P (insn))
22976         break;
22977     }
22978   /* Restore recog_data (getting the attributes of other insns can
22979      destroy this array, but final.c assumes that it remains intact
22980      across this call).  */
22981   extract_constrain_insn_cached (first_insn);
22982 }
22983
22984 void
22985 arm_final_prescan_insn (rtx_insn *insn)
22986 {
22987   /* BODY will hold the body of INSN.  */
22988   rtx body = PATTERN (insn);
22989
22990   /* This will be 1 if trying to repeat the trick, and things need to be
22991      reversed if it appears to fail.  */
22992   int reverse = 0;
22993
22994   /* If we start with a return insn, we only succeed if we find another one.  */
22995   int seeking_return = 0;
22996   enum rtx_code return_code = UNKNOWN;
22997
22998   /* START_INSN will hold the insn from where we start looking.  This is the
22999      first insn after the following code_label if REVERSE is true.  */
23000   rtx_insn *start_insn = insn;
23001
23002   /* If in state 4, check if the target branch is reached, in order to
23003      change back to state 0.  */
23004   if (arm_ccfsm_state == 4)
23005     {
23006       if (insn == arm_target_insn)
23007         {
23008           arm_target_insn = NULL;
23009           arm_ccfsm_state = 0;
23010         }
23011       return;
23012     }
23013
23014   /* If in state 3, it is possible to repeat the trick, if this insn is an
23015      unconditional branch to a label, and immediately following this branch
23016      is the previous target label which is only used once, and the label this
23017      branch jumps to is not too far off.  */
23018   if (arm_ccfsm_state == 3)
23019     {
23020       if (simplejump_p (insn))
23021         {
23022           start_insn = next_nonnote_insn (start_insn);
23023           if (BARRIER_P (start_insn))
23024             {
23025               /* XXX Isn't this always a barrier?  */
23026               start_insn = next_nonnote_insn (start_insn);
23027             }
23028           if (LABEL_P (start_insn)
23029               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23030               && LABEL_NUSES (start_insn) == 1)
23031             reverse = TRUE;
23032           else
23033             return;
23034         }
23035       else if (ANY_RETURN_P (body))
23036         {
23037           start_insn = next_nonnote_insn (start_insn);
23038           if (BARRIER_P (start_insn))
23039             start_insn = next_nonnote_insn (start_insn);
23040           if (LABEL_P (start_insn)
23041               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23042               && LABEL_NUSES (start_insn) == 1)
23043             {
23044               reverse = TRUE;
23045               seeking_return = 1;
23046               return_code = GET_CODE (body);
23047             }
23048           else
23049             return;
23050         }
23051       else
23052         return;
23053     }
23054
23055   gcc_assert (!arm_ccfsm_state || reverse);
23056   if (!JUMP_P (insn))
23057     return;
23058
23059   /* This jump might be paralleled with a clobber of the condition codes
23060      the jump should always come first */
23061   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23062     body = XVECEXP (body, 0, 0);
23063
23064   if (reverse
23065       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23066           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23067     {
23068       int insns_skipped;
23069       int fail = FALSE, succeed = FALSE;
23070       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23071       int then_not_else = TRUE;
23072       rtx_insn *this_insn = start_insn;
23073       rtx label = 0;
23074
23075       /* Register the insn jumped to.  */
23076       if (reverse)
23077         {
23078           if (!seeking_return)
23079             label = XEXP (SET_SRC (body), 0);
23080         }
23081       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23082         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23083       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23084         {
23085           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23086           then_not_else = FALSE;
23087         }
23088       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23089         {
23090           seeking_return = 1;
23091           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23092         }
23093       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23094         {
23095           seeking_return = 1;
23096           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23097           then_not_else = FALSE;
23098         }
23099       else
23100         gcc_unreachable ();
23101
23102       /* See how many insns this branch skips, and what kind of insns.  If all
23103          insns are okay, and the label or unconditional branch to the same
23104          label is not too far away, succeed.  */
23105       for (insns_skipped = 0;
23106            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23107         {
23108           rtx scanbody;
23109
23110           this_insn = next_nonnote_insn (this_insn);
23111           if (!this_insn)
23112             break;
23113
23114           switch (GET_CODE (this_insn))
23115             {
23116             case CODE_LABEL:
23117               /* Succeed if it is the target label, otherwise fail since
23118                  control falls in from somewhere else.  */
23119               if (this_insn == label)
23120                 {
23121                   arm_ccfsm_state = 1;
23122                   succeed = TRUE;
23123                 }
23124               else
23125                 fail = TRUE;
23126               break;
23127
23128             case BARRIER:
23129               /* Succeed if the following insn is the target label.
23130                  Otherwise fail.
23131                  If return insns are used then the last insn in a function
23132                  will be a barrier.  */
23133               this_insn = next_nonnote_insn (this_insn);
23134               if (this_insn && this_insn == label)
23135                 {
23136                   arm_ccfsm_state = 1;
23137                   succeed = TRUE;
23138                 }
23139               else
23140                 fail = TRUE;
23141               break;
23142
23143             case CALL_INSN:
23144               /* The AAPCS says that conditional calls should not be
23145                  used since they make interworking inefficient (the
23146                  linker can't transform BL<cond> into BLX).  That's
23147                  only a problem if the machine has BLX.  */
23148               if (arm_arch5)
23149                 {
23150                   fail = TRUE;
23151                   break;
23152                 }
23153
23154               /* Succeed if the following insn is the target label, or
23155                  if the following two insns are a barrier and the
23156                  target label.  */
23157               this_insn = next_nonnote_insn (this_insn);
23158               if (this_insn && BARRIER_P (this_insn))
23159                 this_insn = next_nonnote_insn (this_insn);
23160
23161               if (this_insn && this_insn == label
23162                   && insns_skipped < max_insns_skipped)
23163                 {
23164                   arm_ccfsm_state = 1;
23165                   succeed = TRUE;
23166                 }
23167               else
23168                 fail = TRUE;
23169               break;
23170
23171             case JUMP_INSN:
23172               /* If this is an unconditional branch to the same label, succeed.
23173                  If it is to another label, do nothing.  If it is conditional,
23174                  fail.  */
23175               /* XXX Probably, the tests for SET and the PC are
23176                  unnecessary.  */
23177
23178               scanbody = PATTERN (this_insn);
23179               if (GET_CODE (scanbody) == SET
23180                   && GET_CODE (SET_DEST (scanbody)) == PC)
23181                 {
23182                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23183                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23184                     {
23185                       arm_ccfsm_state = 2;
23186                       succeed = TRUE;
23187                     }
23188                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23189                     fail = TRUE;
23190                 }
23191               /* Fail if a conditional return is undesirable (e.g. on a
23192                  StrongARM), but still allow this if optimizing for size.  */
23193               else if (GET_CODE (scanbody) == return_code
23194                        && !use_return_insn (TRUE, NULL)
23195                        && !optimize_size)
23196                 fail = TRUE;
23197               else if (GET_CODE (scanbody) == return_code)
23198                 {
23199                   arm_ccfsm_state = 2;
23200                   succeed = TRUE;
23201                 }
23202               else if (GET_CODE (scanbody) == PARALLEL)
23203                 {
23204                   switch (get_attr_conds (this_insn))
23205                     {
23206                     case CONDS_NOCOND:
23207                       break;
23208                     default:
23209                       fail = TRUE;
23210                       break;
23211                     }
23212                 }
23213               else
23214                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23215
23216               break;
23217
23218             case INSN:
23219               /* Instructions using or affecting the condition codes make it
23220                  fail.  */
23221               scanbody = PATTERN (this_insn);
23222               if (!(GET_CODE (scanbody) == SET
23223                     || GET_CODE (scanbody) == PARALLEL)
23224                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23225                 fail = TRUE;
23226               break;
23227
23228             default:
23229               break;
23230             }
23231         }
23232       if (succeed)
23233         {
23234           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23235             arm_target_label = CODE_LABEL_NUMBER (label);
23236           else
23237             {
23238               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23239
23240               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23241                 {
23242                   this_insn = next_nonnote_insn (this_insn);
23243                   gcc_assert (!this_insn
23244                               || (!BARRIER_P (this_insn)
23245                                   && !LABEL_P (this_insn)));
23246                 }
23247               if (!this_insn)
23248                 {
23249                   /* Oh, dear! we ran off the end.. give up.  */
23250                   extract_constrain_insn_cached (insn);
23251                   arm_ccfsm_state = 0;
23252                   arm_target_insn = NULL;
23253                   return;
23254                 }
23255               arm_target_insn = this_insn;
23256             }
23257
23258           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23259              what it was.  */
23260           if (!reverse)
23261             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23262
23263           if (reverse || then_not_else)
23264             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23265         }
23266
23267       /* Restore recog_data (getting the attributes of other insns can
23268          destroy this array, but final.c assumes that it remains intact
23269          across this call.  */
23270       extract_constrain_insn_cached (insn);
23271     }
23272 }
23273
23274 /* Output IT instructions.  */
23275 void
23276 thumb2_asm_output_opcode (FILE * stream)
23277 {
23278   char buff[5];
23279   int n;
23280
23281   if (arm_condexec_mask)
23282     {
23283       for (n = 0; n < arm_condexec_masklen; n++)
23284         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23285       buff[n] = 0;
23286       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23287                   arm_condition_codes[arm_current_cc]);
23288       arm_condexec_mask = 0;
23289     }
23290 }
23291
23292 /* Returns true if REGNO is a valid register
23293    for holding a quantity of type MODE.  */
23294 int
23295 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23296 {
23297   if (GET_MODE_CLASS (mode) == MODE_CC)
23298     return (regno == CC_REGNUM
23299             || (TARGET_HARD_FLOAT
23300                 && regno == VFPCC_REGNUM));
23301
23302   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23303     return false;
23304
23305   if (TARGET_THUMB1)
23306     /* For the Thumb we only allow values bigger than SImode in
23307        registers 0 - 6, so that there is always a second low
23308        register available to hold the upper part of the value.
23309        We probably we ought to ensure that the register is the
23310        start of an even numbered register pair.  */
23311     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23312
23313   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23314     {
23315       if (mode == SFmode || mode == SImode)
23316         return VFP_REGNO_OK_FOR_SINGLE (regno);
23317
23318       if (mode == DFmode)
23319         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23320
23321       if (mode == HFmode)
23322         return VFP_REGNO_OK_FOR_SINGLE (regno);
23323
23324       /* VFP registers can hold HImode values.  */
23325       if (mode == HImode)
23326         return VFP_REGNO_OK_FOR_SINGLE (regno);
23327
23328       if (TARGET_NEON)
23329         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23330                || (VALID_NEON_QREG_MODE (mode)
23331                    && NEON_REGNO_OK_FOR_QUAD (regno))
23332                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23333                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23334                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23335                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23336                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23337
23338       return FALSE;
23339     }
23340
23341   if (TARGET_REALLY_IWMMXT)
23342     {
23343       if (IS_IWMMXT_GR_REGNUM (regno))
23344         return mode == SImode;
23345
23346       if (IS_IWMMXT_REGNUM (regno))
23347         return VALID_IWMMXT_REG_MODE (mode);
23348     }
23349
23350   /* We allow almost any value to be stored in the general registers.
23351      Restrict doubleword quantities to even register pairs in ARM state
23352      so that we can use ldrd.  Do not allow very large Neon structure
23353      opaque modes in general registers; they would use too many.  */
23354   if (regno <= LAST_ARM_REGNUM)
23355     {
23356       if (ARM_NUM_REGS (mode) > 4)
23357           return FALSE;
23358
23359       if (TARGET_THUMB2)
23360         return TRUE;
23361
23362       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23363     }
23364
23365   if (regno == FRAME_POINTER_REGNUM
23366       || regno == ARG_POINTER_REGNUM)
23367     /* We only allow integers in the fake hard registers.  */
23368     return GET_MODE_CLASS (mode) == MODE_INT;
23369
23370   return FALSE;
23371 }
23372
23373 /* Implement MODES_TIEABLE_P.  */
23374
23375 bool
23376 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23377 {
23378   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23379     return true;
23380
23381   /* We specifically want to allow elements of "structure" modes to
23382      be tieable to the structure.  This more general condition allows
23383      other rarer situations too.  */
23384   if (TARGET_NEON
23385       && (VALID_NEON_DREG_MODE (mode1)
23386           || VALID_NEON_QREG_MODE (mode1)
23387           || VALID_NEON_STRUCT_MODE (mode1))
23388       && (VALID_NEON_DREG_MODE (mode2)
23389           || VALID_NEON_QREG_MODE (mode2)
23390           || VALID_NEON_STRUCT_MODE (mode2)))
23391     return true;
23392
23393   return false;
23394 }
23395
23396 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23397    not used in arm mode.  */
23398
23399 enum reg_class
23400 arm_regno_class (int regno)
23401 {
23402   if (regno == PC_REGNUM)
23403     return NO_REGS;
23404
23405   if (TARGET_THUMB1)
23406     {
23407       if (regno == STACK_POINTER_REGNUM)
23408         return STACK_REG;
23409       if (regno == CC_REGNUM)
23410         return CC_REG;
23411       if (regno < 8)
23412         return LO_REGS;
23413       return HI_REGS;
23414     }
23415
23416   if (TARGET_THUMB2 && regno < 8)
23417     return LO_REGS;
23418
23419   if (   regno <= LAST_ARM_REGNUM
23420       || regno == FRAME_POINTER_REGNUM
23421       || regno == ARG_POINTER_REGNUM)
23422     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23423
23424   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23425     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23426
23427   if (IS_VFP_REGNUM (regno))
23428     {
23429       if (regno <= D7_VFP_REGNUM)
23430         return VFP_D0_D7_REGS;
23431       else if (regno <= LAST_LO_VFP_REGNUM)
23432         return VFP_LO_REGS;
23433       else
23434         return VFP_HI_REGS;
23435     }
23436
23437   if (IS_IWMMXT_REGNUM (regno))
23438     return IWMMXT_REGS;
23439
23440   if (IS_IWMMXT_GR_REGNUM (regno))
23441     return IWMMXT_GR_REGS;
23442
23443   return NO_REGS;
23444 }
23445
23446 /* Handle a special case when computing the offset
23447    of an argument from the frame pointer.  */
23448 int
23449 arm_debugger_arg_offset (int value, rtx addr)
23450 {
23451   rtx_insn *insn;
23452
23453   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23454   if (value != 0)
23455     return 0;
23456
23457   /* We can only cope with the case where the address is held in a register.  */
23458   if (!REG_P (addr))
23459     return 0;
23460
23461   /* If we are using the frame pointer to point at the argument, then
23462      an offset of 0 is correct.  */
23463   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23464     return 0;
23465
23466   /* If we are using the stack pointer to point at the
23467      argument, then an offset of 0 is correct.  */
23468   /* ??? Check this is consistent with thumb2 frame layout.  */
23469   if ((TARGET_THUMB || !frame_pointer_needed)
23470       && REGNO (addr) == SP_REGNUM)
23471     return 0;
23472
23473   /* Oh dear.  The argument is pointed to by a register rather
23474      than being held in a register, or being stored at a known
23475      offset from the frame pointer.  Since GDB only understands
23476      those two kinds of argument we must translate the address
23477      held in the register into an offset from the frame pointer.
23478      We do this by searching through the insns for the function
23479      looking to see where this register gets its value.  If the
23480      register is initialized from the frame pointer plus an offset
23481      then we are in luck and we can continue, otherwise we give up.
23482
23483      This code is exercised by producing debugging information
23484      for a function with arguments like this:
23485
23486            double func (double a, double b, int c, double d) {return d;}
23487
23488      Without this code the stab for parameter 'd' will be set to
23489      an offset of 0 from the frame pointer, rather than 8.  */
23490
23491   /* The if() statement says:
23492
23493      If the insn is a normal instruction
23494      and if the insn is setting the value in a register
23495      and if the register being set is the register holding the address of the argument
23496      and if the address is computing by an addition
23497      that involves adding to a register
23498      which is the frame pointer
23499      a constant integer
23500
23501      then...  */
23502
23503   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23504     {
23505       if (   NONJUMP_INSN_P (insn)
23506           && GET_CODE (PATTERN (insn)) == SET
23507           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23508           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23509           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23510           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23511           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23512              )
23513         {
23514           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23515
23516           break;
23517         }
23518     }
23519
23520   if (value == 0)
23521     {
23522       debug_rtx (addr);
23523       warning (0, "unable to compute real location of stacked parameter");
23524       value = 8; /* XXX magic hack */
23525     }
23526
23527   return value;
23528 }
23529 \f
23530 /* Implement TARGET_PROMOTED_TYPE.  */
23531
23532 static tree
23533 arm_promoted_type (const_tree t)
23534 {
23535   if (SCALAR_FLOAT_TYPE_P (t)
23536       && TYPE_PRECISION (t) == 16
23537       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23538     return float_type_node;
23539   return NULL_TREE;
23540 }
23541
23542 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23543    This simply adds HFmode as a supported mode; even though we don't
23544    implement arithmetic on this type directly, it's supported by
23545    optabs conversions, much the way the double-word arithmetic is
23546    special-cased in the default hook.  */
23547
23548 static bool
23549 arm_scalar_mode_supported_p (machine_mode mode)
23550 {
23551   if (mode == HFmode)
23552     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23553   else if (ALL_FIXED_POINT_MODE_P (mode))
23554     return true;
23555   else
23556     return default_scalar_mode_supported_p (mode);
23557 }
23558
23559 /* Set the value of FLT_EVAL_METHOD.
23560    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23561
23562     0: evaluate all operations and constants, whose semantic type has at
23563        most the range and precision of type float, to the range and
23564        precision of float; evaluate all other operations and constants to
23565        the range and precision of the semantic type;
23566
23567     N, where _FloatN is a supported interchange floating type
23568        evaluate all operations and constants, whose semantic type has at
23569        most the range and precision of _FloatN type, to the range and
23570        precision of the _FloatN type; evaluate all other operations and
23571        constants to the range and precision of the semantic type;
23572
23573    If we have the ARMv8.2-A extensions then we support _Float16 in native
23574    precision, so we should set this to 16.  Otherwise, we support the type,
23575    but want to evaluate expressions in float precision, so set this to
23576    0.  */
23577
23578 static enum flt_eval_method
23579 arm_excess_precision (enum excess_precision_type type)
23580 {
23581   switch (type)
23582     {
23583       case EXCESS_PRECISION_TYPE_FAST:
23584       case EXCESS_PRECISION_TYPE_STANDARD:
23585         /* We can calculate either in 16-bit range and precision or
23586            32-bit range and precision.  Make that decision based on whether
23587            we have native support for the ARMv8.2-A 16-bit floating-point
23588            instructions or not.  */
23589         return (TARGET_VFP_FP16INST
23590                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23591                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23592       case EXCESS_PRECISION_TYPE_IMPLICIT:
23593         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23594       default:
23595         gcc_unreachable ();
23596     }
23597   return FLT_EVAL_METHOD_UNPREDICTABLE;
23598 }
23599
23600
23601 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23602    _Float16 if we are using anything other than ieee format for 16-bit
23603    floating point.  Otherwise, punt to the default implementation.  */
23604 static machine_mode
23605 arm_floatn_mode (int n, bool extended)
23606 {
23607   if (!extended && n == 16)
23608     return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23609
23610   return default_floatn_mode (n, extended);
23611 }
23612
23613
23614 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23615    not to early-clobber SRC registers in the process.
23616
23617    We assume that the operands described by SRC and DEST represent a
23618    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23619    number of components into which the copy has been decomposed.  */
23620 void
23621 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23622 {
23623   unsigned int i;
23624
23625   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23626       || REGNO (operands[0]) < REGNO (operands[1]))
23627     {
23628       for (i = 0; i < count; i++)
23629         {
23630           operands[2 * i] = dest[i];
23631           operands[2 * i + 1] = src[i];
23632         }
23633     }
23634   else
23635     {
23636       for (i = 0; i < count; i++)
23637         {
23638           operands[2 * i] = dest[count - i - 1];
23639           operands[2 * i + 1] = src[count - i - 1];
23640         }
23641     }
23642 }
23643
23644 /* Split operands into moves from op[1] + op[2] into op[0].  */
23645
23646 void
23647 neon_split_vcombine (rtx operands[3])
23648 {
23649   unsigned int dest = REGNO (operands[0]);
23650   unsigned int src1 = REGNO (operands[1]);
23651   unsigned int src2 = REGNO (operands[2]);
23652   machine_mode halfmode = GET_MODE (operands[1]);
23653   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23654   rtx destlo, desthi;
23655
23656   if (src1 == dest && src2 == dest + halfregs)
23657     {
23658       /* No-op move.  Can't split to nothing; emit something.  */
23659       emit_note (NOTE_INSN_DELETED);
23660       return;
23661     }
23662
23663   /* Preserve register attributes for variable tracking.  */
23664   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23665   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23666                                GET_MODE_SIZE (halfmode));
23667
23668   /* Special case of reversed high/low parts.  Use VSWP.  */
23669   if (src2 == dest && src1 == dest + halfregs)
23670     {
23671       rtx x = gen_rtx_SET (destlo, operands[1]);
23672       rtx y = gen_rtx_SET (desthi, operands[2]);
23673       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23674       return;
23675     }
23676
23677   if (!reg_overlap_mentioned_p (operands[2], destlo))
23678     {
23679       /* Try to avoid unnecessary moves if part of the result
23680          is in the right place already.  */
23681       if (src1 != dest)
23682         emit_move_insn (destlo, operands[1]);
23683       if (src2 != dest + halfregs)
23684         emit_move_insn (desthi, operands[2]);
23685     }
23686   else
23687     {
23688       if (src2 != dest + halfregs)
23689         emit_move_insn (desthi, operands[2]);
23690       if (src1 != dest)
23691         emit_move_insn (destlo, operands[1]);
23692     }
23693 }
23694 \f
23695 /* Return the number (counting from 0) of
23696    the least significant set bit in MASK.  */
23697
23698 inline static int
23699 number_of_first_bit_set (unsigned mask)
23700 {
23701   return ctz_hwi (mask);
23702 }
23703
23704 /* Like emit_multi_reg_push, but allowing for a different set of
23705    registers to be described as saved.  MASK is the set of registers
23706    to be saved; REAL_REGS is the set of registers to be described as
23707    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23708
23709 static rtx_insn *
23710 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23711 {
23712   unsigned long regno;
23713   rtx par[10], tmp, reg;
23714   rtx_insn *insn;
23715   int i, j;
23716
23717   /* Build the parallel of the registers actually being stored.  */
23718   for (i = 0; mask; ++i, mask &= mask - 1)
23719     {
23720       regno = ctz_hwi (mask);
23721       reg = gen_rtx_REG (SImode, regno);
23722
23723       if (i == 0)
23724         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23725       else
23726         tmp = gen_rtx_USE (VOIDmode, reg);
23727
23728       par[i] = tmp;
23729     }
23730
23731   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23732   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23733   tmp = gen_frame_mem (BLKmode, tmp);
23734   tmp = gen_rtx_SET (tmp, par[0]);
23735   par[0] = tmp;
23736
23737   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23738   insn = emit_insn (tmp);
23739
23740   /* Always build the stack adjustment note for unwind info.  */
23741   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23742   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23743   par[0] = tmp;
23744
23745   /* Build the parallel of the registers recorded as saved for unwind.  */
23746   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23747     {
23748       regno = ctz_hwi (real_regs);
23749       reg = gen_rtx_REG (SImode, regno);
23750
23751       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23752       tmp = gen_frame_mem (SImode, tmp);
23753       tmp = gen_rtx_SET (tmp, reg);
23754       RTX_FRAME_RELATED_P (tmp) = 1;
23755       par[j + 1] = tmp;
23756     }
23757
23758   if (j == 0)
23759     tmp = par[0];
23760   else
23761     {
23762       RTX_FRAME_RELATED_P (par[0]) = 1;
23763       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23764     }
23765
23766   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23767
23768   return insn;
23769 }
23770
23771 /* Emit code to push or pop registers to or from the stack.  F is the
23772    assembly file.  MASK is the registers to pop.  */
23773 static void
23774 thumb_pop (FILE *f, unsigned long mask)
23775 {
23776   int regno;
23777   int lo_mask = mask & 0xFF;
23778   int pushed_words = 0;
23779
23780   gcc_assert (mask);
23781
23782   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23783     {
23784       /* Special case.  Do not generate a POP PC statement here, do it in
23785          thumb_exit() */
23786       thumb_exit (f, -1);
23787       return;
23788     }
23789
23790   fprintf (f, "\tpop\t{");
23791
23792   /* Look at the low registers first.  */
23793   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23794     {
23795       if (lo_mask & 1)
23796         {
23797           asm_fprintf (f, "%r", regno);
23798
23799           if ((lo_mask & ~1) != 0)
23800             fprintf (f, ", ");
23801
23802           pushed_words++;
23803         }
23804     }
23805
23806   if (mask & (1 << PC_REGNUM))
23807     {
23808       /* Catch popping the PC.  */
23809       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23810           || IS_CMSE_ENTRY (arm_current_func_type ()))
23811         {
23812           /* The PC is never poped directly, instead
23813              it is popped into r3 and then BX is used.  */
23814           fprintf (f, "}\n");
23815
23816           thumb_exit (f, -1);
23817
23818           return;
23819         }
23820       else
23821         {
23822           if (mask & 0xFF)
23823             fprintf (f, ", ");
23824
23825           asm_fprintf (f, "%r", PC_REGNUM);
23826         }
23827     }
23828
23829   fprintf (f, "}\n");
23830 }
23831
23832 /* Generate code to return from a thumb function.
23833    If 'reg_containing_return_addr' is -1, then the return address is
23834    actually on the stack, at the stack pointer.  */
23835 static void
23836 thumb_exit (FILE *f, int reg_containing_return_addr)
23837 {
23838   unsigned regs_available_for_popping;
23839   unsigned regs_to_pop;
23840   int pops_needed;
23841   unsigned available;
23842   unsigned required;
23843   machine_mode mode;
23844   int size;
23845   int restore_a4 = FALSE;
23846
23847   /* Compute the registers we need to pop.  */
23848   regs_to_pop = 0;
23849   pops_needed = 0;
23850
23851   if (reg_containing_return_addr == -1)
23852     {
23853       regs_to_pop |= 1 << LR_REGNUM;
23854       ++pops_needed;
23855     }
23856
23857   if (TARGET_BACKTRACE)
23858     {
23859       /* Restore the (ARM) frame pointer and stack pointer.  */
23860       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23861       pops_needed += 2;
23862     }
23863
23864   /* If there is nothing to pop then just emit the BX instruction and
23865      return.  */
23866   if (pops_needed == 0)
23867     {
23868       if (crtl->calls_eh_return)
23869         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23870
23871       if (IS_CMSE_ENTRY (arm_current_func_type ()))
23872         {
23873           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23874                        reg_containing_return_addr);
23875           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23876         }
23877       else
23878         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23879       return;
23880     }
23881   /* Otherwise if we are not supporting interworking and we have not created
23882      a backtrace structure and the function was not entered in ARM mode then
23883      just pop the return address straight into the PC.  */
23884   else if (!TARGET_INTERWORK
23885            && !TARGET_BACKTRACE
23886            && !is_called_in_ARM_mode (current_function_decl)
23887            && !crtl->calls_eh_return
23888            && !IS_CMSE_ENTRY (arm_current_func_type ()))
23889     {
23890       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23891       return;
23892     }
23893
23894   /* Find out how many of the (return) argument registers we can corrupt.  */
23895   regs_available_for_popping = 0;
23896
23897   /* If returning via __builtin_eh_return, the bottom three registers
23898      all contain information needed for the return.  */
23899   if (crtl->calls_eh_return)
23900     size = 12;
23901   else
23902     {
23903       /* If we can deduce the registers used from the function's
23904          return value.  This is more reliable that examining
23905          df_regs_ever_live_p () because that will be set if the register is
23906          ever used in the function, not just if the register is used
23907          to hold a return value.  */
23908
23909       if (crtl->return_rtx != 0)
23910         mode = GET_MODE (crtl->return_rtx);
23911       else
23912         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23913
23914       size = GET_MODE_SIZE (mode);
23915
23916       if (size == 0)
23917         {
23918           /* In a void function we can use any argument register.
23919              In a function that returns a structure on the stack
23920              we can use the second and third argument registers.  */
23921           if (mode == VOIDmode)
23922             regs_available_for_popping =
23923               (1 << ARG_REGISTER (1))
23924               | (1 << ARG_REGISTER (2))
23925               | (1 << ARG_REGISTER (3));
23926           else
23927             regs_available_for_popping =
23928               (1 << ARG_REGISTER (2))
23929               | (1 << ARG_REGISTER (3));
23930         }
23931       else if (size <= 4)
23932         regs_available_for_popping =
23933           (1 << ARG_REGISTER (2))
23934           | (1 << ARG_REGISTER (3));
23935       else if (size <= 8)
23936         regs_available_for_popping =
23937           (1 << ARG_REGISTER (3));
23938     }
23939
23940   /* Match registers to be popped with registers into which we pop them.  */
23941   for (available = regs_available_for_popping,
23942        required  = regs_to_pop;
23943        required != 0 && available != 0;
23944        available &= ~(available & - available),
23945        required  &= ~(required  & - required))
23946     -- pops_needed;
23947
23948   /* If we have any popping registers left over, remove them.  */
23949   if (available > 0)
23950     regs_available_for_popping &= ~available;
23951
23952   /* Otherwise if we need another popping register we can use
23953      the fourth argument register.  */
23954   else if (pops_needed)
23955     {
23956       /* If we have not found any free argument registers and
23957          reg a4 contains the return address, we must move it.  */
23958       if (regs_available_for_popping == 0
23959           && reg_containing_return_addr == LAST_ARG_REGNUM)
23960         {
23961           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23962           reg_containing_return_addr = LR_REGNUM;
23963         }
23964       else if (size > 12)
23965         {
23966           /* Register a4 is being used to hold part of the return value,
23967              but we have dire need of a free, low register.  */
23968           restore_a4 = TRUE;
23969
23970           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23971         }
23972
23973       if (reg_containing_return_addr != LAST_ARG_REGNUM)
23974         {
23975           /* The fourth argument register is available.  */
23976           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23977
23978           --pops_needed;
23979         }
23980     }
23981
23982   /* Pop as many registers as we can.  */
23983   thumb_pop (f, regs_available_for_popping);
23984
23985   /* Process the registers we popped.  */
23986   if (reg_containing_return_addr == -1)
23987     {
23988       /* The return address was popped into the lowest numbered register.  */
23989       regs_to_pop &= ~(1 << LR_REGNUM);
23990
23991       reg_containing_return_addr =
23992         number_of_first_bit_set (regs_available_for_popping);
23993
23994       /* Remove this register for the mask of available registers, so that
23995          the return address will not be corrupted by further pops.  */
23996       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23997     }
23998
23999   /* If we popped other registers then handle them here.  */
24000   if (regs_available_for_popping)
24001     {
24002       int frame_pointer;
24003
24004       /* Work out which register currently contains the frame pointer.  */
24005       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24006
24007       /* Move it into the correct place.  */
24008       asm_fprintf (f, "\tmov\t%r, %r\n",
24009                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24010
24011       /* (Temporarily) remove it from the mask of popped registers.  */
24012       regs_available_for_popping &= ~(1 << frame_pointer);
24013       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24014
24015       if (regs_available_for_popping)
24016         {
24017           int stack_pointer;
24018
24019           /* We popped the stack pointer as well,
24020              find the register that contains it.  */
24021           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24022
24023           /* Move it into the stack register.  */
24024           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24025
24026           /* At this point we have popped all necessary registers, so
24027              do not worry about restoring regs_available_for_popping
24028              to its correct value:
24029
24030              assert (pops_needed == 0)
24031              assert (regs_available_for_popping == (1 << frame_pointer))
24032              assert (regs_to_pop == (1 << STACK_POINTER))  */
24033         }
24034       else
24035         {
24036           /* Since we have just move the popped value into the frame
24037              pointer, the popping register is available for reuse, and
24038              we know that we still have the stack pointer left to pop.  */
24039           regs_available_for_popping |= (1 << frame_pointer);
24040         }
24041     }
24042
24043   /* If we still have registers left on the stack, but we no longer have
24044      any registers into which we can pop them, then we must move the return
24045      address into the link register and make available the register that
24046      contained it.  */
24047   if (regs_available_for_popping == 0 && pops_needed > 0)
24048     {
24049       regs_available_for_popping |= 1 << reg_containing_return_addr;
24050
24051       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24052                    reg_containing_return_addr);
24053
24054       reg_containing_return_addr = LR_REGNUM;
24055     }
24056
24057   /* If we have registers left on the stack then pop some more.
24058      We know that at most we will want to pop FP and SP.  */
24059   if (pops_needed > 0)
24060     {
24061       int  popped_into;
24062       int  move_to;
24063
24064       thumb_pop (f, regs_available_for_popping);
24065
24066       /* We have popped either FP or SP.
24067          Move whichever one it is into the correct register.  */
24068       popped_into = number_of_first_bit_set (regs_available_for_popping);
24069       move_to     = number_of_first_bit_set (regs_to_pop);
24070
24071       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24072
24073       regs_to_pop &= ~(1 << move_to);
24074
24075       --pops_needed;
24076     }
24077
24078   /* If we still have not popped everything then we must have only
24079      had one register available to us and we are now popping the SP.  */
24080   if (pops_needed > 0)
24081     {
24082       int  popped_into;
24083
24084       thumb_pop (f, regs_available_for_popping);
24085
24086       popped_into = number_of_first_bit_set (regs_available_for_popping);
24087
24088       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24089       /*
24090         assert (regs_to_pop == (1 << STACK_POINTER))
24091         assert (pops_needed == 1)
24092       */
24093     }
24094
24095   /* If necessary restore the a4 register.  */
24096   if (restore_a4)
24097     {
24098       if (reg_containing_return_addr != LR_REGNUM)
24099         {
24100           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24101           reg_containing_return_addr = LR_REGNUM;
24102         }
24103
24104       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24105     }
24106
24107   if (crtl->calls_eh_return)
24108     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24109
24110   /* Return to caller.  */
24111   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24112     {
24113       /* This is for the cases where LR is not being used to contain the return
24114          address.  It may therefore contain information that we might not want
24115          to leak, hence it must be cleared.  The value in R0 will never be a
24116          secret at this point, so it is safe to use it, see the clearing code
24117          in 'cmse_nonsecure_entry_clear_before_return'.  */
24118       if (reg_containing_return_addr != LR_REGNUM)
24119         asm_fprintf (f, "\tmov\tlr, r0\n");
24120
24121       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24122       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24123     }
24124   else
24125     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24126 }
24127 \f
24128 /* Scan INSN just before assembler is output for it.
24129    For Thumb-1, we track the status of the condition codes; this
24130    information is used in the cbranchsi4_insn pattern.  */
24131 void
24132 thumb1_final_prescan_insn (rtx_insn *insn)
24133 {
24134   if (flag_print_asm_name)
24135     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24136                  INSN_ADDRESSES (INSN_UID (insn)));
24137   /* Don't overwrite the previous setter when we get to a cbranch.  */
24138   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24139     {
24140       enum attr_conds conds;
24141
24142       if (cfun->machine->thumb1_cc_insn)
24143         {
24144           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24145               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24146             CC_STATUS_INIT;
24147         }
24148       conds = get_attr_conds (insn);
24149       if (conds == CONDS_SET)
24150         {
24151           rtx set = single_set (insn);
24152           cfun->machine->thumb1_cc_insn = insn;
24153           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24154           cfun->machine->thumb1_cc_op1 = const0_rtx;
24155           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24156           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24157             {
24158               rtx src1 = XEXP (SET_SRC (set), 1);
24159               if (src1 == const0_rtx)
24160                 cfun->machine->thumb1_cc_mode = CCmode;
24161             }
24162           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24163             {
24164               /* Record the src register operand instead of dest because
24165                  cprop_hardreg pass propagates src.  */
24166               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24167             }
24168         }
24169       else if (conds != CONDS_NOCOND)
24170         cfun->machine->thumb1_cc_insn = NULL_RTX;
24171     }
24172
24173     /* Check if unexpected far jump is used.  */
24174     if (cfun->machine->lr_save_eliminated
24175         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24176       internal_error("Unexpected thumb1 far jump");
24177 }
24178
24179 int
24180 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24181 {
24182   unsigned HOST_WIDE_INT mask = 0xff;
24183   int i;
24184
24185   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24186   if (val == 0) /* XXX */
24187     return 0;
24188
24189   for (i = 0; i < 25; i++)
24190     if ((val & (mask << i)) == val)
24191       return 1;
24192
24193   return 0;
24194 }
24195
24196 /* Returns nonzero if the current function contains,
24197    or might contain a far jump.  */
24198 static int
24199 thumb_far_jump_used_p (void)
24200 {
24201   rtx_insn *insn;
24202   bool far_jump = false;
24203   unsigned int func_size = 0;
24204
24205   /* If we have already decided that far jumps may be used,
24206      do not bother checking again, and always return true even if
24207      it turns out that they are not being used.  Once we have made
24208      the decision that far jumps are present (and that hence the link
24209      register will be pushed onto the stack) we cannot go back on it.  */
24210   if (cfun->machine->far_jump_used)
24211     return 1;
24212
24213   /* If this function is not being called from the prologue/epilogue
24214      generation code then it must be being called from the
24215      INITIAL_ELIMINATION_OFFSET macro.  */
24216   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24217     {
24218       /* In this case we know that we are being asked about the elimination
24219          of the arg pointer register.  If that register is not being used,
24220          then there are no arguments on the stack, and we do not have to
24221          worry that a far jump might force the prologue to push the link
24222          register, changing the stack offsets.  In this case we can just
24223          return false, since the presence of far jumps in the function will
24224          not affect stack offsets.
24225
24226          If the arg pointer is live (or if it was live, but has now been
24227          eliminated and so set to dead) then we do have to test to see if
24228          the function might contain a far jump.  This test can lead to some
24229          false negatives, since before reload is completed, then length of
24230          branch instructions is not known, so gcc defaults to returning their
24231          longest length, which in turn sets the far jump attribute to true.
24232
24233          A false negative will not result in bad code being generated, but it
24234          will result in a needless push and pop of the link register.  We
24235          hope that this does not occur too often.
24236
24237          If we need doubleword stack alignment this could affect the other
24238          elimination offsets so we can't risk getting it wrong.  */
24239       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24240         cfun->machine->arg_pointer_live = 1;
24241       else if (!cfun->machine->arg_pointer_live)
24242         return 0;
24243     }
24244
24245   /* We should not change far_jump_used during or after reload, as there is
24246      no chance to change stack frame layout.  */
24247   if (reload_in_progress || reload_completed)
24248     return 0;
24249
24250   /* Check to see if the function contains a branch
24251      insn with the far jump attribute set.  */
24252   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24253     {
24254       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24255         {
24256           far_jump = true;
24257         }
24258       func_size += get_attr_length (insn);
24259     }
24260
24261   /* Attribute far_jump will always be true for thumb1 before
24262      shorten_branch pass.  So checking far_jump attribute before
24263      shorten_branch isn't much useful.
24264
24265      Following heuristic tries to estimate more accurately if a far jump
24266      may finally be used.  The heuristic is very conservative as there is
24267      no chance to roll-back the decision of not to use far jump.
24268
24269      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24270      2-byte insn is associated with a 4 byte constant pool.  Using
24271      function size 2048/3 as the threshold is conservative enough.  */
24272   if (far_jump)
24273     {
24274       if ((func_size * 3) >= 2048)
24275         {
24276           /* Record the fact that we have decided that
24277              the function does use far jumps.  */
24278           cfun->machine->far_jump_used = 1;
24279           return 1;
24280         }
24281     }
24282
24283   return 0;
24284 }
24285
24286 /* Return nonzero if FUNC must be entered in ARM mode.  */
24287 static bool
24288 is_called_in_ARM_mode (tree func)
24289 {
24290   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24291
24292   /* Ignore the problem about functions whose address is taken.  */
24293   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24294     return true;
24295
24296 #ifdef ARM_PE
24297   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24298 #else
24299   return false;
24300 #endif
24301 }
24302
24303 /* Given the stack offsets and register mask in OFFSETS, decide how
24304    many additional registers to push instead of subtracting a constant
24305    from SP.  For epilogues the principle is the same except we use pop.
24306    FOR_PROLOGUE indicates which we're generating.  */
24307 static int
24308 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24309 {
24310   HOST_WIDE_INT amount;
24311   unsigned long live_regs_mask = offsets->saved_regs_mask;
24312   /* Extract a mask of the ones we can give to the Thumb's push/pop
24313      instruction.  */
24314   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24315   /* Then count how many other high registers will need to be pushed.  */
24316   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24317   int n_free, reg_base, size;
24318
24319   if (!for_prologue && frame_pointer_needed)
24320     amount = offsets->locals_base - offsets->saved_regs;
24321   else
24322     amount = offsets->outgoing_args - offsets->saved_regs;
24323
24324   /* If the stack frame size is 512 exactly, we can save one load
24325      instruction, which should make this a win even when optimizing
24326      for speed.  */
24327   if (!optimize_size && amount != 512)
24328     return 0;
24329
24330   /* Can't do this if there are high registers to push.  */
24331   if (high_regs_pushed != 0)
24332     return 0;
24333
24334   /* Shouldn't do it in the prologue if no registers would normally
24335      be pushed at all.  In the epilogue, also allow it if we'll have
24336      a pop insn for the PC.  */
24337   if  (l_mask == 0
24338        && (for_prologue
24339            || TARGET_BACKTRACE
24340            || (live_regs_mask & 1 << LR_REGNUM) == 0
24341            || TARGET_INTERWORK
24342            || crtl->args.pretend_args_size != 0))
24343     return 0;
24344
24345   /* Don't do this if thumb_expand_prologue wants to emit instructions
24346      between the push and the stack frame allocation.  */
24347   if (for_prologue
24348       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24349           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24350     return 0;
24351
24352   reg_base = 0;
24353   n_free = 0;
24354   if (!for_prologue)
24355     {
24356       size = arm_size_return_regs ();
24357       reg_base = ARM_NUM_INTS (size);
24358       live_regs_mask >>= reg_base;
24359     }
24360
24361   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24362          && (for_prologue || call_used_regs[reg_base + n_free]))
24363     {
24364       live_regs_mask >>= 1;
24365       n_free++;
24366     }
24367
24368   if (n_free == 0)
24369     return 0;
24370   gcc_assert (amount / 4 * 4 == amount);
24371
24372   if (amount >= 512 && (amount - n_free * 4) < 512)
24373     return (amount - 508) / 4;
24374   if (amount <= n_free * 4)
24375     return amount / 4;
24376   return 0;
24377 }
24378
24379 /* The bits which aren't usefully expanded as rtl.  */
24380 const char *
24381 thumb1_unexpanded_epilogue (void)
24382 {
24383   arm_stack_offsets *offsets;
24384   int regno;
24385   unsigned long live_regs_mask = 0;
24386   int high_regs_pushed = 0;
24387   int extra_pop;
24388   int had_to_push_lr;
24389   int size;
24390
24391   if (cfun->machine->return_used_this_function != 0)
24392     return "";
24393
24394   if (IS_NAKED (arm_current_func_type ()))
24395     return "";
24396
24397   offsets = arm_get_frame_offsets ();
24398   live_regs_mask = offsets->saved_regs_mask;
24399   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24400
24401   /* If we can deduce the registers used from the function's return value.
24402      This is more reliable that examining df_regs_ever_live_p () because that
24403      will be set if the register is ever used in the function, not just if
24404      the register is used to hold a return value.  */
24405   size = arm_size_return_regs ();
24406
24407   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24408   if (extra_pop > 0)
24409     {
24410       unsigned long extra_mask = (1 << extra_pop) - 1;
24411       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24412     }
24413
24414   /* The prolog may have pushed some high registers to use as
24415      work registers.  e.g. the testsuite file:
24416      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24417      compiles to produce:
24418         push    {r4, r5, r6, r7, lr}
24419         mov     r7, r9
24420         mov     r6, r8
24421         push    {r6, r7}
24422      as part of the prolog.  We have to undo that pushing here.  */
24423
24424   if (high_regs_pushed)
24425     {
24426       unsigned long mask = live_regs_mask & 0xff;
24427       int next_hi_reg;
24428
24429       /* The available low registers depend on the size of the value we are
24430          returning.  */
24431       if (size <= 12)
24432         mask |=  1 << 3;
24433       if (size <= 8)
24434         mask |= 1 << 2;
24435
24436       if (mask == 0)
24437         /* Oh dear!  We have no low registers into which we can pop
24438            high registers!  */
24439         internal_error
24440           ("no low registers available for popping high registers");
24441
24442       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24443         if (live_regs_mask & (1 << next_hi_reg))
24444           break;
24445
24446       while (high_regs_pushed)
24447         {
24448           /* Find lo register(s) into which the high register(s) can
24449              be popped.  */
24450           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24451             {
24452               if (mask & (1 << regno))
24453                 high_regs_pushed--;
24454               if (high_regs_pushed == 0)
24455                 break;
24456             }
24457
24458           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24459
24460           /* Pop the values into the low register(s).  */
24461           thumb_pop (asm_out_file, mask);
24462
24463           /* Move the value(s) into the high registers.  */
24464           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24465             {
24466               if (mask & (1 << regno))
24467                 {
24468                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24469                                regno);
24470
24471                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24472                     if (live_regs_mask & (1 << next_hi_reg))
24473                       break;
24474                 }
24475             }
24476         }
24477       live_regs_mask &= ~0x0f00;
24478     }
24479
24480   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24481   live_regs_mask &= 0xff;
24482
24483   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24484     {
24485       /* Pop the return address into the PC.  */
24486       if (had_to_push_lr)
24487         live_regs_mask |= 1 << PC_REGNUM;
24488
24489       /* Either no argument registers were pushed or a backtrace
24490          structure was created which includes an adjusted stack
24491          pointer, so just pop everything.  */
24492       if (live_regs_mask)
24493         thumb_pop (asm_out_file, live_regs_mask);
24494
24495       /* We have either just popped the return address into the
24496          PC or it is was kept in LR for the entire function.
24497          Note that thumb_pop has already called thumb_exit if the
24498          PC was in the list.  */
24499       if (!had_to_push_lr)
24500         thumb_exit (asm_out_file, LR_REGNUM);
24501     }
24502   else
24503     {
24504       /* Pop everything but the return address.  */
24505       if (live_regs_mask)
24506         thumb_pop (asm_out_file, live_regs_mask);
24507
24508       if (had_to_push_lr)
24509         {
24510           if (size > 12)
24511             {
24512               /* We have no free low regs, so save one.  */
24513               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24514                            LAST_ARG_REGNUM);
24515             }
24516
24517           /* Get the return address into a temporary register.  */
24518           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24519
24520           if (size > 12)
24521             {
24522               /* Move the return address to lr.  */
24523               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24524                            LAST_ARG_REGNUM);
24525               /* Restore the low register.  */
24526               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24527                            IP_REGNUM);
24528               regno = LR_REGNUM;
24529             }
24530           else
24531             regno = LAST_ARG_REGNUM;
24532         }
24533       else
24534         regno = LR_REGNUM;
24535
24536       /* Remove the argument registers that were pushed onto the stack.  */
24537       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24538                    SP_REGNUM, SP_REGNUM,
24539                    crtl->args.pretend_args_size);
24540
24541       thumb_exit (asm_out_file, regno);
24542     }
24543
24544   return "";
24545 }
24546
24547 /* Functions to save and restore machine-specific function data.  */
24548 static struct machine_function *
24549 arm_init_machine_status (void)
24550 {
24551   struct machine_function *machine;
24552   machine = ggc_cleared_alloc<machine_function> ();
24553
24554 #if ARM_FT_UNKNOWN != 0
24555   machine->func_type = ARM_FT_UNKNOWN;
24556 #endif
24557   return machine;
24558 }
24559
24560 /* Return an RTX indicating where the return address to the
24561    calling function can be found.  */
24562 rtx
24563 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24564 {
24565   if (count != 0)
24566     return NULL_RTX;
24567
24568   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24569 }
24570
24571 /* Do anything needed before RTL is emitted for each function.  */
24572 void
24573 arm_init_expanders (void)
24574 {
24575   /* Arrange to initialize and mark the machine per-function status.  */
24576   init_machine_status = arm_init_machine_status;
24577
24578   /* This is to stop the combine pass optimizing away the alignment
24579      adjustment of va_arg.  */
24580   /* ??? It is claimed that this should not be necessary.  */
24581   if (cfun)
24582     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24583 }
24584
24585 /* Check that FUNC is called with a different mode.  */
24586
24587 bool
24588 arm_change_mode_p (tree func)
24589 {
24590   if (TREE_CODE (func) != FUNCTION_DECL)
24591     return false;
24592
24593   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24594
24595   if (!callee_tree)
24596     callee_tree = target_option_default_node;
24597
24598   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24599   int flags = callee_opts->x_target_flags;
24600
24601   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24602 }
24603
24604 /* Like arm_compute_initial_elimination offset.  Simpler because there
24605    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24606    to point at the base of the local variables after static stack
24607    space for a function has been allocated.  */
24608
24609 HOST_WIDE_INT
24610 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24611 {
24612   arm_stack_offsets *offsets;
24613
24614   offsets = arm_get_frame_offsets ();
24615
24616   switch (from)
24617     {
24618     case ARG_POINTER_REGNUM:
24619       switch (to)
24620         {
24621         case STACK_POINTER_REGNUM:
24622           return offsets->outgoing_args - offsets->saved_args;
24623
24624         case FRAME_POINTER_REGNUM:
24625           return offsets->soft_frame - offsets->saved_args;
24626
24627         case ARM_HARD_FRAME_POINTER_REGNUM:
24628           return offsets->saved_regs - offsets->saved_args;
24629
24630         case THUMB_HARD_FRAME_POINTER_REGNUM:
24631           return offsets->locals_base - offsets->saved_args;
24632
24633         default:
24634           gcc_unreachable ();
24635         }
24636       break;
24637
24638     case FRAME_POINTER_REGNUM:
24639       switch (to)
24640         {
24641         case STACK_POINTER_REGNUM:
24642           return offsets->outgoing_args - offsets->soft_frame;
24643
24644         case ARM_HARD_FRAME_POINTER_REGNUM:
24645           return offsets->saved_regs - offsets->soft_frame;
24646
24647         case THUMB_HARD_FRAME_POINTER_REGNUM:
24648           return offsets->locals_base - offsets->soft_frame;
24649
24650         default:
24651           gcc_unreachable ();
24652         }
24653       break;
24654
24655     default:
24656       gcc_unreachable ();
24657     }
24658 }
24659
24660 /* Generate the function's prologue.  */
24661
24662 void
24663 thumb1_expand_prologue (void)
24664 {
24665   rtx_insn *insn;
24666
24667   HOST_WIDE_INT amount;
24668   HOST_WIDE_INT size;
24669   arm_stack_offsets *offsets;
24670   unsigned long func_type;
24671   int regno;
24672   unsigned long live_regs_mask;
24673   unsigned long l_mask;
24674   unsigned high_regs_pushed = 0;
24675   bool lr_needs_saving;
24676
24677   func_type = arm_current_func_type ();
24678
24679   /* Naked functions don't have prologues.  */
24680   if (IS_NAKED (func_type))
24681     {
24682       if (flag_stack_usage_info)
24683         current_function_static_stack_size = 0;
24684       return;
24685     }
24686
24687   if (IS_INTERRUPT (func_type))
24688     {
24689       error ("interrupt Service Routines cannot be coded in Thumb mode");
24690       return;
24691     }
24692
24693   if (is_called_in_ARM_mode (current_function_decl))
24694     emit_insn (gen_prologue_thumb1_interwork ());
24695
24696   offsets = arm_get_frame_offsets ();
24697   live_regs_mask = offsets->saved_regs_mask;
24698   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24699
24700   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24701   l_mask = live_regs_mask & 0x40ff;
24702   /* Then count how many other high registers will need to be pushed.  */
24703   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24704
24705   if (crtl->args.pretend_args_size)
24706     {
24707       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24708
24709       if (cfun->machine->uses_anonymous_args)
24710         {
24711           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24712           unsigned long mask;
24713
24714           mask = 1ul << (LAST_ARG_REGNUM + 1);
24715           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24716
24717           insn = thumb1_emit_multi_reg_push (mask, 0);
24718         }
24719       else
24720         {
24721           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24722                                         stack_pointer_rtx, x));
24723         }
24724       RTX_FRAME_RELATED_P (insn) = 1;
24725     }
24726
24727   if (TARGET_BACKTRACE)
24728     {
24729       HOST_WIDE_INT offset = 0;
24730       unsigned work_register;
24731       rtx work_reg, x, arm_hfp_rtx;
24732
24733       /* We have been asked to create a stack backtrace structure.
24734          The code looks like this:
24735
24736          0   .align 2
24737          0   func:
24738          0     sub   SP, #16         Reserve space for 4 registers.
24739          2     push  {R7}            Push low registers.
24740          4     add   R7, SP, #20     Get the stack pointer before the push.
24741          6     str   R7, [SP, #8]    Store the stack pointer
24742                                         (before reserving the space).
24743          8     mov   R7, PC          Get hold of the start of this code + 12.
24744         10     str   R7, [SP, #16]   Store it.
24745         12     mov   R7, FP          Get hold of the current frame pointer.
24746         14     str   R7, [SP, #4]    Store it.
24747         16     mov   R7, LR          Get hold of the current return address.
24748         18     str   R7, [SP, #12]   Store it.
24749         20     add   R7, SP, #16     Point at the start of the
24750                                         backtrace structure.
24751         22     mov   FP, R7          Put this value into the frame pointer.  */
24752
24753       work_register = thumb_find_work_register (live_regs_mask);
24754       work_reg = gen_rtx_REG (SImode, work_register);
24755       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24756
24757       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24758                                     stack_pointer_rtx, GEN_INT (-16)));
24759       RTX_FRAME_RELATED_P (insn) = 1;
24760
24761       if (l_mask)
24762         {
24763           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24764           RTX_FRAME_RELATED_P (insn) = 1;
24765           lr_needs_saving = false;
24766
24767           offset = bit_count (l_mask) * UNITS_PER_WORD;
24768         }
24769
24770       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24771       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24772
24773       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24774       x = gen_frame_mem (SImode, x);
24775       emit_move_insn (x, work_reg);
24776
24777       /* Make sure that the instruction fetching the PC is in the right place
24778          to calculate "start of backtrace creation code + 12".  */
24779       /* ??? The stores using the common WORK_REG ought to be enough to
24780          prevent the scheduler from doing anything weird.  Failing that
24781          we could always move all of the following into an UNSPEC_VOLATILE.  */
24782       if (l_mask)
24783         {
24784           x = gen_rtx_REG (SImode, PC_REGNUM);
24785           emit_move_insn (work_reg, x);
24786
24787           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24788           x = gen_frame_mem (SImode, x);
24789           emit_move_insn (x, work_reg);
24790
24791           emit_move_insn (work_reg, arm_hfp_rtx);
24792
24793           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24794           x = gen_frame_mem (SImode, x);
24795           emit_move_insn (x, work_reg);
24796         }
24797       else
24798         {
24799           emit_move_insn (work_reg, arm_hfp_rtx);
24800
24801           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24802           x = gen_frame_mem (SImode, x);
24803           emit_move_insn (x, work_reg);
24804
24805           x = gen_rtx_REG (SImode, PC_REGNUM);
24806           emit_move_insn (work_reg, x);
24807
24808           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24809           x = gen_frame_mem (SImode, x);
24810           emit_move_insn (x, work_reg);
24811         }
24812
24813       x = gen_rtx_REG (SImode, LR_REGNUM);
24814       emit_move_insn (work_reg, x);
24815
24816       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24817       x = gen_frame_mem (SImode, x);
24818       emit_move_insn (x, work_reg);
24819
24820       x = GEN_INT (offset + 12);
24821       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24822
24823       emit_move_insn (arm_hfp_rtx, work_reg);
24824     }
24825   /* Optimization:  If we are not pushing any low registers but we are going
24826      to push some high registers then delay our first push.  This will just
24827      be a push of LR and we can combine it with the push of the first high
24828      register.  */
24829   else if ((l_mask & 0xff) != 0
24830            || (high_regs_pushed == 0 && lr_needs_saving))
24831     {
24832       unsigned long mask = l_mask;
24833       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24834       insn = thumb1_emit_multi_reg_push (mask, mask);
24835       RTX_FRAME_RELATED_P (insn) = 1;
24836       lr_needs_saving = false;
24837     }
24838
24839   if (high_regs_pushed)
24840     {
24841       unsigned pushable_regs;
24842       unsigned next_hi_reg;
24843       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24844                                                  : crtl->args.info.nregs;
24845       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24846
24847       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24848         if (live_regs_mask & (1 << next_hi_reg))
24849           break;
24850
24851       /* Here we need to mask out registers used for passing arguments
24852          even if they can be pushed.  This is to avoid using them to stash the high
24853          registers.  Such kind of stash may clobber the use of arguments.  */
24854       pushable_regs = l_mask & (~arg_regs_mask);
24855       if (lr_needs_saving)
24856         pushable_regs &= ~(1 << LR_REGNUM);
24857
24858       if (pushable_regs == 0)
24859         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24860
24861       while (high_regs_pushed > 0)
24862         {
24863           unsigned long real_regs_mask = 0;
24864           unsigned long push_mask = 0;
24865
24866           for (regno = LR_REGNUM; regno >= 0; regno --)
24867             {
24868               if (pushable_regs & (1 << regno))
24869                 {
24870                   emit_move_insn (gen_rtx_REG (SImode, regno),
24871                                   gen_rtx_REG (SImode, next_hi_reg));
24872
24873                   high_regs_pushed --;
24874                   real_regs_mask |= (1 << next_hi_reg);
24875                   push_mask |= (1 << regno);
24876
24877                   if (high_regs_pushed)
24878                     {
24879                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24880                            next_hi_reg --)
24881                         if (live_regs_mask & (1 << next_hi_reg))
24882                           break;
24883                     }
24884                   else
24885                     break;
24886                 }
24887             }
24888
24889           /* If we had to find a work register and we have not yet
24890              saved the LR then add it to the list of regs to push.  */
24891           if (lr_needs_saving)
24892             {
24893               push_mask |= 1 << LR_REGNUM;
24894               real_regs_mask |= 1 << LR_REGNUM;
24895               lr_needs_saving = false;
24896             }
24897
24898           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24899           RTX_FRAME_RELATED_P (insn) = 1;
24900         }
24901     }
24902
24903   /* Load the pic register before setting the frame pointer,
24904      so we can use r7 as a temporary work register.  */
24905   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24906     arm_load_pic_register (live_regs_mask);
24907
24908   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24909     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24910                     stack_pointer_rtx);
24911
24912   size = offsets->outgoing_args - offsets->saved_args;
24913   if (flag_stack_usage_info)
24914     current_function_static_stack_size = size;
24915
24916   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
24917   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24918     sorry ("-fstack-check=specific for Thumb-1");
24919
24920   amount = offsets->outgoing_args - offsets->saved_regs;
24921   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24922   if (amount)
24923     {
24924       if (amount < 512)
24925         {
24926           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24927                                         GEN_INT (- amount)));
24928           RTX_FRAME_RELATED_P (insn) = 1;
24929         }
24930       else
24931         {
24932           rtx reg, dwarf;
24933
24934           /* The stack decrement is too big for an immediate value in a single
24935              insn.  In theory we could issue multiple subtracts, but after
24936              three of them it becomes more space efficient to place the full
24937              value in the constant pool and load into a register.  (Also the
24938              ARM debugger really likes to see only one stack decrement per
24939              function).  So instead we look for a scratch register into which
24940              we can load the decrement, and then we subtract this from the
24941              stack pointer.  Unfortunately on the thumb the only available
24942              scratch registers are the argument registers, and we cannot use
24943              these as they may hold arguments to the function.  Instead we
24944              attempt to locate a call preserved register which is used by this
24945              function.  If we can find one, then we know that it will have
24946              been pushed at the start of the prologue and so we can corrupt
24947              it now.  */
24948           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24949             if (live_regs_mask & (1 << regno))
24950               break;
24951
24952           gcc_assert(regno <= LAST_LO_REGNUM);
24953
24954           reg = gen_rtx_REG (SImode, regno);
24955
24956           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24957
24958           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24959                                         stack_pointer_rtx, reg));
24960
24961           dwarf = gen_rtx_SET (stack_pointer_rtx,
24962                                plus_constant (Pmode, stack_pointer_rtx,
24963                                               -amount));
24964           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24965           RTX_FRAME_RELATED_P (insn) = 1;
24966         }
24967     }
24968
24969   if (frame_pointer_needed)
24970     thumb_set_frame_pointer (offsets);
24971
24972   /* If we are profiling, make sure no instructions are scheduled before
24973      the call to mcount.  Similarly if the user has requested no
24974      scheduling in the prolog.  Similarly if we want non-call exceptions
24975      using the EABI unwinder, to prevent faulting instructions from being
24976      swapped with a stack adjustment.  */
24977   if (crtl->profile || !TARGET_SCHED_PROLOG
24978       || (arm_except_unwind_info (&global_options) == UI_TARGET
24979           && cfun->can_throw_non_call_exceptions))
24980     emit_insn (gen_blockage ());
24981
24982   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24983   if (live_regs_mask & 0xff)
24984     cfun->machine->lr_save_eliminated = 0;
24985 }
24986
24987 /* Clear caller saved registers not used to pass return values and leaked
24988    condition flags before exiting a cmse_nonsecure_entry function.  */
24989
24990 void
24991 cmse_nonsecure_entry_clear_before_return (void)
24992 {
24993   uint64_t to_clear_mask[2];
24994   uint32_t padding_bits_to_clear = 0;
24995   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
24996   int regno, maxregno = IP_REGNUM;
24997   tree result_type;
24998   rtx result_rtl;
24999
25000   to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25001   to_clear_mask[0] |= (1ULL << IP_REGNUM);
25002
25003   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25004      registers.  We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25005      to make sure the instructions used to clear them are present.  */
25006   if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25007     {
25008       uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25009       maxregno = LAST_VFP_REGNUM;
25010
25011       float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25012       to_clear_mask[0] |= float_mask;
25013
25014       float_mask = (1ULL << (maxregno - 63)) - 1;
25015       to_clear_mask[1] = float_mask;
25016
25017       /* Make sure we don't clear the two scratch registers used to clear the
25018          relevant FPSCR bits in output_return_instruction.  */
25019       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25020       to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25021       emit_use (gen_rtx_REG (SImode, 4));
25022       to_clear_mask[0] &= ~(1ULL << 4);
25023     }
25024
25025   /* If the user has defined registers to be caller saved, these are no longer
25026      restored by the function before returning and must thus be cleared for
25027      security purposes.  */
25028   for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25029     {
25030       /* We do not touch registers that can be used to pass arguments as per
25031          the AAPCS, since these should never be made callee-saved by user
25032          options.  */
25033       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25034         continue;
25035       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25036         continue;
25037       if (call_used_regs[regno])
25038         to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25039     }
25040
25041   /* Make sure we do not clear the registers used to return the result in.  */
25042   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25043   if (!VOID_TYPE_P (result_type))
25044     {
25045       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25046
25047       /* No need to check that we return in registers, because we don't
25048          support returning on stack yet.  */
25049       to_clear_mask[0]
25050         &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25051                                        padding_bits_to_clear_ptr);
25052     }
25053
25054   if (padding_bits_to_clear != 0)
25055     {
25056       rtx reg_rtx;
25057       /* Padding bits to clear is not 0 so we know we are dealing with
25058          returning a composite type, which only uses r0.  Let's make sure that
25059          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25060       gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25061
25062       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25063
25064       /* Fill the lower half of the negated padding_bits_to_clear.  */
25065       emit_move_insn (reg_rtx,
25066                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25067
25068       /* Also fill the top half of the negated padding_bits_to_clear.  */
25069       if (((~padding_bits_to_clear) >> 16) > 0)
25070         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25071                                                       GEN_INT (16),
25072                                                       GEN_INT (16)),
25073                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25074
25075       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25076                            gen_rtx_REG (SImode, R0_REGNUM),
25077                            reg_rtx));
25078     }
25079
25080   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25081     {
25082       if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25083         continue;
25084
25085       if (IS_VFP_REGNUM (regno))
25086         {
25087           /* If regno is an even vfp register and its successor is also to
25088              be cleared, use vmov.  */
25089           if (TARGET_VFP_DOUBLE
25090               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25091               && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25092             {
25093               emit_move_insn (gen_rtx_REG (DFmode, regno),
25094                               CONST1_RTX (DFmode));
25095               emit_use (gen_rtx_REG (DFmode, regno));
25096               regno++;
25097             }
25098           else
25099             {
25100               emit_move_insn (gen_rtx_REG (SFmode, regno),
25101                               CONST1_RTX (SFmode));
25102               emit_use (gen_rtx_REG (SFmode, regno));
25103             }
25104         }
25105       else
25106         {
25107           if (TARGET_THUMB1)
25108             {
25109               if (regno == R0_REGNUM)
25110                 emit_move_insn (gen_rtx_REG (SImode, regno),
25111                                 const0_rtx);
25112               else
25113                 /* R0 has either been cleared before, see code above, or it
25114                    holds a return value, either way it is not secret
25115                    information.  */
25116                 emit_move_insn (gen_rtx_REG (SImode, regno),
25117                                 gen_rtx_REG (SImode, R0_REGNUM));
25118               emit_use (gen_rtx_REG (SImode, regno));
25119             }
25120           else
25121             {
25122               emit_move_insn (gen_rtx_REG (SImode, regno),
25123                               gen_rtx_REG (SImode, LR_REGNUM));
25124               emit_use (gen_rtx_REG (SImode, regno));
25125             }
25126         }
25127     }
25128 }
25129
25130 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25131    POP instruction can be generated.  LR should be replaced by PC.  All
25132    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25133    all we really need to check here is if single register is to be
25134    returned, or multiple register return.  */
25135 void
25136 thumb2_expand_return (bool simple_return)
25137 {
25138   int i, num_regs;
25139   unsigned long saved_regs_mask;
25140   arm_stack_offsets *offsets;
25141
25142   offsets = arm_get_frame_offsets ();
25143   saved_regs_mask = offsets->saved_regs_mask;
25144
25145   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25146     if (saved_regs_mask & (1 << i))
25147       num_regs++;
25148
25149   if (!simple_return && saved_regs_mask)
25150     {
25151       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25152          functions or adapt code to handle according to ACLE.  This path should
25153          not be reachable for cmse_nonsecure_entry functions though we prefer
25154          to assert it for now to ensure that future code changes do not silently
25155          change this behavior.  */
25156       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25157       if (num_regs == 1)
25158         {
25159           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25160           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25161           rtx addr = gen_rtx_MEM (SImode,
25162                                   gen_rtx_POST_INC (SImode,
25163                                                     stack_pointer_rtx));
25164           set_mem_alias_set (addr, get_frame_alias_set ());
25165           XVECEXP (par, 0, 0) = ret_rtx;
25166           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25167           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25168           emit_jump_insn (par);
25169         }
25170       else
25171         {
25172           saved_regs_mask &= ~ (1 << LR_REGNUM);
25173           saved_regs_mask |=   (1 << PC_REGNUM);
25174           arm_emit_multi_reg_pop (saved_regs_mask);
25175         }
25176     }
25177   else
25178     {
25179       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25180         cmse_nonsecure_entry_clear_before_return ();
25181       emit_jump_insn (simple_return_rtx);
25182     }
25183 }
25184
25185 void
25186 thumb1_expand_epilogue (void)
25187 {
25188   HOST_WIDE_INT amount;
25189   arm_stack_offsets *offsets;
25190   int regno;
25191
25192   /* Naked functions don't have prologues.  */
25193   if (IS_NAKED (arm_current_func_type ()))
25194     return;
25195
25196   offsets = arm_get_frame_offsets ();
25197   amount = offsets->outgoing_args - offsets->saved_regs;
25198
25199   if (frame_pointer_needed)
25200     {
25201       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25202       amount = offsets->locals_base - offsets->saved_regs;
25203     }
25204   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25205
25206   gcc_assert (amount >= 0);
25207   if (amount)
25208     {
25209       emit_insn (gen_blockage ());
25210
25211       if (amount < 512)
25212         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25213                                GEN_INT (amount)));
25214       else
25215         {
25216           /* r3 is always free in the epilogue.  */
25217           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25218
25219           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25220           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25221         }
25222     }
25223
25224   /* Emit a USE (stack_pointer_rtx), so that
25225      the stack adjustment will not be deleted.  */
25226   emit_insn (gen_force_register_use (stack_pointer_rtx));
25227
25228   if (crtl->profile || !TARGET_SCHED_PROLOG)
25229     emit_insn (gen_blockage ());
25230
25231   /* Emit a clobber for each insn that will be restored in the epilogue,
25232      so that flow2 will get register lifetimes correct.  */
25233   for (regno = 0; regno < 13; regno++)
25234     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25235       emit_clobber (gen_rtx_REG (SImode, regno));
25236
25237   if (! df_regs_ever_live_p (LR_REGNUM))
25238     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25239
25240   /* Clear all caller-saved regs that are not used to return.  */
25241   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25242     cmse_nonsecure_entry_clear_before_return ();
25243 }
25244
25245 /* Epilogue code for APCS frame.  */
25246 static void
25247 arm_expand_epilogue_apcs_frame (bool really_return)
25248 {
25249   unsigned long func_type;
25250   unsigned long saved_regs_mask;
25251   int num_regs = 0;
25252   int i;
25253   int floats_from_frame = 0;
25254   arm_stack_offsets *offsets;
25255
25256   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25257   func_type = arm_current_func_type ();
25258
25259   /* Get frame offsets for ARM.  */
25260   offsets = arm_get_frame_offsets ();
25261   saved_regs_mask = offsets->saved_regs_mask;
25262
25263   /* Find the offset of the floating-point save area in the frame.  */
25264   floats_from_frame
25265     = (offsets->saved_args
25266        + arm_compute_static_chain_stack_bytes ()
25267        - offsets->frame);
25268
25269   /* Compute how many core registers saved and how far away the floats are.  */
25270   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25271     if (saved_regs_mask & (1 << i))
25272       {
25273         num_regs++;
25274         floats_from_frame += 4;
25275       }
25276
25277   if (TARGET_HARD_FLOAT)
25278     {
25279       int start_reg;
25280       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25281
25282       /* The offset is from IP_REGNUM.  */
25283       int saved_size = arm_get_vfp_saved_size ();
25284       if (saved_size > 0)
25285         {
25286           rtx_insn *insn;
25287           floats_from_frame += saved_size;
25288           insn = emit_insn (gen_addsi3 (ip_rtx,
25289                                         hard_frame_pointer_rtx,
25290                                         GEN_INT (-floats_from_frame)));
25291           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25292                                        ip_rtx, hard_frame_pointer_rtx);
25293         }
25294
25295       /* Generate VFP register multi-pop.  */
25296       start_reg = FIRST_VFP_REGNUM;
25297
25298       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25299         /* Look for a case where a reg does not need restoring.  */
25300         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25301             && (!df_regs_ever_live_p (i + 1)
25302                 || call_used_regs[i + 1]))
25303           {
25304             if (start_reg != i)
25305               arm_emit_vfp_multi_reg_pop (start_reg,
25306                                           (i - start_reg) / 2,
25307                                           gen_rtx_REG (SImode,
25308                                                        IP_REGNUM));
25309             start_reg = i + 2;
25310           }
25311
25312       /* Restore the remaining regs that we have discovered (or possibly
25313          even all of them, if the conditional in the for loop never
25314          fired).  */
25315       if (start_reg != i)
25316         arm_emit_vfp_multi_reg_pop (start_reg,
25317                                     (i - start_reg) / 2,
25318                                     gen_rtx_REG (SImode, IP_REGNUM));
25319     }
25320
25321   if (TARGET_IWMMXT)
25322     {
25323       /* The frame pointer is guaranteed to be non-double-word aligned, as
25324          it is set to double-word-aligned old_stack_pointer - 4.  */
25325       rtx_insn *insn;
25326       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25327
25328       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25329         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25330           {
25331             rtx addr = gen_frame_mem (V2SImode,
25332                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25333                                                 - lrm_count * 4));
25334             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25335             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25336                                                gen_rtx_REG (V2SImode, i),
25337                                                NULL_RTX);
25338             lrm_count += 2;
25339           }
25340     }
25341
25342   /* saved_regs_mask should contain IP which contains old stack pointer
25343      at the time of activation creation.  Since SP and IP are adjacent registers,
25344      we can restore the value directly into SP.  */
25345   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25346   saved_regs_mask &= ~(1 << IP_REGNUM);
25347   saved_regs_mask |= (1 << SP_REGNUM);
25348
25349   /* There are two registers left in saved_regs_mask - LR and PC.  We
25350      only need to restore LR (the return address), but to
25351      save time we can load it directly into PC, unless we need a
25352      special function exit sequence, or we are not really returning.  */
25353   if (really_return
25354       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25355       && !crtl->calls_eh_return)
25356     /* Delete LR from the register mask, so that LR on
25357        the stack is loaded into the PC in the register mask.  */
25358     saved_regs_mask &= ~(1 << LR_REGNUM);
25359   else
25360     saved_regs_mask &= ~(1 << PC_REGNUM);
25361
25362   num_regs = bit_count (saved_regs_mask);
25363   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25364     {
25365       rtx_insn *insn;
25366       emit_insn (gen_blockage ());
25367       /* Unwind the stack to just below the saved registers.  */
25368       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25369                                     hard_frame_pointer_rtx,
25370                                     GEN_INT (- 4 * num_regs)));
25371
25372       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25373                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25374     }
25375
25376   arm_emit_multi_reg_pop (saved_regs_mask);
25377
25378   if (IS_INTERRUPT (func_type))
25379     {
25380       /* Interrupt handlers will have pushed the
25381          IP onto the stack, so restore it now.  */
25382       rtx_insn *insn;
25383       rtx addr = gen_rtx_MEM (SImode,
25384                               gen_rtx_POST_INC (SImode,
25385                               stack_pointer_rtx));
25386       set_mem_alias_set (addr, get_frame_alias_set ());
25387       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25388       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25389                                          gen_rtx_REG (SImode, IP_REGNUM),
25390                                          NULL_RTX);
25391     }
25392
25393   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25394     return;
25395
25396   if (crtl->calls_eh_return)
25397     emit_insn (gen_addsi3 (stack_pointer_rtx,
25398                            stack_pointer_rtx,
25399                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25400
25401   if (IS_STACKALIGN (func_type))
25402     /* Restore the original stack pointer.  Before prologue, the stack was
25403        realigned and the original stack pointer saved in r0.  For details,
25404        see comment in arm_expand_prologue.  */
25405     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25406
25407   emit_jump_insn (simple_return_rtx);
25408 }
25409
25410 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25411    function is not a sibcall.  */
25412 void
25413 arm_expand_epilogue (bool really_return)
25414 {
25415   unsigned long func_type;
25416   unsigned long saved_regs_mask;
25417   int num_regs = 0;
25418   int i;
25419   int amount;
25420   arm_stack_offsets *offsets;
25421
25422   func_type = arm_current_func_type ();
25423
25424   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25425      let output_return_instruction take care of instruction emission if any.  */
25426   if (IS_NAKED (func_type)
25427       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25428     {
25429       if (really_return)
25430         emit_jump_insn (simple_return_rtx);
25431       return;
25432     }
25433
25434   /* If we are throwing an exception, then we really must be doing a
25435      return, so we can't tail-call.  */
25436   gcc_assert (!crtl->calls_eh_return || really_return);
25437
25438   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25439     {
25440       arm_expand_epilogue_apcs_frame (really_return);
25441       return;
25442     }
25443
25444   /* Get frame offsets for ARM.  */
25445   offsets = arm_get_frame_offsets ();
25446   saved_regs_mask = offsets->saved_regs_mask;
25447   num_regs = bit_count (saved_regs_mask);
25448
25449   if (frame_pointer_needed)
25450     {
25451       rtx_insn *insn;
25452       /* Restore stack pointer if necessary.  */
25453       if (TARGET_ARM)
25454         {
25455           /* In ARM mode, frame pointer points to first saved register.
25456              Restore stack pointer to last saved register.  */
25457           amount = offsets->frame - offsets->saved_regs;
25458
25459           /* Force out any pending memory operations that reference stacked data
25460              before stack de-allocation occurs.  */
25461           emit_insn (gen_blockage ());
25462           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25463                             hard_frame_pointer_rtx,
25464                             GEN_INT (amount)));
25465           arm_add_cfa_adjust_cfa_note (insn, amount,
25466                                        stack_pointer_rtx,
25467                                        hard_frame_pointer_rtx);
25468
25469           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25470              deleted.  */
25471           emit_insn (gen_force_register_use (stack_pointer_rtx));
25472         }
25473       else
25474         {
25475           /* In Thumb-2 mode, the frame pointer points to the last saved
25476              register.  */
25477           amount = offsets->locals_base - offsets->saved_regs;
25478           if (amount)
25479             {
25480               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25481                                 hard_frame_pointer_rtx,
25482                                 GEN_INT (amount)));
25483               arm_add_cfa_adjust_cfa_note (insn, amount,
25484                                            hard_frame_pointer_rtx,
25485                                            hard_frame_pointer_rtx);
25486             }
25487
25488           /* Force out any pending memory operations that reference stacked data
25489              before stack de-allocation occurs.  */
25490           emit_insn (gen_blockage ());
25491           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25492                                        hard_frame_pointer_rtx));
25493           arm_add_cfa_adjust_cfa_note (insn, 0,
25494                                        stack_pointer_rtx,
25495                                        hard_frame_pointer_rtx);
25496           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25497              deleted.  */
25498           emit_insn (gen_force_register_use (stack_pointer_rtx));
25499         }
25500     }
25501   else
25502     {
25503       /* Pop off outgoing args and local frame to adjust stack pointer to
25504          last saved register.  */
25505       amount = offsets->outgoing_args - offsets->saved_regs;
25506       if (amount)
25507         {
25508           rtx_insn *tmp;
25509           /* Force out any pending memory operations that reference stacked data
25510              before stack de-allocation occurs.  */
25511           emit_insn (gen_blockage ());
25512           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25513                                        stack_pointer_rtx,
25514                                        GEN_INT (amount)));
25515           arm_add_cfa_adjust_cfa_note (tmp, amount,
25516                                        stack_pointer_rtx, stack_pointer_rtx);
25517           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25518              not deleted.  */
25519           emit_insn (gen_force_register_use (stack_pointer_rtx));
25520         }
25521     }
25522
25523   if (TARGET_HARD_FLOAT)
25524     {
25525       /* Generate VFP register multi-pop.  */
25526       int end_reg = LAST_VFP_REGNUM + 1;
25527
25528       /* Scan the registers in reverse order.  We need to match
25529          any groupings made in the prologue and generate matching
25530          vldm operations.  The need to match groups is because,
25531          unlike pop, vldm can only do consecutive regs.  */
25532       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25533         /* Look for a case where a reg does not need restoring.  */
25534         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25535             && (!df_regs_ever_live_p (i + 1)
25536                 || call_used_regs[i + 1]))
25537           {
25538             /* Restore the regs discovered so far (from reg+2 to
25539                end_reg).  */
25540             if (end_reg > i + 2)
25541               arm_emit_vfp_multi_reg_pop (i + 2,
25542                                           (end_reg - (i + 2)) / 2,
25543                                           stack_pointer_rtx);
25544             end_reg = i;
25545           }
25546
25547       /* Restore the remaining regs that we have discovered (or possibly
25548          even all of them, if the conditional in the for loop never
25549          fired).  */
25550       if (end_reg > i + 2)
25551         arm_emit_vfp_multi_reg_pop (i + 2,
25552                                     (end_reg - (i + 2)) / 2,
25553                                     stack_pointer_rtx);
25554     }
25555
25556   if (TARGET_IWMMXT)
25557     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25558       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25559         {
25560           rtx_insn *insn;
25561           rtx addr = gen_rtx_MEM (V2SImode,
25562                                   gen_rtx_POST_INC (SImode,
25563                                                     stack_pointer_rtx));
25564           set_mem_alias_set (addr, get_frame_alias_set ());
25565           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25566           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25567                                              gen_rtx_REG (V2SImode, i),
25568                                              NULL_RTX);
25569           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25570                                        stack_pointer_rtx, stack_pointer_rtx);
25571         }
25572
25573   if (saved_regs_mask)
25574     {
25575       rtx insn;
25576       bool return_in_pc = false;
25577
25578       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25579           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25580           && !IS_CMSE_ENTRY (func_type)
25581           && !IS_STACKALIGN (func_type)
25582           && really_return
25583           && crtl->args.pretend_args_size == 0
25584           && saved_regs_mask & (1 << LR_REGNUM)
25585           && !crtl->calls_eh_return)
25586         {
25587           saved_regs_mask &= ~(1 << LR_REGNUM);
25588           saved_regs_mask |= (1 << PC_REGNUM);
25589           return_in_pc = true;
25590         }
25591
25592       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25593         {
25594           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25595             if (saved_regs_mask & (1 << i))
25596               {
25597                 rtx addr = gen_rtx_MEM (SImode,
25598                                         gen_rtx_POST_INC (SImode,
25599                                                           stack_pointer_rtx));
25600                 set_mem_alias_set (addr, get_frame_alias_set ());
25601
25602                 if (i == PC_REGNUM)
25603                   {
25604                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25605                     XVECEXP (insn, 0, 0) = ret_rtx;
25606                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25607                                                         addr);
25608                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25609                     insn = emit_jump_insn (insn);
25610                   }
25611                 else
25612                   {
25613                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25614                                                  addr));
25615                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25616                                                        gen_rtx_REG (SImode, i),
25617                                                        NULL_RTX);
25618                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25619                                                  stack_pointer_rtx,
25620                                                  stack_pointer_rtx);
25621                   }
25622               }
25623         }
25624       else
25625         {
25626           if (TARGET_LDRD
25627               && current_tune->prefer_ldrd_strd
25628               && !optimize_function_for_size_p (cfun))
25629             {
25630               if (TARGET_THUMB2)
25631                 thumb2_emit_ldrd_pop (saved_regs_mask);
25632               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25633                 arm_emit_ldrd_pop (saved_regs_mask);
25634               else
25635                 arm_emit_multi_reg_pop (saved_regs_mask);
25636             }
25637           else
25638             arm_emit_multi_reg_pop (saved_regs_mask);
25639         }
25640
25641       if (return_in_pc)
25642         return;
25643     }
25644
25645   amount
25646     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25647   if (amount)
25648     {
25649       int i, j;
25650       rtx dwarf = NULL_RTX;
25651       rtx_insn *tmp =
25652         emit_insn (gen_addsi3 (stack_pointer_rtx,
25653                                stack_pointer_rtx,
25654                                GEN_INT (amount)));
25655
25656       RTX_FRAME_RELATED_P (tmp) = 1;
25657
25658       if (cfun->machine->uses_anonymous_args)
25659         {
25660           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25661              pretend_args in stack.  */
25662           int num_regs = crtl->args.pretend_args_size / 4;
25663           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25664           for (j = 0, i = 0; j < num_regs; i++)
25665             if (saved_regs_mask & (1 << i))
25666               {
25667                 rtx reg = gen_rtx_REG (SImode, i);
25668                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25669                 j++;
25670               }
25671           REG_NOTES (tmp) = dwarf;
25672         }
25673       arm_add_cfa_adjust_cfa_note (tmp, amount,
25674                                    stack_pointer_rtx, stack_pointer_rtx);
25675     }
25676
25677     /* Clear all caller-saved regs that are not used to return.  */
25678     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25679       {
25680         /* CMSE_ENTRY always returns.  */
25681         gcc_assert (really_return);
25682         cmse_nonsecure_entry_clear_before_return ();
25683       }
25684
25685   if (!really_return)
25686     return;
25687
25688   if (crtl->calls_eh_return)
25689     emit_insn (gen_addsi3 (stack_pointer_rtx,
25690                            stack_pointer_rtx,
25691                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25692
25693   if (IS_STACKALIGN (func_type))
25694     /* Restore the original stack pointer.  Before prologue, the stack was
25695        realigned and the original stack pointer saved in r0.  For details,
25696        see comment in arm_expand_prologue.  */
25697     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25698
25699   emit_jump_insn (simple_return_rtx);
25700 }
25701
25702 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25703    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25704
25705 const char *
25706 thumb1_output_interwork (void)
25707 {
25708   const char * name;
25709   FILE *f = asm_out_file;
25710
25711   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25712   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25713               == SYMBOL_REF);
25714   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25715
25716   /* Generate code sequence to switch us into Thumb mode.  */
25717   /* The .code 32 directive has already been emitted by
25718      ASM_DECLARE_FUNCTION_NAME.  */
25719   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25720   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25721
25722   /* Generate a label, so that the debugger will notice the
25723      change in instruction sets.  This label is also used by
25724      the assembler to bypass the ARM code when this function
25725      is called from a Thumb encoded function elsewhere in the
25726      same file.  Hence the definition of STUB_NAME here must
25727      agree with the definition in gas/config/tc-arm.c.  */
25728
25729 #define STUB_NAME ".real_start_of"
25730
25731   fprintf (f, "\t.code\t16\n");
25732 #ifdef ARM_PE
25733   if (arm_dllexport_name_p (name))
25734     name = arm_strip_name_encoding (name);
25735 #endif
25736   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25737   fprintf (f, "\t.thumb_func\n");
25738   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25739
25740   return "";
25741 }
25742
25743 /* Handle the case of a double word load into a low register from
25744    a computed memory address.  The computed address may involve a
25745    register which is overwritten by the load.  */
25746 const char *
25747 thumb_load_double_from_address (rtx *operands)
25748 {
25749   rtx addr;
25750   rtx base;
25751   rtx offset;
25752   rtx arg1;
25753   rtx arg2;
25754
25755   gcc_assert (REG_P (operands[0]));
25756   gcc_assert (MEM_P (operands[1]));
25757
25758   /* Get the memory address.  */
25759   addr = XEXP (operands[1], 0);
25760
25761   /* Work out how the memory address is computed.  */
25762   switch (GET_CODE (addr))
25763     {
25764     case REG:
25765       operands[2] = adjust_address (operands[1], SImode, 4);
25766
25767       if (REGNO (operands[0]) == REGNO (addr))
25768         {
25769           output_asm_insn ("ldr\t%H0, %2", operands);
25770           output_asm_insn ("ldr\t%0, %1", operands);
25771         }
25772       else
25773         {
25774           output_asm_insn ("ldr\t%0, %1", operands);
25775           output_asm_insn ("ldr\t%H0, %2", operands);
25776         }
25777       break;
25778
25779     case CONST:
25780       /* Compute <address> + 4 for the high order load.  */
25781       operands[2] = adjust_address (operands[1], SImode, 4);
25782
25783       output_asm_insn ("ldr\t%0, %1", operands);
25784       output_asm_insn ("ldr\t%H0, %2", operands);
25785       break;
25786
25787     case PLUS:
25788       arg1   = XEXP (addr, 0);
25789       arg2   = XEXP (addr, 1);
25790
25791       if (CONSTANT_P (arg1))
25792         base = arg2, offset = arg1;
25793       else
25794         base = arg1, offset = arg2;
25795
25796       gcc_assert (REG_P (base));
25797
25798       /* Catch the case of <address> = <reg> + <reg> */
25799       if (REG_P (offset))
25800         {
25801           int reg_offset = REGNO (offset);
25802           int reg_base   = REGNO (base);
25803           int reg_dest   = REGNO (operands[0]);
25804
25805           /* Add the base and offset registers together into the
25806              higher destination register.  */
25807           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25808                        reg_dest + 1, reg_base, reg_offset);
25809
25810           /* Load the lower destination register from the address in
25811              the higher destination register.  */
25812           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25813                        reg_dest, reg_dest + 1);
25814
25815           /* Load the higher destination register from its own address
25816              plus 4.  */
25817           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25818                        reg_dest + 1, reg_dest + 1);
25819         }
25820       else
25821         {
25822           /* Compute <address> + 4 for the high order load.  */
25823           operands[2] = adjust_address (operands[1], SImode, 4);
25824
25825           /* If the computed address is held in the low order register
25826              then load the high order register first, otherwise always
25827              load the low order register first.  */
25828           if (REGNO (operands[0]) == REGNO (base))
25829             {
25830               output_asm_insn ("ldr\t%H0, %2", operands);
25831               output_asm_insn ("ldr\t%0, %1", operands);
25832             }
25833           else
25834             {
25835               output_asm_insn ("ldr\t%0, %1", operands);
25836               output_asm_insn ("ldr\t%H0, %2", operands);
25837             }
25838         }
25839       break;
25840
25841     case LABEL_REF:
25842       /* With no registers to worry about we can just load the value
25843          directly.  */
25844       operands[2] = adjust_address (operands[1], SImode, 4);
25845
25846       output_asm_insn ("ldr\t%H0, %2", operands);
25847       output_asm_insn ("ldr\t%0, %1", operands);
25848       break;
25849
25850     default:
25851       gcc_unreachable ();
25852     }
25853
25854   return "";
25855 }
25856
25857 const char *
25858 thumb_output_move_mem_multiple (int n, rtx *operands)
25859 {
25860   switch (n)
25861     {
25862     case 2:
25863       if (REGNO (operands[4]) > REGNO (operands[5]))
25864         std::swap (operands[4], operands[5]);
25865
25866       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25867       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25868       break;
25869
25870     case 3:
25871       if (REGNO (operands[4]) > REGNO (operands[5]))
25872         std::swap (operands[4], operands[5]);
25873       if (REGNO (operands[5]) > REGNO (operands[6]))
25874         std::swap (operands[5], operands[6]);
25875       if (REGNO (operands[4]) > REGNO (operands[5]))
25876         std::swap (operands[4], operands[5]);
25877
25878       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25879       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25880       break;
25881
25882     default:
25883       gcc_unreachable ();
25884     }
25885
25886   return "";
25887 }
25888
25889 /* Output a call-via instruction for thumb state.  */
25890 const char *
25891 thumb_call_via_reg (rtx reg)
25892 {
25893   int regno = REGNO (reg);
25894   rtx *labelp;
25895
25896   gcc_assert (regno < LR_REGNUM);
25897
25898   /* If we are in the normal text section we can use a single instance
25899      per compilation unit.  If we are doing function sections, then we need
25900      an entry per section, since we can't rely on reachability.  */
25901   if (in_section == text_section)
25902     {
25903       thumb_call_reg_needed = 1;
25904
25905       if (thumb_call_via_label[regno] == NULL)
25906         thumb_call_via_label[regno] = gen_label_rtx ();
25907       labelp = thumb_call_via_label + regno;
25908     }
25909   else
25910     {
25911       if (cfun->machine->call_via[regno] == NULL)
25912         cfun->machine->call_via[regno] = gen_label_rtx ();
25913       labelp = cfun->machine->call_via + regno;
25914     }
25915
25916   output_asm_insn ("bl\t%a0", labelp);
25917   return "";
25918 }
25919
25920 /* Routines for generating rtl.  */
25921 void
25922 thumb_expand_movmemqi (rtx *operands)
25923 {
25924   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25925   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25926   HOST_WIDE_INT len = INTVAL (operands[2]);
25927   HOST_WIDE_INT offset = 0;
25928
25929   while (len >= 12)
25930     {
25931       emit_insn (gen_movmem12b (out, in, out, in));
25932       len -= 12;
25933     }
25934
25935   if (len >= 8)
25936     {
25937       emit_insn (gen_movmem8b (out, in, out, in));
25938       len -= 8;
25939     }
25940
25941   if (len >= 4)
25942     {
25943       rtx reg = gen_reg_rtx (SImode);
25944       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25945       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25946       len -= 4;
25947       offset += 4;
25948     }
25949
25950   if (len >= 2)
25951     {
25952       rtx reg = gen_reg_rtx (HImode);
25953       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25954                                               plus_constant (Pmode, in,
25955                                                              offset))));
25956       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25957                                                                 offset)),
25958                             reg));
25959       len -= 2;
25960       offset += 2;
25961     }
25962
25963   if (len)
25964     {
25965       rtx reg = gen_reg_rtx (QImode);
25966       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25967                                               plus_constant (Pmode, in,
25968                                                              offset))));
25969       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25970                                                                 offset)),
25971                             reg));
25972     }
25973 }
25974
25975 void
25976 thumb_reload_out_hi (rtx *operands)
25977 {
25978   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25979 }
25980
25981 /* Return the length of a function name prefix
25982     that starts with the character 'c'.  */
25983 static int
25984 arm_get_strip_length (int c)
25985 {
25986   switch (c)
25987     {
25988     ARM_NAME_ENCODING_LENGTHS
25989       default: return 0;
25990     }
25991 }
25992
25993 /* Return a pointer to a function's name with any
25994    and all prefix encodings stripped from it.  */
25995 const char *
25996 arm_strip_name_encoding (const char *name)
25997 {
25998   int skip;
25999
26000   while ((skip = arm_get_strip_length (* name)))
26001     name += skip;
26002
26003   return name;
26004 }
26005
26006 /* If there is a '*' anywhere in the name's prefix, then
26007    emit the stripped name verbatim, otherwise prepend an
26008    underscore if leading underscores are being used.  */
26009 void
26010 arm_asm_output_labelref (FILE *stream, const char *name)
26011 {
26012   int skip;
26013   int verbatim = 0;
26014
26015   while ((skip = arm_get_strip_length (* name)))
26016     {
26017       verbatim |= (*name == '*');
26018       name += skip;
26019     }
26020
26021   if (verbatim)
26022     fputs (name, stream);
26023   else
26024     asm_fprintf (stream, "%U%s", name);
26025 }
26026
26027 /* This function is used to emit an EABI tag and its associated value.
26028    We emit the numerical value of the tag in case the assembler does not
26029    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26030    the tag name in a comment so that anyone reading the assembler output
26031    will know which tag is being set.
26032
26033    This function is not static because arm-c.c needs it too.  */
26034
26035 void
26036 arm_emit_eabi_attribute (const char *name, int num, int val)
26037 {
26038   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26039   if (flag_verbose_asm || flag_debug_asm)
26040     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26041   asm_fprintf (asm_out_file, "\n");
26042 }
26043
26044 /* This function is used to print CPU tuning information as comment
26045    in assembler file.  Pointers are not printed for now.  */
26046
26047 void
26048 arm_print_tune_info (void)
26049 {
26050   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26051   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26052                current_tune->constant_limit);
26053   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26054                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26055   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26056                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26057   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26058                "prefetch.l1_cache_size:\t%d\n",
26059                current_tune->prefetch.l1_cache_size);
26060   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26061                "prefetch.l1_cache_line_size:\t%d\n",
26062                current_tune->prefetch.l1_cache_line_size);
26063   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26064                "prefer_constant_pool:\t%d\n",
26065                (int) current_tune->prefer_constant_pool);
26066   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26067                "branch_cost:\t(s:speed, p:predictable)\n");
26068   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26069   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26070                current_tune->branch_cost (false, false));
26071   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26072                current_tune->branch_cost (false, true));
26073   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26074                current_tune->branch_cost (true, false));
26075   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26076                current_tune->branch_cost (true, true));
26077   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26078                "prefer_ldrd_strd:\t%d\n",
26079                (int) current_tune->prefer_ldrd_strd);
26080   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26081                "logical_op_non_short_circuit:\t[%d,%d]\n",
26082                (int) current_tune->logical_op_non_short_circuit_thumb,
26083                (int) current_tune->logical_op_non_short_circuit_arm);
26084   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26085                "prefer_neon_for_64bits:\t%d\n",
26086                (int) current_tune->prefer_neon_for_64bits);
26087   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26088                "disparage_flag_setting_t16_encodings:\t%d\n",
26089                (int) current_tune->disparage_flag_setting_t16_encodings);
26090   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26091                "string_ops_prefer_neon:\t%d\n",
26092                (int) current_tune->string_ops_prefer_neon);
26093   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26094                "max_insns_inline_memset:\t%d\n",
26095                current_tune->max_insns_inline_memset);
26096   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26097                current_tune->fusible_ops);
26098   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26099                (int) current_tune->sched_autopref);
26100 }
26101
26102 static void
26103 arm_file_start (void)
26104 {
26105   int val;
26106
26107   if (TARGET_BPABI)
26108     {
26109       /* We don't have a specified CPU.  Use the architecture to
26110          generate the tags.
26111
26112          Note: it might be better to do this unconditionally, then the
26113          assembler would not need to know about all new CPU names as
26114          they are added.  */
26115       if (!arm_active_target.core_name)
26116         {
26117           /* armv7ve doesn't support any extensions.  */
26118           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26119             {
26120               /* Keep backward compatability for assemblers
26121                  which don't support armv7ve.  */
26122               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26123               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26124               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26125               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26126               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26127             }
26128           else
26129             {
26130               const char* pos = strchr (arm_active_target.arch_name, '+');
26131               if (pos)
26132                 {
26133                   char buf[32];
26134                   gcc_assert (strlen (arm_active_target.arch_name)
26135                               <= sizeof (buf) / sizeof (*pos));
26136                   strncpy (buf, arm_active_target.arch_name,
26137                            (pos - arm_active_target.arch_name) * sizeof (*pos));
26138                   buf[pos - arm_active_target.arch_name] = '\0';
26139                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26140                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26141                 }
26142               else
26143                 asm_fprintf (asm_out_file, "\t.arch %s\n",
26144                              arm_active_target.arch_name);
26145             }
26146         }
26147       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26148         asm_fprintf (asm_out_file, "\t.arch %s\n",
26149                      arm_active_target.core_name + 8);
26150       else
26151         {
26152           const char* truncated_name
26153             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26154           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26155         }
26156
26157       if (print_tune_info)
26158         arm_print_tune_info ();
26159
26160       if (! TARGET_SOFT_FLOAT)
26161         {
26162           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26163             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26164
26165           if (TARGET_HARD_FLOAT_ABI)
26166             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26167         }
26168
26169       /* Some of these attributes only apply when the corresponding features
26170          are used.  However we don't have any easy way of figuring this out.
26171          Conservatively record the setting that would have been used.  */
26172
26173       if (flag_rounding_math)
26174         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26175
26176       if (!flag_unsafe_math_optimizations)
26177         {
26178           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26179           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26180         }
26181       if (flag_signaling_nans)
26182         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26183
26184       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26185                            flag_finite_math_only ? 1 : 3);
26186
26187       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26188       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26189       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26190                                flag_short_enums ? 1 : 2);
26191
26192       /* Tag_ABI_optimization_goals.  */
26193       if (optimize_size)
26194         val = 4;
26195       else if (optimize >= 2)
26196         val = 2;
26197       else if (optimize)
26198         val = 1;
26199       else
26200         val = 6;
26201       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26202
26203       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26204                                unaligned_access);
26205
26206       if (arm_fp16_format)
26207         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26208                              (int) arm_fp16_format);
26209
26210       if (arm_lang_output_object_attributes_hook)
26211         arm_lang_output_object_attributes_hook();
26212     }
26213
26214   default_file_start ();
26215 }
26216
26217 static void
26218 arm_file_end (void)
26219 {
26220   int regno;
26221
26222   if (NEED_INDICATE_EXEC_STACK)
26223     /* Add .note.GNU-stack.  */
26224     file_end_indicate_exec_stack ();
26225
26226   if (! thumb_call_reg_needed)
26227     return;
26228
26229   switch_to_section (text_section);
26230   asm_fprintf (asm_out_file, "\t.code 16\n");
26231   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26232
26233   for (regno = 0; regno < LR_REGNUM; regno++)
26234     {
26235       rtx label = thumb_call_via_label[regno];
26236
26237       if (label != 0)
26238         {
26239           targetm.asm_out.internal_label (asm_out_file, "L",
26240                                           CODE_LABEL_NUMBER (label));
26241           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26242         }
26243     }
26244 }
26245
26246 #ifndef ARM_PE
26247 /* Symbols in the text segment can be accessed without indirecting via the
26248    constant pool; it may take an extra binary operation, but this is still
26249    faster than indirecting via memory.  Don't do this when not optimizing,
26250    since we won't be calculating al of the offsets necessary to do this
26251    simplification.  */
26252
26253 static void
26254 arm_encode_section_info (tree decl, rtx rtl, int first)
26255 {
26256   if (optimize > 0 && TREE_CONSTANT (decl))
26257     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26258
26259   default_encode_section_info (decl, rtl, first);
26260 }
26261 #endif /* !ARM_PE */
26262
26263 static void
26264 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26265 {
26266   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26267       && !strcmp (prefix, "L"))
26268     {
26269       arm_ccfsm_state = 0;
26270       arm_target_insn = NULL;
26271     }
26272   default_internal_label (stream, prefix, labelno);
26273 }
26274
26275 /* Output code to add DELTA to the first argument, and then jump
26276    to FUNCTION.  Used for C++ multiple inheritance.  */
26277
26278 static void
26279 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26280                      HOST_WIDE_INT, tree function)
26281 {
26282   static int thunk_label = 0;
26283   char label[256];
26284   char labelpc[256];
26285   int mi_delta = delta;
26286   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26287   int shift = 0;
26288   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26289                     ? 1 : 0);
26290   if (mi_delta < 0)
26291     mi_delta = - mi_delta;
26292
26293   final_start_function (emit_barrier (), file, 1);
26294
26295   if (TARGET_THUMB1)
26296     {
26297       int labelno = thunk_label++;
26298       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26299       /* Thunks are entered in arm mode when available.  */
26300       if (TARGET_THUMB1_ONLY)
26301         {
26302           /* push r3 so we can use it as a temporary.  */
26303           /* TODO: Omit this save if r3 is not used.  */
26304           fputs ("\tpush {r3}\n", file);
26305           fputs ("\tldr\tr3, ", file);
26306         }
26307       else
26308         {
26309           fputs ("\tldr\tr12, ", file);
26310         }
26311       assemble_name (file, label);
26312       fputc ('\n', file);
26313       if (flag_pic)
26314         {
26315           /* If we are generating PIC, the ldr instruction below loads
26316              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26317              the address of the add + 8, so we have:
26318
26319              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26320                  = target + 1.
26321
26322              Note that we have "+ 1" because some versions of GNU ld
26323              don't set the low bit of the result for R_ARM_REL32
26324              relocations against thumb function symbols.
26325              On ARMv6M this is +4, not +8.  */
26326           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26327           assemble_name (file, labelpc);
26328           fputs (":\n", file);
26329           if (TARGET_THUMB1_ONLY)
26330             {
26331               /* This is 2 insns after the start of the thunk, so we know it
26332                  is 4-byte aligned.  */
26333               fputs ("\tadd\tr3, pc, r3\n", file);
26334               fputs ("\tmov r12, r3\n", file);
26335             }
26336           else
26337             fputs ("\tadd\tr12, pc, r12\n", file);
26338         }
26339       else if (TARGET_THUMB1_ONLY)
26340         fputs ("\tmov r12, r3\n", file);
26341     }
26342   if (TARGET_THUMB1_ONLY)
26343     {
26344       if (mi_delta > 255)
26345         {
26346           fputs ("\tldr\tr3, ", file);
26347           assemble_name (file, label);
26348           fputs ("+4\n", file);
26349           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26350                        mi_op, this_regno, this_regno);
26351         }
26352       else if (mi_delta != 0)
26353         {
26354           /* Thumb1 unified syntax requires s suffix in instruction name when
26355              one of the operands is immediate.  */
26356           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26357                        mi_op, this_regno, this_regno,
26358                        mi_delta);
26359         }
26360     }
26361   else
26362     {
26363       /* TODO: Use movw/movt for large constants when available.  */
26364       while (mi_delta != 0)
26365         {
26366           if ((mi_delta & (3 << shift)) == 0)
26367             shift += 2;
26368           else
26369             {
26370               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26371                            mi_op, this_regno, this_regno,
26372                            mi_delta & (0xff << shift));
26373               mi_delta &= ~(0xff << shift);
26374               shift += 8;
26375             }
26376         }
26377     }
26378   if (TARGET_THUMB1)
26379     {
26380       if (TARGET_THUMB1_ONLY)
26381         fputs ("\tpop\t{r3}\n", file);
26382
26383       fprintf (file, "\tbx\tr12\n");
26384       ASM_OUTPUT_ALIGN (file, 2);
26385       assemble_name (file, label);
26386       fputs (":\n", file);
26387       if (flag_pic)
26388         {
26389           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26390           rtx tem = XEXP (DECL_RTL (function), 0);
26391           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26392              pipeline offset is four rather than eight.  Adjust the offset
26393              accordingly.  */
26394           tem = plus_constant (GET_MODE (tem), tem,
26395                                TARGET_THUMB1_ONLY ? -3 : -7);
26396           tem = gen_rtx_MINUS (GET_MODE (tem),
26397                                tem,
26398                                gen_rtx_SYMBOL_REF (Pmode,
26399                                                    ggc_strdup (labelpc)));
26400           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26401         }
26402       else
26403         /* Output ".word .LTHUNKn".  */
26404         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26405
26406       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26407         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26408     }
26409   else
26410     {
26411       fputs ("\tb\t", file);
26412       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26413       if (NEED_PLT_RELOC)
26414         fputs ("(PLT)", file);
26415       fputc ('\n', file);
26416     }
26417
26418   final_end_function ();
26419 }
26420
26421 /* MI thunk handling for TARGET_32BIT.  */
26422
26423 static void
26424 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26425                        HOST_WIDE_INT vcall_offset, tree function)
26426 {
26427   /* On ARM, this_regno is R0 or R1 depending on
26428      whether the function returns an aggregate or not.
26429   */
26430   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26431                                        function)
26432                     ? R1_REGNUM : R0_REGNUM);
26433
26434   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26435   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26436   reload_completed = 1;
26437   emit_note (NOTE_INSN_PROLOGUE_END);
26438
26439   /* Add DELTA to THIS_RTX.  */
26440   if (delta != 0)
26441     arm_split_constant (PLUS, Pmode, NULL_RTX,
26442                         delta, this_rtx, this_rtx, false);
26443
26444   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26445   if (vcall_offset != 0)
26446     {
26447       /* Load *THIS_RTX.  */
26448       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26449       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26450       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26451                           false);
26452       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26453       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26454       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26455     }
26456
26457   /* Generate a tail call to the target function.  */
26458   if (!TREE_USED (function))
26459     {
26460       assemble_external (function);
26461       TREE_USED (function) = 1;
26462     }
26463   rtx funexp = XEXP (DECL_RTL (function), 0);
26464   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26465   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26466   SIBLING_CALL_P (insn) = 1;
26467
26468   insn = get_insns ();
26469   shorten_branches (insn);
26470   final_start_function (insn, file, 1);
26471   final (insn, file, 1);
26472   final_end_function ();
26473
26474   /* Stop pretending this is a post-reload pass.  */
26475   reload_completed = 0;
26476 }
26477
26478 /* Output code to add DELTA to the first argument, and then jump
26479    to FUNCTION.  Used for C++ multiple inheritance.  */
26480
26481 static void
26482 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26483                      HOST_WIDE_INT vcall_offset, tree function)
26484 {
26485   if (TARGET_32BIT)
26486     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26487   else
26488     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26489 }
26490
26491 int
26492 arm_emit_vector_const (FILE *file, rtx x)
26493 {
26494   int i;
26495   const char * pattern;
26496
26497   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26498
26499   switch (GET_MODE (x))
26500     {
26501     case V2SImode: pattern = "%08x"; break;
26502     case V4HImode: pattern = "%04x"; break;
26503     case V8QImode: pattern = "%02x"; break;
26504     default:       gcc_unreachable ();
26505     }
26506
26507   fprintf (file, "0x");
26508   for (i = CONST_VECTOR_NUNITS (x); i--;)
26509     {
26510       rtx element;
26511
26512       element = CONST_VECTOR_ELT (x, i);
26513       fprintf (file, pattern, INTVAL (element));
26514     }
26515
26516   return 1;
26517 }
26518
26519 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26520    HFmode constant pool entries are actually loaded with ldr.  */
26521 void
26522 arm_emit_fp16_const (rtx c)
26523 {
26524   long bits;
26525
26526   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26527   if (WORDS_BIG_ENDIAN)
26528     assemble_zeros (2);
26529   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26530   if (!WORDS_BIG_ENDIAN)
26531     assemble_zeros (2);
26532 }
26533
26534 const char *
26535 arm_output_load_gr (rtx *operands)
26536 {
26537   rtx reg;
26538   rtx offset;
26539   rtx wcgr;
26540   rtx sum;
26541
26542   if (!MEM_P (operands [1])
26543       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26544       || !REG_P (reg = XEXP (sum, 0))
26545       || !CONST_INT_P (offset = XEXP (sum, 1))
26546       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26547     return "wldrw%?\t%0, %1";
26548
26549   /* Fix up an out-of-range load of a GR register.  */
26550   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26551   wcgr = operands[0];
26552   operands[0] = reg;
26553   output_asm_insn ("ldr%?\t%0, %1", operands);
26554
26555   operands[0] = wcgr;
26556   operands[1] = reg;
26557   output_asm_insn ("tmcr%?\t%0, %1", operands);
26558   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26559
26560   return "";
26561 }
26562
26563 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26564
26565    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26566    named arg and all anonymous args onto the stack.
26567    XXX I know the prologue shouldn't be pushing registers, but it is faster
26568    that way.  */
26569
26570 static void
26571 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26572                             machine_mode mode,
26573                             tree type,
26574                             int *pretend_size,
26575                             int second_time ATTRIBUTE_UNUSED)
26576 {
26577   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26578   int nregs;
26579
26580   cfun->machine->uses_anonymous_args = 1;
26581   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26582     {
26583       nregs = pcum->aapcs_ncrn;
26584       if (nregs & 1)
26585         {
26586           int res = arm_needs_doubleword_align (mode, type);
26587           if (res < 0 && warn_psabi)
26588             inform (input_location, "parameter passing for argument of "
26589                     "type %qT changed in GCC 7.1", type);
26590           else if (res > 0)
26591             nregs++;
26592         }
26593     }
26594   else
26595     nregs = pcum->nregs;
26596
26597   if (nregs < NUM_ARG_REGS)
26598     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26599 }
26600
26601 /* We can't rely on the caller doing the proper promotion when
26602    using APCS or ATPCS.  */
26603
26604 static bool
26605 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26606 {
26607     return !TARGET_AAPCS_BASED;
26608 }
26609
26610 static machine_mode
26611 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26612                            machine_mode mode,
26613                            int *punsignedp ATTRIBUTE_UNUSED,
26614                            const_tree fntype ATTRIBUTE_UNUSED,
26615                            int for_return ATTRIBUTE_UNUSED)
26616 {
26617   if (GET_MODE_CLASS (mode) == MODE_INT
26618       && GET_MODE_SIZE (mode) < 4)
26619     return SImode;
26620
26621   return mode;
26622 }
26623
26624
26625 static bool
26626 arm_default_short_enums (void)
26627 {
26628   return ARM_DEFAULT_SHORT_ENUMS;
26629 }
26630
26631
26632 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26633
26634 static bool
26635 arm_align_anon_bitfield (void)
26636 {
26637   return TARGET_AAPCS_BASED;
26638 }
26639
26640
26641 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26642
26643 static tree
26644 arm_cxx_guard_type (void)
26645 {
26646   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26647 }
26648
26649
26650 /* The EABI says test the least significant bit of a guard variable.  */
26651
26652 static bool
26653 arm_cxx_guard_mask_bit (void)
26654 {
26655   return TARGET_AAPCS_BASED;
26656 }
26657
26658
26659 /* The EABI specifies that all array cookies are 8 bytes long.  */
26660
26661 static tree
26662 arm_get_cookie_size (tree type)
26663 {
26664   tree size;
26665
26666   if (!TARGET_AAPCS_BASED)
26667     return default_cxx_get_cookie_size (type);
26668
26669   size = build_int_cst (sizetype, 8);
26670   return size;
26671 }
26672
26673
26674 /* The EABI says that array cookies should also contain the element size.  */
26675
26676 static bool
26677 arm_cookie_has_size (void)
26678 {
26679   return TARGET_AAPCS_BASED;
26680 }
26681
26682
26683 /* The EABI says constructors and destructors should return a pointer to
26684    the object constructed/destroyed.  */
26685
26686 static bool
26687 arm_cxx_cdtor_returns_this (void)
26688 {
26689   return TARGET_AAPCS_BASED;
26690 }
26691
26692 /* The EABI says that an inline function may never be the key
26693    method.  */
26694
26695 static bool
26696 arm_cxx_key_method_may_be_inline (void)
26697 {
26698   return !TARGET_AAPCS_BASED;
26699 }
26700
26701 static void
26702 arm_cxx_determine_class_data_visibility (tree decl)
26703 {
26704   if (!TARGET_AAPCS_BASED
26705       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26706     return;
26707
26708   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26709      is exported.  However, on systems without dynamic vague linkage,
26710      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26711   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26712     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26713   else
26714     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26715   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26716 }
26717
26718 static bool
26719 arm_cxx_class_data_always_comdat (void)
26720 {
26721   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26722      vague linkage if the class has no key function.  */
26723   return !TARGET_AAPCS_BASED;
26724 }
26725
26726
26727 /* The EABI says __aeabi_atexit should be used to register static
26728    destructors.  */
26729
26730 static bool
26731 arm_cxx_use_aeabi_atexit (void)
26732 {
26733   return TARGET_AAPCS_BASED;
26734 }
26735
26736
26737 void
26738 arm_set_return_address (rtx source, rtx scratch)
26739 {
26740   arm_stack_offsets *offsets;
26741   HOST_WIDE_INT delta;
26742   rtx addr;
26743   unsigned long saved_regs;
26744
26745   offsets = arm_get_frame_offsets ();
26746   saved_regs = offsets->saved_regs_mask;
26747
26748   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26749     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26750   else
26751     {
26752       if (frame_pointer_needed)
26753         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26754       else
26755         {
26756           /* LR will be the first saved register.  */
26757           delta = offsets->outgoing_args - (offsets->frame + 4);
26758
26759
26760           if (delta >= 4096)
26761             {
26762               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26763                                      GEN_INT (delta & ~4095)));
26764               addr = scratch;
26765               delta &= 4095;
26766             }
26767           else
26768             addr = stack_pointer_rtx;
26769
26770           addr = plus_constant (Pmode, addr, delta);
26771         }
26772       /* The store needs to be marked as frame related in order to prevent
26773          DSE from deleting it as dead if it is based on fp.  */
26774       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26775       RTX_FRAME_RELATED_P (insn) = 1;
26776       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26777     }
26778 }
26779
26780
26781 void
26782 thumb_set_return_address (rtx source, rtx scratch)
26783 {
26784   arm_stack_offsets *offsets;
26785   HOST_WIDE_INT delta;
26786   HOST_WIDE_INT limit;
26787   int reg;
26788   rtx addr;
26789   unsigned long mask;
26790
26791   emit_use (source);
26792
26793   offsets = arm_get_frame_offsets ();
26794   mask = offsets->saved_regs_mask;
26795   if (mask & (1 << LR_REGNUM))
26796     {
26797       limit = 1024;
26798       /* Find the saved regs.  */
26799       if (frame_pointer_needed)
26800         {
26801           delta = offsets->soft_frame - offsets->saved_args;
26802           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26803           if (TARGET_THUMB1)
26804             limit = 128;
26805         }
26806       else
26807         {
26808           delta = offsets->outgoing_args - offsets->saved_args;
26809           reg = SP_REGNUM;
26810         }
26811       /* Allow for the stack frame.  */
26812       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26813         delta -= 16;
26814       /* The link register is always the first saved register.  */
26815       delta -= 4;
26816
26817       /* Construct the address.  */
26818       addr = gen_rtx_REG (SImode, reg);
26819       if (delta > limit)
26820         {
26821           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26822           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26823           addr = scratch;
26824         }
26825       else
26826         addr = plus_constant (Pmode, addr, delta);
26827
26828       /* The store needs to be marked as frame related in order to prevent
26829          DSE from deleting it as dead if it is based on fp.  */
26830       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26831       RTX_FRAME_RELATED_P (insn) = 1;
26832       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26833     }
26834   else
26835     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26836 }
26837
26838 /* Implements target hook vector_mode_supported_p.  */
26839 bool
26840 arm_vector_mode_supported_p (machine_mode mode)
26841 {
26842   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26843   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26844       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26845       || mode == V2DImode || mode == V8HFmode))
26846     return true;
26847
26848   if ((TARGET_NEON || TARGET_IWMMXT)
26849       && ((mode == V2SImode)
26850           || (mode == V4HImode)
26851           || (mode == V8QImode)))
26852     return true;
26853
26854   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26855       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26856       || mode == V2HAmode))
26857     return true;
26858
26859   return false;
26860 }
26861
26862 /* Implements target hook array_mode_supported_p.  */
26863
26864 static bool
26865 arm_array_mode_supported_p (machine_mode mode,
26866                             unsigned HOST_WIDE_INT nelems)
26867 {
26868   if (TARGET_NEON
26869       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26870       && (nelems >= 2 && nelems <= 4))
26871     return true;
26872
26873   return false;
26874 }
26875
26876 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26877    registers when autovectorizing for Neon, at least until multiple vector
26878    widths are supported properly by the middle-end.  */
26879
26880 static machine_mode
26881 arm_preferred_simd_mode (machine_mode mode)
26882 {
26883   if (TARGET_NEON)
26884     switch (mode)
26885       {
26886       case SFmode:
26887         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26888       case SImode:
26889         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26890       case HImode:
26891         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26892       case QImode:
26893         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26894       case DImode:
26895         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26896           return V2DImode;
26897         break;
26898
26899       default:;
26900       }
26901
26902   if (TARGET_REALLY_IWMMXT)
26903     switch (mode)
26904       {
26905       case SImode:
26906         return V2SImode;
26907       case HImode:
26908         return V4HImode;
26909       case QImode:
26910         return V8QImode;
26911
26912       default:;
26913       }
26914
26915   return word_mode;
26916 }
26917
26918 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26919
26920    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26921    using r0-r4 for function arguments, r7 for the stack frame and don't have
26922    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26923    potentially problematic instructions accept high registers so this is not
26924    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26925    that require many low registers.  */
26926 static bool
26927 arm_class_likely_spilled_p (reg_class_t rclass)
26928 {
26929   if ((TARGET_THUMB1 && rclass == LO_REGS)
26930       || rclass  == CC_REG)
26931     return true;
26932
26933   return false;
26934 }
26935
26936 /* Implements target hook small_register_classes_for_mode_p.  */
26937 bool
26938 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26939 {
26940   return TARGET_THUMB1;
26941 }
26942
26943 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
26944    ARM insns and therefore guarantee that the shift count is modulo 256.
26945    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26946    guarantee no particular behavior for out-of-range counts.  */
26947
26948 static unsigned HOST_WIDE_INT
26949 arm_shift_truncation_mask (machine_mode mode)
26950 {
26951   return mode == SImode ? 255 : 0;
26952 }
26953
26954
26955 /* Map internal gcc register numbers to DWARF2 register numbers.  */
26956
26957 unsigned int
26958 arm_dbx_register_number (unsigned int regno)
26959 {
26960   if (regno < 16)
26961     return regno;
26962
26963   if (IS_VFP_REGNUM (regno))
26964     {
26965       /* See comment in arm_dwarf_register_span.  */
26966       if (VFP_REGNO_OK_FOR_SINGLE (regno))
26967         return 64 + regno - FIRST_VFP_REGNUM;
26968       else
26969         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26970     }
26971
26972   if (IS_IWMMXT_GR_REGNUM (regno))
26973     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26974
26975   if (IS_IWMMXT_REGNUM (regno))
26976     return 112 + regno - FIRST_IWMMXT_REGNUM;
26977
26978   return DWARF_FRAME_REGISTERS;
26979 }
26980
26981 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26982    GCC models tham as 64 32-bit registers, so we need to describe this to
26983    the DWARF generation code.  Other registers can use the default.  */
26984 static rtx
26985 arm_dwarf_register_span (rtx rtl)
26986 {
26987   machine_mode mode;
26988   unsigned regno;
26989   rtx parts[16];
26990   int nregs;
26991   int i;
26992
26993   regno = REGNO (rtl);
26994   if (!IS_VFP_REGNUM (regno))
26995     return NULL_RTX;
26996
26997   /* XXX FIXME: The EABI defines two VFP register ranges:
26998         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26999         256-287: D0-D31
27000      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27001      corresponding D register.  Until GDB supports this, we shall use the
27002      legacy encodings.  We also use these encodings for D0-D15 for
27003      compatibility with older debuggers.  */
27004   mode = GET_MODE (rtl);
27005   if (GET_MODE_SIZE (mode) < 8)
27006     return NULL_RTX;
27007
27008   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27009     {
27010       nregs = GET_MODE_SIZE (mode) / 4;
27011       for (i = 0; i < nregs; i += 2)
27012         if (TARGET_BIG_END)
27013           {
27014             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27015             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27016           }
27017         else
27018           {
27019             parts[i] = gen_rtx_REG (SImode, regno + i);
27020             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27021           }
27022     }
27023   else
27024     {
27025       nregs = GET_MODE_SIZE (mode) / 8;
27026       for (i = 0; i < nregs; i++)
27027         parts[i] = gen_rtx_REG (DImode, regno + i);
27028     }
27029
27030   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27031 }
27032
27033 #if ARM_UNWIND_INFO
27034 /* Emit unwind directives for a store-multiple instruction or stack pointer
27035    push during alignment.
27036    These should only ever be generated by the function prologue code, so
27037    expect them to have a particular form.
27038    The store-multiple instruction sometimes pushes pc as the last register,
27039    although it should not be tracked into unwind information, or for -Os
27040    sometimes pushes some dummy registers before first register that needs
27041    to be tracked in unwind information; such dummy registers are there just
27042    to avoid separate stack adjustment, and will not be restored in the
27043    epilogue.  */
27044
27045 static void
27046 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27047 {
27048   int i;
27049   HOST_WIDE_INT offset;
27050   HOST_WIDE_INT nregs;
27051   int reg_size;
27052   unsigned reg;
27053   unsigned lastreg;
27054   unsigned padfirst = 0, padlast = 0;
27055   rtx e;
27056
27057   e = XVECEXP (p, 0, 0);
27058   gcc_assert (GET_CODE (e) == SET);
27059
27060   /* First insn will adjust the stack pointer.  */
27061   gcc_assert (GET_CODE (e) == SET
27062               && REG_P (SET_DEST (e))
27063               && REGNO (SET_DEST (e)) == SP_REGNUM
27064               && GET_CODE (SET_SRC (e)) == PLUS);
27065
27066   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27067   nregs = XVECLEN (p, 0) - 1;
27068   gcc_assert (nregs);
27069
27070   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27071   if (reg < 16)
27072     {
27073       /* For -Os dummy registers can be pushed at the beginning to
27074          avoid separate stack pointer adjustment.  */
27075       e = XVECEXP (p, 0, 1);
27076       e = XEXP (SET_DEST (e), 0);
27077       if (GET_CODE (e) == PLUS)
27078         padfirst = INTVAL (XEXP (e, 1));
27079       gcc_assert (padfirst == 0 || optimize_size);
27080       /* The function prologue may also push pc, but not annotate it as it is
27081          never restored.  We turn this into a stack pointer adjustment.  */
27082       e = XVECEXP (p, 0, nregs);
27083       e = XEXP (SET_DEST (e), 0);
27084       if (GET_CODE (e) == PLUS)
27085         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27086       else
27087         padlast = offset - 4;
27088       gcc_assert (padlast == 0 || padlast == 4);
27089       if (padlast == 4)
27090         fprintf (asm_out_file, "\t.pad #4\n");
27091       reg_size = 4;
27092       fprintf (asm_out_file, "\t.save {");
27093     }
27094   else if (IS_VFP_REGNUM (reg))
27095     {
27096       reg_size = 8;
27097       fprintf (asm_out_file, "\t.vsave {");
27098     }
27099   else
27100     /* Unknown register type.  */
27101     gcc_unreachable ();
27102
27103   /* If the stack increment doesn't match the size of the saved registers,
27104      something has gone horribly wrong.  */
27105   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27106
27107   offset = padfirst;
27108   lastreg = 0;
27109   /* The remaining insns will describe the stores.  */
27110   for (i = 1; i <= nregs; i++)
27111     {
27112       /* Expect (set (mem <addr>) (reg)).
27113          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27114       e = XVECEXP (p, 0, i);
27115       gcc_assert (GET_CODE (e) == SET
27116                   && MEM_P (SET_DEST (e))
27117                   && REG_P (SET_SRC (e)));
27118
27119       reg = REGNO (SET_SRC (e));
27120       gcc_assert (reg >= lastreg);
27121
27122       if (i != 1)
27123         fprintf (asm_out_file, ", ");
27124       /* We can't use %r for vfp because we need to use the
27125          double precision register names.  */
27126       if (IS_VFP_REGNUM (reg))
27127         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27128       else
27129         asm_fprintf (asm_out_file, "%r", reg);
27130
27131       if (flag_checking)
27132         {
27133           /* Check that the addresses are consecutive.  */
27134           e = XEXP (SET_DEST (e), 0);
27135           if (GET_CODE (e) == PLUS)
27136             gcc_assert (REG_P (XEXP (e, 0))
27137                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27138                         && CONST_INT_P (XEXP (e, 1))
27139                         && offset == INTVAL (XEXP (e, 1)));
27140           else
27141             gcc_assert (i == 1
27142                         && REG_P (e)
27143                         && REGNO (e) == SP_REGNUM);
27144           offset += reg_size;
27145         }
27146     }
27147   fprintf (asm_out_file, "}\n");
27148   if (padfirst)
27149     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27150 }
27151
27152 /*  Emit unwind directives for a SET.  */
27153
27154 static void
27155 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27156 {
27157   rtx e0;
27158   rtx e1;
27159   unsigned reg;
27160
27161   e0 = XEXP (p, 0);
27162   e1 = XEXP (p, 1);
27163   switch (GET_CODE (e0))
27164     {
27165     case MEM:
27166       /* Pushing a single register.  */
27167       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27168           || !REG_P (XEXP (XEXP (e0, 0), 0))
27169           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27170         abort ();
27171
27172       asm_fprintf (asm_out_file, "\t.save ");
27173       if (IS_VFP_REGNUM (REGNO (e1)))
27174         asm_fprintf(asm_out_file, "{d%d}\n",
27175                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27176       else
27177         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27178       break;
27179
27180     case REG:
27181       if (REGNO (e0) == SP_REGNUM)
27182         {
27183           /* A stack increment.  */
27184           if (GET_CODE (e1) != PLUS
27185               || !REG_P (XEXP (e1, 0))
27186               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27187               || !CONST_INT_P (XEXP (e1, 1)))
27188             abort ();
27189
27190           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27191                        -INTVAL (XEXP (e1, 1)));
27192         }
27193       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27194         {
27195           HOST_WIDE_INT offset;
27196
27197           if (GET_CODE (e1) == PLUS)
27198             {
27199               if (!REG_P (XEXP (e1, 0))
27200                   || !CONST_INT_P (XEXP (e1, 1)))
27201                 abort ();
27202               reg = REGNO (XEXP (e1, 0));
27203               offset = INTVAL (XEXP (e1, 1));
27204               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27205                            HARD_FRAME_POINTER_REGNUM, reg,
27206                            offset);
27207             }
27208           else if (REG_P (e1))
27209             {
27210               reg = REGNO (e1);
27211               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27212                            HARD_FRAME_POINTER_REGNUM, reg);
27213             }
27214           else
27215             abort ();
27216         }
27217       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27218         {
27219           /* Move from sp to reg.  */
27220           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27221         }
27222      else if (GET_CODE (e1) == PLUS
27223               && REG_P (XEXP (e1, 0))
27224               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27225               && CONST_INT_P (XEXP (e1, 1)))
27226         {
27227           /* Set reg to offset from sp.  */
27228           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27229                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27230         }
27231       else
27232         abort ();
27233       break;
27234
27235     default:
27236       abort ();
27237     }
27238 }
27239
27240
27241 /* Emit unwind directives for the given insn.  */
27242
27243 static void
27244 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27245 {
27246   rtx note, pat;
27247   bool handled_one = false;
27248
27249   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27250     return;
27251
27252   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27253       && (TREE_NOTHROW (current_function_decl)
27254           || crtl->all_throwers_are_sibcalls))
27255     return;
27256
27257   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27258     return;
27259
27260   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27261     {
27262       switch (REG_NOTE_KIND (note))
27263         {
27264         case REG_FRAME_RELATED_EXPR:
27265           pat = XEXP (note, 0);
27266           goto found;
27267
27268         case REG_CFA_REGISTER:
27269           pat = XEXP (note, 0);
27270           if (pat == NULL)
27271             {
27272               pat = PATTERN (insn);
27273               if (GET_CODE (pat) == PARALLEL)
27274                 pat = XVECEXP (pat, 0, 0);
27275             }
27276
27277           /* Only emitted for IS_STACKALIGN re-alignment.  */
27278           {
27279             rtx dest, src;
27280             unsigned reg;
27281
27282             src = SET_SRC (pat);
27283             dest = SET_DEST (pat);
27284
27285             gcc_assert (src == stack_pointer_rtx);
27286             reg = REGNO (dest);
27287             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27288                          reg + 0x90, reg);
27289           }
27290           handled_one = true;
27291           break;
27292
27293         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27294            to get correct dwarf information for shrink-wrap.  We should not
27295            emit unwind information for it because these are used either for
27296            pretend arguments or notes to adjust sp and restore registers from
27297            stack.  */
27298         case REG_CFA_DEF_CFA:
27299         case REG_CFA_ADJUST_CFA:
27300         case REG_CFA_RESTORE:
27301           return;
27302
27303         case REG_CFA_EXPRESSION:
27304         case REG_CFA_OFFSET:
27305           /* ??? Only handling here what we actually emit.  */
27306           gcc_unreachable ();
27307
27308         default:
27309           break;
27310         }
27311     }
27312   if (handled_one)
27313     return;
27314   pat = PATTERN (insn);
27315  found:
27316
27317   switch (GET_CODE (pat))
27318     {
27319     case SET:
27320       arm_unwind_emit_set (asm_out_file, pat);
27321       break;
27322
27323     case SEQUENCE:
27324       /* Store multiple.  */
27325       arm_unwind_emit_sequence (asm_out_file, pat);
27326       break;
27327
27328     default:
27329       abort();
27330     }
27331 }
27332
27333
27334 /* Output a reference from a function exception table to the type_info
27335    object X.  The EABI specifies that the symbol should be relocated by
27336    an R_ARM_TARGET2 relocation.  */
27337
27338 static bool
27339 arm_output_ttype (rtx x)
27340 {
27341   fputs ("\t.word\t", asm_out_file);
27342   output_addr_const (asm_out_file, x);
27343   /* Use special relocations for symbol references.  */
27344   if (!CONST_INT_P (x))
27345     fputs ("(TARGET2)", asm_out_file);
27346   fputc ('\n', asm_out_file);
27347
27348   return TRUE;
27349 }
27350
27351 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27352
27353 static void
27354 arm_asm_emit_except_personality (rtx personality)
27355 {
27356   fputs ("\t.personality\t", asm_out_file);
27357   output_addr_const (asm_out_file, personality);
27358   fputc ('\n', asm_out_file);
27359 }
27360 #endif /* ARM_UNWIND_INFO */
27361
27362 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27363
27364 static void
27365 arm_asm_init_sections (void)
27366 {
27367 #if ARM_UNWIND_INFO
27368   exception_section = get_unnamed_section (0, output_section_asm_op,
27369                                            "\t.handlerdata");
27370 #endif /* ARM_UNWIND_INFO */
27371
27372 #ifdef OBJECT_FORMAT_ELF
27373   if (target_pure_code)
27374     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27375 #endif
27376 }
27377
27378 /* Output unwind directives for the start/end of a function.  */
27379
27380 void
27381 arm_output_fn_unwind (FILE * f, bool prologue)
27382 {
27383   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27384     return;
27385
27386   if (prologue)
27387     fputs ("\t.fnstart\n", f);
27388   else
27389     {
27390       /* If this function will never be unwound, then mark it as such.
27391          The came condition is used in arm_unwind_emit to suppress
27392          the frame annotations.  */
27393       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27394           && (TREE_NOTHROW (current_function_decl)
27395               || crtl->all_throwers_are_sibcalls))
27396         fputs("\t.cantunwind\n", f);
27397
27398       fputs ("\t.fnend\n", f);
27399     }
27400 }
27401
27402 static bool
27403 arm_emit_tls_decoration (FILE *fp, rtx x)
27404 {
27405   enum tls_reloc reloc;
27406   rtx val;
27407
27408   val = XVECEXP (x, 0, 0);
27409   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27410
27411   output_addr_const (fp, val);
27412
27413   switch (reloc)
27414     {
27415     case TLS_GD32:
27416       fputs ("(tlsgd)", fp);
27417       break;
27418     case TLS_LDM32:
27419       fputs ("(tlsldm)", fp);
27420       break;
27421     case TLS_LDO32:
27422       fputs ("(tlsldo)", fp);
27423       break;
27424     case TLS_IE32:
27425       fputs ("(gottpoff)", fp);
27426       break;
27427     case TLS_LE32:
27428       fputs ("(tpoff)", fp);
27429       break;
27430     case TLS_DESCSEQ:
27431       fputs ("(tlsdesc)", fp);
27432       break;
27433     default:
27434       gcc_unreachable ();
27435     }
27436
27437   switch (reloc)
27438     {
27439     case TLS_GD32:
27440     case TLS_LDM32:
27441     case TLS_IE32:
27442     case TLS_DESCSEQ:
27443       fputs (" + (. - ", fp);
27444       output_addr_const (fp, XVECEXP (x, 0, 2));
27445       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27446       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27447       output_addr_const (fp, XVECEXP (x, 0, 3));
27448       fputc (')', fp);
27449       break;
27450     default:
27451       break;
27452     }
27453
27454   return TRUE;
27455 }
27456
27457 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27458
27459 static void
27460 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27461 {
27462   gcc_assert (size == 4);
27463   fputs ("\t.word\t", file);
27464   output_addr_const (file, x);
27465   fputs ("(tlsldo)", file);
27466 }
27467
27468 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27469
27470 static bool
27471 arm_output_addr_const_extra (FILE *fp, rtx x)
27472 {
27473   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27474     return arm_emit_tls_decoration (fp, x);
27475   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27476     {
27477       char label[256];
27478       int labelno = INTVAL (XVECEXP (x, 0, 0));
27479
27480       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27481       assemble_name_raw (fp, label);
27482
27483       return TRUE;
27484     }
27485   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27486     {
27487       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27488       if (GOT_PCREL)
27489         fputs ("+.", fp);
27490       fputs ("-(", fp);
27491       output_addr_const (fp, XVECEXP (x, 0, 0));
27492       fputc (')', fp);
27493       return TRUE;
27494     }
27495   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27496     {
27497       output_addr_const (fp, XVECEXP (x, 0, 0));
27498       if (GOT_PCREL)
27499         fputs ("+.", fp);
27500       fputs ("-(", fp);
27501       output_addr_const (fp, XVECEXP (x, 0, 1));
27502       fputc (')', fp);
27503       return TRUE;
27504     }
27505   else if (GET_CODE (x) == CONST_VECTOR)
27506     return arm_emit_vector_const (fp, x);
27507
27508   return FALSE;
27509 }
27510
27511 /* Output assembly for a shift instruction.
27512    SET_FLAGS determines how the instruction modifies the condition codes.
27513    0 - Do not set condition codes.
27514    1 - Set condition codes.
27515    2 - Use smallest instruction.  */
27516 const char *
27517 arm_output_shift(rtx * operands, int set_flags)
27518 {
27519   char pattern[100];
27520   static const char flag_chars[3] = {'?', '.', '!'};
27521   const char *shift;
27522   HOST_WIDE_INT val;
27523   char c;
27524
27525   c = flag_chars[set_flags];
27526   shift = shift_op(operands[3], &val);
27527   if (shift)
27528     {
27529       if (val != -1)
27530         operands[2] = GEN_INT(val);
27531       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27532     }
27533   else
27534     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27535
27536   output_asm_insn (pattern, operands);
27537   return "";
27538 }
27539
27540 /* Output assembly for a WMMX immediate shift instruction.  */
27541 const char *
27542 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27543 {
27544   int shift = INTVAL (operands[2]);
27545   char templ[50];
27546   machine_mode opmode = GET_MODE (operands[0]);
27547
27548   gcc_assert (shift >= 0);
27549
27550   /* If the shift value in the register versions is > 63 (for D qualifier),
27551      31 (for W qualifier) or 15 (for H qualifier).  */
27552   if (((opmode == V4HImode) && (shift > 15))
27553         || ((opmode == V2SImode) && (shift > 31))
27554         || ((opmode == DImode) && (shift > 63)))
27555   {
27556     if (wror_or_wsra)
27557       {
27558         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27559         output_asm_insn (templ, operands);
27560         if (opmode == DImode)
27561           {
27562             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27563             output_asm_insn (templ, operands);
27564           }
27565       }
27566     else
27567       {
27568         /* The destination register will contain all zeros.  */
27569         sprintf (templ, "wzero\t%%0");
27570         output_asm_insn (templ, operands);
27571       }
27572     return "";
27573   }
27574
27575   if ((opmode == DImode) && (shift > 32))
27576     {
27577       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27578       output_asm_insn (templ, operands);
27579       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27580       output_asm_insn (templ, operands);
27581     }
27582   else
27583     {
27584       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27585       output_asm_insn (templ, operands);
27586     }
27587   return "";
27588 }
27589
27590 /* Output assembly for a WMMX tinsr instruction.  */
27591 const char *
27592 arm_output_iwmmxt_tinsr (rtx *operands)
27593 {
27594   int mask = INTVAL (operands[3]);
27595   int i;
27596   char templ[50];
27597   int units = mode_nunits[GET_MODE (operands[0])];
27598   gcc_assert ((mask & (mask - 1)) == 0);
27599   for (i = 0; i < units; ++i)
27600     {
27601       if ((mask & 0x01) == 1)
27602         {
27603           break;
27604         }
27605       mask >>= 1;
27606     }
27607   gcc_assert (i < units);
27608   {
27609     switch (GET_MODE (operands[0]))
27610       {
27611       case V8QImode:
27612         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27613         break;
27614       case V4HImode:
27615         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27616         break;
27617       case V2SImode:
27618         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27619         break;
27620       default:
27621         gcc_unreachable ();
27622         break;
27623       }
27624     output_asm_insn (templ, operands);
27625   }
27626   return "";
27627 }
27628
27629 /* Output a Thumb-1 casesi dispatch sequence.  */
27630 const char *
27631 thumb1_output_casesi (rtx *operands)
27632 {
27633   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27634
27635   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27636
27637   switch (GET_MODE(diff_vec))
27638     {
27639     case QImode:
27640       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27641               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27642     case HImode:
27643       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27644               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27645     case SImode:
27646       return "bl\t%___gnu_thumb1_case_si";
27647     default:
27648       gcc_unreachable ();
27649     }
27650 }
27651
27652 /* Output a Thumb-2 casesi instruction.  */
27653 const char *
27654 thumb2_output_casesi (rtx *operands)
27655 {
27656   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27657
27658   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27659
27660   output_asm_insn ("cmp\t%0, %1", operands);
27661   output_asm_insn ("bhi\t%l3", operands);
27662   switch (GET_MODE(diff_vec))
27663     {
27664     case QImode:
27665       return "tbb\t[%|pc, %0]";
27666     case HImode:
27667       return "tbh\t[%|pc, %0, lsl #1]";
27668     case SImode:
27669       if (flag_pic)
27670         {
27671           output_asm_insn ("adr\t%4, %l2", operands);
27672           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27673           output_asm_insn ("add\t%4, %4, %5", operands);
27674           return "bx\t%4";
27675         }
27676       else
27677         {
27678           output_asm_insn ("adr\t%4, %l2", operands);
27679           return "ldr\t%|pc, [%4, %0, lsl #2]";
27680         }
27681     default:
27682       gcc_unreachable ();
27683     }
27684 }
27685
27686 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27687    per-core tuning structs.  */
27688 static int
27689 arm_issue_rate (void)
27690 {
27691   return current_tune->issue_rate;
27692 }
27693
27694 /* Return how many instructions should scheduler lookahead to choose the
27695    best one.  */
27696 static int
27697 arm_first_cycle_multipass_dfa_lookahead (void)
27698 {
27699   int issue_rate = arm_issue_rate ();
27700
27701   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27702 }
27703
27704 /* Enable modeling of L2 auto-prefetcher.  */
27705 static int
27706 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27707 {
27708   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27709 }
27710
27711 const char *
27712 arm_mangle_type (const_tree type)
27713 {
27714   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27715      has to be managled as if it is in the "std" namespace.  */
27716   if (TARGET_AAPCS_BASED
27717       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27718     return "St9__va_list";
27719
27720   /* Half-precision float.  */
27721   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27722     return "Dh";
27723
27724   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27725      builtin type.  */
27726   if (TYPE_NAME (type) != NULL)
27727     return arm_mangle_builtin_type (type);
27728
27729   /* Use the default mangling.  */
27730   return NULL;
27731 }
27732
27733 /* Order of allocation of core registers for Thumb: this allocation is
27734    written over the corresponding initial entries of the array
27735    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27736    first.  Saving and restoring a low register is usually cheaper than
27737    using a call-clobbered high register.  */
27738
27739 static const int thumb_core_reg_alloc_order[] =
27740 {
27741    3,  2,  1,  0,  4,  5,  6,  7,
27742   12, 14,  8,  9, 10, 11
27743 };
27744
27745 /* Adjust register allocation order when compiling for Thumb.  */
27746
27747 void
27748 arm_order_regs_for_local_alloc (void)
27749 {
27750   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27751   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27752   if (TARGET_THUMB)
27753     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27754             sizeof (thumb_core_reg_alloc_order));
27755 }
27756
27757 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27758
27759 bool
27760 arm_frame_pointer_required (void)
27761 {
27762   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27763     return true;
27764
27765   /* If the function receives nonlocal gotos, it needs to save the frame
27766      pointer in the nonlocal_goto_save_area object.  */
27767   if (cfun->has_nonlocal_label)
27768     return true;
27769
27770   /* The frame pointer is required for non-leaf APCS frames.  */
27771   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27772     return true;
27773
27774   /* If we are probing the stack in the prologue, we will have a faulting
27775      instruction prior to the stack adjustment and this requires a frame
27776      pointer if we want to catch the exception using the EABI unwinder.  */
27777   if (!IS_INTERRUPT (arm_current_func_type ())
27778       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27779       && arm_except_unwind_info (&global_options) == UI_TARGET
27780       && cfun->can_throw_non_call_exceptions)
27781     {
27782       HOST_WIDE_INT size = get_frame_size ();
27783
27784       /* That's irrelevant if there is no stack adjustment.  */
27785       if (size <= 0)
27786         return false;
27787
27788       /* That's relevant only if there is a stack probe.  */
27789       if (crtl->is_leaf && !cfun->calls_alloca)
27790         {
27791           /* We don't have the final size of the frame so adjust.  */
27792           size += 32 * UNITS_PER_WORD;
27793           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27794             return true;
27795         }
27796       else
27797         return true;
27798     }
27799
27800   return false;
27801 }
27802
27803 /* Only thumb1 can't support conditional execution, so return true if
27804    the target is not thumb1.  */
27805 static bool
27806 arm_have_conditional_execution (void)
27807 {
27808   return !TARGET_THUMB1;
27809 }
27810
27811 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27812 static HOST_WIDE_INT
27813 arm_vector_alignment (const_tree type)
27814 {
27815   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27816
27817   if (TARGET_AAPCS_BASED)
27818     align = MIN (align, 64);
27819
27820   return align;
27821 }
27822
27823 static unsigned int
27824 arm_autovectorize_vector_sizes (void)
27825 {
27826   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27827 }
27828
27829 static bool
27830 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27831 {
27832   /* Vectors which aren't in packed structures will not be less aligned than
27833      the natural alignment of their element type, so this is safe.  */
27834   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27835     return !is_packed;
27836
27837   return default_builtin_vector_alignment_reachable (type, is_packed);
27838 }
27839
27840 static bool
27841 arm_builtin_support_vector_misalignment (machine_mode mode,
27842                                          const_tree type, int misalignment,
27843                                          bool is_packed)
27844 {
27845   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27846     {
27847       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27848
27849       if (is_packed)
27850         return align == 1;
27851
27852       /* If the misalignment is unknown, we should be able to handle the access
27853          so long as it is not to a member of a packed data structure.  */
27854       if (misalignment == -1)
27855         return true;
27856
27857       /* Return true if the misalignment is a multiple of the natural alignment
27858          of the vector's element type.  This is probably always going to be
27859          true in practice, since we've already established that this isn't a
27860          packed access.  */
27861       return ((misalignment % align) == 0);
27862     }
27863
27864   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27865                                                       is_packed);
27866 }
27867
27868 static void
27869 arm_conditional_register_usage (void)
27870 {
27871   int regno;
27872
27873   if (TARGET_THUMB1 && optimize_size)
27874     {
27875       /* When optimizing for size on Thumb-1, it's better not
27876         to use the HI regs, because of the overhead of
27877         stacking them.  */
27878       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27879         fixed_regs[regno] = call_used_regs[regno] = 1;
27880     }
27881
27882   /* The link register can be clobbered by any branch insn,
27883      but we have no way to track that at present, so mark
27884      it as unavailable.  */
27885   if (TARGET_THUMB1)
27886     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27887
27888   if (TARGET_32BIT && TARGET_HARD_FLOAT)
27889     {
27890       /* VFPv3 registers are disabled when earlier VFP
27891          versions are selected due to the definition of
27892          LAST_VFP_REGNUM.  */
27893       for (regno = FIRST_VFP_REGNUM;
27894            regno <= LAST_VFP_REGNUM; ++ regno)
27895         {
27896           fixed_regs[regno] = 0;
27897           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27898             || regno >= FIRST_VFP_REGNUM + 32;
27899         }
27900     }
27901
27902   if (TARGET_REALLY_IWMMXT)
27903     {
27904       regno = FIRST_IWMMXT_GR_REGNUM;
27905       /* The 2002/10/09 revision of the XScale ABI has wCG0
27906          and wCG1 as call-preserved registers.  The 2002/11/21
27907          revision changed this so that all wCG registers are
27908          scratch registers.  */
27909       for (regno = FIRST_IWMMXT_GR_REGNUM;
27910            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27911         fixed_regs[regno] = 0;
27912       /* The XScale ABI has wR0 - wR9 as scratch registers,
27913          the rest as call-preserved registers.  */
27914       for (regno = FIRST_IWMMXT_REGNUM;
27915            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27916         {
27917           fixed_regs[regno] = 0;
27918           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27919         }
27920     }
27921
27922   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27923     {
27924       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27925       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27926     }
27927   else if (TARGET_APCS_STACK)
27928     {
27929       fixed_regs[10]     = 1;
27930       call_used_regs[10] = 1;
27931     }
27932   /* -mcaller-super-interworking reserves r11 for calls to
27933      _interwork_r11_call_via_rN().  Making the register global
27934      is an easy way of ensuring that it remains valid for all
27935      calls.  */
27936   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27937       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27938     {
27939       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27940       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27941       if (TARGET_CALLER_INTERWORKING)
27942         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27943     }
27944   SUBTARGET_CONDITIONAL_REGISTER_USAGE
27945 }
27946
27947 static reg_class_t
27948 arm_preferred_rename_class (reg_class_t rclass)
27949 {
27950   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27951      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
27952      and code size can be reduced.  */
27953   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27954     return LO_REGS;
27955   else
27956     return NO_REGS;
27957 }
27958
27959 /* Compute the attribute "length" of insn "*push_multi".
27960    So this function MUST be kept in sync with that insn pattern.  */
27961 int
27962 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27963 {
27964   int i, regno, hi_reg;
27965   int num_saves = XVECLEN (parallel_op, 0);
27966
27967   /* ARM mode.  */
27968   if (TARGET_ARM)
27969     return 4;
27970   /* Thumb1 mode.  */
27971   if (TARGET_THUMB1)
27972     return 2;
27973
27974   /* Thumb2 mode.  */
27975   regno = REGNO (first_op);
27976   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27977      list is 8-bit.  Normally this means all registers in the list must be
27978      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
27979      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
27980      with 16-bit encoding.  */
27981   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27982   for (i = 1; i < num_saves && !hi_reg; i++)
27983     {
27984       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27985       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27986     }
27987
27988   if (!hi_reg)
27989     return 2;
27990   return 4;
27991 }
27992
27993 /* Compute the attribute "length" of insn.  Currently, this function is used
27994    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27995    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
27996    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
27997    true if OPERANDS contains insn which explicit updates base register.  */
27998
27999 int
28000 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28001 {
28002   /* ARM mode.  */
28003   if (TARGET_ARM)
28004     return 4;
28005   /* Thumb1 mode.  */
28006   if (TARGET_THUMB1)
28007     return 2;
28008
28009   rtx parallel_op = operands[0];
28010   /* Initialize to elements number of PARALLEL.  */
28011   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28012   /* Initialize the value to base register.  */
28013   unsigned regno = REGNO (operands[1]);
28014   /* Skip return and write back pattern.
28015      We only need register pop pattern for later analysis.  */
28016   unsigned first_indx = 0;
28017   first_indx += return_pc ? 1 : 0;
28018   first_indx += write_back_p ? 1 : 0;
28019
28020   /* A pop operation can be done through LDM or POP.  If the base register is SP
28021      and if it's with write back, then a LDM will be alias of POP.  */
28022   bool pop_p = (regno == SP_REGNUM && write_back_p);
28023   bool ldm_p = !pop_p;
28024
28025   /* Check base register for LDM.  */
28026   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28027     return 4;
28028
28029   /* Check each register in the list.  */
28030   for (; indx >= first_indx; indx--)
28031     {
28032       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28033       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28034          comment in arm_attr_length_push_multi.  */
28035       if (REGNO_REG_CLASS (regno) == HI_REGS
28036           && (regno != PC_REGNUM || ldm_p))
28037         return 4;
28038     }
28039
28040   return 2;
28041 }
28042
28043 /* Compute the number of instructions emitted by output_move_double.  */
28044 int
28045 arm_count_output_move_double_insns (rtx *operands)
28046 {
28047   int count;
28048   rtx ops[2];
28049   /* output_move_double may modify the operands array, so call it
28050      here on a copy of the array.  */
28051   ops[0] = operands[0];
28052   ops[1] = operands[1];
28053   output_move_double (ops, false, &count);
28054   return count;
28055 }
28056
28057 int
28058 vfp3_const_double_for_fract_bits (rtx operand)
28059 {
28060   REAL_VALUE_TYPE r0;
28061
28062   if (!CONST_DOUBLE_P (operand))
28063     return 0;
28064
28065   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28066   if (exact_real_inverse (DFmode, &r0)
28067       && !REAL_VALUE_NEGATIVE (r0))
28068     {
28069       if (exact_real_truncate (DFmode, &r0))
28070         {
28071           HOST_WIDE_INT value = real_to_integer (&r0);
28072           value = value & 0xffffffff;
28073           if ((value != 0) && ( (value & (value - 1)) == 0))
28074             {
28075               int ret = exact_log2 (value);
28076               gcc_assert (IN_RANGE (ret, 0, 31));
28077               return ret;
28078             }
28079         }
28080     }
28081   return 0;
28082 }
28083
28084 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28085    log2 is in [1, 32], return that log2.  Otherwise return -1.
28086    This is used in the patterns for vcvt.s32.f32 floating-point to
28087    fixed-point conversions.  */
28088
28089 int
28090 vfp3_const_double_for_bits (rtx x)
28091 {
28092   const REAL_VALUE_TYPE *r;
28093
28094   if (!CONST_DOUBLE_P (x))
28095     return -1;
28096
28097   r = CONST_DOUBLE_REAL_VALUE (x);
28098
28099   if (REAL_VALUE_NEGATIVE (*r)
28100       || REAL_VALUE_ISNAN (*r)
28101       || REAL_VALUE_ISINF (*r)
28102       || !real_isinteger (r, SFmode))
28103     return -1;
28104
28105   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28106
28107 /* The exact_log2 above will have returned -1 if this is
28108    not an exact log2.  */
28109   if (!IN_RANGE (hwint, 1, 32))
28110     return -1;
28111
28112   return hwint;
28113 }
28114
28115 \f
28116 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28117
28118 static void
28119 arm_pre_atomic_barrier (enum memmodel model)
28120 {
28121   if (need_atomic_barrier_p (model, true))
28122     emit_insn (gen_memory_barrier ());
28123 }
28124
28125 static void
28126 arm_post_atomic_barrier (enum memmodel model)
28127 {
28128   if (need_atomic_barrier_p (model, false))
28129     emit_insn (gen_memory_barrier ());
28130 }
28131
28132 /* Emit the load-exclusive and store-exclusive instructions.
28133    Use acquire and release versions if necessary.  */
28134
28135 static void
28136 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28137 {
28138   rtx (*gen) (rtx, rtx);
28139
28140   if (acq)
28141     {
28142       switch (mode)
28143         {
28144         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28145         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28146         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28147         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28148         default:
28149           gcc_unreachable ();
28150         }
28151     }
28152   else
28153     {
28154       switch (mode)
28155         {
28156         case QImode: gen = gen_arm_load_exclusiveqi; break;
28157         case HImode: gen = gen_arm_load_exclusivehi; break;
28158         case SImode: gen = gen_arm_load_exclusivesi; break;
28159         case DImode: gen = gen_arm_load_exclusivedi; break;
28160         default:
28161           gcc_unreachable ();
28162         }
28163     }
28164
28165   emit_insn (gen (rval, mem));
28166 }
28167
28168 static void
28169 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28170                           rtx mem, bool rel)
28171 {
28172   rtx (*gen) (rtx, rtx, rtx);
28173
28174   if (rel)
28175     {
28176       switch (mode)
28177         {
28178         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28179         case HImode: gen = gen_arm_store_release_exclusivehi; break;
28180         case SImode: gen = gen_arm_store_release_exclusivesi; break;
28181         case DImode: gen = gen_arm_store_release_exclusivedi; break;
28182         default:
28183           gcc_unreachable ();
28184         }
28185     }
28186   else
28187     {
28188       switch (mode)
28189         {
28190         case QImode: gen = gen_arm_store_exclusiveqi; break;
28191         case HImode: gen = gen_arm_store_exclusivehi; break;
28192         case SImode: gen = gen_arm_store_exclusivesi; break;
28193         case DImode: gen = gen_arm_store_exclusivedi; break;
28194         default:
28195           gcc_unreachable ();
28196         }
28197     }
28198
28199   emit_insn (gen (bval, rval, mem));
28200 }
28201
28202 /* Mark the previous jump instruction as unlikely.  */
28203
28204 static void
28205 emit_unlikely_jump (rtx insn)
28206 {
28207   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28208
28209   rtx_insn *jump = emit_jump_insn (insn);
28210   add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28211 }
28212
28213 /* Expand a compare and swap pattern.  */
28214
28215 void
28216 arm_expand_compare_and_swap (rtx operands[])
28217 {
28218   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28219   machine_mode mode;
28220   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28221
28222   bval = operands[0];
28223   rval = operands[1];
28224   mem = operands[2];
28225   oldval = operands[3];
28226   newval = operands[4];
28227   is_weak = operands[5];
28228   mod_s = operands[6];
28229   mod_f = operands[7];
28230   mode = GET_MODE (mem);
28231
28232   /* Normally the succ memory model must be stronger than fail, but in the
28233      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28234      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28235
28236   if (TARGET_HAVE_LDACQ
28237       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28238       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28239     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28240
28241   switch (mode)
28242     {
28243     case QImode:
28244     case HImode:
28245       /* For narrow modes, we're going to perform the comparison in SImode,
28246          so do the zero-extension now.  */
28247       rval = gen_reg_rtx (SImode);
28248       oldval = convert_modes (SImode, mode, oldval, true);
28249       /* FALLTHRU */
28250
28251     case SImode:
28252       /* Force the value into a register if needed.  We waited until after
28253          the zero-extension above to do this properly.  */
28254       if (!arm_add_operand (oldval, SImode))
28255         oldval = force_reg (SImode, oldval);
28256       break;
28257
28258     case DImode:
28259       if (!cmpdi_operand (oldval, mode))
28260         oldval = force_reg (mode, oldval);
28261       break;
28262
28263     default:
28264       gcc_unreachable ();
28265     }
28266
28267   if (TARGET_THUMB1)
28268     {
28269       switch (mode)
28270         {
28271         case QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28272         case HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28273         case SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28274         case DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28275         default:
28276           gcc_unreachable ();
28277         }
28278     }
28279   else
28280     {
28281       switch (mode)
28282         {
28283         case QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28284         case HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28285         case SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28286         case DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28287         default:
28288           gcc_unreachable ();
28289         }
28290     }
28291
28292   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28293   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28294
28295   if (mode == QImode || mode == HImode)
28296     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28297
28298   /* In all cases, we arrange for success to be signaled by Z set.
28299      This arrangement allows for the boolean result to be used directly
28300      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28301      boolean negation of the result is also stored in bval because Thumb-1
28302      backend lacks dependency tracking for CC flag due to flag-setting not
28303      being represented at RTL level.  */
28304   if (TARGET_THUMB1)
28305       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28306   else
28307     {
28308       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28309       emit_insn (gen_rtx_SET (bval, x));
28310     }
28311 }
28312
28313 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28314    another memory store between the load-exclusive and store-exclusive can
28315    reset the monitor from Exclusive to Open state.  This means we must wait
28316    until after reload to split the pattern, lest we get a register spill in
28317    the middle of the atomic sequence.  Success of the compare and swap is
28318    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28319    for Thumb-1 targets (ie. negation of the boolean value returned by
28320    atomic_compare_and_swapmode standard pattern in operand 0).  */
28321
28322 void
28323 arm_split_compare_and_swap (rtx operands[])
28324 {
28325   rtx rval, mem, oldval, newval, neg_bval;
28326   machine_mode mode;
28327   enum memmodel mod_s, mod_f;
28328   bool is_weak;
28329   rtx_code_label *label1, *label2;
28330   rtx x, cond;
28331
28332   rval = operands[1];
28333   mem = operands[2];
28334   oldval = operands[3];
28335   newval = operands[4];
28336   is_weak = (operands[5] != const0_rtx);
28337   mod_s = memmodel_from_int (INTVAL (operands[6]));
28338   mod_f = memmodel_from_int (INTVAL (operands[7]));
28339   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28340   mode = GET_MODE (mem);
28341
28342   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28343
28344   bool use_acquire = TARGET_HAVE_LDACQ
28345                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28346                           || is_mm_release (mod_s));
28347
28348   bool use_release = TARGET_HAVE_LDACQ
28349                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28350                           || is_mm_acquire (mod_s));
28351
28352   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28353      a full barrier is emitted after the store-release.  */
28354   if (is_armv8_sync)
28355     use_acquire = false;
28356
28357   /* Checks whether a barrier is needed and emits one accordingly.  */
28358   if (!(use_acquire || use_release))
28359     arm_pre_atomic_barrier (mod_s);
28360
28361   label1 = NULL;
28362   if (!is_weak)
28363     {
28364       label1 = gen_label_rtx ();
28365       emit_label (label1);
28366     }
28367   label2 = gen_label_rtx ();
28368
28369   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28370
28371   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28372      as required to communicate with arm_expand_compare_and_swap.  */
28373   if (TARGET_32BIT)
28374     {
28375       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28376       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28377       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28378                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28379       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28380     }
28381   else
28382     {
28383       emit_move_insn (neg_bval, const1_rtx);
28384       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28385       if (thumb1_cmpneg_operand (oldval, SImode))
28386         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28387                                                     label2, cond));
28388       else
28389         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28390     }
28391
28392   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28393
28394   /* Weak or strong, we want EQ to be true for success, so that we
28395      match the flags that we got from the compare above.  */
28396   if (TARGET_32BIT)
28397     {
28398       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28399       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28400       emit_insn (gen_rtx_SET (cond, x));
28401     }
28402
28403   if (!is_weak)
28404     {
28405       /* Z is set to boolean value of !neg_bval, as required to communicate
28406          with arm_expand_compare_and_swap.  */
28407       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28408       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28409     }
28410
28411   if (!is_mm_relaxed (mod_f))
28412     emit_label (label2);
28413
28414   /* Checks whether a barrier is needed and emits one accordingly.  */
28415   if (is_armv8_sync
28416       || !(use_acquire || use_release))
28417     arm_post_atomic_barrier (mod_s);
28418
28419   if (is_mm_relaxed (mod_f))
28420     emit_label (label2);
28421 }
28422
28423 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28424    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28425    operation).  Operation is performed on the content at MEM and on VALUE
28426    following the memory model MODEL_RTX.  The content at MEM before and after
28427    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28428    success of the operation is returned in COND.  Using a scratch register or
28429    an operand register for these determines what result is returned for that
28430    pattern.  */
28431
28432 void
28433 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28434                      rtx value, rtx model_rtx, rtx cond)
28435 {
28436   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28437   machine_mode mode = GET_MODE (mem);
28438   machine_mode wmode = (mode == DImode ? DImode : SImode);
28439   rtx_code_label *label;
28440   bool all_low_regs, bind_old_new;
28441   rtx x;
28442
28443   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28444
28445   bool use_acquire = TARGET_HAVE_LDACQ
28446                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28447                           || is_mm_release (model));
28448
28449   bool use_release = TARGET_HAVE_LDACQ
28450                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28451                           || is_mm_acquire (model));
28452
28453   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28454      a full barrier is emitted after the store-release.  */
28455   if (is_armv8_sync)
28456     use_acquire = false;
28457
28458   /* Checks whether a barrier is needed and emits one accordingly.  */
28459   if (!(use_acquire || use_release))
28460     arm_pre_atomic_barrier (model);
28461
28462   label = gen_label_rtx ();
28463   emit_label (label);
28464
28465   if (new_out)
28466     new_out = gen_lowpart (wmode, new_out);
28467   if (old_out)
28468     old_out = gen_lowpart (wmode, old_out);
28469   else
28470     old_out = new_out;
28471   value = simplify_gen_subreg (wmode, value, mode, 0);
28472
28473   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28474
28475   /* Does the operation require destination and first operand to use the same
28476      register?  This is decided by register constraints of relevant insn
28477      patterns in thumb1.md.  */
28478   gcc_assert (!new_out || REG_P (new_out));
28479   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28480                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28481                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28482   bind_old_new =
28483     (TARGET_THUMB1
28484      && code != SET
28485      && code != MINUS
28486      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28487
28488   /* We want to return the old value while putting the result of the operation
28489      in the same register as the old value so copy the old value over to the
28490      destination register and use that register for the operation.  */
28491   if (old_out && bind_old_new)
28492     {
28493       emit_move_insn (new_out, old_out);
28494       old_out = new_out;
28495     }
28496
28497   switch (code)
28498     {
28499     case SET:
28500       new_out = value;
28501       break;
28502
28503     case NOT:
28504       x = gen_rtx_AND (wmode, old_out, value);
28505       emit_insn (gen_rtx_SET (new_out, x));
28506       x = gen_rtx_NOT (wmode, new_out);
28507       emit_insn (gen_rtx_SET (new_out, x));
28508       break;
28509
28510     case MINUS:
28511       if (CONST_INT_P (value))
28512         {
28513           value = GEN_INT (-INTVAL (value));
28514           code = PLUS;
28515         }
28516       /* FALLTHRU */
28517
28518     case PLUS:
28519       if (mode == DImode)
28520         {
28521           /* DImode plus/minus need to clobber flags.  */
28522           /* The adddi3 and subdi3 patterns are incorrectly written so that
28523              they require matching operands, even when we could easily support
28524              three operands.  Thankfully, this can be fixed up post-splitting,
28525              as the individual add+adc patterns do accept three operands and
28526              post-reload cprop can make these moves go away.  */
28527           emit_move_insn (new_out, old_out);
28528           if (code == PLUS)
28529             x = gen_adddi3 (new_out, new_out, value);
28530           else
28531             x = gen_subdi3 (new_out, new_out, value);
28532           emit_insn (x);
28533           break;
28534         }
28535       /* FALLTHRU */
28536
28537     default:
28538       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28539       emit_insn (gen_rtx_SET (new_out, x));
28540       break;
28541     }
28542
28543   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28544                             use_release);
28545
28546   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28547   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28548
28549   /* Checks whether a barrier is needed and emits one accordingly.  */
28550   if (is_armv8_sync
28551       || !(use_acquire || use_release))
28552     arm_post_atomic_barrier (model);
28553 }
28554 \f
28555 #define MAX_VECT_LEN 16
28556
28557 struct expand_vec_perm_d
28558 {
28559   rtx target, op0, op1;
28560   unsigned char perm[MAX_VECT_LEN];
28561   machine_mode vmode;
28562   unsigned char nelt;
28563   bool one_vector_p;
28564   bool testing_p;
28565 };
28566
28567 /* Generate a variable permutation.  */
28568
28569 static void
28570 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28571 {
28572   machine_mode vmode = GET_MODE (target);
28573   bool one_vector_p = rtx_equal_p (op0, op1);
28574
28575   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28576   gcc_checking_assert (GET_MODE (op0) == vmode);
28577   gcc_checking_assert (GET_MODE (op1) == vmode);
28578   gcc_checking_assert (GET_MODE (sel) == vmode);
28579   gcc_checking_assert (TARGET_NEON);
28580
28581   if (one_vector_p)
28582     {
28583       if (vmode == V8QImode)
28584         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28585       else
28586         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28587     }
28588   else
28589     {
28590       rtx pair;
28591
28592       if (vmode == V8QImode)
28593         {
28594           pair = gen_reg_rtx (V16QImode);
28595           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28596           pair = gen_lowpart (TImode, pair);
28597           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28598         }
28599       else
28600         {
28601           pair = gen_reg_rtx (OImode);
28602           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28603           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28604         }
28605     }
28606 }
28607
28608 void
28609 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28610 {
28611   machine_mode vmode = GET_MODE (target);
28612   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28613   bool one_vector_p = rtx_equal_p (op0, op1);
28614   rtx rmask[MAX_VECT_LEN], mask;
28615
28616   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28617      numbering of elements for big-endian, we must reverse the order.  */
28618   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28619
28620   /* The VTBL instruction does not use a modulo index, so we must take care
28621      of that ourselves.  */
28622   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28623   for (i = 0; i < nelt; ++i)
28624     rmask[i] = mask;
28625   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28626   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28627
28628   arm_expand_vec_perm_1 (target, op0, op1, sel);
28629 }
28630
28631 /* Map lane ordering between architectural lane order, and GCC lane order,
28632    taking into account ABI.  See comment above output_move_neon for details.  */
28633
28634 static int
28635 neon_endian_lane_map (machine_mode mode, int lane)
28636 {
28637   if (BYTES_BIG_ENDIAN)
28638   {
28639     int nelems = GET_MODE_NUNITS (mode);
28640     /* Reverse lane order.  */
28641     lane = (nelems - 1 - lane);
28642     /* Reverse D register order, to match ABI.  */
28643     if (GET_MODE_SIZE (mode) == 16)
28644       lane = lane ^ (nelems / 2);
28645   }
28646   return lane;
28647 }
28648
28649 /* Some permutations index into pairs of vectors, this is a helper function
28650    to map indexes into those pairs of vectors.  */
28651
28652 static int
28653 neon_pair_endian_lane_map (machine_mode mode, int lane)
28654 {
28655   int nelem = GET_MODE_NUNITS (mode);
28656   if (BYTES_BIG_ENDIAN)
28657     lane =
28658       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28659   return lane;
28660 }
28661
28662 /* Generate or test for an insn that supports a constant permutation.  */
28663
28664 /* Recognize patterns for the VUZP insns.  */
28665
28666 static bool
28667 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28668 {
28669   unsigned int i, odd, mask, nelt = d->nelt;
28670   rtx out0, out1, in0, in1;
28671   rtx (*gen)(rtx, rtx, rtx, rtx);
28672   int first_elem;
28673   int swap_nelt;
28674
28675   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28676     return false;
28677
28678   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28679      big endian pattern on 64 bit vectors, so we correct for that.  */
28680   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28681     && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28682
28683   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28684
28685   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28686     odd = 0;
28687   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28688     odd = 1;
28689   else
28690     return false;
28691   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28692
28693   for (i = 0; i < nelt; i++)
28694     {
28695       unsigned elt =
28696         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28697       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28698         return false;
28699     }
28700
28701   /* Success!  */
28702   if (d->testing_p)
28703     return true;
28704
28705   switch (d->vmode)
28706     {
28707     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28708     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28709     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28710     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28711     case V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28712     case V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28713     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28714     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28715     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28716     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28717     default:
28718       gcc_unreachable ();
28719     }
28720
28721   in0 = d->op0;
28722   in1 = d->op1;
28723   if (swap_nelt != 0)
28724     std::swap (in0, in1);
28725
28726   out0 = d->target;
28727   out1 = gen_reg_rtx (d->vmode);
28728   if (odd)
28729     std::swap (out0, out1);
28730
28731   emit_insn (gen (out0, in0, in1, out1));
28732   return true;
28733 }
28734
28735 /* Recognize patterns for the VZIP insns.  */
28736
28737 static bool
28738 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28739 {
28740   unsigned int i, high, mask, nelt = d->nelt;
28741   rtx out0, out1, in0, in1;
28742   rtx (*gen)(rtx, rtx, rtx, rtx);
28743   int first_elem;
28744   bool is_swapped;
28745
28746   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28747     return false;
28748
28749   is_swapped = BYTES_BIG_ENDIAN;
28750
28751   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28752
28753   high = nelt / 2;
28754   if (first_elem == neon_endian_lane_map (d->vmode, high))
28755     ;
28756   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28757     high = 0;
28758   else
28759     return false;
28760   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28761
28762   for (i = 0; i < nelt / 2; i++)
28763     {
28764       unsigned elt =
28765         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28766       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28767           != elt)
28768         return false;
28769       elt =
28770         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28771       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28772           != elt)
28773         return false;
28774     }
28775
28776   /* Success!  */
28777   if (d->testing_p)
28778     return true;
28779
28780   switch (d->vmode)
28781     {
28782     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28783     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28784     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28785     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28786     case V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28787     case V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28788     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28789     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28790     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28791     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28792     default:
28793       gcc_unreachable ();
28794     }
28795
28796   in0 = d->op0;
28797   in1 = d->op1;
28798   if (is_swapped)
28799     std::swap (in0, in1);
28800
28801   out0 = d->target;
28802   out1 = gen_reg_rtx (d->vmode);
28803   if (high)
28804     std::swap (out0, out1);
28805
28806   emit_insn (gen (out0, in0, in1, out1));
28807   return true;
28808 }
28809
28810 /* Recognize patterns for the VREV insns.  */
28811
28812 static bool
28813 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28814 {
28815   unsigned int i, j, diff, nelt = d->nelt;
28816   rtx (*gen)(rtx, rtx);
28817
28818   if (!d->one_vector_p)
28819     return false;
28820
28821   diff = d->perm[0];
28822   switch (diff)
28823     {
28824     case 7:
28825       switch (d->vmode)
28826         {
28827         case V16QImode: gen = gen_neon_vrev64v16qi; break;
28828         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28829         default:
28830           return false;
28831         }
28832       break;
28833     case 3:
28834       switch (d->vmode)
28835         {
28836         case V16QImode: gen = gen_neon_vrev32v16qi; break;
28837         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28838         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28839         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28840         case V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28841         case V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28842         default:
28843           return false;
28844         }
28845       break;
28846     case 1:
28847       switch (d->vmode)
28848         {
28849         case V16QImode: gen = gen_neon_vrev16v16qi; break;
28850         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28851         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28852         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28853         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
28854         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
28855         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28856         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28857         default:
28858           return false;
28859         }
28860       break;
28861     default:
28862       return false;
28863     }
28864
28865   for (i = 0; i < nelt ; i += diff + 1)
28866     for (j = 0; j <= diff; j += 1)
28867       {
28868         /* This is guaranteed to be true as the value of diff
28869            is 7, 3, 1 and we should have enough elements in the
28870            queue to generate this. Getting a vector mask with a
28871            value of diff other than these values implies that
28872            something is wrong by the time we get here.  */
28873         gcc_assert (i + j < nelt);
28874         if (d->perm[i + j] != i + diff - j)
28875           return false;
28876       }
28877
28878   /* Success! */
28879   if (d->testing_p)
28880     return true;
28881
28882   emit_insn (gen (d->target, d->op0));
28883   return true;
28884 }
28885
28886 /* Recognize patterns for the VTRN insns.  */
28887
28888 static bool
28889 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28890 {
28891   unsigned int i, odd, mask, nelt = d->nelt;
28892   rtx out0, out1, in0, in1;
28893   rtx (*gen)(rtx, rtx, rtx, rtx);
28894
28895   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28896     return false;
28897
28898   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28899   if (d->perm[0] == 0)
28900     odd = 0;
28901   else if (d->perm[0] == 1)
28902     odd = 1;
28903   else
28904     return false;
28905   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28906
28907   for (i = 0; i < nelt; i += 2)
28908     {
28909       if (d->perm[i] != i + odd)
28910         return false;
28911       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28912         return false;
28913     }
28914
28915   /* Success!  */
28916   if (d->testing_p)
28917     return true;
28918
28919   switch (d->vmode)
28920     {
28921     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28922     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28923     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28924     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28925     case V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
28926     case V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
28927     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
28928     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
28929     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
28930     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
28931     default:
28932       gcc_unreachable ();
28933     }
28934
28935   in0 = d->op0;
28936   in1 = d->op1;
28937   if (BYTES_BIG_ENDIAN)
28938     {
28939       std::swap (in0, in1);
28940       odd = !odd;
28941     }
28942
28943   out0 = d->target;
28944   out1 = gen_reg_rtx (d->vmode);
28945   if (odd)
28946     std::swap (out0, out1);
28947
28948   emit_insn (gen (out0, in0, in1, out1));
28949   return true;
28950 }
28951
28952 /* Recognize patterns for the VEXT insns.  */
28953
28954 static bool
28955 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28956 {
28957   unsigned int i, nelt = d->nelt;
28958   rtx (*gen) (rtx, rtx, rtx, rtx);
28959   rtx offset;
28960
28961   unsigned int location;
28962
28963   unsigned int next  = d->perm[0] + 1;
28964
28965   /* TODO: Handle GCC's numbering of elements for big-endian.  */
28966   if (BYTES_BIG_ENDIAN)
28967     return false;
28968
28969   /* Check if the extracted indexes are increasing by one.  */
28970   for (i = 1; i < nelt; next++, i++)
28971     {
28972       /* If we hit the most significant element of the 2nd vector in
28973          the previous iteration, no need to test further.  */
28974       if (next == 2 * nelt)
28975         return false;
28976
28977       /* If we are operating on only one vector: it could be a
28978          rotation.  If there are only two elements of size < 64, let
28979          arm_evpc_neon_vrev catch it.  */
28980       if (d->one_vector_p && (next == nelt))
28981         {
28982           if ((nelt == 2) && (d->vmode != V2DImode))
28983             return false;
28984           else
28985             next = 0;
28986         }
28987
28988       if (d->perm[i] != next)
28989         return false;
28990     }
28991
28992   location = d->perm[0];
28993
28994   switch (d->vmode)
28995     {
28996     case V16QImode: gen = gen_neon_vextv16qi; break;
28997     case V8QImode: gen = gen_neon_vextv8qi; break;
28998     case V4HImode: gen = gen_neon_vextv4hi; break;
28999     case V8HImode: gen = gen_neon_vextv8hi; break;
29000     case V2SImode: gen = gen_neon_vextv2si; break;
29001     case V4SImode: gen = gen_neon_vextv4si; break;
29002     case V4HFmode: gen = gen_neon_vextv4hf; break;
29003     case V8HFmode: gen = gen_neon_vextv8hf; break;
29004     case V2SFmode: gen = gen_neon_vextv2sf; break;
29005     case V4SFmode: gen = gen_neon_vextv4sf; break;
29006     case V2DImode: gen = gen_neon_vextv2di; break;
29007     default:
29008       return false;
29009     }
29010
29011   /* Success! */
29012   if (d->testing_p)
29013     return true;
29014
29015   offset = GEN_INT (location);
29016   emit_insn (gen (d->target, d->op0, d->op1, offset));
29017   return true;
29018 }
29019
29020 /* The NEON VTBL instruction is a fully variable permuation that's even
29021    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29022    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29023    can do slightly better by expanding this as a constant where we don't
29024    have to apply a mask.  */
29025
29026 static bool
29027 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29028 {
29029   rtx rperm[MAX_VECT_LEN], sel;
29030   machine_mode vmode = d->vmode;
29031   unsigned int i, nelt = d->nelt;
29032
29033   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29034      numbering of elements for big-endian, we must reverse the order.  */
29035   if (BYTES_BIG_ENDIAN)
29036     return false;
29037
29038   if (d->testing_p)
29039     return true;
29040
29041   /* Generic code will try constant permutation twice.  Once with the
29042      original mode and again with the elements lowered to QImode.
29043      So wait and don't do the selector expansion ourselves.  */
29044   if (vmode != V8QImode && vmode != V16QImode)
29045     return false;
29046
29047   for (i = 0; i < nelt; ++i)
29048     rperm[i] = GEN_INT (d->perm[i]);
29049   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29050   sel = force_reg (vmode, sel);
29051
29052   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29053   return true;
29054 }
29055
29056 static bool
29057 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29058 {
29059   /* Check if the input mask matches vext before reordering the
29060      operands.  */
29061   if (TARGET_NEON)
29062     if (arm_evpc_neon_vext (d))
29063       return true;
29064
29065   /* The pattern matching functions above are written to look for a small
29066      number to begin the sequence (0, 1, N/2).  If we begin with an index
29067      from the second operand, we can swap the operands.  */
29068   if (d->perm[0] >= d->nelt)
29069     {
29070       unsigned i, nelt = d->nelt;
29071
29072       for (i = 0; i < nelt; ++i)
29073         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29074
29075       std::swap (d->op0, d->op1);
29076     }
29077
29078   if (TARGET_NEON)
29079     {
29080       if (arm_evpc_neon_vuzp (d))
29081         return true;
29082       if (arm_evpc_neon_vzip (d))
29083         return true;
29084       if (arm_evpc_neon_vrev (d))
29085         return true;
29086       if (arm_evpc_neon_vtrn (d))
29087         return true;
29088       return arm_evpc_neon_vtbl (d);
29089     }
29090   return false;
29091 }
29092
29093 /* Expand a vec_perm_const pattern.  */
29094
29095 bool
29096 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29097 {
29098   struct expand_vec_perm_d d;
29099   int i, nelt, which;
29100
29101   d.target = target;
29102   d.op0 = op0;
29103   d.op1 = op1;
29104
29105   d.vmode = GET_MODE (target);
29106   gcc_assert (VECTOR_MODE_P (d.vmode));
29107   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29108   d.testing_p = false;
29109
29110   for (i = which = 0; i < nelt; ++i)
29111     {
29112       rtx e = XVECEXP (sel, 0, i);
29113       int ei = INTVAL (e) & (2 * nelt - 1);
29114       which |= (ei < nelt ? 1 : 2);
29115       d.perm[i] = ei;
29116     }
29117
29118   switch (which)
29119     {
29120     default:
29121       gcc_unreachable();
29122
29123     case 3:
29124       d.one_vector_p = false;
29125       if (!rtx_equal_p (op0, op1))
29126         break;
29127
29128       /* The elements of PERM do not suggest that only the first operand
29129          is used, but both operands are identical.  Allow easier matching
29130          of the permutation by folding the permutation into the single
29131          input vector.  */
29132       /* FALLTHRU */
29133     case 2:
29134       for (i = 0; i < nelt; ++i)
29135         d.perm[i] &= nelt - 1;
29136       d.op0 = op1;
29137       d.one_vector_p = true;
29138       break;
29139
29140     case 1:
29141       d.op1 = op0;
29142       d.one_vector_p = true;
29143       break;
29144     }
29145
29146   return arm_expand_vec_perm_const_1 (&d);
29147 }
29148
29149 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29150
29151 static bool
29152 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29153                                  const unsigned char *sel)
29154 {
29155   struct expand_vec_perm_d d;
29156   unsigned int i, nelt, which;
29157   bool ret;
29158
29159   d.vmode = vmode;
29160   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29161   d.testing_p = true;
29162   memcpy (d.perm, sel, nelt);
29163
29164   /* Categorize the set of elements in the selector.  */
29165   for (i = which = 0; i < nelt; ++i)
29166     {
29167       unsigned char e = d.perm[i];
29168       gcc_assert (e < 2 * nelt);
29169       which |= (e < nelt ? 1 : 2);
29170     }
29171
29172   /* For all elements from second vector, fold the elements to first.  */
29173   if (which == 2)
29174     for (i = 0; i < nelt; ++i)
29175       d.perm[i] -= nelt;
29176
29177   /* Check whether the mask can be applied to the vector type.  */
29178   d.one_vector_p = (which != 3);
29179
29180   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29181   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29182   if (!d.one_vector_p)
29183     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29184
29185   start_sequence ();
29186   ret = arm_expand_vec_perm_const_1 (&d);
29187   end_sequence ();
29188
29189   return ret;
29190 }
29191
29192 bool
29193 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29194 {
29195   /* If we are soft float and we do not have ldrd
29196      then all auto increment forms are ok.  */
29197   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29198     return true;
29199
29200   switch (code)
29201     {
29202       /* Post increment and Pre Decrement are supported for all
29203          instruction forms except for vector forms.  */
29204     case ARM_POST_INC:
29205     case ARM_PRE_DEC:
29206       if (VECTOR_MODE_P (mode))
29207         {
29208           if (code != ARM_PRE_DEC)
29209             return true;
29210           else
29211             return false;
29212         }
29213
29214       return true;
29215
29216     case ARM_POST_DEC:
29217     case ARM_PRE_INC:
29218       /* Without LDRD and mode size greater than
29219          word size, there is no point in auto-incrementing
29220          because ldm and stm will not have these forms.  */
29221       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29222         return false;
29223
29224       /* Vector and floating point modes do not support
29225          these auto increment forms.  */
29226       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29227         return false;
29228
29229       return true;
29230
29231     default:
29232       return false;
29233
29234     }
29235
29236   return false;
29237 }
29238
29239 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29240    on ARM, since we know that shifts by negative amounts are no-ops.
29241    Additionally, the default expansion code is not available or suitable
29242    for post-reload insn splits (this can occur when the register allocator
29243    chooses not to do a shift in NEON).
29244
29245    This function is used in both initial expand and post-reload splits, and
29246    handles all kinds of 64-bit shifts.
29247
29248    Input requirements:
29249     - It is safe for the input and output to be the same register, but
29250       early-clobber rules apply for the shift amount and scratch registers.
29251     - Shift by register requires both scratch registers.  In all other cases
29252       the scratch registers may be NULL.
29253     - Ashiftrt by a register also clobbers the CC register.  */
29254 void
29255 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29256                                rtx amount, rtx scratch1, rtx scratch2)
29257 {
29258   rtx out_high = gen_highpart (SImode, out);
29259   rtx out_low = gen_lowpart (SImode, out);
29260   rtx in_high = gen_highpart (SImode, in);
29261   rtx in_low = gen_lowpart (SImode, in);
29262
29263   /* Terminology:
29264         in = the register pair containing the input value.
29265         out = the destination register pair.
29266         up = the high- or low-part of each pair.
29267         down = the opposite part to "up".
29268      In a shift, we can consider bits to shift from "up"-stream to
29269      "down"-stream, so in a left-shift "up" is the low-part and "down"
29270      is the high-part of each register pair.  */
29271
29272   rtx out_up   = code == ASHIFT ? out_low : out_high;
29273   rtx out_down = code == ASHIFT ? out_high : out_low;
29274   rtx in_up   = code == ASHIFT ? in_low : in_high;
29275   rtx in_down = code == ASHIFT ? in_high : in_low;
29276
29277   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29278   gcc_assert (out
29279               && (REG_P (out) || GET_CODE (out) == SUBREG)
29280               && GET_MODE (out) == DImode);
29281   gcc_assert (in
29282               && (REG_P (in) || GET_CODE (in) == SUBREG)
29283               && GET_MODE (in) == DImode);
29284   gcc_assert (amount
29285               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29286                    && GET_MODE (amount) == SImode)
29287                   || CONST_INT_P (amount)));
29288   gcc_assert (scratch1 == NULL
29289               || (GET_CODE (scratch1) == SCRATCH)
29290               || (GET_MODE (scratch1) == SImode
29291                   && REG_P (scratch1)));
29292   gcc_assert (scratch2 == NULL
29293               || (GET_CODE (scratch2) == SCRATCH)
29294               || (GET_MODE (scratch2) == SImode
29295                   && REG_P (scratch2)));
29296   gcc_assert (!REG_P (out) || !REG_P (amount)
29297               || !HARD_REGISTER_P (out)
29298               || (REGNO (out) != REGNO (amount)
29299                   && REGNO (out) + 1 != REGNO (amount)));
29300
29301   /* Macros to make following code more readable.  */
29302   #define SUB_32(DEST,SRC) \
29303             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29304   #define RSB_32(DEST,SRC) \
29305             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29306   #define SUB_S_32(DEST,SRC) \
29307             gen_addsi3_compare0 ((DEST), (SRC), \
29308                                  GEN_INT (-32))
29309   #define SET(DEST,SRC) \
29310             gen_rtx_SET ((DEST), (SRC))
29311   #define SHIFT(CODE,SRC,AMOUNT) \
29312             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29313   #define LSHIFT(CODE,SRC,AMOUNT) \
29314             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29315                             SImode, (SRC), (AMOUNT))
29316   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29317             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29318                             SImode, (SRC), (AMOUNT))
29319   #define ORR(A,B) \
29320             gen_rtx_IOR (SImode, (A), (B))
29321   #define BRANCH(COND,LABEL) \
29322             gen_arm_cond_branch ((LABEL), \
29323                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29324                                                    const0_rtx), \
29325                                  cc_reg)
29326
29327   /* Shifts by register and shifts by constant are handled separately.  */
29328   if (CONST_INT_P (amount))
29329     {
29330       /* We have a shift-by-constant.  */
29331
29332       /* First, handle out-of-range shift amounts.
29333          In both cases we try to match the result an ARM instruction in a
29334          shift-by-register would give.  This helps reduce execution
29335          differences between optimization levels, but it won't stop other
29336          parts of the compiler doing different things.  This is "undefined
29337          behavior, in any case.  */
29338       if (INTVAL (amount) <= 0)
29339         emit_insn (gen_movdi (out, in));
29340       else if (INTVAL (amount) >= 64)
29341         {
29342           if (code == ASHIFTRT)
29343             {
29344               rtx const31_rtx = GEN_INT (31);
29345               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29346               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29347             }
29348           else
29349             emit_insn (gen_movdi (out, const0_rtx));
29350         }
29351
29352       /* Now handle valid shifts. */
29353       else if (INTVAL (amount) < 32)
29354         {
29355           /* Shifts by a constant less than 32.  */
29356           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29357
29358           /* Clearing the out register in DImode first avoids lots
29359              of spilling and results in less stack usage.
29360              Later this redundant insn is completely removed.
29361              Do that only if "in" and "out" are different registers.  */
29362           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29363             emit_insn (SET (out, const0_rtx));
29364           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29365           emit_insn (SET (out_down,
29366                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29367                                out_down)));
29368           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29369         }
29370       else
29371         {
29372           /* Shifts by a constant greater than 31.  */
29373           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29374
29375           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29376             emit_insn (SET (out, const0_rtx));
29377           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29378           if (code == ASHIFTRT)
29379             emit_insn (gen_ashrsi3 (out_up, in_up,
29380                                     GEN_INT (31)));
29381           else
29382             emit_insn (SET (out_up, const0_rtx));
29383         }
29384     }
29385   else
29386     {
29387       /* We have a shift-by-register.  */
29388       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29389
29390       /* This alternative requires the scratch registers.  */
29391       gcc_assert (scratch1 && REG_P (scratch1));
29392       gcc_assert (scratch2 && REG_P (scratch2));
29393
29394       /* We will need the values "amount-32" and "32-amount" later.
29395          Swapping them around now allows the later code to be more general. */
29396       switch (code)
29397         {
29398         case ASHIFT:
29399           emit_insn (SUB_32 (scratch1, amount));
29400           emit_insn (RSB_32 (scratch2, amount));
29401           break;
29402         case ASHIFTRT:
29403           emit_insn (RSB_32 (scratch1, amount));
29404           /* Also set CC = amount > 32.  */
29405           emit_insn (SUB_S_32 (scratch2, amount));
29406           break;
29407         case LSHIFTRT:
29408           emit_insn (RSB_32 (scratch1, amount));
29409           emit_insn (SUB_32 (scratch2, amount));
29410           break;
29411         default:
29412           gcc_unreachable ();
29413         }
29414
29415       /* Emit code like this:
29416
29417          arithmetic-left:
29418             out_down = in_down << amount;
29419             out_down = (in_up << (amount - 32)) | out_down;
29420             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29421             out_up = in_up << amount;
29422
29423          arithmetic-right:
29424             out_down = in_down >> amount;
29425             out_down = (in_up << (32 - amount)) | out_down;
29426             if (amount < 32)
29427               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29428             out_up = in_up << amount;
29429
29430          logical-right:
29431             out_down = in_down >> amount;
29432             out_down = (in_up << (32 - amount)) | out_down;
29433             if (amount < 32)
29434               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29435             out_up = in_up << amount;
29436
29437           The ARM and Thumb2 variants are the same but implemented slightly
29438           differently.  If this were only called during expand we could just
29439           use the Thumb2 case and let combine do the right thing, but this
29440           can also be called from post-reload splitters.  */
29441
29442       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29443
29444       if (!TARGET_THUMB2)
29445         {
29446           /* Emit code for ARM mode.  */
29447           emit_insn (SET (out_down,
29448                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29449           if (code == ASHIFTRT)
29450             {
29451               rtx_code_label *done_label = gen_label_rtx ();
29452               emit_jump_insn (BRANCH (LT, done_label));
29453               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29454                                              out_down)));
29455               emit_label (done_label);
29456             }
29457           else
29458             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29459                                            out_down)));
29460         }
29461       else
29462         {
29463           /* Emit code for Thumb2 mode.
29464              Thumb2 can't do shift and or in one insn.  */
29465           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29466           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29467
29468           if (code == ASHIFTRT)
29469             {
29470               rtx_code_label *done_label = gen_label_rtx ();
29471               emit_jump_insn (BRANCH (LT, done_label));
29472               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29473               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29474               emit_label (done_label);
29475             }
29476           else
29477             {
29478               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29479               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29480             }
29481         }
29482
29483       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29484     }
29485
29486   #undef SUB_32
29487   #undef RSB_32
29488   #undef SUB_S_32
29489   #undef SET
29490   #undef SHIFT
29491   #undef LSHIFT
29492   #undef REV_LSHIFT
29493   #undef ORR
29494   #undef BRANCH
29495 }
29496
29497 /* Returns true if the pattern is a valid symbolic address, which is either a
29498    symbol_ref or (symbol_ref + addend).
29499
29500    According to the ARM ELF ABI, the initial addend of REL-type relocations
29501    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29502    literal field of the instruction as a 16-bit signed value in the range
29503    -32768 <= A < 32768.  */
29504
29505 bool
29506 arm_valid_symbolic_address_p (rtx addr)
29507 {
29508   rtx xop0, xop1 = NULL_RTX;
29509   rtx tmp = addr;
29510
29511   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29512     return true;
29513
29514   /* (const (plus: symbol_ref const_int))  */
29515   if (GET_CODE (addr) == CONST)
29516     tmp = XEXP (addr, 0);
29517
29518   if (GET_CODE (tmp) == PLUS)
29519     {
29520       xop0 = XEXP (tmp, 0);
29521       xop1 = XEXP (tmp, 1);
29522
29523       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29524           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29525     }
29526
29527   return false;
29528 }
29529
29530 /* Returns true if a valid comparison operation and makes
29531    the operands in a form that is valid.  */
29532 bool
29533 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29534 {
29535   enum rtx_code code = GET_CODE (*comparison);
29536   int code_int;
29537   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29538     ? GET_MODE (*op2) : GET_MODE (*op1);
29539
29540   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29541
29542   if (code == UNEQ || code == LTGT)
29543     return false;
29544
29545   code_int = (int)code;
29546   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29547   PUT_CODE (*comparison, (enum rtx_code)code_int);
29548
29549   switch (mode)
29550     {
29551     case SImode:
29552       if (!arm_add_operand (*op1, mode))
29553         *op1 = force_reg (mode, *op1);
29554       if (!arm_add_operand (*op2, mode))
29555         *op2 = force_reg (mode, *op2);
29556       return true;
29557
29558     case DImode:
29559       if (!cmpdi_operand (*op1, mode))
29560         *op1 = force_reg (mode, *op1);
29561       if (!cmpdi_operand (*op2, mode))
29562         *op2 = force_reg (mode, *op2);
29563       return true;
29564
29565     case HFmode:
29566       if (!TARGET_VFP_FP16INST)
29567         break;
29568       /* FP16 comparisons are done in SF mode.  */
29569       mode = SFmode;
29570       *op1 = convert_to_mode (mode, *op1, 1);
29571       *op2 = convert_to_mode (mode, *op2, 1);
29572       /* Fall through.  */
29573     case SFmode:
29574     case DFmode:
29575       if (!vfp_compare_operand (*op1, mode))
29576         *op1 = force_reg (mode, *op1);
29577       if (!vfp_compare_operand (*op2, mode))
29578         *op2 = force_reg (mode, *op2);
29579       return true;
29580     default:
29581       break;
29582     }
29583
29584   return false;
29585
29586 }
29587
29588 /* Maximum number of instructions to set block of memory.  */
29589 static int
29590 arm_block_set_max_insns (void)
29591 {
29592   if (optimize_function_for_size_p (cfun))
29593     return 4;
29594   else
29595     return current_tune->max_insns_inline_memset;
29596 }
29597
29598 /* Return TRUE if it's profitable to set block of memory for
29599    non-vectorized case.  VAL is the value to set the memory
29600    with.  LENGTH is the number of bytes to set.  ALIGN is the
29601    alignment of the destination memory in bytes.  UNALIGNED_P
29602    is TRUE if we can only set the memory with instructions
29603    meeting alignment requirements.  USE_STRD_P is TRUE if we
29604    can use strd to set the memory.  */
29605 static bool
29606 arm_block_set_non_vect_profit_p (rtx val,
29607                                  unsigned HOST_WIDE_INT length,
29608                                  unsigned HOST_WIDE_INT align,
29609                                  bool unaligned_p, bool use_strd_p)
29610 {
29611   int num = 0;
29612   /* For leftovers in bytes of 0-7, we can set the memory block using
29613      strb/strh/str with minimum instruction number.  */
29614   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29615
29616   if (unaligned_p)
29617     {
29618       num = arm_const_inline_cost (SET, val);
29619       num += length / align + length % align;
29620     }
29621   else if (use_strd_p)
29622     {
29623       num = arm_const_double_inline_cost (val);
29624       num += (length >> 3) + leftover[length & 7];
29625     }
29626   else
29627     {
29628       num = arm_const_inline_cost (SET, val);
29629       num += (length >> 2) + leftover[length & 3];
29630     }
29631
29632   /* We may be able to combine last pair STRH/STRB into a single STR
29633      by shifting one byte back.  */
29634   if (unaligned_access && length > 3 && (length & 3) == 3)
29635     num--;
29636
29637   return (num <= arm_block_set_max_insns ());
29638 }
29639
29640 /* Return TRUE if it's profitable to set block of memory for
29641    vectorized case.  LENGTH is the number of bytes to set.
29642    ALIGN is the alignment of destination memory in bytes.
29643    MODE is the vector mode used to set the memory.  */
29644 static bool
29645 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29646                              unsigned HOST_WIDE_INT align,
29647                              machine_mode mode)
29648 {
29649   int num;
29650   bool unaligned_p = ((align & 3) != 0);
29651   unsigned int nelt = GET_MODE_NUNITS (mode);
29652
29653   /* Instruction loading constant value.  */
29654   num = 1;
29655   /* Instructions storing the memory.  */
29656   num += (length + nelt - 1) / nelt;
29657   /* Instructions adjusting the address expression.  Only need to
29658      adjust address expression if it's 4 bytes aligned and bytes
29659      leftover can only be stored by mis-aligned store instruction.  */
29660   if (!unaligned_p && (length & 3) != 0)
29661     num++;
29662
29663   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29664   if (!unaligned_p && mode == V16QImode)
29665     num--;
29666
29667   return (num <= arm_block_set_max_insns ());
29668 }
29669
29670 /* Set a block of memory using vectorization instructions for the
29671    unaligned case.  We fill the first LENGTH bytes of the memory
29672    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29673    the alignment requirement of memory.  Return TRUE if succeeded.  */
29674 static bool
29675 arm_block_set_unaligned_vect (rtx dstbase,
29676                               unsigned HOST_WIDE_INT length,
29677                               unsigned HOST_WIDE_INT value,
29678                               unsigned HOST_WIDE_INT align)
29679 {
29680   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29681   rtx dst, mem;
29682   rtx val_elt, val_vec, reg;
29683   rtx rval[MAX_VECT_LEN];
29684   rtx (*gen_func) (rtx, rtx);
29685   machine_mode mode;
29686   unsigned HOST_WIDE_INT v = value;
29687   unsigned int offset = 0;
29688   gcc_assert ((align & 0x3) != 0);
29689   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29690   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29691   if (length >= nelt_v16)
29692     {
29693       mode = V16QImode;
29694       gen_func = gen_movmisalignv16qi;
29695     }
29696   else
29697     {
29698       mode = V8QImode;
29699       gen_func = gen_movmisalignv8qi;
29700     }
29701   nelt_mode = GET_MODE_NUNITS (mode);
29702   gcc_assert (length >= nelt_mode);
29703   /* Skip if it isn't profitable.  */
29704   if (!arm_block_set_vect_profit_p (length, align, mode))
29705     return false;
29706
29707   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29708   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29709
29710   v = sext_hwi (v, BITS_PER_WORD);
29711   val_elt = GEN_INT (v);
29712   for (j = 0; j < nelt_mode; j++)
29713     rval[j] = val_elt;
29714
29715   reg = gen_reg_rtx (mode);
29716   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29717   /* Emit instruction loading the constant value.  */
29718   emit_move_insn (reg, val_vec);
29719
29720   /* Handle nelt_mode bytes in a vector.  */
29721   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29722     {
29723       emit_insn ((*gen_func) (mem, reg));
29724       if (i + 2 * nelt_mode <= length)
29725         {
29726           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29727           offset += nelt_mode;
29728           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29729         }
29730     }
29731
29732   /* If there are not less than nelt_v8 bytes leftover, we must be in
29733      V16QI mode.  */
29734   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29735
29736   /* Handle (8, 16) bytes leftover.  */
29737   if (i + nelt_v8 < length)
29738     {
29739       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29740       offset += length - i;
29741       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29742
29743       /* We are shifting bytes back, set the alignment accordingly.  */
29744       if ((length & 1) != 0 && align >= 2)
29745         set_mem_align (mem, BITS_PER_UNIT);
29746
29747       emit_insn (gen_movmisalignv16qi (mem, reg));
29748     }
29749   /* Handle (0, 8] bytes leftover.  */
29750   else if (i < length && i + nelt_v8 >= length)
29751     {
29752       if (mode == V16QImode)
29753         reg = gen_lowpart (V8QImode, reg);
29754
29755       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29756                                               + (nelt_mode - nelt_v8))));
29757       offset += (length - i) + (nelt_mode - nelt_v8);
29758       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29759
29760       /* We are shifting bytes back, set the alignment accordingly.  */
29761       if ((length & 1) != 0 && align >= 2)
29762         set_mem_align (mem, BITS_PER_UNIT);
29763
29764       emit_insn (gen_movmisalignv8qi (mem, reg));
29765     }
29766
29767   return true;
29768 }
29769
29770 /* Set a block of memory using vectorization instructions for the
29771    aligned case.  We fill the first LENGTH bytes of the memory area
29772    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29773    alignment requirement of memory.  Return TRUE if succeeded.  */
29774 static bool
29775 arm_block_set_aligned_vect (rtx dstbase,
29776                             unsigned HOST_WIDE_INT length,
29777                             unsigned HOST_WIDE_INT value,
29778                             unsigned HOST_WIDE_INT align)
29779 {
29780   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29781   rtx dst, addr, mem;
29782   rtx val_elt, val_vec, reg;
29783   rtx rval[MAX_VECT_LEN];
29784   machine_mode mode;
29785   unsigned HOST_WIDE_INT v = value;
29786   unsigned int offset = 0;
29787
29788   gcc_assert ((align & 0x3) == 0);
29789   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29790   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29791   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29792     mode = V16QImode;
29793   else
29794     mode = V8QImode;
29795
29796   nelt_mode = GET_MODE_NUNITS (mode);
29797   gcc_assert (length >= nelt_mode);
29798   /* Skip if it isn't profitable.  */
29799   if (!arm_block_set_vect_profit_p (length, align, mode))
29800     return false;
29801
29802   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29803
29804   v = sext_hwi (v, BITS_PER_WORD);
29805   val_elt = GEN_INT (v);
29806   for (j = 0; j < nelt_mode; j++)
29807     rval[j] = val_elt;
29808
29809   reg = gen_reg_rtx (mode);
29810   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29811   /* Emit instruction loading the constant value.  */
29812   emit_move_insn (reg, val_vec);
29813
29814   i = 0;
29815   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29816   if (mode == V16QImode)
29817     {
29818       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29819       emit_insn (gen_movmisalignv16qi (mem, reg));
29820       i += nelt_mode;
29821       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29822       if (i + nelt_v8 < length && i + nelt_v16 > length)
29823         {
29824           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29825           offset += length - nelt_mode;
29826           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29827           /* We are shifting bytes back, set the alignment accordingly.  */
29828           if ((length & 0x3) == 0)
29829             set_mem_align (mem, BITS_PER_UNIT * 4);
29830           else if ((length & 0x1) == 0)
29831             set_mem_align (mem, BITS_PER_UNIT * 2);
29832           else
29833             set_mem_align (mem, BITS_PER_UNIT);
29834
29835           emit_insn (gen_movmisalignv16qi (mem, reg));
29836           return true;
29837         }
29838       /* Fall through for bytes leftover.  */
29839       mode = V8QImode;
29840       nelt_mode = GET_MODE_NUNITS (mode);
29841       reg = gen_lowpart (V8QImode, reg);
29842     }
29843
29844   /* Handle 8 bytes in a vector.  */
29845   for (; (i + nelt_mode <= length); i += nelt_mode)
29846     {
29847       addr = plus_constant (Pmode, dst, i);
29848       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29849       emit_move_insn (mem, reg);
29850     }
29851
29852   /* Handle single word leftover by shifting 4 bytes back.  We can
29853      use aligned access for this case.  */
29854   if (i + UNITS_PER_WORD == length)
29855     {
29856       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29857       offset += i - UNITS_PER_WORD;
29858       mem = adjust_automodify_address (dstbase, mode, addr, offset);
29859       /* We are shifting 4 bytes back, set the alignment accordingly.  */
29860       if (align > UNITS_PER_WORD)
29861         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29862
29863       emit_move_insn (mem, reg);
29864     }
29865   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29866      We have to use unaligned access for this case.  */
29867   else if (i < length)
29868     {
29869       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29870       offset += length - nelt_mode;
29871       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29872       /* We are shifting bytes back, set the alignment accordingly.  */
29873       if ((length & 1) == 0)
29874         set_mem_align (mem, BITS_PER_UNIT * 2);
29875       else
29876         set_mem_align (mem, BITS_PER_UNIT);
29877
29878       emit_insn (gen_movmisalignv8qi (mem, reg));
29879     }
29880
29881   return true;
29882 }
29883
29884 /* Set a block of memory using plain strh/strb instructions, only
29885    using instructions allowed by ALIGN on processor.  We fill the
29886    first LENGTH bytes of the memory area starting from DSTBASE
29887    with byte constant VALUE.  ALIGN is the alignment requirement
29888    of memory.  */
29889 static bool
29890 arm_block_set_unaligned_non_vect (rtx dstbase,
29891                                   unsigned HOST_WIDE_INT length,
29892                                   unsigned HOST_WIDE_INT value,
29893                                   unsigned HOST_WIDE_INT align)
29894 {
29895   unsigned int i;
29896   rtx dst, addr, mem;
29897   rtx val_exp, val_reg, reg;
29898   machine_mode mode;
29899   HOST_WIDE_INT v = value;
29900
29901   gcc_assert (align == 1 || align == 2);
29902
29903   if (align == 2)
29904     v |= (value << BITS_PER_UNIT);
29905
29906   v = sext_hwi (v, BITS_PER_WORD);
29907   val_exp = GEN_INT (v);
29908   /* Skip if it isn't profitable.  */
29909   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29910                                         align, true, false))
29911     return false;
29912
29913   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29914   mode = (align == 2 ? HImode : QImode);
29915   val_reg = force_reg (SImode, val_exp);
29916   reg = gen_lowpart (mode, val_reg);
29917
29918   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29919     {
29920       addr = plus_constant (Pmode, dst, i);
29921       mem = adjust_automodify_address (dstbase, mode, addr, i);
29922       emit_move_insn (mem, reg);
29923     }
29924
29925   /* Handle single byte leftover.  */
29926   if (i + 1 == length)
29927     {
29928       reg = gen_lowpart (QImode, val_reg);
29929       addr = plus_constant (Pmode, dst, i);
29930       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29931       emit_move_insn (mem, reg);
29932       i++;
29933     }
29934
29935   gcc_assert (i == length);
29936   return true;
29937 }
29938
29939 /* Set a block of memory using plain strd/str/strh/strb instructions,
29940    to permit unaligned copies on processors which support unaligned
29941    semantics for those instructions.  We fill the first LENGTH bytes
29942    of the memory area starting from DSTBASE with byte constant VALUE.
29943    ALIGN is the alignment requirement of memory.  */
29944 static bool
29945 arm_block_set_aligned_non_vect (rtx dstbase,
29946                                 unsigned HOST_WIDE_INT length,
29947                                 unsigned HOST_WIDE_INT value,
29948                                 unsigned HOST_WIDE_INT align)
29949 {
29950   unsigned int i;
29951   rtx dst, addr, mem;
29952   rtx val_exp, val_reg, reg;
29953   unsigned HOST_WIDE_INT v;
29954   bool use_strd_p;
29955
29956   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29957                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29958
29959   v = (value | (value << 8) | (value << 16) | (value << 24));
29960   if (length < UNITS_PER_WORD)
29961     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29962
29963   if (use_strd_p)
29964     v |= (v << BITS_PER_WORD);
29965   else
29966     v = sext_hwi (v, BITS_PER_WORD);
29967
29968   val_exp = GEN_INT (v);
29969   /* Skip if it isn't profitable.  */
29970   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29971                                         align, false, use_strd_p))
29972     {
29973       if (!use_strd_p)
29974         return false;
29975
29976       /* Try without strd.  */
29977       v = (v >> BITS_PER_WORD);
29978       v = sext_hwi (v, BITS_PER_WORD);
29979       val_exp = GEN_INT (v);
29980       use_strd_p = false;
29981       if (!arm_block_set_non_vect_profit_p (val_exp, length,
29982                                             align, false, use_strd_p))
29983         return false;
29984     }
29985
29986   i = 0;
29987   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29988   /* Handle double words using strd if possible.  */
29989   if (use_strd_p)
29990     {
29991       val_reg = force_reg (DImode, val_exp);
29992       reg = val_reg;
29993       for (; (i + 8 <= length); i += 8)
29994         {
29995           addr = plus_constant (Pmode, dst, i);
29996           mem = adjust_automodify_address (dstbase, DImode, addr, i);
29997           emit_move_insn (mem, reg);
29998         }
29999     }
30000   else
30001     val_reg = force_reg (SImode, val_exp);
30002
30003   /* Handle words.  */
30004   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30005   for (; (i + 4 <= length); i += 4)
30006     {
30007       addr = plus_constant (Pmode, dst, i);
30008       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30009       if ((align & 3) == 0)
30010         emit_move_insn (mem, reg);
30011       else
30012         emit_insn (gen_unaligned_storesi (mem, reg));
30013     }
30014
30015   /* Merge last pair of STRH and STRB into a STR if possible.  */
30016   if (unaligned_access && i > 0 && (i + 3) == length)
30017     {
30018       addr = plus_constant (Pmode, dst, i - 1);
30019       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30020       /* We are shifting one byte back, set the alignment accordingly.  */
30021       if ((align & 1) == 0)
30022         set_mem_align (mem, BITS_PER_UNIT);
30023
30024       /* Most likely this is an unaligned access, and we can't tell at
30025          compilation time.  */
30026       emit_insn (gen_unaligned_storesi (mem, reg));
30027       return true;
30028     }
30029
30030   /* Handle half word leftover.  */
30031   if (i + 2 <= length)
30032     {
30033       reg = gen_lowpart (HImode, val_reg);
30034       addr = plus_constant (Pmode, dst, i);
30035       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30036       if ((align & 1) == 0)
30037         emit_move_insn (mem, reg);
30038       else
30039         emit_insn (gen_unaligned_storehi (mem, reg));
30040
30041       i += 2;
30042     }
30043
30044   /* Handle single byte leftover.  */
30045   if (i + 1 == length)
30046     {
30047       reg = gen_lowpart (QImode, val_reg);
30048       addr = plus_constant (Pmode, dst, i);
30049       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30050       emit_move_insn (mem, reg);
30051     }
30052
30053   return true;
30054 }
30055
30056 /* Set a block of memory using vectorization instructions for both
30057    aligned and unaligned cases.  We fill the first LENGTH bytes of
30058    the memory area starting from DSTBASE with byte constant VALUE.
30059    ALIGN is the alignment requirement of memory.  */
30060 static bool
30061 arm_block_set_vect (rtx dstbase,
30062                     unsigned HOST_WIDE_INT length,
30063                     unsigned HOST_WIDE_INT value,
30064                     unsigned HOST_WIDE_INT align)
30065 {
30066   /* Check whether we need to use unaligned store instruction.  */
30067   if (((align & 3) != 0 || (length & 3) != 0)
30068       /* Check whether unaligned store instruction is available.  */
30069       && (!unaligned_access || BYTES_BIG_ENDIAN))
30070     return false;
30071
30072   if ((align & 3) == 0)
30073     return arm_block_set_aligned_vect (dstbase, length, value, align);
30074   else
30075     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30076 }
30077
30078 /* Expand string store operation.  Firstly we try to do that by using
30079    vectorization instructions, then try with ARM unaligned access and
30080    double-word store if profitable.  OPERANDS[0] is the destination,
30081    OPERANDS[1] is the number of bytes, operands[2] is the value to
30082    initialize the memory, OPERANDS[3] is the known alignment of the
30083    destination.  */
30084 bool
30085 arm_gen_setmem (rtx *operands)
30086 {
30087   rtx dstbase = operands[0];
30088   unsigned HOST_WIDE_INT length;
30089   unsigned HOST_WIDE_INT value;
30090   unsigned HOST_WIDE_INT align;
30091
30092   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30093     return false;
30094
30095   length = UINTVAL (operands[1]);
30096   if (length > 64)
30097     return false;
30098
30099   value = (UINTVAL (operands[2]) & 0xFF);
30100   align = UINTVAL (operands[3]);
30101   if (TARGET_NEON && length >= 8
30102       && current_tune->string_ops_prefer_neon
30103       && arm_block_set_vect (dstbase, length, value, align))
30104     return true;
30105
30106   if (!unaligned_access && (align & 3) != 0)
30107     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30108
30109   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30110 }
30111
30112
30113 static bool
30114 arm_macro_fusion_p (void)
30115 {
30116   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30117 }
30118
30119 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30120    for MOVW / MOVT macro fusion.  */
30121
30122 static bool
30123 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30124 {
30125   /* We are trying to fuse
30126      movw imm / movt imm
30127     instructions as a group that gets scheduled together.  */
30128
30129   rtx set_dest = SET_DEST (curr_set);
30130
30131   if (GET_MODE (set_dest) != SImode)
30132     return false;
30133
30134   /* We are trying to match:
30135      prev (movw)  == (set (reg r0) (const_int imm16))
30136      curr (movt) == (set (zero_extract (reg r0)
30137                                         (const_int 16)
30138                                         (const_int 16))
30139                           (const_int imm16_1))
30140      or
30141      prev (movw) == (set (reg r1)
30142                           (high (symbol_ref ("SYM"))))
30143     curr (movt) == (set (reg r0)
30144                         (lo_sum (reg r1)
30145                                 (symbol_ref ("SYM"))))  */
30146
30147     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30148       {
30149         if (CONST_INT_P (SET_SRC (curr_set))
30150             && CONST_INT_P (SET_SRC (prev_set))
30151             && REG_P (XEXP (set_dest, 0))
30152             && REG_P (SET_DEST (prev_set))
30153             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30154           return true;
30155
30156       }
30157     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30158              && REG_P (SET_DEST (curr_set))
30159              && REG_P (SET_DEST (prev_set))
30160              && GET_CODE (SET_SRC (prev_set)) == HIGH
30161              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30162       return true;
30163
30164   return false;
30165 }
30166
30167 static bool
30168 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30169 {
30170   rtx prev_set = single_set (prev);
30171   rtx curr_set = single_set (curr);
30172
30173   if (!prev_set
30174       || !curr_set)
30175     return false;
30176
30177   if (any_condjump_p (curr))
30178     return false;
30179
30180   if (!arm_macro_fusion_p ())
30181     return false;
30182
30183   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30184       && aarch_crypto_can_dual_issue (prev, curr))
30185     return true;
30186
30187   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30188       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30189     return true;
30190
30191   return false;
30192 }
30193
30194 /* Return true iff the instruction fusion described by OP is enabled.  */
30195 bool
30196 arm_fusion_enabled_p (tune_params::fuse_ops op)
30197 {
30198   return current_tune->fusible_ops & op;
30199 }
30200
30201 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30202    scheduled for speculative execution.  Reject the long-running division
30203    and square-root instructions.  */
30204
30205 static bool
30206 arm_sched_can_speculate_insn (rtx_insn *insn)
30207 {
30208   switch (get_attr_type (insn))
30209     {
30210       case TYPE_SDIV:
30211       case TYPE_UDIV:
30212       case TYPE_FDIVS:
30213       case TYPE_FDIVD:
30214       case TYPE_FSQRTS:
30215       case TYPE_FSQRTD:
30216       case TYPE_NEON_FP_SQRT_S:
30217       case TYPE_NEON_FP_SQRT_D:
30218       case TYPE_NEON_FP_SQRT_S_Q:
30219       case TYPE_NEON_FP_SQRT_D_Q:
30220       case TYPE_NEON_FP_DIV_S:
30221       case TYPE_NEON_FP_DIV_D:
30222       case TYPE_NEON_FP_DIV_S_Q:
30223       case TYPE_NEON_FP_DIV_D_Q:
30224         return false;
30225       default:
30226         return true;
30227     }
30228 }
30229
30230 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30231
30232 static unsigned HOST_WIDE_INT
30233 arm_asan_shadow_offset (void)
30234 {
30235   return HOST_WIDE_INT_1U << 29;
30236 }
30237
30238
30239 /* This is a temporary fix for PR60655.  Ideally we need
30240    to handle most of these cases in the generic part but
30241    currently we reject minus (..) (sym_ref).  We try to
30242    ameliorate the case with minus (sym_ref1) (sym_ref2)
30243    where they are in the same section.  */
30244
30245 static bool
30246 arm_const_not_ok_for_debug_p (rtx p)
30247 {
30248   tree decl_op0 = NULL;
30249   tree decl_op1 = NULL;
30250
30251   if (GET_CODE (p) == MINUS)
30252     {
30253       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30254         {
30255           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30256           if (decl_op1
30257               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30258               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30259             {
30260               if ((VAR_P (decl_op1)
30261                    || TREE_CODE (decl_op1) == CONST_DECL)
30262                   && (VAR_P (decl_op0)
30263                       || TREE_CODE (decl_op0) == CONST_DECL))
30264                 return (get_variable_section (decl_op1, false)
30265                         != get_variable_section (decl_op0, false));
30266
30267               if (TREE_CODE (decl_op1) == LABEL_DECL
30268                   && TREE_CODE (decl_op0) == LABEL_DECL)
30269                 return (DECL_CONTEXT (decl_op1)
30270                         != DECL_CONTEXT (decl_op0));
30271             }
30272
30273           return true;
30274         }
30275     }
30276
30277   return false;
30278 }
30279
30280 /* return TRUE if x is a reference to a value in a constant pool */
30281 extern bool
30282 arm_is_constant_pool_ref (rtx x)
30283 {
30284   return (MEM_P (x)
30285           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30286           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30287 }
30288
30289 /* Remember the last target of arm_set_current_function.  */
30290 static GTY(()) tree arm_previous_fndecl;
30291
30292 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30293
30294 void
30295 save_restore_target_globals (tree new_tree)
30296 {
30297   /* If we have a previous state, use it.  */
30298   if (TREE_TARGET_GLOBALS (new_tree))
30299     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30300   else if (new_tree == target_option_default_node)
30301     restore_target_globals (&default_target_globals);
30302   else
30303     {
30304       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30305       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30306     }
30307
30308   arm_option_params_internal ();
30309 }
30310
30311 /* Invalidate arm_previous_fndecl.  */
30312
30313 void
30314 arm_reset_previous_fndecl (void)
30315 {
30316   arm_previous_fndecl = NULL_TREE;
30317 }
30318
30319 /* Establish appropriate back-end context for processing the function
30320    FNDECL.  The argument might be NULL to indicate processing at top
30321    level, outside of any function scope.  */
30322
30323 static void
30324 arm_set_current_function (tree fndecl)
30325 {
30326   if (!fndecl || fndecl == arm_previous_fndecl)
30327     return;
30328
30329   tree old_tree = (arm_previous_fndecl
30330                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30331                    : NULL_TREE);
30332
30333   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30334
30335   /* If current function has no attributes but previous one did,
30336      use the default node.  */
30337   if (! new_tree && old_tree)
30338     new_tree = target_option_default_node;
30339
30340   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30341      the default have been handled by save_restore_target_globals from
30342      arm_pragma_target_parse.  */
30343   if (old_tree == new_tree)
30344     return;
30345
30346   arm_previous_fndecl = fndecl;
30347
30348   /* First set the target options.  */
30349   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30350
30351   save_restore_target_globals (new_tree);
30352 }
30353
30354 /* Implement TARGET_OPTION_PRINT.  */
30355
30356 static void
30357 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30358 {
30359   int flags = ptr->x_target_flags;
30360   const char *fpu_name;
30361
30362   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30363               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30364
30365   fprintf (file, "%*sselected arch %s\n", indent, "",
30366            TARGET_THUMB2_P (flags) ? "thumb2" :
30367            TARGET_THUMB_P (flags) ? "thumb1" :
30368            "arm");
30369
30370   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30371 }
30372
30373 /* Hook to determine if one function can safely inline another.  */
30374
30375 static bool
30376 arm_can_inline_p (tree caller, tree callee)
30377 {
30378   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30379   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30380   bool can_inline = true;
30381
30382   struct cl_target_option *caller_opts
30383         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30384                                            : target_option_default_node);
30385
30386   struct cl_target_option *callee_opts
30387         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30388                                            : target_option_default_node);
30389
30390   if (callee_opts == caller_opts)
30391     return true;
30392
30393   /* Callee's ISA features should be a subset of the caller's.  */
30394   struct arm_build_target caller_target;
30395   struct arm_build_target callee_target;
30396   caller_target.isa = sbitmap_alloc (isa_num_bits);
30397   callee_target.isa = sbitmap_alloc (isa_num_bits);
30398
30399   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30400                               false);
30401   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30402                               false);
30403   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30404     can_inline = false;
30405
30406   sbitmap_free (caller_target.isa);
30407   sbitmap_free (callee_target.isa);
30408
30409   /* OK to inline between different modes.
30410      Function with mode specific instructions, e.g using asm,
30411      must be explicitly protected with noinline.  */
30412   return can_inline;
30413 }
30414
30415 /* Hook to fix function's alignment affected by target attribute.  */
30416
30417 static void
30418 arm_relayout_function (tree fndecl)
30419 {
30420   if (DECL_USER_ALIGN (fndecl))
30421     return;
30422
30423   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30424
30425   if (!callee_tree)
30426     callee_tree = target_option_default_node;
30427
30428   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30429   SET_DECL_ALIGN
30430     (fndecl,
30431      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30432 }
30433
30434 /* Inner function to process the attribute((target(...))), take an argument and
30435    set the current options from the argument.  If we have a list, recursively
30436    go over the list.  */
30437
30438 static bool
30439 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30440 {
30441   if (TREE_CODE (args) == TREE_LIST)
30442     {
30443       bool ret = true;
30444
30445       for (; args; args = TREE_CHAIN (args))
30446         if (TREE_VALUE (args)
30447             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30448           ret = false;
30449       return ret;
30450     }
30451
30452   else if (TREE_CODE (args) != STRING_CST)
30453     {
30454       error ("attribute %<target%> argument not a string");
30455       return false;
30456     }
30457
30458   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30459   char *q;
30460
30461   while ((q = strtok (argstr, ",")) != NULL)
30462     {
30463       while (ISSPACE (*q)) ++q;
30464
30465       argstr = NULL;
30466       if (!strncmp (q, "thumb", 5))
30467           opts->x_target_flags |= MASK_THUMB;
30468
30469       else if (!strncmp (q, "arm", 3))
30470           opts->x_target_flags &= ~MASK_THUMB;
30471
30472       else if (!strncmp (q, "fpu=", 4))
30473         {
30474           int fpu_index;
30475           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30476                                        &fpu_index, CL_TARGET))
30477             {
30478               error ("invalid fpu for attribute(target(\"%s\"))", q);
30479               return false;
30480             }
30481           if (fpu_index == TARGET_FPU_auto)
30482             {
30483               /* This doesn't really make sense until we support
30484                  general dynamic selection of the architecture and all
30485                  sub-features.  */
30486               sorry ("auto fpu selection not currently permitted here");
30487               return false;
30488             }
30489           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30490         }
30491       else
30492         {
30493           error ("attribute(target(\"%s\")) is unknown", q);
30494           return false;
30495         }
30496     }
30497
30498   return true;
30499 }
30500
30501 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30502
30503 tree
30504 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30505                                  struct gcc_options *opts_set)
30506 {
30507   struct cl_target_option cl_opts;
30508
30509   if (!arm_valid_target_attribute_rec (args, opts))
30510     return NULL_TREE;
30511
30512   cl_target_option_save (&cl_opts, opts);
30513   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30514   arm_option_check_internal (opts);
30515   /* Do any overrides, such as global options arch=xxx.  */
30516   arm_option_override_internal (opts, opts_set);
30517
30518   return build_target_option_node (opts);
30519 }
30520
30521 static void
30522 add_attribute  (const char * mode, tree *attributes)
30523 {
30524   size_t len = strlen (mode);
30525   tree value = build_string (len, mode);
30526
30527   TREE_TYPE (value) = build_array_type (char_type_node,
30528                                         build_index_type (size_int (len)));
30529
30530   *attributes = tree_cons (get_identifier ("target"),
30531                            build_tree_list (NULL_TREE, value),
30532                            *attributes);
30533 }
30534
30535 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30536
30537 static void
30538 arm_insert_attributes (tree fndecl, tree * attributes)
30539 {
30540   const char *mode;
30541
30542   if (! TARGET_FLIP_THUMB)
30543     return;
30544
30545   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30546       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30547    return;
30548
30549   /* Nested definitions must inherit mode.  */
30550   if (current_function_decl)
30551    {
30552      mode = TARGET_THUMB ? "thumb" : "arm";
30553      add_attribute (mode, attributes);
30554      return;
30555    }
30556
30557   /* If there is already a setting don't change it.  */
30558   if (lookup_attribute ("target", *attributes) != NULL)
30559     return;
30560
30561   mode = thumb_flipper ? "thumb" : "arm";
30562   add_attribute (mode, attributes);
30563
30564   thumb_flipper = !thumb_flipper;
30565 }
30566
30567 /* Hook to validate attribute((target("string"))).  */
30568
30569 static bool
30570 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30571                               tree args, int ARG_UNUSED (flags))
30572 {
30573   bool ret = true;
30574   struct gcc_options func_options;
30575   tree cur_tree, new_optimize;
30576   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30577
30578   /* Get the optimization options of the current function.  */
30579   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30580
30581   /* If the function changed the optimization levels as well as setting target
30582      options, start with the optimizations specified.  */
30583   if (!func_optimize)
30584     func_optimize = optimization_default_node;
30585
30586   /* Init func_options.  */
30587   memset (&func_options, 0, sizeof (func_options));
30588   init_options_struct (&func_options, NULL);
30589   lang_hooks.init_options_struct (&func_options);
30590
30591   /* Initialize func_options to the defaults.  */
30592   cl_optimization_restore (&func_options,
30593                            TREE_OPTIMIZATION (func_optimize));
30594
30595   cl_target_option_restore (&func_options,
30596                             TREE_TARGET_OPTION (target_option_default_node));
30597
30598   /* Set func_options flags with new target mode.  */
30599   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30600                                               &global_options_set);
30601
30602   if (cur_tree == NULL_TREE)
30603     ret = false;
30604
30605   new_optimize = build_optimization_node (&func_options);
30606
30607   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30608
30609   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30610
30611   finalize_options_struct (&func_options);
30612
30613   return ret;
30614 }
30615
30616 /* Match an ISA feature bitmap to a named FPU.  We always use the
30617    first entry that exactly matches the feature set, so that we
30618    effectively canonicalize the FPU name for the assembler.  */
30619 static const char*
30620 arm_identify_fpu_from_isa (sbitmap isa)
30621 {
30622   auto_sbitmap fpubits (isa_num_bits);
30623   auto_sbitmap cand_fpubits (isa_num_bits);
30624
30625   bitmap_and (fpubits, isa, isa_all_fpubits);
30626
30627   /* If there are no ISA feature bits relating to the FPU, we must be
30628      doing soft-float.  */
30629   if (bitmap_empty_p (fpubits))
30630     return "softvfp";
30631
30632   for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30633     {
30634       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30635       if (bitmap_equal_p (fpubits, cand_fpubits))
30636         return all_fpus[i].name;
30637     }
30638   /* We must find an entry, or things have gone wrong.  */
30639   gcc_unreachable ();
30640 }
30641
30642 void
30643 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30644 {
30645
30646   fprintf (stream, "\t.syntax unified\n");
30647
30648   if (TARGET_THUMB)
30649     {
30650       if (is_called_in_ARM_mode (decl)
30651           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30652               && cfun->is_thunk))
30653         fprintf (stream, "\t.code 32\n");
30654       else if (TARGET_THUMB1)
30655         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30656       else
30657         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30658     }
30659   else
30660     fprintf (stream, "\t.arm\n");
30661
30662   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30663                (TARGET_SOFT_FLOAT
30664                 ? "softvfp"
30665                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30666
30667   if (TARGET_POKE_FUNCTION_NAME)
30668     arm_poke_function_name (stream, (const char *) name);
30669 }
30670
30671 /* If MEM is in the form of [base+offset], extract the two parts
30672    of address and set to BASE and OFFSET, otherwise return false
30673    after clearing BASE and OFFSET.  */
30674
30675 static bool
30676 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30677 {
30678   rtx addr;
30679
30680   gcc_assert (MEM_P (mem));
30681
30682   addr = XEXP (mem, 0);
30683
30684   /* Strip off const from addresses like (const (addr)).  */
30685   if (GET_CODE (addr) == CONST)
30686     addr = XEXP (addr, 0);
30687
30688   if (GET_CODE (addr) == REG)
30689     {
30690       *base = addr;
30691       *offset = const0_rtx;
30692       return true;
30693     }
30694
30695   if (GET_CODE (addr) == PLUS
30696       && GET_CODE (XEXP (addr, 0)) == REG
30697       && CONST_INT_P (XEXP (addr, 1)))
30698     {
30699       *base = XEXP (addr, 0);
30700       *offset = XEXP (addr, 1);
30701       return true;
30702     }
30703
30704   *base = NULL_RTX;
30705   *offset = NULL_RTX;
30706
30707   return false;
30708 }
30709
30710 /* If INSN is a load or store of address in the form of [base+offset],
30711    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30712    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30713    otherwise return FALSE.  */
30714
30715 static bool
30716 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30717 {
30718   rtx x, dest, src;
30719
30720   gcc_assert (INSN_P (insn));
30721   x = PATTERN (insn);
30722   if (GET_CODE (x) != SET)
30723     return false;
30724
30725   src = SET_SRC (x);
30726   dest = SET_DEST (x);
30727   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30728     {
30729       *is_load = false;
30730       extract_base_offset_in_addr (dest, base, offset);
30731     }
30732   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30733     {
30734       *is_load = true;
30735       extract_base_offset_in_addr (src, base, offset);
30736     }
30737   else
30738     return false;
30739
30740   return (*base != NULL_RTX && *offset != NULL_RTX);
30741 }
30742
30743 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30744
30745    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30746    and PRI are only calculated for these instructions.  For other instruction,
30747    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30748    instruction fusion can be supported by returning different priorities.
30749
30750    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30751
30752 static void
30753 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30754                            int *fusion_pri, int *pri)
30755 {
30756   int tmp, off_val;
30757   bool is_load;
30758   rtx base, offset;
30759
30760   gcc_assert (INSN_P (insn));
30761
30762   tmp = max_pri - 1;
30763   if (!fusion_load_store (insn, &base, &offset, &is_load))
30764     {
30765       *pri = tmp;
30766       *fusion_pri = tmp;
30767       return;
30768     }
30769
30770   /* Load goes first.  */
30771   if (is_load)
30772     *fusion_pri = tmp - 1;
30773   else
30774     *fusion_pri = tmp - 2;
30775
30776   tmp /= 2;
30777
30778   /* INSN with smaller base register goes first.  */
30779   tmp -= ((REGNO (base) & 0xff) << 20);
30780
30781   /* INSN with smaller offset goes first.  */
30782   off_val = (int)(INTVAL (offset));
30783   if (off_val >= 0)
30784     tmp -= (off_val & 0xfffff);
30785   else
30786     tmp += ((- off_val) & 0xfffff);
30787
30788   *pri = tmp;
30789   return;
30790 }
30791
30792
30793 /* Construct and return a PARALLEL RTX vector with elements numbering the
30794    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30795    the vector - from the perspective of the architecture.  This does not
30796    line up with GCC's perspective on lane numbers, so we end up with
30797    different masks depending on our target endian-ness.  The diagram
30798    below may help.  We must draw the distinction when building masks
30799    which select one half of the vector.  An instruction selecting
30800    architectural low-lanes for a big-endian target, must be described using
30801    a mask selecting GCC high-lanes.
30802
30803                  Big-Endian             Little-Endian
30804
30805 GCC             0   1   2   3           3   2   1   0
30806               | x | x | x | x |       | x | x | x | x |
30807 Architecture    3   2   1   0           3   2   1   0
30808
30809 Low Mask:         { 2, 3 }                { 0, 1 }
30810 High Mask:        { 0, 1 }                { 2, 3 }
30811 */
30812
30813 rtx
30814 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30815 {
30816   int nunits = GET_MODE_NUNITS (mode);
30817   rtvec v = rtvec_alloc (nunits / 2);
30818   int high_base = nunits / 2;
30819   int low_base = 0;
30820   int base;
30821   rtx t1;
30822   int i;
30823
30824   if (BYTES_BIG_ENDIAN)
30825     base = high ? low_base : high_base;
30826   else
30827     base = high ? high_base : low_base;
30828
30829   for (i = 0; i < nunits / 2; i++)
30830     RTVEC_ELT (v, i) = GEN_INT (base + i);
30831
30832   t1 = gen_rtx_PARALLEL (mode, v);
30833   return t1;
30834 }
30835
30836 /* Check OP for validity as a PARALLEL RTX vector with elements
30837    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30838    from the perspective of the architecture.  See the diagram above
30839    arm_simd_vect_par_cnst_half_p for more details.  */
30840
30841 bool
30842 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30843                                        bool high)
30844 {
30845   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30846   HOST_WIDE_INT count_op = XVECLEN (op, 0);
30847   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30848   int i = 0;
30849
30850   if (!VECTOR_MODE_P (mode))
30851     return false;
30852
30853   if (count_op != count_ideal)
30854     return false;
30855
30856   for (i = 0; i < count_ideal; i++)
30857     {
30858       rtx elt_op = XVECEXP (op, 0, i);
30859       rtx elt_ideal = XVECEXP (ideal, 0, i);
30860
30861       if (!CONST_INT_P (elt_op)
30862           || INTVAL (elt_ideal) != INTVAL (elt_op))
30863         return false;
30864     }
30865   return true;
30866 }
30867
30868 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30869    in Thumb1.  */
30870 static bool
30871 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30872                          const_tree)
30873 {
30874   /* For now, we punt and not handle this for TARGET_THUMB1.  */
30875   if (vcall_offset && TARGET_THUMB1)
30876     return false;
30877
30878   /* Otherwise ok.  */
30879   return true;
30880 }
30881
30882 /* Generate RTL for a conditional branch with rtx comparison CODE in
30883    mode CC_MODE. The destination of the unlikely conditional branch
30884    is LABEL_REF.  */
30885
30886 void
30887 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30888                           rtx label_ref)
30889 {
30890   rtx x;
30891   x = gen_rtx_fmt_ee (code, VOIDmode,
30892                       gen_rtx_REG (cc_mode, CC_REGNUM),
30893                       const0_rtx);
30894
30895   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30896                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
30897                             pc_rtx);
30898   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30899 }
30900
30901 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30902
30903    For pure-code sections there is no letter code for this attribute, so
30904    output all the section flags numerically when this is needed.  */
30905
30906 static bool
30907 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30908 {
30909
30910   if (flags & SECTION_ARM_PURECODE)
30911     {
30912       *num = 0x20000000;
30913
30914       if (!(flags & SECTION_DEBUG))
30915         *num |= 0x2;
30916       if (flags & SECTION_EXCLUDE)
30917         *num |= 0x80000000;
30918       if (flags & SECTION_WRITE)
30919         *num |= 0x1;
30920       if (flags & SECTION_CODE)
30921         *num |= 0x4;
30922       if (flags & SECTION_MERGE)
30923         *num |= 0x10;
30924       if (flags & SECTION_STRINGS)
30925         *num |= 0x20;
30926       if (flags & SECTION_TLS)
30927         *num |= 0x400;
30928       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30929         *num |= 0x200;
30930
30931         return true;
30932     }
30933
30934   return false;
30935 }
30936
30937 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30938
30939    If pure-code is passed as an option, make sure all functions are in
30940    sections that have the SHF_ARM_PURECODE attribute.  */
30941
30942 static section *
30943 arm_function_section (tree decl, enum node_frequency freq,
30944                       bool startup, bool exit)
30945 {
30946   const char * section_name;
30947   section * sec;
30948
30949   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30950     return default_function_section (decl, freq, startup, exit);
30951
30952   if (!target_pure_code)
30953     return default_function_section (decl, freq, startup, exit);
30954
30955
30956   section_name = DECL_SECTION_NAME (decl);
30957
30958   /* If a function is not in a named section then it falls under the 'default'
30959      text section, also known as '.text'.  We can preserve previous behavior as
30960      the default text section already has the SHF_ARM_PURECODE section
30961      attribute.  */
30962   if (!section_name)
30963     {
30964       section *default_sec = default_function_section (decl, freq, startup,
30965                                                        exit);
30966
30967       /* If default_sec is not null, then it must be a special section like for
30968          example .text.startup.  We set the pure-code attribute and return the
30969          same section to preserve existing behavior.  */
30970       if (default_sec)
30971           default_sec->common.flags |= SECTION_ARM_PURECODE;
30972       return default_sec;
30973     }
30974
30975   /* Otherwise look whether a section has already been created with
30976      'section_name'.  */
30977   sec = get_named_section (decl, section_name, 0);
30978   if (!sec)
30979     /* If that is not the case passing NULL as the section's name to
30980        'get_named_section' will create a section with the declaration's
30981        section name.  */
30982     sec = get_named_section (decl, NULL, 0);
30983
30984   /* Set the SHF_ARM_PURECODE attribute.  */
30985   sec->common.flags |= SECTION_ARM_PURECODE;
30986
30987   return sec;
30988 }
30989
30990 /* Implements the TARGET_SECTION_FLAGS hook.
30991
30992    If DECL is a function declaration and pure-code is passed as an option
30993    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
30994    section's name and RELOC indicates whether the declarations initializer may
30995    contain runtime relocations.  */
30996
30997 static unsigned int
30998 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30999 {
31000   unsigned int flags = default_section_type_flags (decl, name, reloc);
31001
31002   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31003     flags |= SECTION_ARM_PURECODE;
31004
31005   return flags;
31006 }
31007
31008 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31009
31010 static void
31011 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31012                            rtx op0, rtx op1,
31013                            rtx *quot_p, rtx *rem_p)
31014 {
31015   if (mode == SImode)
31016     gcc_assert (!TARGET_IDIV);
31017
31018   machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
31019                                                      MODE_INT);
31020
31021   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31022                                         libval_mode, 2,
31023                                         op0, GET_MODE (op0),
31024                                         op1, GET_MODE (op1));
31025
31026   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31027   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31028                                        GET_MODE_SIZE (mode));
31029
31030   gcc_assert (quotient);
31031   gcc_assert (remainder);
31032
31033   *quot_p = quotient;
31034   *rem_p = remainder;
31035 }
31036
31037 /*  This function checks for the availability of the coprocessor builtin passed
31038     in BUILTIN for the current target.  Returns true if it is available and
31039     false otherwise.  If a BUILTIN is passed for which this function has not
31040     been implemented it will cause an exception.  */
31041
31042 bool
31043 arm_coproc_builtin_available (enum unspecv builtin)
31044 {
31045   /* None of these builtins are available in Thumb mode if the target only
31046      supports Thumb-1.  */
31047   if (TARGET_THUMB1)
31048     return false;
31049
31050   switch (builtin)
31051     {
31052       case VUNSPEC_CDP:
31053       case VUNSPEC_LDC:
31054       case VUNSPEC_LDCL:
31055       case VUNSPEC_STC:
31056       case VUNSPEC_STCL:
31057       case VUNSPEC_MCR:
31058       case VUNSPEC_MRC:
31059         if (arm_arch4)
31060           return true;
31061         break;
31062       case VUNSPEC_CDP2:
31063       case VUNSPEC_LDC2:
31064       case VUNSPEC_LDC2L:
31065       case VUNSPEC_STC2:
31066       case VUNSPEC_STC2L:
31067       case VUNSPEC_MCR2:
31068       case VUNSPEC_MRC2:
31069         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31070            ARMv8-{A,M}.  */
31071         if (arm_arch5)
31072           return true;
31073         break;
31074       case VUNSPEC_MCRR:
31075       case VUNSPEC_MRRC:
31076         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31077            ARMv8-{A,M}.  */
31078         if (arm_arch6 || arm_arch5te)
31079           return true;
31080         break;
31081       case VUNSPEC_MCRR2:
31082       case VUNSPEC_MRRC2:
31083         if (arm_arch6)
31084           return true;
31085         break;
31086       default:
31087         gcc_unreachable ();
31088     }
31089   return false;
31090 }
31091
31092 /* This function returns true if OP is a valid memory operand for the ldc and
31093    stc coprocessor instructions and false otherwise.  */
31094
31095 bool
31096 arm_coproc_ldc_stc_legitimate_address (rtx op)
31097 {
31098   HOST_WIDE_INT range;
31099   /* Has to be a memory operand.  */
31100   if (!MEM_P (op))
31101     return false;
31102
31103   op = XEXP (op, 0);
31104
31105   /* We accept registers.  */
31106   if (REG_P (op))
31107     return true;
31108
31109   switch GET_CODE (op)
31110     {
31111       case PLUS:
31112         {
31113           /* Or registers with an offset.  */
31114           if (!REG_P (XEXP (op, 0)))
31115             return false;
31116
31117           op = XEXP (op, 1);
31118
31119           /* The offset must be an immediate though.  */
31120           if (!CONST_INT_P (op))
31121             return false;
31122
31123           range = INTVAL (op);
31124
31125           /* Within the range of [-1020,1020].  */
31126           if (!IN_RANGE (range, -1020, 1020))
31127             return false;
31128
31129           /* And a multiple of 4.  */
31130           return (range % 4) == 0;
31131         }
31132       case PRE_INC:
31133       case POST_INC:
31134       case PRE_DEC:
31135       case POST_DEC:
31136         return REG_P (XEXP (op, 0));
31137       default:
31138         gcc_unreachable ();
31139     }
31140   return false;
31141 }
31142 #include "gt-arm.h"