gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2019 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "df.h"
  36 #include "tm_p.h"
  37 #include "stringpool.h"
  38 #include "attribs.h"
  39 #include "optabs.h"
  40 #include "regs.h"
  41 #include "emit-rtl.h"
  42 #include "recog.h"
  43 #include "cgraph.h"
  44 #include "diagnostic-core.h"
  45 #include "alias.h"
  46 #include "fold-const.h"
  47 #include "stor-layout.h"
  48 #include "calls.h"
  49 #include "varasm.h"
  50 #include "output.h"
  51 #include "insn-attr.h"
  52 #include "flags.h"
  53 #include "reload.h"
  54 #include "explow.h"
  55 #include "expr.h"
  56 #include "cfgrtl.h"
  57 #include "sched-int.h"
  58 #include "common/common-target.h"
  59 #include "langhooks.h"
  60 #include "intl.h"
  61 #include "libfuncs.h"
  62 #include "params.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73
  74 /* This file should be included last.  */
  75 #include "target-def.h"
  76
  77 /* Forward definitions of types.  */
  78 typedef struct minipool_node    Mnode;
  79 typedef struct minipool_fixup   Mfix;
  80
  81 /* The last .arch and .fpu assembly strings that we printed.  */
  82 static std::string arm_last_printed_arch_string;
  83 static std::string arm_last_printed_fpu_string;
  84
  85 void (*arm_lang_output_object_attributes_hook)(void);
  86
  87 struct four_ints
  88 {
  89   int i[4];
  90 };
  91
  92 /* Forward function declarations.  */
  93 static bool arm_const_not_ok_for_debug_p (rtx);
  94 static int arm_needs_doubleword_align (machine_mode, const_tree);
  95 static int arm_compute_static_chain_stack_bytes (void);
  96 static arm_stack_offsets *arm_get_frame_offsets (void);
  97 static void arm_compute_frame_layout (void);
  98 static void arm_add_gc_roots (void);
  99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 100                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 101 static unsigned bit_count (unsigned long);
 102 static unsigned bitmap_popcount (const sbitmap);
 103 static int arm_address_register_rtx_p (rtx, int);
 104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 105 static bool is_called_in_ARM_mode (tree);
 106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 111 inline static int thumb1_index_register_rtx_p (rtx, int);
 112 static int thumb_far_jump_used_p (void);
 113 static bool thumb_force_lr_save (void);
 114 static unsigned arm_size_return_regs (void);
 115 static bool arm_assemble_integer (rtx, unsigned int, int);
 116 static void arm_print_operand (FILE *, rtx, int);
 117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 118 static bool arm_print_operand_punct_valid_p (unsigned char code);
 119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 120 static arm_cc get_arm_condition_code (rtx);
 121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 122 static const char *output_multi_immediate (rtx *, const char *, const char *,
 123                                            int, HOST_WIDE_INT);
 124 static const char *shift_op (rtx, HOST_WIDE_INT *);
 125 static struct machine_function *arm_init_machine_status (void);
 126 static void thumb_exit (FILE *, int);
 127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 129 static Mnode *add_minipool_forward_ref (Mfix *);
 130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 131 static Mnode *add_minipool_backward_ref (Mfix *);
 132 static void assign_minipool_offsets (Mfix *);
 133 static void arm_print_value (FILE *, rtx);
 134 static void dump_minipool (rtx_insn *);
 135 static int arm_barrier_cost (rtx_insn *);
 136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 139                                machine_mode, rtx);
 140 static void arm_reorg (void);
 141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 143 static unsigned long arm_compute_save_core_reg_mask (void);
 144 static unsigned long arm_isr_value (tree);
 145 static unsigned long arm_compute_func_type (void);
 146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 151 #endif
 152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 154 static void arm_output_function_epilogue (FILE *);
 155 static void arm_output_function_prologue (FILE *);
 156 static int arm_comp_type_attributes (const_tree, const_tree);
 157 static void arm_set_default_type_attributes (tree);
 158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 160 static int optimal_immediate_sequence (enum rtx_code code,
 161                                        unsigned HOST_WIDE_INT val,
 162                                        struct four_ints *return_sequence);
 163 static int optimal_immediate_sequence_1 (enum rtx_code code,
 164                                          unsigned HOST_WIDE_INT val,
 165                                          struct four_ints *return_sequence,
 166                                          int i);
 167 static int arm_get_strip_length (int);
 168 static bool arm_function_ok_for_sibcall (tree, tree);
 169 static machine_mode arm_promote_function_mode (const_tree,
 170                                                     machine_mode, int *,
 171                                                     const_tree, int);
 172 static bool arm_return_in_memory (const_tree, const_tree);
 173 static rtx arm_function_value (const_tree, const_tree, bool);
 174 static rtx arm_libcall_value_1 (machine_mode);
 175 static rtx arm_libcall_value (machine_mode, const_rtx);
 176 static bool arm_function_value_regno_p (const unsigned int);
 177 static void arm_internal_label (FILE *, const char *, unsigned long);
 178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 179                                  tree);
 180 static bool arm_have_conditional_execution (void);
 181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 182 static bool arm_legitimate_constant_p (machine_mode, rtx);
 183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 187 static void emit_constant_insn (rtx cond, rtx pattern);
 188 static rtx_insn *emit_set_insn (rtx, rtx);
 189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 190 static int arm_arg_partial_bytes (cumulative_args_t,
 191                                   const function_arg_info &);
 192 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 193 static void arm_function_arg_advance (cumulative_args_t,
 194                                       const function_arg_info &);
 195 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 196 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 197 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 198                                       const_tree);
 199 static rtx aapcs_libcall_value (machine_mode);
 200 static int aapcs_select_return_coproc (const_tree, const_tree);
 201
 202 #ifdef OBJECT_FORMAT_ELF
 203 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 204 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 205 #endif
 206 #ifndef ARM_PE
 207 static void arm_encode_section_info (tree, rtx, int);
 208 #endif
 209
 210 static void arm_file_end (void);
 211 static void arm_file_start (void);
 212 static void arm_insert_attributes (tree, tree *);
 213
 214 static void arm_setup_incoming_varargs (cumulative_args_t,
 215                                         const function_arg_info &, int *, int);
 216 static bool arm_pass_by_reference (cumulative_args_t,
 217                                    const function_arg_info &);
 218 static bool arm_promote_prototypes (const_tree);
 219 static bool arm_default_short_enums (void);
 220 static bool arm_align_anon_bitfield (void);
 221 static bool arm_return_in_msb (const_tree);
 222 static bool arm_must_pass_in_stack (const function_arg_info &);
 223 static bool arm_return_in_memory (const_tree, const_tree);
 224 #if ARM_UNWIND_INFO
 225 static void arm_unwind_emit (FILE *, rtx_insn *);
 226 static bool arm_output_ttype (rtx);
 227 static void arm_asm_emit_except_personality (rtx);
 228 #endif
 229 static void arm_asm_init_sections (void);
 230 static rtx arm_dwarf_register_span (rtx);
 231
 232 static tree arm_cxx_guard_type (void);
 233 static bool arm_cxx_guard_mask_bit (void);
 234 static tree arm_get_cookie_size (tree);
 235 static bool arm_cookie_has_size (void);
 236 static bool arm_cxx_cdtor_returns_this (void);
 237 static bool arm_cxx_key_method_may_be_inline (void);
 238 static void arm_cxx_determine_class_data_visibility (tree);
 239 static bool arm_cxx_class_data_always_comdat (void);
 240 static bool arm_cxx_use_aeabi_atexit (void);
 241 static void arm_init_libfuncs (void);
 242 static tree arm_build_builtin_va_list (void);
 243 static void arm_expand_builtin_va_start (tree, rtx);
 244 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 245 static void arm_option_override (void);
 246 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 247 static void arm_option_restore (struct gcc_options *,
 248                                 struct cl_target_option *);
 249 static void arm_override_options_after_change (void);
 250 static void arm_option_print (FILE *, int, struct cl_target_option *);
 251 static void arm_set_current_function (tree);
 252 static bool arm_can_inline_p (tree, tree);
 253 static void arm_relayout_function (tree);
 254 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 255 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 256 static bool arm_sched_can_speculate_insn (rtx_insn *);
 257 static bool arm_macro_fusion_p (void);
 258 static bool arm_cannot_copy_insn_p (rtx_insn *);
 259 static int arm_issue_rate (void);
 260 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 261 static int arm_first_cycle_multipass_dfa_lookahead (void);
 262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 264 static bool arm_output_addr_const_extra (FILE *, rtx);
 265 static bool arm_allocate_stack_slots_for_args (void);
 266 static bool arm_warn_func_return (tree);
 267 static tree arm_promoted_type (const_tree t);
 268 static bool arm_scalar_mode_supported_p (scalar_mode);
 269 static bool arm_frame_pointer_required (void);
 270 static bool arm_can_eliminate (const int, const int);
 271 static void arm_asm_trampoline_template (FILE *);
 272 static void arm_trampoline_init (rtx, tree, rtx);
 273 static rtx arm_trampoline_adjust_address (rtx);
 274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 278 static bool arm_array_mode_supported_p (machine_mode,
 279                                         unsigned HOST_WIDE_INT);
 280 static machine_mode arm_preferred_simd_mode (scalar_mode);
 281 static bool arm_class_likely_spilled_p (reg_class_t);
 282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 285                                                      const_tree type,
 286                                                      int misalignment,
 287                                                      bool is_packed);
 288 static void arm_conditional_register_usage (void);
 289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 291 static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
 292 static int arm_default_branch_cost (bool, bool);
 293 static int arm_cortex_a5_branch_cost (bool, bool);
 294 static int arm_cortex_m_branch_cost (bool, bool);
 295 static int arm_cortex_m7_branch_cost (bool, bool);
 296
 297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
 298                                           const vec_perm_indices &);
 299
 300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 301
 302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 303                                            tree vectype,
 304                                            int misalign ATTRIBUTE_UNUSED);
 305 static unsigned arm_add_stmt_cost (void *data, int count,
 306                                    enum vect_cost_for_stmt kind,
 307                                    struct _stmt_vec_info *stmt_info,
 308                                    int misalign,
 309                                    enum vect_cost_model_location where);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314
 315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 317                                      const_tree);
 318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 321                                                 int reloc);
 322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 328 \f
 329 /* Table of machine attributes.  */
 330 static const struct attribute_spec arm_attribute_table[] =
 331 {
 332   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 333        affects_type_identity, handler, exclude } */
 334   /* Function calls made to this symbol must be done indirectly, because
 335      it may lie outside of the 26 bit addressing range of a normal function
 336      call.  */
 337   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 338   /* Whereas these functions are always known to reside within the 26 bit
 339      addressing range.  */
 340   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 341   /* Specify the procedure call conventions for a function.  */
 342   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 343     NULL },
 344   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 345   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 346     NULL },
 347   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 348     NULL },
 349   { "naked",        0, 0, true,  false, false, false,
 350     arm_handle_fndecl_attribute, NULL },
 351 #ifdef ARM_PE
 352   /* ARM/PE has three new attributes:
 353      interfacearm - ?
 354      dllexport - for exporting a function/variable that will live in a dll
 355      dllimport - for importing a function/variable from a dll
 356
 357      Microsoft allows multiple declspecs in one __declspec, separating
 358      them with spaces.  We do NOT support this.  Instead, use __declspec
 359      multiple times.
 360   */
 361   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 362   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 363   { "interfacearm", 0, 0, true,  false, false, false,
 364     arm_handle_fndecl_attribute, NULL },
 365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 366   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 367     NULL },
 368   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 369     NULL },
 370   { "notshared",    0, 0, false, true, false, false,
 371     arm_handle_notshared_attribute, NULL },
 372 #endif
 373   /* ARMv8-M Security Extensions support.  */
 374   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 375     arm_handle_cmse_nonsecure_entry, NULL },
 376   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
 377     arm_handle_cmse_nonsecure_call, NULL },
 378   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 379 };
 380 \f
 381 /* Initialize the GCC target structure.  */
 382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 383 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 385 #endif
 386
 387 #undef TARGET_LEGITIMIZE_ADDRESS
 388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 389
 390 #undef  TARGET_ATTRIBUTE_TABLE
 391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 392
 393 #undef  TARGET_INSERT_ATTRIBUTES
 394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 395
 396 #undef TARGET_ASM_FILE_START
 397 #define TARGET_ASM_FILE_START arm_file_start
 398 #undef TARGET_ASM_FILE_END
 399 #define TARGET_ASM_FILE_END arm_file_end
 400
 401 #undef  TARGET_ASM_ALIGNED_SI_OP
 402 #define TARGET_ASM_ALIGNED_SI_OP NULL
 403 #undef  TARGET_ASM_INTEGER
 404 #define TARGET_ASM_INTEGER arm_assemble_integer
 405
 406 #undef TARGET_PRINT_OPERAND
 407 #define TARGET_PRINT_OPERAND arm_print_operand
 408 #undef TARGET_PRINT_OPERAND_ADDRESS
 409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 412
 413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 415
 416 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 418
 419 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 421
 422 #undef TARGET_CAN_INLINE_P
 423 #define TARGET_CAN_INLINE_P arm_can_inline_p
 424
 425 #undef TARGET_RELAYOUT_FUNCTION
 426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 427
 428 #undef  TARGET_OPTION_OVERRIDE
 429 #define TARGET_OPTION_OVERRIDE arm_option_override
 430
 431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 433
 434 #undef TARGET_OPTION_SAVE
 435 #define TARGET_OPTION_SAVE arm_option_save
 436
 437 #undef TARGET_OPTION_RESTORE
 438 #define TARGET_OPTION_RESTORE arm_option_restore
 439
 440 #undef TARGET_OPTION_PRINT
 441 #define TARGET_OPTION_PRINT arm_option_print
 442
 443 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 445
 446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 448
 449 #undef TARGET_SCHED_MACRO_FUSION_P
 450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 451
 452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 454
 455 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 457
 458 #undef  TARGET_SCHED_ADJUST_COST
 459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 460
 461 #undef TARGET_SET_CURRENT_FUNCTION
 462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 463
 464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 466
 467 #undef TARGET_SCHED_REORDER
 468 #define TARGET_SCHED_REORDER arm_sched_reorder
 469
 470 #undef TARGET_REGISTER_MOVE_COST
 471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 472
 473 #undef TARGET_MEMORY_MOVE_COST
 474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 475
 476 #undef TARGET_ENCODE_SECTION_INFO
 477 #ifdef ARM_PE
 478 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 479 #else
 480 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 481 #endif
 482
 483 #undef  TARGET_STRIP_NAME_ENCODING
 484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 485
 486 #undef  TARGET_ASM_INTERNAL_LABEL
 487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 488
 489 #undef TARGET_FLOATN_MODE
 490 #define TARGET_FLOATN_MODE arm_floatn_mode
 491
 492 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 494
 495 #undef  TARGET_FUNCTION_VALUE
 496 #define TARGET_FUNCTION_VALUE arm_function_value
 497
 498 #undef  TARGET_LIBCALL_VALUE
 499 #define TARGET_LIBCALL_VALUE arm_libcall_value
 500
 501 #undef TARGET_FUNCTION_VALUE_REGNO_P
 502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 503
 504 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 506 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 508
 509 #undef  TARGET_RTX_COSTS
 510 #define TARGET_RTX_COSTS arm_rtx_costs
 511 #undef  TARGET_ADDRESS_COST
 512 #define TARGET_ADDRESS_COST arm_address_cost
 513
 514 #undef TARGET_SHIFT_TRUNCATION_MASK
 515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 524   arm_autovectorize_vector_sizes
 525
 526 #undef  TARGET_MACHINE_DEPENDENT_REORG
 527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 528
 529 #undef  TARGET_INIT_BUILTINS
 530 #define TARGET_INIT_BUILTINS  arm_init_builtins
 531 #undef  TARGET_EXPAND_BUILTIN
 532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 533 #undef  TARGET_BUILTIN_DECL
 534 #define TARGET_BUILTIN_DECL arm_builtin_decl
 535
 536 #undef TARGET_INIT_LIBFUNCS
 537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 538
 539 #undef TARGET_PROMOTE_FUNCTION_MODE
 540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 541 #undef TARGET_PROMOTE_PROTOTYPES
 542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 543 #undef TARGET_PASS_BY_REFERENCE
 544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 545 #undef TARGET_ARG_PARTIAL_BYTES
 546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 547 #undef TARGET_FUNCTION_ARG
 548 #define TARGET_FUNCTION_ARG arm_function_arg
 549 #undef TARGET_FUNCTION_ARG_ADVANCE
 550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 551 #undef TARGET_FUNCTION_ARG_PADDING
 552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 553 #undef TARGET_FUNCTION_ARG_BOUNDARY
 554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 555
 556 #undef  TARGET_SETUP_INCOMING_VARARGS
 557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 558
 559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 561
 562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 564 #undef TARGET_TRAMPOLINE_INIT
 565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 568
 569 #undef TARGET_WARN_FUNC_RETURN
 570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 571
 572 #undef TARGET_DEFAULT_SHORT_ENUMS
 573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 574
 575 #undef TARGET_ALIGN_ANON_BITFIELD
 576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 577
 578 #undef TARGET_NARROW_VOLATILE_BITFIELD
 579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 580
 581 #undef TARGET_CXX_GUARD_TYPE
 582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 583
 584 #undef TARGET_CXX_GUARD_MASK_BIT
 585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 586
 587 #undef TARGET_CXX_GET_COOKIE_SIZE
 588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 589
 590 #undef TARGET_CXX_COOKIE_HAS_SIZE
 591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 592
 593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 595
 596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 598
 599 #undef TARGET_CXX_USE_AEABI_ATEXIT
 600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 601
 602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 604   arm_cxx_determine_class_data_visibility
 605
 606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 608
 609 #undef TARGET_RETURN_IN_MSB
 610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 611
 612 #undef TARGET_RETURN_IN_MEMORY
 613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 614
 615 #undef TARGET_MUST_PASS_IN_STACK
 616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 617
 618 #if ARM_UNWIND_INFO
 619 #undef TARGET_ASM_UNWIND_EMIT
 620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 621
 622 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 623 #undef TARGET_ASM_TTYPE
 624 #define TARGET_ASM_TTYPE arm_output_ttype
 625
 626 #undef TARGET_ARM_EABI_UNWINDER
 627 #define TARGET_ARM_EABI_UNWINDER true
 628
 629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 631
 632 #endif /* ARM_UNWIND_INFO */
 633
 634 #undef TARGET_ASM_INIT_SECTIONS
 635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 636
 637 #undef TARGET_DWARF_REGISTER_SPAN
 638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 639
 640 #undef  TARGET_CANNOT_COPY_INSN_P
 641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 642
 643 #ifdef HAVE_AS_TLS
 644 #undef TARGET_HAVE_TLS
 645 #define TARGET_HAVE_TLS true
 646 #endif
 647
 648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 650
 651 #undef TARGET_LEGITIMATE_CONSTANT_P
 652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 653
 654 #undef TARGET_CANNOT_FORCE_CONST_MEM
 655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 656
 657 #undef TARGET_MAX_ANCHOR_OFFSET
 658 #define TARGET_MAX_ANCHOR_OFFSET 4095
 659
 660 /* The minimum is set such that the total size of the block
 661    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 662    divisible by eight, ensuring natural spacing of anchors.  */
 663 #undef TARGET_MIN_ANCHOR_OFFSET
 664 #define TARGET_MIN_ANCHOR_OFFSET -4088
 665
 666 #undef TARGET_SCHED_ISSUE_RATE
 667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 668
 669 #undef TARGET_SCHED_VARIABLE_ISSUE
 670 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 671
 672 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 673 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 674   arm_first_cycle_multipass_dfa_lookahead
 675
 676 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 677 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 678   arm_first_cycle_multipass_dfa_lookahead_guard
 679
 680 #undef TARGET_MANGLE_TYPE
 681 #define TARGET_MANGLE_TYPE arm_mangle_type
 682
 683 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 684 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 685
 686 #undef TARGET_BUILD_BUILTIN_VA_LIST
 687 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 688 #undef TARGET_EXPAND_BUILTIN_VA_START
 689 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 690 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 691 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 692
 693 #ifdef HAVE_AS_TLS
 694 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 695 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 696 #endif
 697
 698 #undef TARGET_LEGITIMATE_ADDRESS_P
 699 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 700
 701 #undef TARGET_PREFERRED_RELOAD_CLASS
 702 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 703
 704 #undef TARGET_PROMOTED_TYPE
 705 #define TARGET_PROMOTED_TYPE arm_promoted_type
 706
 707 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 708 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 709
 710 #undef TARGET_COMPUTE_FRAME_LAYOUT
 711 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 712
 713 #undef TARGET_FRAME_POINTER_REQUIRED
 714 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 715
 716 #undef TARGET_CAN_ELIMINATE
 717 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 718
 719 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 720 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 721
 722 #undef TARGET_CLASS_LIKELY_SPILLED_P
 723 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 724
 725 #undef TARGET_VECTORIZE_BUILTINS
 726 #define TARGET_VECTORIZE_BUILTINS
 727
 728 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 729 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 730   arm_builtin_vectorized_function
 731
 732 #undef TARGET_VECTOR_ALIGNMENT
 733 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 734
 735 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 736 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 737   arm_vector_alignment_reachable
 738
 739 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 740 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 741   arm_builtin_support_vector_misalignment
 742
 743 #undef TARGET_PREFERRED_RENAME_CLASS
 744 #define TARGET_PREFERRED_RENAME_CLASS \
 745   arm_preferred_rename_class
 746
 747 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 748 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 749
 750 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 751 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 752   arm_builtin_vectorization_cost
 753 #undef TARGET_VECTORIZE_ADD_STMT_COST
 754 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 755
 756 #undef TARGET_CANONICALIZE_COMPARISON
 757 #define TARGET_CANONICALIZE_COMPARISON \
 758   arm_canonicalize_comparison
 759
 760 #undef TARGET_ASAN_SHADOW_OFFSET
 761 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 762
 763 #undef MAX_INSN_PER_IT_BLOCK
 764 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 765
 766 #undef TARGET_CAN_USE_DOLOOP_P
 767 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 768
 769 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 770 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 771
 772 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 773 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 774
 775 #undef TARGET_SCHED_FUSION_PRIORITY
 776 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 777
 778 #undef  TARGET_ASM_FUNCTION_SECTION
 779 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 780
 781 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 782 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 783
 784 #undef TARGET_SECTION_TYPE_FLAGS
 785 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 786
 787 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 788 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 789
 790 #undef TARGET_C_EXCESS_PRECISION
 791 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 792
 793 /* Although the architecture reserves bits 0 and 1, only the former is
 794    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 795 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 796 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 797
 798 #undef TARGET_FIXED_CONDITION_CODE_REGS
 799 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 800
 801 #undef TARGET_HARD_REGNO_NREGS
 802 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 803 #undef TARGET_HARD_REGNO_MODE_OK
 804 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 805
 806 #undef TARGET_MODES_TIEABLE_P
 807 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 808
 809 #undef TARGET_CAN_CHANGE_MODE_CLASS
 810 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 811
 812 #undef TARGET_CONSTANT_ALIGNMENT
 813 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 814 \f
 815 /* Obstack for minipool constant handling.  */
 816 static struct obstack minipool_obstack;
 817 static char *         minipool_startobj;
 818
 819 /* The maximum number of insns skipped which
 820    will be conditionalised if possible.  */
 821 static int max_insns_skipped = 5;
 822
 823 extern FILE * asm_out_file;
 824
 825 /* True if we are currently building a constant table.  */
 826 int making_const_table;
 827
 828 /* The processor for which instructions should be scheduled.  */
 829 enum processor_type arm_tune = TARGET_CPU_arm_none;
 830
 831 /* The current tuning set.  */
 832 const struct tune_params *current_tune;
 833
 834 /* Which floating point hardware to schedule for.  */
 835 int arm_fpu_attr;
 836
 837 /* Used for Thumb call_via trampolines.  */
 838 rtx thumb_call_via_label[14];
 839 static int thumb_call_reg_needed;
 840
 841 /* The bits in this mask specify which instruction scheduling options should
 842    be used.  */
 843 unsigned int tune_flags = 0;
 844
 845 /* The highest ARM architecture version supported by the
 846    target.  */
 847 enum base_architecture arm_base_arch = BASE_ARCH_0;
 848
 849 /* Active target architecture and tuning.  */
 850
 851 struct arm_build_target arm_active_target;
 852
 853 /* The following are used in the arm.md file as equivalents to bits
 854    in the above two flag variables.  */
 855
 856 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 857 int arm_arch4 = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 860 int arm_arch4t = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 863 int arm_arch5t = 0;
 864
 865 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 866 int arm_arch5te = 0;
 867
 868 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 869 int arm_arch6 = 0;
 870
 871 /* Nonzero if this chip supports the ARM 6K extensions.  */
 872 int arm_arch6k = 0;
 873
 874 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 875 int arm_arch6kz = 0;
 876
 877 /* Nonzero if instructions present in ARMv6-M can be used.  */
 878 int arm_arch6m = 0;
 879
 880 /* Nonzero if this chip supports the ARM 7 extensions.  */
 881 int arm_arch7 = 0;
 882
 883 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 884 int arm_arch_lpae = 0;
 885
 886 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 887 int arm_arch_notm = 0;
 888
 889 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 890 int arm_arch7em = 0;
 891
 892 /* Nonzero if instructions present in ARMv8 can be used.  */
 893 int arm_arch8 = 0;
 894
 895 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 896 int arm_arch8_1 = 0;
 897
 898 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 899 int arm_arch8_2 = 0;
 900
 901 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 902 int arm_arch8_3 = 0;
 903
 904 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 905 int arm_arch8_4 = 0;
 906
 907 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 908    Architecture 8.2.  */
 909 int arm_fp16_inst = 0;
 910
 911 /* Nonzero if this chip can benefit from load scheduling.  */
 912 int arm_ld_sched = 0;
 913
 914 /* Nonzero if this chip is a StrongARM.  */
 915 int arm_tune_strongarm = 0;
 916
 917 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 918 int arm_arch_iwmmxt = 0;
 919
 920 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 921 int arm_arch_iwmmxt2 = 0;
 922
 923 /* Nonzero if this chip is an XScale.  */
 924 int arm_arch_xscale = 0;
 925
 926 /* Nonzero if tuning for XScale  */
 927 int arm_tune_xscale = 0;
 928
 929 /* Nonzero if we want to tune for stores that access the write-buffer.
 930    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 931 int arm_tune_wbuf = 0;
 932
 933 /* Nonzero if tuning for Cortex-A9.  */
 934 int arm_tune_cortex_a9 = 0;
 935
 936 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 937    preprocessor.
 938    XXX This is a bit of a hack, it's intended to help work around
 939    problems in GLD which doesn't understand that armv5t code is
 940    interworking clean.  */
 941 int arm_cpp_interwork = 0;
 942
 943 /* Nonzero if chip supports Thumb 1.  */
 944 int arm_arch_thumb1;
 945
 946 /* Nonzero if chip supports Thumb 2.  */
 947 int arm_arch_thumb2;
 948
 949 /* Nonzero if chip supports integer division instruction.  */
 950 int arm_arch_arm_hwdiv;
 951 int arm_arch_thumb_hwdiv;
 952
 953 /* Nonzero if chip disallows volatile memory access in IT block.  */
 954 int arm_arch_no_volatile_ce;
 955
 956 /* Nonzero if we shouldn't use literal pools.  */
 957 bool arm_disable_literal_pool = false;
 958
 959 /* The register number to be used for the PIC offset register.  */
 960 unsigned arm_pic_register = INVALID_REGNUM;
 961
 962 enum arm_pcs arm_pcs_default;
 963
 964 /* For an explanation of these variables, see final_prescan_insn below.  */
 965 int arm_ccfsm_state;
 966 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 967 enum arm_cond_code arm_current_cc;
 968
 969 rtx arm_target_insn;
 970 int arm_target_label;
 971 /* The number of conditionally executed insns, including the current insn.  */
 972 int arm_condexec_count = 0;
 973 /* A bitmask specifying the patterns for the IT block.
 974    Zero means do not output an IT block before this insn. */
 975 int arm_condexec_mask = 0;
 976 /* The number of bits used in arm_condexec_mask.  */
 977 int arm_condexec_masklen = 0;
 978
 979 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 980 int arm_arch_crc = 0;
 981
 982 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 983 int arm_arch_dotprod = 0;
 984
 985 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 986 int arm_arch_cmse = 0;
 987
 988 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 989 int arm_m_profile_small_mul = 0;
 990
 991 /* The condition codes of the ARM, and the inverse function.  */
 992 static const char * const arm_condition_codes[] =
 993 {
 994   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 995   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 996 };
 997
 998 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 999 int arm_regs_in_sequence[] =
1000 {
1001   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1002 };
1003
1004 #define ARM_LSL_NAME "lsl"
1005 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1006
1007 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1008                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1009                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1010 \f
1011 /* Initialization code.  */
1012
1013 struct cpu_tune
1014 {
1015   enum processor_type scheduler;
1016   unsigned int tune_flags;
1017   const struct tune_params *tune;
1018 };
1019
1020 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1021 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1022   {                                                             \
1023     num_slots,                                                  \
1024     l1_size,                                                    \
1025     l1_line_size                                                \
1026   }
1027
1028 /* arm generic vectorizer costs.  */
1029 static const
1030 struct cpu_vec_costs arm_default_vec_cost = {
1031   1,                                    /* scalar_stmt_cost.  */
1032   1,                                    /* scalar load_cost.  */
1033   1,                                    /* scalar_store_cost.  */
1034   1,                                    /* vec_stmt_cost.  */
1035   1,                                    /* vec_to_scalar_cost.  */
1036   1,                                    /* scalar_to_vec_cost.  */
1037   1,                                    /* vec_align_load_cost.  */
1038   1,                                    /* vec_unalign_load_cost.  */
1039   1,                                    /* vec_unalign_store_cost.  */
1040   1,                                    /* vec_store_cost.  */
1041   3,                                    /* cond_taken_branch_cost.  */
1042   1,                                    /* cond_not_taken_branch_cost.  */
1043 };
1044
1045 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1046 #include "aarch-cost-tables.h"
1047
1048
1049
1050 const struct cpu_cost_table cortexa9_extra_costs =
1051 {
1052   /* ALU */
1053   {
1054     0,                  /* arith.  */
1055     0,                  /* logical.  */
1056     0,                  /* shift.  */
1057     COSTS_N_INSNS (1),  /* shift_reg.  */
1058     COSTS_N_INSNS (1),  /* arith_shift.  */
1059     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1060     0,                  /* log_shift.  */
1061     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1062     COSTS_N_INSNS (1),  /* extend.  */
1063     COSTS_N_INSNS (2),  /* extend_arith.  */
1064     COSTS_N_INSNS (1),  /* bfi.  */
1065     COSTS_N_INSNS (1),  /* bfx.  */
1066     0,                  /* clz.  */
1067     0,                  /* rev.  */
1068     0,                  /* non_exec.  */
1069     true                /* non_exec_costs_exec.  */
1070   },
1071   {
1072     /* MULT SImode */
1073     {
1074       COSTS_N_INSNS (3),        /* simple.  */
1075       COSTS_N_INSNS (3),        /* flag_setting.  */
1076       COSTS_N_INSNS (2),        /* extend.  */
1077       COSTS_N_INSNS (3),        /* add.  */
1078       COSTS_N_INSNS (2),        /* extend_add.  */
1079       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1080     },
1081     /* MULT DImode */
1082     {
1083       0,                        /* simple (N/A).  */
1084       0,                        /* flag_setting (N/A).  */
1085       COSTS_N_INSNS (4),        /* extend.  */
1086       0,                        /* add (N/A).  */
1087       COSTS_N_INSNS (4),        /* extend_add.  */
1088       0                         /* idiv (N/A).  */
1089     }
1090   },
1091   /* LD/ST */
1092   {
1093     COSTS_N_INSNS (2),  /* load.  */
1094     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1095     COSTS_N_INSNS (2),  /* ldrd.  */
1096     COSTS_N_INSNS (2),  /* ldm_1st.  */
1097     1,                  /* ldm_regs_per_insn_1st.  */
1098     2,                  /* ldm_regs_per_insn_subsequent.  */
1099     COSTS_N_INSNS (5),  /* loadf.  */
1100     COSTS_N_INSNS (5),  /* loadd.  */
1101     COSTS_N_INSNS (1),  /* load_unaligned.  */
1102     COSTS_N_INSNS (2),  /* store.  */
1103     COSTS_N_INSNS (2),  /* strd.  */
1104     COSTS_N_INSNS (2),  /* stm_1st.  */
1105     1,                  /* stm_regs_per_insn_1st.  */
1106     2,                  /* stm_regs_per_insn_subsequent.  */
1107     COSTS_N_INSNS (1),  /* storef.  */
1108     COSTS_N_INSNS (1),  /* stored.  */
1109     COSTS_N_INSNS (1),  /* store_unaligned.  */
1110     COSTS_N_INSNS (1),  /* loadv.  */
1111     COSTS_N_INSNS (1)   /* storev.  */
1112   },
1113   {
1114     /* FP SFmode */
1115     {
1116       COSTS_N_INSNS (14),       /* div.  */
1117       COSTS_N_INSNS (4),        /* mult.  */
1118       COSTS_N_INSNS (7),        /* mult_addsub. */
1119       COSTS_N_INSNS (30),       /* fma.  */
1120       COSTS_N_INSNS (3),        /* addsub.  */
1121       COSTS_N_INSNS (1),        /* fpconst.  */
1122       COSTS_N_INSNS (1),        /* neg.  */
1123       COSTS_N_INSNS (3),        /* compare.  */
1124       COSTS_N_INSNS (3),        /* widen.  */
1125       COSTS_N_INSNS (3),        /* narrow.  */
1126       COSTS_N_INSNS (3),        /* toint.  */
1127       COSTS_N_INSNS (3),        /* fromint.  */
1128       COSTS_N_INSNS (3)         /* roundint.  */
1129     },
1130     /* FP DFmode */
1131     {
1132       COSTS_N_INSNS (24),       /* div.  */
1133       COSTS_N_INSNS (5),        /* mult.  */
1134       COSTS_N_INSNS (8),        /* mult_addsub.  */
1135       COSTS_N_INSNS (30),       /* fma.  */
1136       COSTS_N_INSNS (3),        /* addsub.  */
1137       COSTS_N_INSNS (1),        /* fpconst.  */
1138       COSTS_N_INSNS (1),        /* neg.  */
1139       COSTS_N_INSNS (3),        /* compare.  */
1140       COSTS_N_INSNS (3),        /* widen.  */
1141       COSTS_N_INSNS (3),        /* narrow.  */
1142       COSTS_N_INSNS (3),        /* toint.  */
1143       COSTS_N_INSNS (3),        /* fromint.  */
1144       COSTS_N_INSNS (3)         /* roundint.  */
1145     }
1146   },
1147   /* Vector */
1148   {
1149     COSTS_N_INSNS (1)   /* alu.  */
1150   }
1151 };
1152
1153 const struct cpu_cost_table cortexa8_extra_costs =
1154 {
1155   /* ALU */
1156   {
1157     0,                  /* arith.  */
1158     0,                  /* logical.  */
1159     COSTS_N_INSNS (1),  /* shift.  */
1160     0,                  /* shift_reg.  */
1161     COSTS_N_INSNS (1),  /* arith_shift.  */
1162     0,                  /* arith_shift_reg.  */
1163     COSTS_N_INSNS (1),  /* log_shift.  */
1164     0,                  /* log_shift_reg.  */
1165     0,                  /* extend.  */
1166     0,                  /* extend_arith.  */
1167     0,                  /* bfi.  */
1168     0,                  /* bfx.  */
1169     0,                  /* clz.  */
1170     0,                  /* rev.  */
1171     0,                  /* non_exec.  */
1172     true                /* non_exec_costs_exec.  */
1173   },
1174   {
1175     /* MULT SImode */
1176     {
1177       COSTS_N_INSNS (1),        /* simple.  */
1178       COSTS_N_INSNS (1),        /* flag_setting.  */
1179       COSTS_N_INSNS (1),        /* extend.  */
1180       COSTS_N_INSNS (1),        /* add.  */
1181       COSTS_N_INSNS (1),        /* extend_add.  */
1182       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1183     },
1184     /* MULT DImode */
1185     {
1186       0,                        /* simple (N/A).  */
1187       0,                        /* flag_setting (N/A).  */
1188       COSTS_N_INSNS (2),        /* extend.  */
1189       0,                        /* add (N/A).  */
1190       COSTS_N_INSNS (2),        /* extend_add.  */
1191       0                         /* idiv (N/A).  */
1192     }
1193   },
1194   /* LD/ST */
1195   {
1196     COSTS_N_INSNS (1),  /* load.  */
1197     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1198     COSTS_N_INSNS (1),  /* ldrd.  */
1199     COSTS_N_INSNS (1),  /* ldm_1st.  */
1200     1,                  /* ldm_regs_per_insn_1st.  */
1201     2,                  /* ldm_regs_per_insn_subsequent.  */
1202     COSTS_N_INSNS (1),  /* loadf.  */
1203     COSTS_N_INSNS (1),  /* loadd.  */
1204     COSTS_N_INSNS (1),  /* load_unaligned.  */
1205     COSTS_N_INSNS (1),  /* store.  */
1206     COSTS_N_INSNS (1),  /* strd.  */
1207     COSTS_N_INSNS (1),  /* stm_1st.  */
1208     1,                  /* stm_regs_per_insn_1st.  */
1209     2,                  /* stm_regs_per_insn_subsequent.  */
1210     COSTS_N_INSNS (1),  /* storef.  */
1211     COSTS_N_INSNS (1),  /* stored.  */
1212     COSTS_N_INSNS (1),  /* store_unaligned.  */
1213     COSTS_N_INSNS (1),  /* loadv.  */
1214     COSTS_N_INSNS (1)   /* storev.  */
1215   },
1216   {
1217     /* FP SFmode */
1218     {
1219       COSTS_N_INSNS (36),       /* div.  */
1220       COSTS_N_INSNS (11),       /* mult.  */
1221       COSTS_N_INSNS (20),       /* mult_addsub. */
1222       COSTS_N_INSNS (30),       /* fma.  */
1223       COSTS_N_INSNS (9),        /* addsub.  */
1224       COSTS_N_INSNS (3),        /* fpconst.  */
1225       COSTS_N_INSNS (3),        /* neg.  */
1226       COSTS_N_INSNS (6),        /* compare.  */
1227       COSTS_N_INSNS (4),        /* widen.  */
1228       COSTS_N_INSNS (4),        /* narrow.  */
1229       COSTS_N_INSNS (8),        /* toint.  */
1230       COSTS_N_INSNS (8),        /* fromint.  */
1231       COSTS_N_INSNS (8)         /* roundint.  */
1232     },
1233     /* FP DFmode */
1234     {
1235       COSTS_N_INSNS (64),       /* div.  */
1236       COSTS_N_INSNS (16),       /* mult.  */
1237       COSTS_N_INSNS (25),       /* mult_addsub.  */
1238       COSTS_N_INSNS (30),       /* fma.  */
1239       COSTS_N_INSNS (9),        /* addsub.  */
1240       COSTS_N_INSNS (3),        /* fpconst.  */
1241       COSTS_N_INSNS (3),        /* neg.  */
1242       COSTS_N_INSNS (6),        /* compare.  */
1243       COSTS_N_INSNS (6),        /* widen.  */
1244       COSTS_N_INSNS (6),        /* narrow.  */
1245       COSTS_N_INSNS (8),        /* toint.  */
1246       COSTS_N_INSNS (8),        /* fromint.  */
1247       COSTS_N_INSNS (8)         /* roundint.  */
1248     }
1249   },
1250   /* Vector */
1251   {
1252     COSTS_N_INSNS (1)   /* alu.  */
1253   }
1254 };
1255
1256 const struct cpu_cost_table cortexa5_extra_costs =
1257 {
1258   /* ALU */
1259   {
1260     0,                  /* arith.  */
1261     0,                  /* logical.  */
1262     COSTS_N_INSNS (1),  /* shift.  */
1263     COSTS_N_INSNS (1),  /* shift_reg.  */
1264     COSTS_N_INSNS (1),  /* arith_shift.  */
1265     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1266     COSTS_N_INSNS (1),  /* log_shift.  */
1267     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1268     COSTS_N_INSNS (1),  /* extend.  */
1269     COSTS_N_INSNS (1),  /* extend_arith.  */
1270     COSTS_N_INSNS (1),  /* bfi.  */
1271     COSTS_N_INSNS (1),  /* bfx.  */
1272     COSTS_N_INSNS (1),  /* clz.  */
1273     COSTS_N_INSNS (1),  /* rev.  */
1274     0,                  /* non_exec.  */
1275     true                /* non_exec_costs_exec.  */
1276   },
1277
1278   {
1279     /* MULT SImode */
1280     {
1281       0,                        /* simple.  */
1282       COSTS_N_INSNS (1),        /* flag_setting.  */
1283       COSTS_N_INSNS (1),        /* extend.  */
1284       COSTS_N_INSNS (1),        /* add.  */
1285       COSTS_N_INSNS (1),        /* extend_add.  */
1286       COSTS_N_INSNS (7)         /* idiv.  */
1287     },
1288     /* MULT DImode */
1289     {
1290       0,                        /* simple (N/A).  */
1291       0,                        /* flag_setting (N/A).  */
1292       COSTS_N_INSNS (1),        /* extend.  */
1293       0,                        /* add.  */
1294       COSTS_N_INSNS (2),        /* extend_add.  */
1295       0                         /* idiv (N/A).  */
1296     }
1297   },
1298   /* LD/ST */
1299   {
1300     COSTS_N_INSNS (1),  /* load.  */
1301     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1302     COSTS_N_INSNS (6),  /* ldrd.  */
1303     COSTS_N_INSNS (1),  /* ldm_1st.  */
1304     1,                  /* ldm_regs_per_insn_1st.  */
1305     2,                  /* ldm_regs_per_insn_subsequent.  */
1306     COSTS_N_INSNS (2),  /* loadf.  */
1307     COSTS_N_INSNS (4),  /* loadd.  */
1308     COSTS_N_INSNS (1),  /* load_unaligned.  */
1309     COSTS_N_INSNS (1),  /* store.  */
1310     COSTS_N_INSNS (3),  /* strd.  */
1311     COSTS_N_INSNS (1),  /* stm_1st.  */
1312     1,                  /* stm_regs_per_insn_1st.  */
1313     2,                  /* stm_regs_per_insn_subsequent.  */
1314     COSTS_N_INSNS (2),  /* storef.  */
1315     COSTS_N_INSNS (2),  /* stored.  */
1316     COSTS_N_INSNS (1),  /* store_unaligned.  */
1317     COSTS_N_INSNS (1),  /* loadv.  */
1318     COSTS_N_INSNS (1)   /* storev.  */
1319   },
1320   {
1321     /* FP SFmode */
1322     {
1323       COSTS_N_INSNS (15),       /* div.  */
1324       COSTS_N_INSNS (3),        /* mult.  */
1325       COSTS_N_INSNS (7),        /* mult_addsub. */
1326       COSTS_N_INSNS (7),        /* fma.  */
1327       COSTS_N_INSNS (3),        /* addsub.  */
1328       COSTS_N_INSNS (3),        /* fpconst.  */
1329       COSTS_N_INSNS (3),        /* neg.  */
1330       COSTS_N_INSNS (3),        /* compare.  */
1331       COSTS_N_INSNS (3),        /* widen.  */
1332       COSTS_N_INSNS (3),        /* narrow.  */
1333       COSTS_N_INSNS (3),        /* toint.  */
1334       COSTS_N_INSNS (3),        /* fromint.  */
1335       COSTS_N_INSNS (3)         /* roundint.  */
1336     },
1337     /* FP DFmode */
1338     {
1339       COSTS_N_INSNS (30),       /* div.  */
1340       COSTS_N_INSNS (6),        /* mult.  */
1341       COSTS_N_INSNS (10),       /* mult_addsub.  */
1342       COSTS_N_INSNS (7),        /* fma.  */
1343       COSTS_N_INSNS (3),        /* addsub.  */
1344       COSTS_N_INSNS (3),        /* fpconst.  */
1345       COSTS_N_INSNS (3),        /* neg.  */
1346       COSTS_N_INSNS (3),        /* compare.  */
1347       COSTS_N_INSNS (3),        /* widen.  */
1348       COSTS_N_INSNS (3),        /* narrow.  */
1349       COSTS_N_INSNS (3),        /* toint.  */
1350       COSTS_N_INSNS (3),        /* fromint.  */
1351       COSTS_N_INSNS (3)         /* roundint.  */
1352     }
1353   },
1354   /* Vector */
1355   {
1356     COSTS_N_INSNS (1)   /* alu.  */
1357   }
1358 };
1359
1360
1361 const struct cpu_cost_table cortexa7_extra_costs =
1362 {
1363   /* ALU */
1364   {
1365     0,                  /* arith.  */
1366     0,                  /* logical.  */
1367     COSTS_N_INSNS (1),  /* shift.  */
1368     COSTS_N_INSNS (1),  /* shift_reg.  */
1369     COSTS_N_INSNS (1),  /* arith_shift.  */
1370     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1371     COSTS_N_INSNS (1),  /* log_shift.  */
1372     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1373     COSTS_N_INSNS (1),  /* extend.  */
1374     COSTS_N_INSNS (1),  /* extend_arith.  */
1375     COSTS_N_INSNS (1),  /* bfi.  */
1376     COSTS_N_INSNS (1),  /* bfx.  */
1377     COSTS_N_INSNS (1),  /* clz.  */
1378     COSTS_N_INSNS (1),  /* rev.  */
1379     0,                  /* non_exec.  */
1380     true                /* non_exec_costs_exec.  */
1381   },
1382
1383   {
1384     /* MULT SImode */
1385     {
1386       0,                        /* simple.  */
1387       COSTS_N_INSNS (1),        /* flag_setting.  */
1388       COSTS_N_INSNS (1),        /* extend.  */
1389       COSTS_N_INSNS (1),        /* add.  */
1390       COSTS_N_INSNS (1),        /* extend_add.  */
1391       COSTS_N_INSNS (7)         /* idiv.  */
1392     },
1393     /* MULT DImode */
1394     {
1395       0,                        /* simple (N/A).  */
1396       0,                        /* flag_setting (N/A).  */
1397       COSTS_N_INSNS (1),        /* extend.  */
1398       0,                        /* add.  */
1399       COSTS_N_INSNS (2),        /* extend_add.  */
1400       0                         /* idiv (N/A).  */
1401     }
1402   },
1403   /* LD/ST */
1404   {
1405     COSTS_N_INSNS (1),  /* load.  */
1406     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1407     COSTS_N_INSNS (3),  /* ldrd.  */
1408     COSTS_N_INSNS (1),  /* ldm_1st.  */
1409     1,                  /* ldm_regs_per_insn_1st.  */
1410     2,                  /* ldm_regs_per_insn_subsequent.  */
1411     COSTS_N_INSNS (2),  /* loadf.  */
1412     COSTS_N_INSNS (2),  /* loadd.  */
1413     COSTS_N_INSNS (1),  /* load_unaligned.  */
1414     COSTS_N_INSNS (1),  /* store.  */
1415     COSTS_N_INSNS (3),  /* strd.  */
1416     COSTS_N_INSNS (1),  /* stm_1st.  */
1417     1,                  /* stm_regs_per_insn_1st.  */
1418     2,                  /* stm_regs_per_insn_subsequent.  */
1419     COSTS_N_INSNS (2),  /* storef.  */
1420     COSTS_N_INSNS (2),  /* stored.  */
1421     COSTS_N_INSNS (1),  /* store_unaligned.  */
1422     COSTS_N_INSNS (1),  /* loadv.  */
1423     COSTS_N_INSNS (1)   /* storev.  */
1424   },
1425   {
1426     /* FP SFmode */
1427     {
1428       COSTS_N_INSNS (15),       /* div.  */
1429       COSTS_N_INSNS (3),        /* mult.  */
1430       COSTS_N_INSNS (7),        /* mult_addsub. */
1431       COSTS_N_INSNS (7),        /* fma.  */
1432       COSTS_N_INSNS (3),        /* addsub.  */
1433       COSTS_N_INSNS (3),        /* fpconst.  */
1434       COSTS_N_INSNS (3),        /* neg.  */
1435       COSTS_N_INSNS (3),        /* compare.  */
1436       COSTS_N_INSNS (3),        /* widen.  */
1437       COSTS_N_INSNS (3),        /* narrow.  */
1438       COSTS_N_INSNS (3),        /* toint.  */
1439       COSTS_N_INSNS (3),        /* fromint.  */
1440       COSTS_N_INSNS (3)         /* roundint.  */
1441     },
1442     /* FP DFmode */
1443     {
1444       COSTS_N_INSNS (30),       /* div.  */
1445       COSTS_N_INSNS (6),        /* mult.  */
1446       COSTS_N_INSNS (10),       /* mult_addsub.  */
1447       COSTS_N_INSNS (7),        /* fma.  */
1448       COSTS_N_INSNS (3),        /* addsub.  */
1449       COSTS_N_INSNS (3),        /* fpconst.  */
1450       COSTS_N_INSNS (3),        /* neg.  */
1451       COSTS_N_INSNS (3),        /* compare.  */
1452       COSTS_N_INSNS (3),        /* widen.  */
1453       COSTS_N_INSNS (3),        /* narrow.  */
1454       COSTS_N_INSNS (3),        /* toint.  */
1455       COSTS_N_INSNS (3),        /* fromint.  */
1456       COSTS_N_INSNS (3)         /* roundint.  */
1457     }
1458   },
1459   /* Vector */
1460   {
1461     COSTS_N_INSNS (1)   /* alu.  */
1462   }
1463 };
1464
1465 const struct cpu_cost_table cortexa12_extra_costs =
1466 {
1467   /* ALU */
1468   {
1469     0,                  /* arith.  */
1470     0,                  /* logical.  */
1471     0,                  /* shift.  */
1472     COSTS_N_INSNS (1),  /* shift_reg.  */
1473     COSTS_N_INSNS (1),  /* arith_shift.  */
1474     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1475     COSTS_N_INSNS (1),  /* log_shift.  */
1476     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1477     0,                  /* extend.  */
1478     COSTS_N_INSNS (1),  /* extend_arith.  */
1479     0,                  /* bfi.  */
1480     COSTS_N_INSNS (1),  /* bfx.  */
1481     COSTS_N_INSNS (1),  /* clz.  */
1482     COSTS_N_INSNS (1),  /* rev.  */
1483     0,                  /* non_exec.  */
1484     true                /* non_exec_costs_exec.  */
1485   },
1486   /* MULT SImode */
1487   {
1488     {
1489       COSTS_N_INSNS (2),        /* simple.  */
1490       COSTS_N_INSNS (3),        /* flag_setting.  */
1491       COSTS_N_INSNS (2),        /* extend.  */
1492       COSTS_N_INSNS (3),        /* add.  */
1493       COSTS_N_INSNS (2),        /* extend_add.  */
1494       COSTS_N_INSNS (18)        /* idiv.  */
1495     },
1496     /* MULT DImode */
1497     {
1498       0,                        /* simple (N/A).  */
1499       0,                        /* flag_setting (N/A).  */
1500       COSTS_N_INSNS (3),        /* extend.  */
1501       0,                        /* add (N/A).  */
1502       COSTS_N_INSNS (3),        /* extend_add.  */
1503       0                         /* idiv (N/A).  */
1504     }
1505   },
1506   /* LD/ST */
1507   {
1508     COSTS_N_INSNS (3),  /* load.  */
1509     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1510     COSTS_N_INSNS (3),  /* ldrd.  */
1511     COSTS_N_INSNS (3),  /* ldm_1st.  */
1512     1,                  /* ldm_regs_per_insn_1st.  */
1513     2,                  /* ldm_regs_per_insn_subsequent.  */
1514     COSTS_N_INSNS (3),  /* loadf.  */
1515     COSTS_N_INSNS (3),  /* loadd.  */
1516     0,                  /* load_unaligned.  */
1517     0,                  /* store.  */
1518     0,                  /* strd.  */
1519     0,                  /* stm_1st.  */
1520     1,                  /* stm_regs_per_insn_1st.  */
1521     2,                  /* stm_regs_per_insn_subsequent.  */
1522     COSTS_N_INSNS (2),  /* storef.  */
1523     COSTS_N_INSNS (2),  /* stored.  */
1524     0,                  /* store_unaligned.  */
1525     COSTS_N_INSNS (1),  /* loadv.  */
1526     COSTS_N_INSNS (1)   /* storev.  */
1527   },
1528   {
1529     /* FP SFmode */
1530     {
1531       COSTS_N_INSNS (17),       /* div.  */
1532       COSTS_N_INSNS (4),        /* mult.  */
1533       COSTS_N_INSNS (8),        /* mult_addsub. */
1534       COSTS_N_INSNS (8),        /* fma.  */
1535       COSTS_N_INSNS (4),        /* addsub.  */
1536       COSTS_N_INSNS (2),        /* fpconst. */
1537       COSTS_N_INSNS (2),        /* neg.  */
1538       COSTS_N_INSNS (2),        /* compare.  */
1539       COSTS_N_INSNS (4),        /* widen.  */
1540       COSTS_N_INSNS (4),        /* narrow.  */
1541       COSTS_N_INSNS (4),        /* toint.  */
1542       COSTS_N_INSNS (4),        /* fromint.  */
1543       COSTS_N_INSNS (4)         /* roundint.  */
1544     },
1545     /* FP DFmode */
1546     {
1547       COSTS_N_INSNS (31),       /* div.  */
1548       COSTS_N_INSNS (4),        /* mult.  */
1549       COSTS_N_INSNS (8),        /* mult_addsub.  */
1550       COSTS_N_INSNS (8),        /* fma.  */
1551       COSTS_N_INSNS (4),        /* addsub.  */
1552       COSTS_N_INSNS (2),        /* fpconst.  */
1553       COSTS_N_INSNS (2),        /* neg.  */
1554       COSTS_N_INSNS (2),        /* compare.  */
1555       COSTS_N_INSNS (4),        /* widen.  */
1556       COSTS_N_INSNS (4),        /* narrow.  */
1557       COSTS_N_INSNS (4),        /* toint.  */
1558       COSTS_N_INSNS (4),        /* fromint.  */
1559       COSTS_N_INSNS (4)         /* roundint.  */
1560     }
1561   },
1562   /* Vector */
1563   {
1564     COSTS_N_INSNS (1)   /* alu.  */
1565   }
1566 };
1567
1568 const struct cpu_cost_table cortexa15_extra_costs =
1569 {
1570   /* ALU */
1571   {
1572     0,                  /* arith.  */
1573     0,                  /* logical.  */
1574     0,                  /* shift.  */
1575     0,                  /* shift_reg.  */
1576     COSTS_N_INSNS (1),  /* arith_shift.  */
1577     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1578     COSTS_N_INSNS (1),  /* log_shift.  */
1579     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1580     0,                  /* extend.  */
1581     COSTS_N_INSNS (1),  /* extend_arith.  */
1582     COSTS_N_INSNS (1),  /* bfi.  */
1583     0,                  /* bfx.  */
1584     0,                  /* clz.  */
1585     0,                  /* rev.  */
1586     0,                  /* non_exec.  */
1587     true                /* non_exec_costs_exec.  */
1588   },
1589   /* MULT SImode */
1590   {
1591     {
1592       COSTS_N_INSNS (2),        /* simple.  */
1593       COSTS_N_INSNS (3),        /* flag_setting.  */
1594       COSTS_N_INSNS (2),        /* extend.  */
1595       COSTS_N_INSNS (2),        /* add.  */
1596       COSTS_N_INSNS (2),        /* extend_add.  */
1597       COSTS_N_INSNS (18)        /* idiv.  */
1598     },
1599     /* MULT DImode */
1600     {
1601       0,                        /* simple (N/A).  */
1602       0,                        /* flag_setting (N/A).  */
1603       COSTS_N_INSNS (3),        /* extend.  */
1604       0,                        /* add (N/A).  */
1605       COSTS_N_INSNS (3),        /* extend_add.  */
1606       0                         /* idiv (N/A).  */
1607     }
1608   },
1609   /* LD/ST */
1610   {
1611     COSTS_N_INSNS (3),  /* load.  */
1612     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1613     COSTS_N_INSNS (3),  /* ldrd.  */
1614     COSTS_N_INSNS (4),  /* ldm_1st.  */
1615     1,                  /* ldm_regs_per_insn_1st.  */
1616     2,                  /* ldm_regs_per_insn_subsequent.  */
1617     COSTS_N_INSNS (4),  /* loadf.  */
1618     COSTS_N_INSNS (4),  /* loadd.  */
1619     0,                  /* load_unaligned.  */
1620     0,                  /* store.  */
1621     0,                  /* strd.  */
1622     COSTS_N_INSNS (1),  /* stm_1st.  */
1623     1,                  /* stm_regs_per_insn_1st.  */
1624     2,                  /* stm_regs_per_insn_subsequent.  */
1625     0,                  /* storef.  */
1626     0,                  /* stored.  */
1627     0,                  /* store_unaligned.  */
1628     COSTS_N_INSNS (1),  /* loadv.  */
1629     COSTS_N_INSNS (1)   /* storev.  */
1630   },
1631   {
1632     /* FP SFmode */
1633     {
1634       COSTS_N_INSNS (17),       /* div.  */
1635       COSTS_N_INSNS (4),        /* mult.  */
1636       COSTS_N_INSNS (8),        /* mult_addsub. */
1637       COSTS_N_INSNS (8),        /* fma.  */
1638       COSTS_N_INSNS (4),        /* addsub.  */
1639       COSTS_N_INSNS (2),        /* fpconst. */
1640       COSTS_N_INSNS (2),        /* neg.  */
1641       COSTS_N_INSNS (5),        /* compare.  */
1642       COSTS_N_INSNS (4),        /* widen.  */
1643       COSTS_N_INSNS (4),        /* narrow.  */
1644       COSTS_N_INSNS (4),        /* toint.  */
1645       COSTS_N_INSNS (4),        /* fromint.  */
1646       COSTS_N_INSNS (4)         /* roundint.  */
1647     },
1648     /* FP DFmode */
1649     {
1650       COSTS_N_INSNS (31),       /* div.  */
1651       COSTS_N_INSNS (4),        /* mult.  */
1652       COSTS_N_INSNS (8),        /* mult_addsub.  */
1653       COSTS_N_INSNS (8),        /* fma.  */
1654       COSTS_N_INSNS (4),        /* addsub.  */
1655       COSTS_N_INSNS (2),        /* fpconst.  */
1656       COSTS_N_INSNS (2),        /* neg.  */
1657       COSTS_N_INSNS (2),        /* compare.  */
1658       COSTS_N_INSNS (4),        /* widen.  */
1659       COSTS_N_INSNS (4),        /* narrow.  */
1660       COSTS_N_INSNS (4),        /* toint.  */
1661       COSTS_N_INSNS (4),        /* fromint.  */
1662       COSTS_N_INSNS (4)         /* roundint.  */
1663     }
1664   },
1665   /* Vector */
1666   {
1667     COSTS_N_INSNS (1)   /* alu.  */
1668   }
1669 };
1670
1671 const struct cpu_cost_table v7m_extra_costs =
1672 {
1673   /* ALU */
1674   {
1675     0,                  /* arith.  */
1676     0,                  /* logical.  */
1677     0,                  /* shift.  */
1678     0,                  /* shift_reg.  */
1679     0,                  /* arith_shift.  */
1680     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1681     0,                  /* log_shift.  */
1682     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1683     0,                  /* extend.  */
1684     COSTS_N_INSNS (1),  /* extend_arith.  */
1685     0,                  /* bfi.  */
1686     0,                  /* bfx.  */
1687     0,                  /* clz.  */
1688     0,                  /* rev.  */
1689     COSTS_N_INSNS (1),  /* non_exec.  */
1690     false               /* non_exec_costs_exec.  */
1691   },
1692   {
1693     /* MULT SImode */
1694     {
1695       COSTS_N_INSNS (1),        /* simple.  */
1696       COSTS_N_INSNS (1),        /* flag_setting.  */
1697       COSTS_N_INSNS (2),        /* extend.  */
1698       COSTS_N_INSNS (1),        /* add.  */
1699       COSTS_N_INSNS (3),        /* extend_add.  */
1700       COSTS_N_INSNS (8)         /* idiv.  */
1701     },
1702     /* MULT DImode */
1703     {
1704       0,                        /* simple (N/A).  */
1705       0,                        /* flag_setting (N/A).  */
1706       COSTS_N_INSNS (2),        /* extend.  */
1707       0,                        /* add (N/A).  */
1708       COSTS_N_INSNS (3),        /* extend_add.  */
1709       0                         /* idiv (N/A).  */
1710     }
1711   },
1712   /* LD/ST */
1713   {
1714     COSTS_N_INSNS (2),  /* load.  */
1715     0,                  /* load_sign_extend.  */
1716     COSTS_N_INSNS (3),  /* ldrd.  */
1717     COSTS_N_INSNS (2),  /* ldm_1st.  */
1718     1,                  /* ldm_regs_per_insn_1st.  */
1719     1,                  /* ldm_regs_per_insn_subsequent.  */
1720     COSTS_N_INSNS (2),  /* loadf.  */
1721     COSTS_N_INSNS (3),  /* loadd.  */
1722     COSTS_N_INSNS (1),  /* load_unaligned.  */
1723     COSTS_N_INSNS (2),  /* store.  */
1724     COSTS_N_INSNS (3),  /* strd.  */
1725     COSTS_N_INSNS (2),  /* stm_1st.  */
1726     1,                  /* stm_regs_per_insn_1st.  */
1727     1,                  /* stm_regs_per_insn_subsequent.  */
1728     COSTS_N_INSNS (2),  /* storef.  */
1729     COSTS_N_INSNS (3),  /* stored.  */
1730     COSTS_N_INSNS (1),  /* store_unaligned.  */
1731     COSTS_N_INSNS (1),  /* loadv.  */
1732     COSTS_N_INSNS (1)   /* storev.  */
1733   },
1734   {
1735     /* FP SFmode */
1736     {
1737       COSTS_N_INSNS (7),        /* div.  */
1738       COSTS_N_INSNS (2),        /* mult.  */
1739       COSTS_N_INSNS (5),        /* mult_addsub.  */
1740       COSTS_N_INSNS (3),        /* fma.  */
1741       COSTS_N_INSNS (1),        /* addsub.  */
1742       0,                        /* fpconst.  */
1743       0,                        /* neg.  */
1744       0,                        /* compare.  */
1745       0,                        /* widen.  */
1746       0,                        /* narrow.  */
1747       0,                        /* toint.  */
1748       0,                        /* fromint.  */
1749       0                         /* roundint.  */
1750     },
1751     /* FP DFmode */
1752     {
1753       COSTS_N_INSNS (15),       /* div.  */
1754       COSTS_N_INSNS (5),        /* mult.  */
1755       COSTS_N_INSNS (7),        /* mult_addsub.  */
1756       COSTS_N_INSNS (7),        /* fma.  */
1757       COSTS_N_INSNS (3),        /* addsub.  */
1758       0,                        /* fpconst.  */
1759       0,                        /* neg.  */
1760       0,                        /* compare.  */
1761       0,                        /* widen.  */
1762       0,                        /* narrow.  */
1763       0,                        /* toint.  */
1764       0,                        /* fromint.  */
1765       0                         /* roundint.  */
1766     }
1767   },
1768   /* Vector */
1769   {
1770     COSTS_N_INSNS (1)   /* alu.  */
1771   }
1772 };
1773
1774 const struct addr_mode_cost_table generic_addr_mode_costs =
1775 {
1776   /* int.  */
1777   {
1778     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1779     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1780     COSTS_N_INSNS (0)   /* AMO_WB.  */
1781   },
1782   /* float.  */
1783   {
1784     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1785     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1786     COSTS_N_INSNS (0)   /* AMO_WB.  */
1787   },
1788   /* vector.  */
1789   {
1790     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1791     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1792     COSTS_N_INSNS (0)   /* AMO_WB.  */
1793   }
1794 };
1795
1796 const struct tune_params arm_slowmul_tune =
1797 {
1798   &generic_extra_costs,                 /* Insn extra costs.  */
1799   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1800   NULL,                                 /* Sched adj cost.  */
1801   arm_default_branch_cost,
1802   &arm_default_vec_cost,
1803   3,                                            /* Constant limit.  */
1804   5,                                            /* Max cond insns.  */
1805   8,                                            /* Memset max inline.  */
1806   1,                                            /* Issue rate.  */
1807   ARM_PREFETCH_NOT_BENEFICIAL,
1808   tune_params::PREF_CONST_POOL_TRUE,
1809   tune_params::PREF_LDRD_FALSE,
1810   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1811   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1812   tune_params::DISPARAGE_FLAGS_NEITHER,
1813   tune_params::PREF_NEON_STRINGOPS_FALSE,
1814   tune_params::FUSE_NOTHING,
1815   tune_params::SCHED_AUTOPREF_OFF
1816 };
1817
1818 const struct tune_params arm_fastmul_tune =
1819 {
1820   &generic_extra_costs,                 /* Insn extra costs.  */
1821   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1822   NULL,                                 /* Sched adj cost.  */
1823   arm_default_branch_cost,
1824   &arm_default_vec_cost,
1825   1,                                            /* Constant limit.  */
1826   5,                                            /* Max cond insns.  */
1827   8,                                            /* Memset max inline.  */
1828   1,                                            /* Issue rate.  */
1829   ARM_PREFETCH_NOT_BENEFICIAL,
1830   tune_params::PREF_CONST_POOL_TRUE,
1831   tune_params::PREF_LDRD_FALSE,
1832   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1833   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1834   tune_params::DISPARAGE_FLAGS_NEITHER,
1835   tune_params::PREF_NEON_STRINGOPS_FALSE,
1836   tune_params::FUSE_NOTHING,
1837   tune_params::SCHED_AUTOPREF_OFF
1838 };
1839
1840 /* StrongARM has early execution of branches, so a sequence that is worth
1841    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1842
1843 const struct tune_params arm_strongarm_tune =
1844 {
1845   &generic_extra_costs,                 /* Insn extra costs.  */
1846   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1847   NULL,                                 /* Sched adj cost.  */
1848   arm_default_branch_cost,
1849   &arm_default_vec_cost,
1850   1,                                            /* Constant limit.  */
1851   3,                                            /* Max cond insns.  */
1852   8,                                            /* Memset max inline.  */
1853   1,                                            /* Issue rate.  */
1854   ARM_PREFETCH_NOT_BENEFICIAL,
1855   tune_params::PREF_CONST_POOL_TRUE,
1856   tune_params::PREF_LDRD_FALSE,
1857   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1858   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1859   tune_params::DISPARAGE_FLAGS_NEITHER,
1860   tune_params::PREF_NEON_STRINGOPS_FALSE,
1861   tune_params::FUSE_NOTHING,
1862   tune_params::SCHED_AUTOPREF_OFF
1863 };
1864
1865 const struct tune_params arm_xscale_tune =
1866 {
1867   &generic_extra_costs,                 /* Insn extra costs.  */
1868   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1869   xscale_sched_adjust_cost,
1870   arm_default_branch_cost,
1871   &arm_default_vec_cost,
1872   2,                                            /* Constant limit.  */
1873   3,                                            /* Max cond insns.  */
1874   8,                                            /* Memset max inline.  */
1875   1,                                            /* Issue rate.  */
1876   ARM_PREFETCH_NOT_BENEFICIAL,
1877   tune_params::PREF_CONST_POOL_TRUE,
1878   tune_params::PREF_LDRD_FALSE,
1879   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1880   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1881   tune_params::DISPARAGE_FLAGS_NEITHER,
1882   tune_params::PREF_NEON_STRINGOPS_FALSE,
1883   tune_params::FUSE_NOTHING,
1884   tune_params::SCHED_AUTOPREF_OFF
1885 };
1886
1887 const struct tune_params arm_9e_tune =
1888 {
1889   &generic_extra_costs,                 /* Insn extra costs.  */
1890   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1891   NULL,                                 /* Sched adj cost.  */
1892   arm_default_branch_cost,
1893   &arm_default_vec_cost,
1894   1,                                            /* Constant limit.  */
1895   5,                                            /* Max cond insns.  */
1896   8,                                            /* Memset max inline.  */
1897   1,                                            /* Issue rate.  */
1898   ARM_PREFETCH_NOT_BENEFICIAL,
1899   tune_params::PREF_CONST_POOL_TRUE,
1900   tune_params::PREF_LDRD_FALSE,
1901   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1903   tune_params::DISPARAGE_FLAGS_NEITHER,
1904   tune_params::PREF_NEON_STRINGOPS_FALSE,
1905   tune_params::FUSE_NOTHING,
1906   tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911   &generic_extra_costs,                 /* Insn extra costs.  */
1912   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1913   NULL,                                 /* Sched adj cost.  */
1914   arm_default_branch_cost,
1915   &arm_default_vec_cost,
1916   1,                                            /* Constant limit.  */
1917   5,                                            /* Max cond insns.  */
1918   8,                                            /* Memset max inline.  */
1919   2,                                            /* Issue rate.  */
1920   ARM_PREFETCH_NOT_BENEFICIAL,
1921   tune_params::PREF_CONST_POOL_TRUE,
1922   tune_params::PREF_LDRD_FALSE,
1923   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1925   tune_params::DISPARAGE_FLAGS_NEITHER,
1926   tune_params::PREF_NEON_STRINGOPS_FALSE,
1927   tune_params::FUSE_NOTHING,
1928   tune_params::SCHED_AUTOPREF_OFF
1929 };
1930
1931 const struct tune_params arm_v6t2_tune =
1932 {
1933   &generic_extra_costs,                 /* Insn extra costs.  */
1934   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1935   NULL,                                 /* Sched adj cost.  */
1936   arm_default_branch_cost,
1937   &arm_default_vec_cost,
1938   1,                                            /* Constant limit.  */
1939   5,                                            /* Max cond insns.  */
1940   8,                                            /* Memset max inline.  */
1941   1,                                            /* Issue rate.  */
1942   ARM_PREFETCH_NOT_BENEFICIAL,
1943   tune_params::PREF_CONST_POOL_FALSE,
1944   tune_params::PREF_LDRD_FALSE,
1945   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1946   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1947   tune_params::DISPARAGE_FLAGS_NEITHER,
1948   tune_params::PREF_NEON_STRINGOPS_FALSE,
1949   tune_params::FUSE_NOTHING,
1950   tune_params::SCHED_AUTOPREF_OFF
1951 };
1952
1953
1954 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1955 const struct tune_params arm_cortex_tune =
1956 {
1957   &generic_extra_costs,
1958   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1959   NULL,                                 /* Sched adj cost.  */
1960   arm_default_branch_cost,
1961   &arm_default_vec_cost,
1962   1,                                            /* Constant limit.  */
1963   5,                                            /* Max cond insns.  */
1964   8,                                            /* Memset max inline.  */
1965   2,                                            /* Issue rate.  */
1966   ARM_PREFETCH_NOT_BENEFICIAL,
1967   tune_params::PREF_CONST_POOL_FALSE,
1968   tune_params::PREF_LDRD_FALSE,
1969   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1970   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1971   tune_params::DISPARAGE_FLAGS_NEITHER,
1972   tune_params::PREF_NEON_STRINGOPS_FALSE,
1973   tune_params::FUSE_NOTHING,
1974   tune_params::SCHED_AUTOPREF_OFF
1975 };
1976
1977 const struct tune_params arm_cortex_a8_tune =
1978 {
1979   &cortexa8_extra_costs,
1980   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1981   NULL,                                 /* Sched adj cost.  */
1982   arm_default_branch_cost,
1983   &arm_default_vec_cost,
1984   1,                                            /* Constant limit.  */
1985   5,                                            /* Max cond insns.  */
1986   8,                                            /* Memset max inline.  */
1987   2,                                            /* Issue rate.  */
1988   ARM_PREFETCH_NOT_BENEFICIAL,
1989   tune_params::PREF_CONST_POOL_FALSE,
1990   tune_params::PREF_LDRD_FALSE,
1991   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1992   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1993   tune_params::DISPARAGE_FLAGS_NEITHER,
1994   tune_params::PREF_NEON_STRINGOPS_TRUE,
1995   tune_params::FUSE_NOTHING,
1996   tune_params::SCHED_AUTOPREF_OFF
1997 };
1998
1999 const struct tune_params arm_cortex_a7_tune =
2000 {
2001   &cortexa7_extra_costs,
2002   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2003   NULL,                                 /* Sched adj cost.  */
2004   arm_default_branch_cost,
2005   &arm_default_vec_cost,
2006   1,                                            /* Constant limit.  */
2007   5,                                            /* Max cond insns.  */
2008   8,                                            /* Memset max inline.  */
2009   2,                                            /* Issue rate.  */
2010   ARM_PREFETCH_NOT_BENEFICIAL,
2011   tune_params::PREF_CONST_POOL_FALSE,
2012   tune_params::PREF_LDRD_FALSE,
2013   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2014   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2015   tune_params::DISPARAGE_FLAGS_NEITHER,
2016   tune_params::PREF_NEON_STRINGOPS_TRUE,
2017   tune_params::FUSE_NOTHING,
2018   tune_params::SCHED_AUTOPREF_OFF
2019 };
2020
2021 const struct tune_params arm_cortex_a15_tune =
2022 {
2023   &cortexa15_extra_costs,
2024   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2025   NULL,                                 /* Sched adj cost.  */
2026   arm_default_branch_cost,
2027   &arm_default_vec_cost,
2028   1,                                            /* Constant limit.  */
2029   2,                                            /* Max cond insns.  */
2030   8,                                            /* Memset max inline.  */
2031   3,                                            /* Issue rate.  */
2032   ARM_PREFETCH_NOT_BENEFICIAL,
2033   tune_params::PREF_CONST_POOL_FALSE,
2034   tune_params::PREF_LDRD_TRUE,
2035   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2036   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2037   tune_params::DISPARAGE_FLAGS_ALL,
2038   tune_params::PREF_NEON_STRINGOPS_TRUE,
2039   tune_params::FUSE_NOTHING,
2040   tune_params::SCHED_AUTOPREF_FULL
2041 };
2042
2043 const struct tune_params arm_cortex_a35_tune =
2044 {
2045   &cortexa53_extra_costs,
2046   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2047   NULL,                                 /* Sched adj cost.  */
2048   arm_default_branch_cost,
2049   &arm_default_vec_cost,
2050   1,                                            /* Constant limit.  */
2051   5,                                            /* Max cond insns.  */
2052   8,                                            /* Memset max inline.  */
2053   1,                                            /* Issue rate.  */
2054   ARM_PREFETCH_NOT_BENEFICIAL,
2055   tune_params::PREF_CONST_POOL_FALSE,
2056   tune_params::PREF_LDRD_FALSE,
2057   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2058   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2059   tune_params::DISPARAGE_FLAGS_NEITHER,
2060   tune_params::PREF_NEON_STRINGOPS_TRUE,
2061   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2062   tune_params::SCHED_AUTOPREF_OFF
2063 };
2064
2065 const struct tune_params arm_cortex_a53_tune =
2066 {
2067   &cortexa53_extra_costs,
2068   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2069   NULL,                                 /* Sched adj cost.  */
2070   arm_default_branch_cost,
2071   &arm_default_vec_cost,
2072   1,                                            /* Constant limit.  */
2073   5,                                            /* Max cond insns.  */
2074   8,                                            /* Memset max inline.  */
2075   2,                                            /* Issue rate.  */
2076   ARM_PREFETCH_NOT_BENEFICIAL,
2077   tune_params::PREF_CONST_POOL_FALSE,
2078   tune_params::PREF_LDRD_FALSE,
2079   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2080   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2081   tune_params::DISPARAGE_FLAGS_NEITHER,
2082   tune_params::PREF_NEON_STRINGOPS_TRUE,
2083   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2084   tune_params::SCHED_AUTOPREF_OFF
2085 };
2086
2087 const struct tune_params arm_cortex_a57_tune =
2088 {
2089   &cortexa57_extra_costs,
2090   &generic_addr_mode_costs,             /* addressing mode costs */
2091   NULL,                                 /* Sched adj cost.  */
2092   arm_default_branch_cost,
2093   &arm_default_vec_cost,
2094   1,                                            /* Constant limit.  */
2095   2,                                            /* Max cond insns.  */
2096   8,                                            /* Memset max inline.  */
2097   3,                                            /* Issue rate.  */
2098   ARM_PREFETCH_NOT_BENEFICIAL,
2099   tune_params::PREF_CONST_POOL_FALSE,
2100   tune_params::PREF_LDRD_TRUE,
2101   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2102   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2103   tune_params::DISPARAGE_FLAGS_ALL,
2104   tune_params::PREF_NEON_STRINGOPS_TRUE,
2105   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2106   tune_params::SCHED_AUTOPREF_FULL
2107 };
2108
2109 const struct tune_params arm_exynosm1_tune =
2110 {
2111   &exynosm1_extra_costs,
2112   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2113   NULL,                                         /* Sched adj cost.  */
2114   arm_default_branch_cost,
2115   &arm_default_vec_cost,
2116   1,                                            /* Constant limit.  */
2117   2,                                            /* Max cond insns.  */
2118   8,                                            /* Memset max inline.  */
2119   3,                                            /* Issue rate.  */
2120   ARM_PREFETCH_NOT_BENEFICIAL,
2121   tune_params::PREF_CONST_POOL_FALSE,
2122   tune_params::PREF_LDRD_TRUE,
2123   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2124   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2125   tune_params::DISPARAGE_FLAGS_ALL,
2126   tune_params::PREF_NEON_STRINGOPS_TRUE,
2127   tune_params::FUSE_NOTHING,
2128   tune_params::SCHED_AUTOPREF_OFF
2129 };
2130
2131 const struct tune_params arm_xgene1_tune =
2132 {
2133   &xgene1_extra_costs,
2134   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2135   NULL,                                 /* Sched adj cost.  */
2136   arm_default_branch_cost,
2137   &arm_default_vec_cost,
2138   1,                                            /* Constant limit.  */
2139   2,                                            /* Max cond insns.  */
2140   32,                                           /* Memset max inline.  */
2141   4,                                            /* Issue rate.  */
2142   ARM_PREFETCH_NOT_BENEFICIAL,
2143   tune_params::PREF_CONST_POOL_FALSE,
2144   tune_params::PREF_LDRD_TRUE,
2145   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2146   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2147   tune_params::DISPARAGE_FLAGS_ALL,
2148   tune_params::PREF_NEON_STRINGOPS_FALSE,
2149   tune_params::FUSE_NOTHING,
2150   tune_params::SCHED_AUTOPREF_OFF
2151 };
2152
2153 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2154    less appealing.  Set max_insns_skipped to a low value.  */
2155
2156 const struct tune_params arm_cortex_a5_tune =
2157 {
2158   &cortexa5_extra_costs,
2159   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2160   NULL,                                 /* Sched adj cost.  */
2161   arm_cortex_a5_branch_cost,
2162   &arm_default_vec_cost,
2163   1,                                            /* Constant limit.  */
2164   1,                                            /* Max cond insns.  */
2165   8,                                            /* Memset max inline.  */
2166   2,                                            /* Issue rate.  */
2167   ARM_PREFETCH_NOT_BENEFICIAL,
2168   tune_params::PREF_CONST_POOL_FALSE,
2169   tune_params::PREF_LDRD_FALSE,
2170   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2171   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2172   tune_params::DISPARAGE_FLAGS_NEITHER,
2173   tune_params::PREF_NEON_STRINGOPS_TRUE,
2174   tune_params::FUSE_NOTHING,
2175   tune_params::SCHED_AUTOPREF_OFF
2176 };
2177
2178 const struct tune_params arm_cortex_a9_tune =
2179 {
2180   &cortexa9_extra_costs,
2181   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2182   cortex_a9_sched_adjust_cost,
2183   arm_default_branch_cost,
2184   &arm_default_vec_cost,
2185   1,                                            /* Constant limit.  */
2186   5,                                            /* Max cond insns.  */
2187   8,                                            /* Memset max inline.  */
2188   2,                                            /* Issue rate.  */
2189   ARM_PREFETCH_BENEFICIAL(4,32,32),
2190   tune_params::PREF_CONST_POOL_FALSE,
2191   tune_params::PREF_LDRD_FALSE,
2192   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2193   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2194   tune_params::DISPARAGE_FLAGS_NEITHER,
2195   tune_params::PREF_NEON_STRINGOPS_FALSE,
2196   tune_params::FUSE_NOTHING,
2197   tune_params::SCHED_AUTOPREF_OFF
2198 };
2199
2200 const struct tune_params arm_cortex_a12_tune =
2201 {
2202   &cortexa12_extra_costs,
2203   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2204   NULL,                                 /* Sched adj cost.  */
2205   arm_default_branch_cost,
2206   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2207   1,                                            /* Constant limit.  */
2208   2,                                            /* Max cond insns.  */
2209   8,                                            /* Memset max inline.  */
2210   2,                                            /* Issue rate.  */
2211   ARM_PREFETCH_NOT_BENEFICIAL,
2212   tune_params::PREF_CONST_POOL_FALSE,
2213   tune_params::PREF_LDRD_TRUE,
2214   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2215   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2216   tune_params::DISPARAGE_FLAGS_ALL,
2217   tune_params::PREF_NEON_STRINGOPS_TRUE,
2218   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2219   tune_params::SCHED_AUTOPREF_OFF
2220 };
2221
2222 const struct tune_params arm_cortex_a73_tune =
2223 {
2224   &cortexa57_extra_costs,
2225   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2226   NULL,                                         /* Sched adj cost.  */
2227   arm_default_branch_cost,
2228   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2229   1,                                            /* Constant limit.  */
2230   2,                                            /* Max cond insns.  */
2231   8,                                            /* Memset max inline.  */
2232   2,                                            /* Issue rate.  */
2233   ARM_PREFETCH_NOT_BENEFICIAL,
2234   tune_params::PREF_CONST_POOL_FALSE,
2235   tune_params::PREF_LDRD_TRUE,
2236   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2237   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2238   tune_params::DISPARAGE_FLAGS_ALL,
2239   tune_params::PREF_NEON_STRINGOPS_TRUE,
2240   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2241   tune_params::SCHED_AUTOPREF_FULL
2242 };
2243
2244 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2245    cycle to execute each.  An LDR from the constant pool also takes two cycles
2246    to execute, but mildly increases pipelining opportunity (consecutive
2247    loads/stores can be pipelined together, saving one cycle), and may also
2248    improve icache utilisation.  Hence we prefer the constant pool for such
2249    processors.  */
2250
2251 const struct tune_params arm_v7m_tune =
2252 {
2253   &v7m_extra_costs,
2254   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2255   NULL,                                 /* Sched adj cost.  */
2256   arm_cortex_m_branch_cost,
2257   &arm_default_vec_cost,
2258   1,                                            /* Constant limit.  */
2259   2,                                            /* Max cond insns.  */
2260   8,                                            /* Memset max inline.  */
2261   1,                                            /* Issue rate.  */
2262   ARM_PREFETCH_NOT_BENEFICIAL,
2263   tune_params::PREF_CONST_POOL_TRUE,
2264   tune_params::PREF_LDRD_FALSE,
2265   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2266   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2267   tune_params::DISPARAGE_FLAGS_NEITHER,
2268   tune_params::PREF_NEON_STRINGOPS_FALSE,
2269   tune_params::FUSE_NOTHING,
2270   tune_params::SCHED_AUTOPREF_OFF
2271 };
2272
2273 /* Cortex-M7 tuning.  */
2274
2275 const struct tune_params arm_cortex_m7_tune =
2276 {
2277   &v7m_extra_costs,
2278   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2279   NULL,                                 /* Sched adj cost.  */
2280   arm_cortex_m7_branch_cost,
2281   &arm_default_vec_cost,
2282   0,                                            /* Constant limit.  */
2283   1,                                            /* Max cond insns.  */
2284   8,                                            /* Memset max inline.  */
2285   2,                                            /* Issue rate.  */
2286   ARM_PREFETCH_NOT_BENEFICIAL,
2287   tune_params::PREF_CONST_POOL_TRUE,
2288   tune_params::PREF_LDRD_FALSE,
2289   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2290   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2291   tune_params::DISPARAGE_FLAGS_NEITHER,
2292   tune_params::PREF_NEON_STRINGOPS_FALSE,
2293   tune_params::FUSE_NOTHING,
2294   tune_params::SCHED_AUTOPREF_OFF
2295 };
2296
2297 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2298    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2299    cortex-m23.  */
2300 const struct tune_params arm_v6m_tune =
2301 {
2302   &generic_extra_costs,                 /* Insn extra costs.  */
2303   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2304   NULL,                                 /* Sched adj cost.  */
2305   arm_default_branch_cost,
2306   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2307   1,                                            /* Constant limit.  */
2308   5,                                            /* Max cond insns.  */
2309   8,                                            /* Memset max inline.  */
2310   1,                                            /* Issue rate.  */
2311   ARM_PREFETCH_NOT_BENEFICIAL,
2312   tune_params::PREF_CONST_POOL_FALSE,
2313   tune_params::PREF_LDRD_FALSE,
2314   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2315   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2316   tune_params::DISPARAGE_FLAGS_NEITHER,
2317   tune_params::PREF_NEON_STRINGOPS_FALSE,
2318   tune_params::FUSE_NOTHING,
2319   tune_params::SCHED_AUTOPREF_OFF
2320 };
2321
2322 const struct tune_params arm_fa726te_tune =
2323 {
2324   &generic_extra_costs,                         /* Insn extra costs.  */
2325   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2326   fa726te_sched_adjust_cost,
2327   arm_default_branch_cost,
2328   &arm_default_vec_cost,
2329   1,                                            /* Constant limit.  */
2330   5,                                            /* Max cond insns.  */
2331   8,                                            /* Memset max inline.  */
2332   2,                                            /* Issue rate.  */
2333   ARM_PREFETCH_NOT_BENEFICIAL,
2334   tune_params::PREF_CONST_POOL_TRUE,
2335   tune_params::PREF_LDRD_FALSE,
2336   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2337   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2338   tune_params::DISPARAGE_FLAGS_NEITHER,
2339   tune_params::PREF_NEON_STRINGOPS_FALSE,
2340   tune_params::FUSE_NOTHING,
2341   tune_params::SCHED_AUTOPREF_OFF
2342 };
2343
2344 /* Auto-generated CPU, FPU and architecture tables.  */
2345 #include "arm-cpu-data.h"
2346
2347 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2348    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2349    is thus chosen to be big enough to hold the longest architecture name.  */
2350
2351 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2352
2353 /* Supported TLS relocations.  */
2354
2355 enum tls_reloc {
2356   TLS_GD32,
2357   TLS_GD32_FDPIC,
2358   TLS_LDM32,
2359   TLS_LDM32_FDPIC,
2360   TLS_LDO32,
2361   TLS_IE32,
2362   TLS_IE32_FDPIC,
2363   TLS_LE32,
2364   TLS_DESCSEQ   /* GNU scheme */
2365 };
2366
2367 /* The maximum number of insns to be used when loading a constant.  */
2368 inline static int
2369 arm_constant_limit (bool size_p)
2370 {
2371   return size_p ? 1 : current_tune->constant_limit;
2372 }
2373
2374 /* Emit an insn that's a simple single-set.  Both the operands must be known
2375    to be valid.  */
2376 inline static rtx_insn *
2377 emit_set_insn (rtx x, rtx y)
2378 {
2379   return emit_insn (gen_rtx_SET (x, y));
2380 }
2381
2382 /* Return the number of bits set in VALUE.  */
2383 static unsigned
2384 bit_count (unsigned long value)
2385 {
2386   unsigned long count = 0;
2387
2388   while (value)
2389     {
2390       count++;
2391       value &= value - 1;  /* Clear the least-significant set bit.  */
2392     }
2393
2394   return count;
2395 }
2396
2397 /* Return the number of bits set in BMAP.  */
2398 static unsigned
2399 bitmap_popcount (const sbitmap bmap)
2400 {
2401   unsigned int count = 0;
2402   unsigned int n = 0;
2403   sbitmap_iterator sbi;
2404
2405   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2406     count++;
2407   return count;
2408 }
2409
2410 typedef struct
2411 {
2412   machine_mode mode;
2413   const char *name;
2414 } arm_fixed_mode_set;
2415
2416 /* A small helper for setting fixed-point library libfuncs.  */
2417
2418 static void
2419 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2420                              const char *funcname, const char *modename,
2421                              int num_suffix)
2422 {
2423   char buffer[50];
2424
2425   if (num_suffix == 0)
2426     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2427   else
2428     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2429
2430   set_optab_libfunc (optable, mode, buffer);
2431 }
2432
2433 static void
2434 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2435                             machine_mode from, const char *funcname,
2436                             const char *toname, const char *fromname)
2437 {
2438   char buffer[50];
2439   const char *maybe_suffix_2 = "";
2440
2441   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2442   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2443       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2444       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2445     maybe_suffix_2 = "2";
2446
2447   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2448            maybe_suffix_2);
2449
2450   set_conv_libfunc (optable, to, from, buffer);
2451 }
2452
2453 static GTY(()) rtx speculation_barrier_libfunc;
2454
2455 /* Set up library functions unique to ARM.  */
2456 static void
2457 arm_init_libfuncs (void)
2458 {
2459   /* For Linux, we have access to kernel support for atomic operations.  */
2460   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2461     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2462
2463   /* There are no special library functions unless we are using the
2464      ARM BPABI.  */
2465   if (!TARGET_BPABI)
2466     return;
2467
2468   /* The functions below are described in Section 4 of the "Run-Time
2469      ABI for the ARM architecture", Version 1.0.  */
2470
2471   /* Double-precision floating-point arithmetic.  Table 2.  */
2472   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2473   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2474   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2475   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2476   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2477
2478   /* Double-precision comparisons.  Table 3.  */
2479   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2480   set_optab_libfunc (ne_optab, DFmode, NULL);
2481   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2482   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2483   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2484   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2485   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2486
2487   /* Single-precision floating-point arithmetic.  Table 4.  */
2488   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2489   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2490   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2491   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2492   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2493
2494   /* Single-precision comparisons.  Table 5.  */
2495   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2496   set_optab_libfunc (ne_optab, SFmode, NULL);
2497   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2498   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2499   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2500   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2501   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2502
2503   /* Floating-point to integer conversions.  Table 6.  */
2504   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2505   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2506   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2507   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2508   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2509   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2510   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2511   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2512
2513   /* Conversions between floating types.  Table 7.  */
2514   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2515   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2516
2517   /* Integer to floating-point conversions.  Table 8.  */
2518   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2519   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2520   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2521   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2522   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2523   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2524   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2525   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2526
2527   /* Long long.  Table 9.  */
2528   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2529   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2530   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2531   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2532   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2533   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2534   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2535   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2536
2537   /* Integer (32/32->32) division.  \S 4.3.1.  */
2538   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2539   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2540
2541   /* The divmod functions are designed so that they can be used for
2542      plain division, even though they return both the quotient and the
2543      remainder.  The quotient is returned in the usual location (i.e.,
2544      r0 for SImode, {r0, r1} for DImode), just as would be expected
2545      for an ordinary division routine.  Because the AAPCS calling
2546      conventions specify that all of { r0, r1, r2, r3 } are
2547      callee-saved registers, there is no need to tell the compiler
2548      explicitly that those registers are clobbered by these
2549      routines.  */
2550   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2551   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2552
2553   /* For SImode division the ABI provides div-without-mod routines,
2554      which are faster.  */
2555   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2556   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2557
2558   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2559      divmod libcalls instead.  */
2560   set_optab_libfunc (smod_optab, DImode, NULL);
2561   set_optab_libfunc (umod_optab, DImode, NULL);
2562   set_optab_libfunc (smod_optab, SImode, NULL);
2563   set_optab_libfunc (umod_optab, SImode, NULL);
2564
2565   /* Half-precision float operations.  The compiler handles all operations
2566      with NULL libfuncs by converting the SFmode.  */
2567   switch (arm_fp16_format)
2568     {
2569     case ARM_FP16_FORMAT_IEEE:
2570     case ARM_FP16_FORMAT_ALTERNATIVE:
2571
2572       /* Conversions.  */
2573       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2574                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2575                          ? "__gnu_f2h_ieee"
2576                          : "__gnu_f2h_alternative"));
2577       set_conv_libfunc (sext_optab, SFmode, HFmode,
2578                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2579                          ? "__gnu_h2f_ieee"
2580                          : "__gnu_h2f_alternative"));
2581
2582       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2583                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2584                          ? "__gnu_d2h_ieee"
2585                          : "__gnu_d2h_alternative"));
2586
2587       /* Arithmetic.  */
2588       set_optab_libfunc (add_optab, HFmode, NULL);
2589       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2590       set_optab_libfunc (smul_optab, HFmode, NULL);
2591       set_optab_libfunc (neg_optab, HFmode, NULL);
2592       set_optab_libfunc (sub_optab, HFmode, NULL);
2593
2594       /* Comparisons.  */
2595       set_optab_libfunc (eq_optab, HFmode, NULL);
2596       set_optab_libfunc (ne_optab, HFmode, NULL);
2597       set_optab_libfunc (lt_optab, HFmode, NULL);
2598       set_optab_libfunc (le_optab, HFmode, NULL);
2599       set_optab_libfunc (ge_optab, HFmode, NULL);
2600       set_optab_libfunc (gt_optab, HFmode, NULL);
2601       set_optab_libfunc (unord_optab, HFmode, NULL);
2602       break;
2603
2604     default:
2605       break;
2606     }
2607
2608   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2609   {
2610     const arm_fixed_mode_set fixed_arith_modes[] =
2611       {
2612         { E_QQmode, "qq" },
2613         { E_UQQmode, "uqq" },
2614         { E_HQmode, "hq" },
2615         { E_UHQmode, "uhq" },
2616         { E_SQmode, "sq" },
2617         { E_USQmode, "usq" },
2618         { E_DQmode, "dq" },
2619         { E_UDQmode, "udq" },
2620         { E_TQmode, "tq" },
2621         { E_UTQmode, "utq" },
2622         { E_HAmode, "ha" },
2623         { E_UHAmode, "uha" },
2624         { E_SAmode, "sa" },
2625         { E_USAmode, "usa" },
2626         { E_DAmode, "da" },
2627         { E_UDAmode, "uda" },
2628         { E_TAmode, "ta" },
2629         { E_UTAmode, "uta" }
2630       };
2631     const arm_fixed_mode_set fixed_conv_modes[] =
2632       {
2633         { E_QQmode, "qq" },
2634         { E_UQQmode, "uqq" },
2635         { E_HQmode, "hq" },
2636         { E_UHQmode, "uhq" },
2637         { E_SQmode, "sq" },
2638         { E_USQmode, "usq" },
2639         { E_DQmode, "dq" },
2640         { E_UDQmode, "udq" },
2641         { E_TQmode, "tq" },
2642         { E_UTQmode, "utq" },
2643         { E_HAmode, "ha" },
2644         { E_UHAmode, "uha" },
2645         { E_SAmode, "sa" },
2646         { E_USAmode, "usa" },
2647         { E_DAmode, "da" },
2648         { E_UDAmode, "uda" },
2649         { E_TAmode, "ta" },
2650         { E_UTAmode, "uta" },
2651         { E_QImode, "qi" },
2652         { E_HImode, "hi" },
2653         { E_SImode, "si" },
2654         { E_DImode, "di" },
2655         { E_TImode, "ti" },
2656         { E_SFmode, "sf" },
2657         { E_DFmode, "df" }
2658       };
2659     unsigned int i, j;
2660
2661     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2662       {
2663         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2664                                      "add", fixed_arith_modes[i].name, 3);
2665         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2666                                      "ssadd", fixed_arith_modes[i].name, 3);
2667         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2668                                      "usadd", fixed_arith_modes[i].name, 3);
2669         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2670                                      "sub", fixed_arith_modes[i].name, 3);
2671         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2672                                      "sssub", fixed_arith_modes[i].name, 3);
2673         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2674                                      "ussub", fixed_arith_modes[i].name, 3);
2675         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2676                                      "mul", fixed_arith_modes[i].name, 3);
2677         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2678                                      "ssmul", fixed_arith_modes[i].name, 3);
2679         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2680                                      "usmul", fixed_arith_modes[i].name, 3);
2681         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2682                                      "div", fixed_arith_modes[i].name, 3);
2683         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2684                                      "udiv", fixed_arith_modes[i].name, 3);
2685         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2686                                      "ssdiv", fixed_arith_modes[i].name, 3);
2687         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2688                                      "usdiv", fixed_arith_modes[i].name, 3);
2689         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2690                                      "neg", fixed_arith_modes[i].name, 2);
2691         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2692                                      "ssneg", fixed_arith_modes[i].name, 2);
2693         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2694                                      "usneg", fixed_arith_modes[i].name, 2);
2695         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2696                                      "ashl", fixed_arith_modes[i].name, 3);
2697         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2698                                      "ashr", fixed_arith_modes[i].name, 3);
2699         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2700                                      "lshr", fixed_arith_modes[i].name, 3);
2701         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2702                                      "ssashl", fixed_arith_modes[i].name, 3);
2703         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2704                                      "usashl", fixed_arith_modes[i].name, 3);
2705         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2706                                      "cmp", fixed_arith_modes[i].name, 2);
2707       }
2708
2709     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2710       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2711         {
2712           if (i == j
2713               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2714                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2715             continue;
2716
2717           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2718                                       fixed_conv_modes[j].mode, "fract",
2719                                       fixed_conv_modes[i].name,
2720                                       fixed_conv_modes[j].name);
2721           arm_set_fixed_conv_libfunc (satfract_optab,
2722                                       fixed_conv_modes[i].mode,
2723                                       fixed_conv_modes[j].mode, "satfract",
2724                                       fixed_conv_modes[i].name,
2725                                       fixed_conv_modes[j].name);
2726           arm_set_fixed_conv_libfunc (fractuns_optab,
2727                                       fixed_conv_modes[i].mode,
2728                                       fixed_conv_modes[j].mode, "fractuns",
2729                                       fixed_conv_modes[i].name,
2730                                       fixed_conv_modes[j].name);
2731           arm_set_fixed_conv_libfunc (satfractuns_optab,
2732                                       fixed_conv_modes[i].mode,
2733                                       fixed_conv_modes[j].mode, "satfractuns",
2734                                       fixed_conv_modes[i].name,
2735                                       fixed_conv_modes[j].name);
2736         }
2737   }
2738
2739   if (TARGET_AAPCS_BASED)
2740     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2741
2742   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2743 }
2744
2745 /* On AAPCS systems, this is the "struct __va_list".  */
2746 static GTY(()) tree va_list_type;
2747
2748 /* Return the type to use as __builtin_va_list.  */
2749 static tree
2750 arm_build_builtin_va_list (void)
2751 {
2752   tree va_list_name;
2753   tree ap_field;
2754
2755   if (!TARGET_AAPCS_BASED)
2756     return std_build_builtin_va_list ();
2757
2758   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2759      defined as:
2760
2761        struct __va_list
2762        {
2763          void *__ap;
2764        };
2765
2766      The C Library ABI further reinforces this definition in \S
2767      4.1.
2768
2769      We must follow this definition exactly.  The structure tag
2770      name is visible in C++ mangled names, and thus forms a part
2771      of the ABI.  The field name may be used by people who
2772      #include <stdarg.h>.  */
2773   /* Create the type.  */
2774   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2775   /* Give it the required name.  */
2776   va_list_name = build_decl (BUILTINS_LOCATION,
2777                              TYPE_DECL,
2778                              get_identifier ("__va_list"),
2779                              va_list_type);
2780   DECL_ARTIFICIAL (va_list_name) = 1;
2781   TYPE_NAME (va_list_type) = va_list_name;
2782   TYPE_STUB_DECL (va_list_type) = va_list_name;
2783   /* Create the __ap field.  */
2784   ap_field = build_decl (BUILTINS_LOCATION,
2785                          FIELD_DECL,
2786                          get_identifier ("__ap"),
2787                          ptr_type_node);
2788   DECL_ARTIFICIAL (ap_field) = 1;
2789   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2790   TYPE_FIELDS (va_list_type) = ap_field;
2791   /* Compute its layout.  */
2792   layout_type (va_list_type);
2793
2794   return va_list_type;
2795 }
2796
2797 /* Return an expression of type "void *" pointing to the next
2798    available argument in a variable-argument list.  VALIST is the
2799    user-level va_list object, of type __builtin_va_list.  */
2800 static tree
2801 arm_extract_valist_ptr (tree valist)
2802 {
2803   if (TREE_TYPE (valist) == error_mark_node)
2804     return error_mark_node;
2805
2806   /* On an AAPCS target, the pointer is stored within "struct
2807      va_list".  */
2808   if (TARGET_AAPCS_BASED)
2809     {
2810       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2811       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2812                        valist, ap_field, NULL_TREE);
2813     }
2814
2815   return valist;
2816 }
2817
2818 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2819 static void
2820 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2821 {
2822   valist = arm_extract_valist_ptr (valist);
2823   std_expand_builtin_va_start (valist, nextarg);
2824 }
2825
2826 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2827 static tree
2828 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2829                           gimple_seq *post_p)
2830 {
2831   valist = arm_extract_valist_ptr (valist);
2832   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2833 }
2834
2835 /* Check any incompatible options that the user has specified.  */
2836 static void
2837 arm_option_check_internal (struct gcc_options *opts)
2838 {
2839   int flags = opts->x_target_flags;
2840
2841   /* iWMMXt and NEON are incompatible.  */
2842   if (TARGET_IWMMXT
2843       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2844     error ("iWMMXt and NEON are incompatible");
2845
2846   /* Make sure that the processor choice does not conflict with any of the
2847      other command line choices.  */
2848   if (TARGET_ARM_P (flags)
2849       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2850     error ("target CPU does not support ARM mode");
2851
2852   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2853   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2854     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2855
2856   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2857     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2858
2859   /* If this target is normally configured to use APCS frames, warn if they
2860      are turned off and debugging is turned on.  */
2861   if (TARGET_ARM_P (flags)
2862       && write_symbols != NO_DEBUG
2863       && !TARGET_APCS_FRAME
2864       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2865     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2866              "debugging");
2867
2868   /* iWMMXt unsupported under Thumb mode.  */
2869   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2870     error ("iWMMXt unsupported under Thumb mode");
2871
2872   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2873     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2874
2875   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2876     {
2877       error ("RTP PIC is incompatible with Thumb");
2878       flag_pic = 0;
2879     }
2880
2881   if (target_pure_code || target_slow_flash_data)
2882     {
2883       const char *flag = (target_pure_code ? "-mpure-code" :
2884                                              "-mslow-flash-data");
2885
2886       /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2887          with MOVT.  */
2888       if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2889         error ("%s only supports non-pic code on M-profile targets with the "
2890                "MOVT instruction", flag);
2891
2892       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2893          -mword-relocations forbids relocation of MOVT/MOVW.  */
2894       if (target_word_relocations)
2895         error ("%s incompatible with %<-mword-relocations%>", flag);
2896     }
2897 }
2898
2899 /* Recompute the global settings depending on target attribute options.  */
2900
2901 static void
2902 arm_option_params_internal (void)
2903 {
2904   /* If we are not using the default (ARM mode) section anchor offset
2905      ranges, then set the correct ranges now.  */
2906   if (TARGET_THUMB1)
2907     {
2908       /* Thumb-1 LDR instructions cannot have negative offsets.
2909          Permissible positive offset ranges are 5-bit (for byte loads),
2910          6-bit (for halfword loads), or 7-bit (for word loads).
2911          Empirical results suggest a 7-bit anchor range gives the best
2912          overall code size.  */
2913       targetm.min_anchor_offset = 0;
2914       targetm.max_anchor_offset = 127;
2915     }
2916   else if (TARGET_THUMB2)
2917     {
2918       /* The minimum is set such that the total size of the block
2919          for a particular anchor is 248 + 1 + 4095 bytes, which is
2920          divisible by eight, ensuring natural spacing of anchors.  */
2921       targetm.min_anchor_offset = -248;
2922       targetm.max_anchor_offset = 4095;
2923     }
2924   else
2925     {
2926       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2927       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2928     }
2929
2930   /* Increase the number of conditional instructions with -Os.  */
2931   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2932
2933   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2934   if (TARGET_THUMB2)
2935     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2936 }
2937
2938 /* True if -mflip-thumb should next add an attribute for the default
2939    mode, false if it should next add an attribute for the opposite mode.  */
2940 static GTY(()) bool thumb_flipper;
2941
2942 /* Options after initial target override.  */
2943 static GTY(()) tree init_optimize;
2944
2945 static void
2946 arm_override_options_after_change_1 (struct gcc_options *opts)
2947 {
2948   /* -falign-functions without argument: supply one.  */
2949   if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2950     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2951       && opts->x_optimize_size ? "2" : "4";
2952 }
2953
2954 /* Implement targetm.override_options_after_change.  */
2955
2956 static void
2957 arm_override_options_after_change (void)
2958 {
2959   arm_configure_build_target (&arm_active_target,
2960                               TREE_TARGET_OPTION (target_option_default_node),
2961                               &global_options_set, false);
2962
2963   arm_override_options_after_change_1 (&global_options);
2964 }
2965
2966 /* Implement TARGET_OPTION_SAVE.  */
2967 static void
2968 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2969 {
2970   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2971   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2972   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2973 }
2974
2975 /* Implement TARGET_OPTION_RESTORE.  */
2976 static void
2977 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2978 {
2979   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2980   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2981   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2982   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2983                               false);
2984 }
2985
2986 /* Reset options between modes that the user has specified.  */
2987 static void
2988 arm_option_override_internal (struct gcc_options *opts,
2989                               struct gcc_options *opts_set)
2990 {
2991   arm_override_options_after_change_1 (opts);
2992
2993   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2994     {
2995       /* The default is to enable interworking, so this warning message would
2996          be confusing to users who have just compiled with
2997          eg, -march=armv4.  */
2998       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2999       opts->x_target_flags &= ~MASK_INTERWORK;
3000     }
3001
3002   if (TARGET_THUMB_P (opts->x_target_flags)
3003       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3004     {
3005       warning (0, "target CPU does not support THUMB instructions");
3006       opts->x_target_flags &= ~MASK_THUMB;
3007     }
3008
3009   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3010     {
3011       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3012       opts->x_target_flags &= ~MASK_APCS_FRAME;
3013     }
3014
3015   /* Callee super interworking implies thumb interworking.  Adding
3016      this to the flags here simplifies the logic elsewhere.  */
3017   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3018     opts->x_target_flags |= MASK_INTERWORK;
3019
3020   /* need to remember initial values so combinaisons of options like
3021      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3022   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3023
3024   if (! opts_set->x_arm_restrict_it)
3025     opts->x_arm_restrict_it = arm_arch8;
3026
3027   /* ARM execution state and M profile don't have [restrict] IT.  */
3028   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3029     opts->x_arm_restrict_it = 0;
3030
3031   /* Enable -munaligned-access by default for
3032      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3033      i.e. Thumb2 and ARM state only.
3034      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3035      - ARMv8 architecture-base processors.
3036
3037      Disable -munaligned-access by default for
3038      - all pre-ARMv6 architecture-based processors
3039      - ARMv6-M architecture-based processors
3040      - ARMv8-M Baseline processors.  */
3041
3042   if (! opts_set->x_unaligned_access)
3043     {
3044       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3045                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3046     }
3047   else if (opts->x_unaligned_access == 1
3048            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3049     {
3050       warning (0, "target CPU does not support unaligned accesses");
3051      opts->x_unaligned_access = 0;
3052     }
3053
3054   /* Don't warn since it's on by default in -O2.  */
3055   if (TARGET_THUMB1_P (opts->x_target_flags))
3056     opts->x_flag_schedule_insns = 0;
3057   else
3058     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3059
3060   /* Disable shrink-wrap when optimizing function for size, since it tends to
3061      generate additional returns.  */
3062   if (optimize_function_for_size_p (cfun)
3063       && TARGET_THUMB2_P (opts->x_target_flags))
3064     opts->x_flag_shrink_wrap = false;
3065   else
3066     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3067
3068   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3069      - epilogue_insns - does not accurately model the corresponding insns
3070      emitted in the asm file.  In particular, see the comment in thumb_exit
3071      'Find out how many of the (return) argument registers we can corrupt'.
3072      As a consequence, the epilogue may clobber registers without fipa-ra
3073      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3074      TODO: Accurately model clobbers for epilogue_insns and reenable
3075      fipa-ra.  */
3076   if (TARGET_THUMB1_P (opts->x_target_flags))
3077     opts->x_flag_ipa_ra = 0;
3078   else
3079     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3080
3081   /* Thumb2 inline assembly code should always use unified syntax.
3082      This will apply to ARM and Thumb1 eventually.  */
3083   if (TARGET_THUMB2_P (opts->x_target_flags))
3084     opts->x_inline_asm_unified = true;
3085
3086 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3087   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3088 #endif
3089 }
3090
3091 static sbitmap isa_all_fpubits;
3092 static sbitmap isa_quirkbits;
3093
3094 /* Configure a build target TARGET from the user-specified options OPTS and
3095    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3096    architecture have been specified, but the two are not identical.  */
3097 void
3098 arm_configure_build_target (struct arm_build_target *target,
3099                             struct cl_target_option *opts,
3100                             struct gcc_options *opts_set,
3101                             bool warn_compatible)
3102 {
3103   const cpu_option *arm_selected_tune = NULL;
3104   const arch_option *arm_selected_arch = NULL;
3105   const cpu_option *arm_selected_cpu = NULL;
3106   const arm_fpu_desc *arm_selected_fpu = NULL;
3107   const char *tune_opts = NULL;
3108   const char *arch_opts = NULL;
3109   const char *cpu_opts = NULL;
3110
3111   bitmap_clear (target->isa);
3112   target->core_name = NULL;
3113   target->arch_name = NULL;
3114
3115   if (opts_set->x_arm_arch_string)
3116     {
3117       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3118                                                       "-march",
3119                                                       opts->x_arm_arch_string);
3120       arch_opts = strchr (opts->x_arm_arch_string, '+');
3121     }
3122
3123   if (opts_set->x_arm_cpu_string)
3124     {
3125       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3126                                                     opts->x_arm_cpu_string);
3127       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3128       arm_selected_tune = arm_selected_cpu;
3129       /* If taking the tuning from -mcpu, we don't need to rescan the
3130          options for tuning.  */
3131     }
3132
3133   if (opts_set->x_arm_tune_string)
3134     {
3135       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3136                                                      opts->x_arm_tune_string);
3137       tune_opts = strchr (opts->x_arm_tune_string, '+');
3138     }
3139
3140   if (arm_selected_arch)
3141     {
3142       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3143       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3144                                  arch_opts);
3145
3146       if (arm_selected_cpu)
3147         {
3148           auto_sbitmap cpu_isa (isa_num_bits);
3149           auto_sbitmap isa_delta (isa_num_bits);
3150
3151           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3152           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3153                                      cpu_opts);
3154           bitmap_xor (isa_delta, cpu_isa, target->isa);
3155           /* Ignore any bits that are quirk bits.  */
3156           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3157           /* Ignore (for now) any bits that might be set by -mfpu.  */
3158           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3159
3160           if (!bitmap_empty_p (isa_delta))
3161             {
3162               if (warn_compatible)
3163                 warning (0, "switch %<-mcpu=%s%> conflicts "
3164                          "with %<-march=%s%> switch",
3165                          arm_selected_cpu->common.name,
3166                          arm_selected_arch->common.name);
3167               /* -march wins for code generation.
3168                  -mcpu wins for default tuning.  */
3169               if (!arm_selected_tune)
3170                 arm_selected_tune = arm_selected_cpu;
3171
3172               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3173               target->arch_name = arm_selected_arch->common.name;
3174             }
3175           else
3176             {
3177               /* Architecture and CPU are essentially the same.
3178                  Prefer the CPU setting.  */
3179               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3180               target->core_name = arm_selected_cpu->common.name;
3181               /* Copy the CPU's capabilities, so that we inherit the
3182                  appropriate extensions and quirks.  */
3183               bitmap_copy (target->isa, cpu_isa);
3184             }
3185         }
3186       else
3187         {
3188           /* Pick a CPU based on the architecture.  */
3189           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3190           target->arch_name = arm_selected_arch->common.name;
3191           /* Note: target->core_name is left unset in this path.  */
3192         }
3193     }
3194   else if (arm_selected_cpu)
3195     {
3196       target->core_name = arm_selected_cpu->common.name;
3197       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3198       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3199                                  cpu_opts);
3200       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3201     }
3202   /* If the user did not specify a processor or architecture, choose
3203      one for them.  */
3204   else
3205     {
3206       const cpu_option *sel;
3207       auto_sbitmap sought_isa (isa_num_bits);
3208       bitmap_clear (sought_isa);
3209       auto_sbitmap default_isa (isa_num_bits);
3210
3211       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3212                                                     TARGET_CPU_DEFAULT);
3213       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3214       gcc_assert (arm_selected_cpu->common.name);
3215
3216       /* RWE: All of the selection logic below (to the end of this
3217          'if' clause) looks somewhat suspect.  It appears to be mostly
3218          there to support forcing thumb support when the default CPU
3219          does not have thumb (somewhat dubious in terms of what the
3220          user might be expecting).  I think it should be removed once
3221          support for the pre-thumb era cores is removed.  */
3222       sel = arm_selected_cpu;
3223       arm_initialize_isa (default_isa, sel->common.isa_bits);
3224       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3225                                  cpu_opts);
3226
3227       /* Now check to see if the user has specified any command line
3228          switches that require certain abilities from the cpu.  */
3229
3230       if (TARGET_INTERWORK || TARGET_THUMB)
3231         bitmap_set_bit (sought_isa, isa_bit_thumb);
3232
3233       /* If there are such requirements and the default CPU does not
3234          satisfy them, we need to run over the complete list of
3235          cores looking for one that is satisfactory.  */
3236       if (!bitmap_empty_p (sought_isa)
3237           && !bitmap_subset_p (sought_isa, default_isa))
3238         {
3239           auto_sbitmap candidate_isa (isa_num_bits);
3240           /* We're only interested in a CPU with at least the
3241              capabilities of the default CPU and the required
3242              additional features.  */
3243           bitmap_ior (default_isa, default_isa, sought_isa);
3244
3245           /* Try to locate a CPU type that supports all of the abilities
3246              of the default CPU, plus the extra abilities requested by
3247              the user.  */
3248           for (sel = all_cores; sel->common.name != NULL; sel++)
3249             {
3250               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3251               /* An exact match?  */
3252               if (bitmap_equal_p (default_isa, candidate_isa))
3253                 break;
3254             }
3255
3256           if (sel->common.name == NULL)
3257             {
3258               unsigned current_bit_count = isa_num_bits;
3259               const cpu_option *best_fit = NULL;
3260
3261               /* Ideally we would like to issue an error message here
3262                  saying that it was not possible to find a CPU compatible
3263                  with the default CPU, but which also supports the command
3264                  line options specified by the programmer, and so they
3265                  ought to use the -mcpu=<name> command line option to
3266                  override the default CPU type.
3267
3268                  If we cannot find a CPU that has exactly the
3269                  characteristics of the default CPU and the given
3270                  command line options we scan the array again looking
3271                  for a best match.  The best match must have at least
3272                  the capabilities of the perfect match.  */
3273               for (sel = all_cores; sel->common.name != NULL; sel++)
3274                 {
3275                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3276
3277                   if (bitmap_subset_p (default_isa, candidate_isa))
3278                     {
3279                       unsigned count;
3280
3281                       bitmap_and_compl (candidate_isa, candidate_isa,
3282                                         default_isa);
3283                       count = bitmap_popcount (candidate_isa);
3284
3285                       if (count < current_bit_count)
3286                         {
3287                           best_fit = sel;
3288                           current_bit_count = count;
3289                         }
3290                     }
3291
3292                   gcc_assert (best_fit);
3293                   sel = best_fit;
3294                 }
3295             }
3296           arm_selected_cpu = sel;
3297         }
3298
3299       /* Now we know the CPU, we can finally initialize the target
3300          structure.  */
3301       target->core_name = arm_selected_cpu->common.name;
3302       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3303       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3304                                  cpu_opts);
3305       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3306     }
3307
3308   gcc_assert (arm_selected_cpu);
3309   gcc_assert (arm_selected_arch);
3310
3311   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3312     {
3313       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3314       auto_sbitmap fpu_bits (isa_num_bits);
3315
3316       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3317       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3318       bitmap_ior (target->isa, target->isa, fpu_bits);
3319     }
3320
3321   if (!arm_selected_tune)
3322     arm_selected_tune = arm_selected_cpu;
3323   else /* Validate the features passed to -mtune.  */
3324     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3325
3326   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3327
3328   /* Finish initializing the target structure.  */
3329   target->arch_pp_name = arm_selected_arch->arch;
3330   target->base_arch = arm_selected_arch->base_arch;
3331   target->profile = arm_selected_arch->profile;
3332
3333   target->tune_flags = tune_data->tune_flags;
3334   target->tune = tune_data->tune;
3335   target->tune_core = tune_data->scheduler;
3336   arm_option_reconfigure_globals ();
3337 }
3338
3339 /* Fix up any incompatible options that the user has specified.  */
3340 static void
3341 arm_option_override (void)
3342 {
3343   static const enum isa_feature fpu_bitlist[]
3344     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3345   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3346   cl_target_option opts;
3347
3348   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3349   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3350
3351   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3352   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3353
3354   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3355
3356   if (!global_options_set.x_arm_fpu_index)
3357     {
3358       bool ok;
3359       int fpu_index;
3360
3361       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3362                                   CL_TARGET);
3363       gcc_assert (ok);
3364       arm_fpu_index = (enum fpu_type) fpu_index;
3365     }
3366
3367   cl_target_option_save (&opts, &global_options);
3368   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3369                               true);
3370
3371 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3372   SUBTARGET_OVERRIDE_OPTIONS;
3373 #endif
3374
3375   /* Initialize boolean versions of the architectural flags, for use
3376      in the arm.md file and for enabling feature flags.  */
3377   arm_option_reconfigure_globals ();
3378
3379   arm_tune = arm_active_target.tune_core;
3380   tune_flags = arm_active_target.tune_flags;
3381   current_tune = arm_active_target.tune;
3382
3383   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3384   if (TARGET_APCS_FRAME)
3385     flag_shrink_wrap = false;
3386
3387   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3388     {
3389       warning (0, "%<-mapcs-stack-check%> incompatible with "
3390                "%<-mno-apcs-frame%>");
3391       target_flags |= MASK_APCS_FRAME;
3392     }
3393
3394   if (TARGET_POKE_FUNCTION_NAME)
3395     target_flags |= MASK_APCS_FRAME;
3396
3397   if (TARGET_APCS_REENT && flag_pic)
3398     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3399
3400   if (TARGET_APCS_REENT)
3401     warning (0, "APCS reentrant code not supported.  Ignored");
3402
3403   /* Set up some tuning parameters.  */
3404   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3405   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3406   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3407   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3408   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3409   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3410
3411   /* For arm2/3 there is no need to do any scheduling if we are doing
3412      software floating-point.  */
3413   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3414     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3415
3416   /* Override the default structure alignment for AAPCS ABI.  */
3417   if (!global_options_set.x_arm_structure_size_boundary)
3418     {
3419       if (TARGET_AAPCS_BASED)
3420         arm_structure_size_boundary = 8;
3421     }
3422   else
3423     {
3424       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3425
3426       if (arm_structure_size_boundary != 8
3427           && arm_structure_size_boundary != 32
3428           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3429         {
3430           if (ARM_DOUBLEWORD_ALIGN)
3431             warning (0,
3432                      "structure size boundary can only be set to 8, 32 or 64");
3433           else
3434             warning (0, "structure size boundary can only be set to 8 or 32");
3435           arm_structure_size_boundary
3436             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3437         }
3438     }
3439
3440   if (TARGET_VXWORKS_RTP)
3441     {
3442       if (!global_options_set.x_arm_pic_data_is_text_relative)
3443         arm_pic_data_is_text_relative = 0;
3444     }
3445   else if (flag_pic
3446            && !arm_pic_data_is_text_relative
3447            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3448     /* When text & data segments don't have a fixed displacement, the
3449        intended use is with a single, read only, pic base register.
3450        Unless the user explicitly requested not to do that, set
3451        it.  */
3452     target_flags |= MASK_SINGLE_PIC_BASE;
3453
3454   /* If stack checking is disabled, we can use r10 as the PIC register,
3455      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3456   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3457     {
3458       if (TARGET_VXWORKS_RTP)
3459         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3460       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3461     }
3462
3463   if (flag_pic && TARGET_VXWORKS_RTP)
3464     arm_pic_register = 9;
3465
3466   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3467   if (TARGET_FDPIC)
3468     {
3469       arm_pic_register = FDPIC_REGNUM;
3470       if (TARGET_THUMB1)
3471         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3472     }
3473
3474   if (arm_pic_register_string != NULL)
3475     {
3476       int pic_register = decode_reg_name (arm_pic_register_string);
3477
3478       if (!flag_pic)
3479         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3480
3481       /* Prevent the user from choosing an obviously stupid PIC register.  */
3482       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3483                || pic_register == HARD_FRAME_POINTER_REGNUM
3484                || pic_register == STACK_POINTER_REGNUM
3485                || pic_register >= PC_REGNUM
3486                || (TARGET_VXWORKS_RTP
3487                    && (unsigned int) pic_register != arm_pic_register))
3488         error ("unable to use %qs for PIC register", arm_pic_register_string);
3489       else
3490         arm_pic_register = pic_register;
3491     }
3492
3493   if (flag_pic)
3494     target_word_relocations = 1;
3495
3496   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3497   if (fix_cm3_ldrd == 2)
3498     {
3499       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3500         fix_cm3_ldrd = 1;
3501       else
3502         fix_cm3_ldrd = 0;
3503     }
3504
3505   /* Hot/Cold partitioning is not currently supported, since we can't
3506      handle literal pool placement in that case.  */
3507   if (flag_reorder_blocks_and_partition)
3508     {
3509       inform (input_location,
3510               "%<-freorder-blocks-and-partition%> not supported "
3511               "on this architecture");
3512       flag_reorder_blocks_and_partition = 0;
3513       flag_reorder_blocks = 1;
3514     }
3515
3516   if (flag_pic)
3517     /* Hoisting PIC address calculations more aggressively provides a small,
3518        but measurable, size reduction for PIC code.  Therefore, we decrease
3519        the bar for unrestricted expression hoisting to the cost of PIC address
3520        calculation, which is 2 instructions.  */
3521     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3522                            global_options.x_param_values,
3523                            global_options_set.x_param_values);
3524
3525   /* ARM EABI defaults to strict volatile bitfields.  */
3526   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3527       && abi_version_at_least(2))
3528     flag_strict_volatile_bitfields = 1;
3529
3530   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3531      have deemed it beneficial (signified by setting
3532      prefetch.num_slots to 1 or more).  */
3533   if (flag_prefetch_loop_arrays < 0
3534       && HAVE_prefetch
3535       && optimize >= 3
3536       && current_tune->prefetch.num_slots > 0)
3537     flag_prefetch_loop_arrays = 1;
3538
3539   /* Set up parameters to be used in prefetching algorithm.  Do not
3540      override the defaults unless we are tuning for a core we have
3541      researched values for.  */
3542   if (current_tune->prefetch.num_slots > 0)
3543     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3544                            current_tune->prefetch.num_slots,
3545                            global_options.x_param_values,
3546                            global_options_set.x_param_values);
3547   if (current_tune->prefetch.l1_cache_line_size >= 0)
3548     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3549                            current_tune->prefetch.l1_cache_line_size,
3550                            global_options.x_param_values,
3551                            global_options_set.x_param_values);
3552   if (current_tune->prefetch.l1_cache_size >= 0)
3553     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3554                            current_tune->prefetch.l1_cache_size,
3555                            global_options.x_param_values,
3556                            global_options_set.x_param_values);
3557
3558   /* Look through ready list and all of queue for instructions
3559      relevant for L2 auto-prefetcher.  */
3560   int param_sched_autopref_queue_depth;
3561
3562   switch (current_tune->sched_autopref)
3563     {
3564     case tune_params::SCHED_AUTOPREF_OFF:
3565       param_sched_autopref_queue_depth = -1;
3566       break;
3567
3568     case tune_params::SCHED_AUTOPREF_RANK:
3569       param_sched_autopref_queue_depth = 0;
3570       break;
3571
3572     case tune_params::SCHED_AUTOPREF_FULL:
3573       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3574       break;
3575
3576     default:
3577       gcc_unreachable ();
3578     }
3579
3580   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3581                          param_sched_autopref_queue_depth,
3582                          global_options.x_param_values,
3583                          global_options_set.x_param_values);
3584
3585   /* Currently, for slow flash data, we just disable literal pools.  We also
3586      disable it for pure-code.  */
3587   if (target_slow_flash_data || target_pure_code)
3588     arm_disable_literal_pool = true;
3589
3590   /* Disable scheduling fusion by default if it's not armv7 processor
3591      or doesn't prefer ldrd/strd.  */
3592   if (flag_schedule_fusion == 2
3593       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3594     flag_schedule_fusion = 0;
3595
3596   /* Need to remember initial options before they are overriden.  */
3597   init_optimize = build_optimization_node (&global_options);
3598
3599   arm_options_perform_arch_sanity_checks ();
3600   arm_option_override_internal (&global_options, &global_options_set);
3601   arm_option_check_internal (&global_options);
3602   arm_option_params_internal ();
3603
3604   /* Create the default target_options structure.  */
3605   target_option_default_node = target_option_current_node
3606     = build_target_option_node (&global_options);
3607
3608   /* Register global variables with the garbage collector.  */
3609   arm_add_gc_roots ();
3610
3611   /* Init initial mode for testing.  */
3612   thumb_flipper = TARGET_THUMB;
3613 }
3614
3615
3616 /* Reconfigure global status flags from the active_target.isa.  */
3617 void
3618 arm_option_reconfigure_globals (void)
3619 {
3620   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3621   arm_base_arch = arm_active_target.base_arch;
3622
3623   /* Initialize boolean versions of the architectural flags, for use
3624      in the arm.md file.  */
3625   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3626   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3627   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3628   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3629   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3630   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3631   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3632   arm_arch6m = arm_arch6 && !arm_arch_notm;
3633   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3634   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3635   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3636   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3637   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3638   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3639   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3640   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3641   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3642   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3643   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3644   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3645   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3646   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3647   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3648   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3649   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3650   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3651   if (arm_fp16_inst)
3652     {
3653       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3654         error ("selected fp16 options are incompatible");
3655       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3656     }
3657
3658   /* And finally, set up some quirks.  */
3659   arm_arch_no_volatile_ce
3660     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3661   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3662                                             isa_bit_quirk_armv6kz);
3663
3664   /* Use the cp15 method if it is available.  */
3665   if (target_thread_pointer == TP_AUTO)
3666     {
3667       if (arm_arch6k && !TARGET_THUMB1)
3668         target_thread_pointer = TP_CP15;
3669       else
3670         target_thread_pointer = TP_SOFT;
3671     }
3672 }
3673
3674 /* Perform some validation between the desired architecture and the rest of the
3675    options.  */
3676 void
3677 arm_options_perform_arch_sanity_checks (void)
3678 {
3679   /* V5T code we generate is completely interworking capable, so we turn off
3680      TARGET_INTERWORK here to avoid many tests later on.  */
3681
3682   /* XXX However, we must pass the right pre-processor defines to CPP
3683      or GLD can get confused.  This is a hack.  */
3684   if (TARGET_INTERWORK)
3685     arm_cpp_interwork = 1;
3686
3687   if (arm_arch5t)
3688     target_flags &= ~MASK_INTERWORK;
3689
3690   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3691     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3692
3693   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3694     error ("iwmmxt abi requires an iwmmxt capable cpu");
3695
3696   /* BPABI targets use linker tricks to allow interworking on cores
3697      without thumb support.  */
3698   if (TARGET_INTERWORK
3699       && !TARGET_BPABI
3700       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3701     {
3702       warning (0, "target CPU does not support interworking" );
3703       target_flags &= ~MASK_INTERWORK;
3704     }
3705
3706   /* If soft-float is specified then don't use FPU.  */
3707   if (TARGET_SOFT_FLOAT)
3708     arm_fpu_attr = FPU_NONE;
3709   else
3710     arm_fpu_attr = FPU_VFP;
3711
3712   if (TARGET_AAPCS_BASED)
3713     {
3714       if (TARGET_CALLER_INTERWORKING)
3715         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3716       else
3717         if (TARGET_CALLEE_INTERWORKING)
3718           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3719     }
3720
3721   /* __fp16 support currently assumes the core has ldrh.  */
3722   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3723     sorry ("__fp16 and no ldrh");
3724
3725   if (use_cmse && !arm_arch_cmse)
3726     error ("target CPU does not support ARMv8-M Security Extensions");
3727
3728   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3729      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3730   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3731     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3732
3733
3734   if (TARGET_AAPCS_BASED)
3735     {
3736       if (arm_abi == ARM_ABI_IWMMXT)
3737         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3738       else if (TARGET_HARD_FLOAT_ABI)
3739         {
3740           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3741           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3742             error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3743         }
3744       else
3745         arm_pcs_default = ARM_PCS_AAPCS;
3746     }
3747   else
3748     {
3749       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3750         sorry ("%<-mfloat-abi=hard%> and VFP");
3751
3752       if (arm_abi == ARM_ABI_APCS)
3753         arm_pcs_default = ARM_PCS_APCS;
3754       else
3755         arm_pcs_default = ARM_PCS_ATPCS;
3756     }
3757 }
3758
3759 /* Test whether a local function descriptor is canonical, i.e.,
3760    whether we can use GOTOFFFUNCDESC to compute the address of the
3761    function.  */
3762 static bool
3763 arm_fdpic_local_funcdesc_p (rtx fnx)
3764 {
3765   tree fn;
3766   enum symbol_visibility vis;
3767   bool ret;
3768
3769   if (!TARGET_FDPIC)
3770     return true;
3771
3772   if (! SYMBOL_REF_LOCAL_P (fnx))
3773     return false;
3774
3775   fn = SYMBOL_REF_DECL (fnx);
3776
3777   if (! fn)
3778     return false;
3779
3780   vis = DECL_VISIBILITY (fn);
3781
3782   if (vis == VISIBILITY_PROTECTED)
3783     /* Private function descriptors for protected functions are not
3784        canonical.  Temporarily change the visibility to global so that
3785        we can ensure uniqueness of funcdesc pointers.  */
3786     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3787
3788   ret = default_binds_local_p_1 (fn, flag_pic);
3789
3790   DECL_VISIBILITY (fn) = vis;
3791
3792   return ret;
3793 }
3794
3795 static void
3796 arm_add_gc_roots (void)
3797 {
3798   gcc_obstack_init(&minipool_obstack);
3799   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3800 }
3801 \f
3802 /* A table of known ARM exception types.
3803    For use with the interrupt function attribute.  */
3804
3805 typedef struct
3806 {
3807   const char *const arg;
3808   const unsigned long return_value;
3809 }
3810 isr_attribute_arg;
3811
3812 static const isr_attribute_arg isr_attribute_args [] =
3813 {
3814   { "IRQ",   ARM_FT_ISR },
3815   { "irq",   ARM_FT_ISR },
3816   { "FIQ",   ARM_FT_FIQ },
3817   { "fiq",   ARM_FT_FIQ },
3818   { "ABORT", ARM_FT_ISR },
3819   { "abort", ARM_FT_ISR },
3820   { "ABORT", ARM_FT_ISR },
3821   { "abort", ARM_FT_ISR },
3822   { "UNDEF", ARM_FT_EXCEPTION },
3823   { "undef", ARM_FT_EXCEPTION },
3824   { "SWI",   ARM_FT_EXCEPTION },
3825   { "swi",   ARM_FT_EXCEPTION },
3826   { NULL,    ARM_FT_NORMAL }
3827 };
3828
3829 /* Returns the (interrupt) function type of the current
3830    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3831
3832 static unsigned long
3833 arm_isr_value (tree argument)
3834 {
3835   const isr_attribute_arg * ptr;
3836   const char *              arg;
3837
3838   if (!arm_arch_notm)
3839     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3840
3841   /* No argument - default to IRQ.  */
3842   if (argument == NULL_TREE)
3843     return ARM_FT_ISR;
3844
3845   /* Get the value of the argument.  */
3846   if (TREE_VALUE (argument) == NULL_TREE
3847       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3848     return ARM_FT_UNKNOWN;
3849
3850   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3851
3852   /* Check it against the list of known arguments.  */
3853   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3854     if (streq (arg, ptr->arg))
3855       return ptr->return_value;
3856
3857   /* An unrecognized interrupt type.  */
3858   return ARM_FT_UNKNOWN;
3859 }
3860
3861 /* Computes the type of the current function.  */
3862
3863 static unsigned long
3864 arm_compute_func_type (void)
3865 {
3866   unsigned long type = ARM_FT_UNKNOWN;
3867   tree a;
3868   tree attr;
3869
3870   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3871
3872   /* Decide if the current function is volatile.  Such functions
3873      never return, and many memory cycles can be saved by not storing
3874      register values that will never be needed again.  This optimization
3875      was added to speed up context switching in a kernel application.  */
3876   if (optimize > 0
3877       && (TREE_NOTHROW (current_function_decl)
3878           || !(flag_unwind_tables
3879                || (flag_exceptions
3880                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3881       && TREE_THIS_VOLATILE (current_function_decl))
3882     type |= ARM_FT_VOLATILE;
3883
3884   if (cfun->static_chain_decl != NULL)
3885     type |= ARM_FT_NESTED;
3886
3887   attr = DECL_ATTRIBUTES (current_function_decl);
3888
3889   a = lookup_attribute ("naked", attr);
3890   if (a != NULL_TREE)
3891     type |= ARM_FT_NAKED;
3892
3893   a = lookup_attribute ("isr", attr);
3894   if (a == NULL_TREE)
3895     a = lookup_attribute ("interrupt", attr);
3896
3897   if (a == NULL_TREE)
3898     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3899   else
3900     type |= arm_isr_value (TREE_VALUE (a));
3901
3902   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3903     type |= ARM_FT_CMSE_ENTRY;
3904
3905   return type;
3906 }
3907
3908 /* Returns the type of the current function.  */
3909
3910 unsigned long
3911 arm_current_func_type (void)
3912 {
3913   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3914     cfun->machine->func_type = arm_compute_func_type ();
3915
3916   return cfun->machine->func_type;
3917 }
3918
3919 bool
3920 arm_allocate_stack_slots_for_args (void)
3921 {
3922   /* Naked functions should not allocate stack slots for arguments.  */
3923   return !IS_NAKED (arm_current_func_type ());
3924 }
3925
3926 static bool
3927 arm_warn_func_return (tree decl)
3928 {
3929   /* Naked functions are implemented entirely in assembly, including the
3930      return sequence, so suppress warnings about this.  */
3931   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3932 }
3933
3934 \f
3935 /* Output assembler code for a block containing the constant parts
3936    of a trampoline, leaving space for the variable parts.
3937
3938    On the ARM, (if r8 is the static chain regnum, and remembering that
3939    referencing pc adds an offset of 8) the trampoline looks like:
3940            ldr          r8, [pc, #0]
3941            ldr          pc, [pc]
3942            .word        static chain value
3943            .word        function's address
3944    XXX FIXME: When the trampoline returns, r8 will be clobbered.
3945
3946    In FDPIC mode, the trampoline looks like:
3947            .word        trampoline address
3948            .word        trampoline GOT address
3949            ldr          r12, [pc, #8] ; #4 for Arm mode
3950            ldr          r9,  [pc, #8] ; #4 for Arm mode
3951            ldr          pc,  [pc, #8] ; #4 for Arm mode
3952            .word        static chain value
3953            .word        GOT address
3954            .word        function's address
3955 */
3956
3957 static void
3958 arm_asm_trampoline_template (FILE *f)
3959 {
3960   fprintf (f, "\t.syntax unified\n");
3961
3962   if (TARGET_FDPIC)
3963     {
3964       /* The first two words are a function descriptor pointing to the
3965          trampoline code just below.  */
3966       if (TARGET_ARM)
3967         fprintf (f, "\t.arm\n");
3968       else if (TARGET_THUMB2)
3969         fprintf (f, "\t.thumb\n");
3970       else
3971         /* Only ARM and Thumb-2 are supported.  */
3972         gcc_unreachable ();
3973
3974       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3975       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3976       /* Trampoline code which sets the static chain register but also
3977          PIC register before jumping into real code.  */
3978       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3979                    STATIC_CHAIN_REGNUM, PC_REGNUM,
3980                    TARGET_THUMB2 ? 8 : 4);
3981       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3982                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
3983                    TARGET_THUMB2 ? 8 : 4);
3984       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3985                    PC_REGNUM, PC_REGNUM,
3986                    TARGET_THUMB2 ? 8 : 4);
3987       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3988     }
3989   else if (TARGET_ARM)
3990     {
3991       fprintf (f, "\t.arm\n");
3992       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3993       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3994     }
3995   else if (TARGET_THUMB2)
3996     {
3997       fprintf (f, "\t.thumb\n");
3998       /* The Thumb-2 trampoline is similar to the arm implementation.
3999          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4000       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4001                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4002       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4003     }
4004   else
4005     {
4006       ASM_OUTPUT_ALIGN (f, 2);
4007       fprintf (f, "\t.code\t16\n");
4008       fprintf (f, ".Ltrampoline_start:\n");
4009       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4010       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4011       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4012       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4013       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4014       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4015     }
4016   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4017   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4018 }
4019
4020 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4021
4022 static void
4023 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4024 {
4025   rtx fnaddr, mem, a_tramp;
4026
4027   emit_block_move (m_tramp, assemble_trampoline_template (),
4028                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4029
4030   if (TARGET_FDPIC)
4031     {
4032       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4033       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4034       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4035       /* The function start address is at offset 8, but in Thumb mode
4036          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4037          below.  */
4038       rtx trampoline_code_start
4039         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4040
4041       /* Write initial funcdesc which points to the trampoline.  */
4042       mem = adjust_address (m_tramp, SImode, 0);
4043       emit_move_insn (mem, trampoline_code_start);
4044       mem = adjust_address (m_tramp, SImode, 4);
4045       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4046       /* Setup static chain.  */
4047       mem = adjust_address (m_tramp, SImode, 20);
4048       emit_move_insn (mem, chain_value);
4049       /* GOT + real function entry point.  */
4050       mem = adjust_address (m_tramp, SImode, 24);
4051       emit_move_insn (mem, gotaddr);
4052       mem = adjust_address (m_tramp, SImode, 28);
4053       emit_move_insn (mem, fnaddr);
4054     }
4055   else
4056     {
4057       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4058       emit_move_insn (mem, chain_value);
4059
4060       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4061       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4062       emit_move_insn (mem, fnaddr);
4063     }
4064
4065   a_tramp = XEXP (m_tramp, 0);
4066   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4067                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
4068                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4069 }
4070
4071 /* Thumb trampolines should be entered in thumb mode, so set
4072    the bottom bit of the address.  */
4073
4074 static rtx
4075 arm_trampoline_adjust_address (rtx addr)
4076 {
4077   /* For FDPIC don't fix trampoline address since it's a function
4078      descriptor and not a function address.  */
4079   if (TARGET_THUMB && !TARGET_FDPIC)
4080     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4081                                 NULL, 0, OPTAB_LIB_WIDEN);
4082   return addr;
4083 }
4084 \f
4085 /* Return 1 if it is possible to return using a single instruction.
4086    If SIBLING is non-null, this is a test for a return before a sibling
4087    call.  SIBLING is the call insn, so we can examine its register usage.  */
4088
4089 int
4090 use_return_insn (int iscond, rtx sibling)
4091 {
4092   int regno;
4093   unsigned int func_type;
4094   unsigned long saved_int_regs;
4095   unsigned HOST_WIDE_INT stack_adjust;
4096   arm_stack_offsets *offsets;
4097
4098   /* Never use a return instruction before reload has run.  */
4099   if (!reload_completed)
4100     return 0;
4101
4102   func_type = arm_current_func_type ();
4103
4104   /* Naked, volatile and stack alignment functions need special
4105      consideration.  */
4106   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4107     return 0;
4108
4109   /* So do interrupt functions that use the frame pointer and Thumb
4110      interrupt functions.  */
4111   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4112     return 0;
4113
4114   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4115       && !optimize_function_for_size_p (cfun))
4116     return 0;
4117
4118   offsets = arm_get_frame_offsets ();
4119   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4120
4121   /* As do variadic functions.  */
4122   if (crtl->args.pretend_args_size
4123       || cfun->machine->uses_anonymous_args
4124       /* Or if the function calls __builtin_eh_return () */
4125       || crtl->calls_eh_return
4126       /* Or if the function calls alloca */
4127       || cfun->calls_alloca
4128       /* Or if there is a stack adjustment.  However, if the stack pointer
4129          is saved on the stack, we can use a pre-incrementing stack load.  */
4130       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4131                                  && stack_adjust == 4))
4132       /* Or if the static chain register was saved above the frame, under the
4133          assumption that the stack pointer isn't saved on the stack.  */
4134       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4135           && arm_compute_static_chain_stack_bytes() != 0))
4136     return 0;
4137
4138   saved_int_regs = offsets->saved_regs_mask;
4139
4140   /* Unfortunately, the insn
4141
4142        ldmib sp, {..., sp, ...}
4143
4144      triggers a bug on most SA-110 based devices, such that the stack
4145      pointer won't be correctly restored if the instruction takes a
4146      page fault.  We work around this problem by popping r3 along with
4147      the other registers, since that is never slower than executing
4148      another instruction.
4149
4150      We test for !arm_arch5t here, because code for any architecture
4151      less than this could potentially be run on one of the buggy
4152      chips.  */
4153   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4154     {
4155       /* Validate that r3 is a call-clobbered register (always true in
4156          the default abi) ...  */
4157       if (!call_used_or_fixed_reg_p (3))
4158         return 0;
4159
4160       /* ... that it isn't being used for a return value ... */
4161       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4162         return 0;
4163
4164       /* ... or for a tail-call argument ...  */
4165       if (sibling)
4166         {
4167           gcc_assert (CALL_P (sibling));
4168
4169           if (find_regno_fusage (sibling, USE, 3))
4170             return 0;
4171         }
4172
4173       /* ... and that there are no call-saved registers in r0-r2
4174          (always true in the default ABI).  */
4175       if (saved_int_regs & 0x7)
4176         return 0;
4177     }
4178
4179   /* Can't be done if interworking with Thumb, and any registers have been
4180      stacked.  */
4181   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4182     return 0;
4183
4184   /* On StrongARM, conditional returns are expensive if they aren't
4185      taken and multiple registers have been stacked.  */
4186   if (iscond && arm_tune_strongarm)
4187     {
4188       /* Conditional return when just the LR is stored is a simple
4189          conditional-load instruction, that's not expensive.  */
4190       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4191         return 0;
4192
4193       if (flag_pic
4194           && arm_pic_register != INVALID_REGNUM
4195           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4196         return 0;
4197     }
4198
4199   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4200      several instructions if anything needs to be popped.  */
4201   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4202     return 0;
4203
4204   /* If there are saved registers but the LR isn't saved, then we need
4205      two instructions for the return.  */
4206   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4207     return 0;
4208
4209   /* Can't be done if any of the VFP regs are pushed,
4210      since this also requires an insn.  */
4211   if (TARGET_HARD_FLOAT)
4212     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4213       if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
4214         return 0;
4215
4216   if (TARGET_REALLY_IWMMXT)
4217     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4218       if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
4219         return 0;
4220
4221   return 1;
4222 }
4223
4224 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4225    shrink-wrapping if possible.  This is the case if we need to emit a
4226    prologue, which we can test by looking at the offsets.  */
4227 bool
4228 use_simple_return_p (void)
4229 {
4230   arm_stack_offsets *offsets;
4231
4232   /* Note this function can be called before or after reload.  */
4233   if (!reload_completed)
4234     arm_compute_frame_layout ();
4235
4236   offsets = arm_get_frame_offsets ();
4237   return offsets->outgoing_args != 0;
4238 }
4239
4240 /* Return TRUE if int I is a valid immediate ARM constant.  */
4241
4242 int
4243 const_ok_for_arm (HOST_WIDE_INT i)
4244 {
4245   int lowbit;
4246
4247   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4248      be all zero, or all one.  */
4249   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4250       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4251           != ((~(unsigned HOST_WIDE_INT) 0)
4252               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4253     return FALSE;
4254
4255   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4256
4257   /* Fast return for 0 and small values.  We must do this for zero, since
4258      the code below can't handle that one case.  */
4259   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4260     return TRUE;
4261
4262   /* Get the number of trailing zeros.  */
4263   lowbit = ffs((int) i) - 1;
4264
4265   /* Only even shifts are allowed in ARM mode so round down to the
4266      nearest even number.  */
4267   if (TARGET_ARM)
4268     lowbit &= ~1;
4269
4270   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4271     return TRUE;
4272
4273   if (TARGET_ARM)
4274     {
4275       /* Allow rotated constants in ARM mode.  */
4276       if (lowbit <= 4
4277            && ((i & ~0xc000003f) == 0
4278                || (i & ~0xf000000f) == 0
4279                || (i & ~0xfc000003) == 0))
4280         return TRUE;
4281     }
4282   else if (TARGET_THUMB2)
4283     {
4284       HOST_WIDE_INT v;
4285
4286       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4287       v = i & 0xff;
4288       v |= v << 16;
4289       if (i == v || i == (v | (v << 8)))
4290         return TRUE;
4291
4292       /* Allow repeated pattern 0xXY00XY00.  */
4293       v = i & 0xff00;
4294       v |= v << 16;
4295       if (i == v)
4296         return TRUE;
4297     }
4298   else if (TARGET_HAVE_MOVT)
4299     {
4300       /* Thumb-1 Targets with MOVT.  */
4301       if (i > 0xffff)
4302         return FALSE;
4303       else
4304         return TRUE;
4305     }
4306
4307   return FALSE;
4308 }
4309
4310 /* Return true if I is a valid constant for the operation CODE.  */
4311 int
4312 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4313 {
4314   if (const_ok_for_arm (i))
4315     return 1;
4316
4317   switch (code)
4318     {
4319     case SET:
4320       /* See if we can use movw.  */
4321       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4322         return 1;
4323       else
4324         /* Otherwise, try mvn.  */
4325         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4326
4327     case PLUS:
4328       /* See if we can use addw or subw.  */
4329       if (TARGET_THUMB2
4330           && ((i & 0xfffff000) == 0
4331               || ((-i) & 0xfffff000) == 0))
4332         return 1;
4333       /* Fall through.  */
4334     case COMPARE:
4335     case EQ:
4336     case NE:
4337     case GT:
4338     case LE:
4339     case LT:
4340     case GE:
4341     case GEU:
4342     case LTU:
4343     case GTU:
4344     case LEU:
4345     case UNORDERED:
4346     case ORDERED:
4347     case UNEQ:
4348     case UNGE:
4349     case UNLT:
4350     case UNGT:
4351     case UNLE:
4352       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4353
4354     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4355     case XOR:
4356       return 0;
4357
4358     case IOR:
4359       if (TARGET_THUMB2)
4360         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4361       return 0;
4362
4363     case AND:
4364       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4365
4366     default:
4367       gcc_unreachable ();
4368     }
4369 }
4370
4371 /* Return true if I is a valid di mode constant for the operation CODE.  */
4372 int
4373 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4374 {
4375   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4376   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4377   rtx hi = GEN_INT (hi_val);
4378   rtx lo = GEN_INT (lo_val);
4379
4380   if (TARGET_THUMB1)
4381     return 0;
4382
4383   switch (code)
4384     {
4385     case AND:
4386     case IOR:
4387     case XOR:
4388       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4389              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4390     case PLUS:
4391       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4392
4393     default:
4394       return 0;
4395     }
4396 }
4397
4398 /* Emit a sequence of insns to handle a large constant.
4399    CODE is the code of the operation required, it can be any of SET, PLUS,
4400    IOR, AND, XOR, MINUS;
4401    MODE is the mode in which the operation is being performed;
4402    VAL is the integer to operate on;
4403    SOURCE is the other operand (a register, or a null-pointer for SET);
4404    SUBTARGETS means it is safe to create scratch registers if that will
4405    either produce a simpler sequence, or we will want to cse the values.
4406    Return value is the number of insns emitted.  */
4407
4408 /* ??? Tweak this for thumb2.  */
4409 int
4410 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4411                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4412 {
4413   rtx cond;
4414
4415   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4416     cond = COND_EXEC_TEST (PATTERN (insn));
4417   else
4418     cond = NULL_RTX;
4419
4420   if (subtargets || code == SET
4421       || (REG_P (target) && REG_P (source)
4422           && REGNO (target) != REGNO (source)))
4423     {
4424       /* After arm_reorg has been called, we can't fix up expensive
4425          constants by pushing them into memory so we must synthesize
4426          them in-line, regardless of the cost.  This is only likely to
4427          be more costly on chips that have load delay slots and we are
4428          compiling without running the scheduler (so no splitting
4429          occurred before the final instruction emission).
4430
4431          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4432       */
4433       if (!cfun->machine->after_arm_reorg
4434           && !cond
4435           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4436                                 1, 0)
4437               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4438                  + (code != SET))))
4439         {
4440           if (code == SET)
4441             {
4442               /* Currently SET is the only monadic value for CODE, all
4443                  the rest are diadic.  */
4444               if (TARGET_USE_MOVT)
4445                 arm_emit_movpair (target, GEN_INT (val));
4446               else
4447                 emit_set_insn (target, GEN_INT (val));
4448
4449               return 1;
4450             }
4451           else
4452             {
4453               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4454
4455               if (TARGET_USE_MOVT)
4456                 arm_emit_movpair (temp, GEN_INT (val));
4457               else
4458                 emit_set_insn (temp, GEN_INT (val));
4459
4460               /* For MINUS, the value is subtracted from, since we never
4461                  have subtraction of a constant.  */
4462               if (code == MINUS)
4463                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4464               else
4465                 emit_set_insn (target,
4466                                gen_rtx_fmt_ee (code, mode, source, temp));
4467               return 2;
4468             }
4469         }
4470     }
4471
4472   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4473                            1);
4474 }
4475
4476 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4477    ARM/THUMB2 immediates, and add up to VAL.
4478    Thr function return value gives the number of insns required.  */
4479 static int
4480 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4481                             struct four_ints *return_sequence)
4482 {
4483   int best_consecutive_zeros = 0;
4484   int i;
4485   int best_start = 0;
4486   int insns1, insns2;
4487   struct four_ints tmp_sequence;
4488
4489   /* If we aren't targeting ARM, the best place to start is always at
4490      the bottom, otherwise look more closely.  */
4491   if (TARGET_ARM)
4492     {
4493       for (i = 0; i < 32; i += 2)
4494         {
4495           int consecutive_zeros = 0;
4496
4497           if (!(val & (3 << i)))
4498             {
4499               while ((i < 32) && !(val & (3 << i)))
4500                 {
4501                   consecutive_zeros += 2;
4502                   i += 2;
4503                 }
4504               if (consecutive_zeros > best_consecutive_zeros)
4505                 {
4506                   best_consecutive_zeros = consecutive_zeros;
4507                   best_start = i - consecutive_zeros;
4508                 }
4509               i -= 2;
4510             }
4511         }
4512     }
4513
4514   /* So long as it won't require any more insns to do so, it's
4515      desirable to emit a small constant (in bits 0...9) in the last
4516      insn.  This way there is more chance that it can be combined with
4517      a later addressing insn to form a pre-indexed load or store
4518      operation.  Consider:
4519
4520            *((volatile int *)0xe0000100) = 1;
4521            *((volatile int *)0xe0000110) = 2;
4522
4523      We want this to wind up as:
4524
4525             mov rA, #0xe0000000
4526             mov rB, #1
4527             str rB, [rA, #0x100]
4528             mov rB, #2
4529             str rB, [rA, #0x110]
4530
4531      rather than having to synthesize both large constants from scratch.
4532
4533      Therefore, we calculate how many insns would be required to emit
4534      the constant starting from `best_start', and also starting from
4535      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4536      yield a shorter sequence, we may as well use zero.  */
4537   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4538   if (best_start != 0
4539       && ((HOST_WIDE_INT_1U << best_start) < val))
4540     {
4541       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4542       if (insns2 <= insns1)
4543         {
4544           *return_sequence = tmp_sequence;
4545           insns1 = insns2;
4546         }
4547     }
4548
4549   return insns1;
4550 }
4551
4552 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4553 static int
4554 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4555                              struct four_ints *return_sequence, int i)
4556 {
4557   int remainder = val & 0xffffffff;
4558   int insns = 0;
4559
4560   /* Try and find a way of doing the job in either two or three
4561      instructions.
4562
4563      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4564      location.  We start at position I.  This may be the MSB, or
4565      optimial_immediate_sequence may have positioned it at the largest block
4566      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4567      wrapping around to the top of the word when we drop off the bottom.
4568      In the worst case this code should produce no more than four insns.
4569
4570      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4571      constants, shifted to any arbitrary location.  We should always start
4572      at the MSB.  */
4573   do
4574     {
4575       int end;
4576       unsigned int b1, b2, b3, b4;
4577       unsigned HOST_WIDE_INT result;
4578       int loc;
4579
4580       gcc_assert (insns < 4);
4581
4582       if (i <= 0)
4583         i += 32;
4584
4585       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4586       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4587         {
4588           loc = i;
4589           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4590             /* We can use addw/subw for the last 12 bits.  */
4591             result = remainder;
4592           else
4593             {
4594               /* Use an 8-bit shifted/rotated immediate.  */
4595               end = i - 8;
4596               if (end < 0)
4597                 end += 32;
4598               result = remainder & ((0x0ff << end)
4599                                    | ((i < end) ? (0xff >> (32 - end))
4600                                                 : 0));
4601               i -= 8;
4602             }
4603         }
4604       else
4605         {
4606           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4607              arbitrary shifts.  */
4608           i -= TARGET_ARM ? 2 : 1;
4609           continue;
4610         }
4611
4612       /* Next, see if we can do a better job with a thumb2 replicated
4613          constant.
4614
4615          We do it this way around to catch the cases like 0x01F001E0 where
4616          two 8-bit immediates would work, but a replicated constant would
4617          make it worse.
4618
4619          TODO: 16-bit constants that don't clear all the bits, but still win.
4620          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4621       if (TARGET_THUMB2)
4622         {
4623           b1 = (remainder & 0xff000000) >> 24;
4624           b2 = (remainder & 0x00ff0000) >> 16;
4625           b3 = (remainder & 0x0000ff00) >> 8;
4626           b4 = remainder & 0xff;
4627
4628           if (loc > 24)
4629             {
4630               /* The 8-bit immediate already found clears b1 (and maybe b2),
4631                  but must leave b3 and b4 alone.  */
4632
4633               /* First try to find a 32-bit replicated constant that clears
4634                  almost everything.  We can assume that we can't do it in one,
4635                  or else we wouldn't be here.  */
4636               unsigned int tmp = b1 & b2 & b3 & b4;
4637               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4638                                   + (tmp << 24);
4639               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4640                                             + (tmp == b3) + (tmp == b4);
4641               if (tmp
4642                   && (matching_bytes >= 3
4643                       || (matching_bytes == 2
4644                           && const_ok_for_op (remainder & ~tmp2, code))))
4645                 {
4646                   /* At least 3 of the bytes match, and the fourth has at
4647                      least as many bits set, or two of the bytes match
4648                      and it will only require one more insn to finish.  */
4649                   result = tmp2;
4650                   i = tmp != b1 ? 32
4651                       : tmp != b2 ? 24
4652                       : tmp != b3 ? 16
4653                       : 8;
4654                 }
4655
4656               /* Second, try to find a 16-bit replicated constant that can
4657                  leave three of the bytes clear.  If b2 or b4 is already
4658                  zero, then we can.  If the 8-bit from above would not
4659                  clear b2 anyway, then we still win.  */
4660               else if (b1 == b3 && (!b2 || !b4
4661                                || (remainder & 0x00ff0000 & ~result)))
4662                 {
4663                   result = remainder & 0xff00ff00;
4664                   i = 24;
4665                 }
4666             }
4667           else if (loc > 16)
4668             {
4669               /* The 8-bit immediate already found clears b2 (and maybe b3)
4670                  and we don't get here unless b1 is alredy clear, but it will
4671                  leave b4 unchanged.  */
4672
4673               /* If we can clear b2 and b4 at once, then we win, since the
4674                  8-bits couldn't possibly reach that far.  */
4675               if (b2 == b4)
4676                 {
4677                   result = remainder & 0x00ff00ff;
4678                   i = 16;
4679                 }
4680             }
4681         }
4682
4683       return_sequence->i[insns++] = result;
4684       remainder &= ~result;
4685
4686       if (code == SET || code == MINUS)
4687         code = PLUS;
4688     }
4689   while (remainder);
4690
4691   return insns;
4692 }
4693
4694 /* Emit an instruction with the indicated PATTERN.  If COND is
4695    non-NULL, conditionalize the execution of the instruction on COND
4696    being true.  */
4697
4698 static void
4699 emit_constant_insn (rtx cond, rtx pattern)
4700 {
4701   if (cond)
4702     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4703   emit_insn (pattern);
4704 }
4705
4706 /* As above, but extra parameter GENERATE which, if clear, suppresses
4707    RTL generation.  */
4708
4709 static int
4710 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4711                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4712                   int subtargets, int generate)
4713 {
4714   int can_invert = 0;
4715   int can_negate = 0;
4716   int final_invert = 0;
4717   int i;
4718   int set_sign_bit_copies = 0;
4719   int clear_sign_bit_copies = 0;
4720   int clear_zero_bit_copies = 0;
4721   int set_zero_bit_copies = 0;
4722   int insns = 0, neg_insns, inv_insns;
4723   unsigned HOST_WIDE_INT temp1, temp2;
4724   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4725   struct four_ints *immediates;
4726   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4727
4728   /* Find out which operations are safe for a given CODE.  Also do a quick
4729      check for degenerate cases; these can occur when DImode operations
4730      are split.  */
4731   switch (code)
4732     {
4733     case SET:
4734       can_invert = 1;
4735       break;
4736
4737     case PLUS:
4738       can_negate = 1;
4739       break;
4740
4741     case IOR:
4742       if (remainder == 0xffffffff)
4743         {
4744           if (generate)
4745             emit_constant_insn (cond,
4746                                 gen_rtx_SET (target,
4747                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4748           return 1;
4749         }
4750
4751       if (remainder == 0)
4752         {
4753           if (reload_completed && rtx_equal_p (target, source))
4754             return 0;
4755
4756           if (generate)
4757             emit_constant_insn (cond, gen_rtx_SET (target, source));
4758           return 1;
4759         }
4760       break;
4761
4762     case AND:
4763       if (remainder == 0)
4764         {
4765           if (generate)
4766             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4767           return 1;
4768         }
4769       if (remainder == 0xffffffff)
4770         {
4771           if (reload_completed && rtx_equal_p (target, source))
4772             return 0;
4773           if (generate)
4774             emit_constant_insn (cond, gen_rtx_SET (target, source));
4775           return 1;
4776         }
4777       can_invert = 1;
4778       break;
4779
4780     case XOR:
4781       if (remainder == 0)
4782         {
4783           if (reload_completed && rtx_equal_p (target, source))
4784             return 0;
4785           if (generate)
4786             emit_constant_insn (cond, gen_rtx_SET (target, source));
4787           return 1;
4788         }
4789
4790       if (remainder == 0xffffffff)
4791         {
4792           if (generate)
4793             emit_constant_insn (cond,
4794                                 gen_rtx_SET (target,
4795                                              gen_rtx_NOT (mode, source)));
4796           return 1;
4797         }
4798       final_invert = 1;
4799       break;
4800
4801     case MINUS:
4802       /* We treat MINUS as (val - source), since (source - val) is always
4803          passed as (source + (-val)).  */
4804       if (remainder == 0)
4805         {
4806           if (generate)
4807             emit_constant_insn (cond,
4808                                 gen_rtx_SET (target,
4809                                              gen_rtx_NEG (mode, source)));
4810           return 1;
4811         }
4812       if (const_ok_for_arm (val))
4813         {
4814           if (generate)
4815             emit_constant_insn (cond,
4816                                 gen_rtx_SET (target,
4817                                              gen_rtx_MINUS (mode, GEN_INT (val),
4818                                                             source)));
4819           return 1;
4820         }
4821
4822       break;
4823
4824     default:
4825       gcc_unreachable ();
4826     }
4827
4828   /* If we can do it in one insn get out quickly.  */
4829   if (const_ok_for_op (val, code))
4830     {
4831       if (generate)
4832         emit_constant_insn (cond,
4833                             gen_rtx_SET (target,
4834                                          (source
4835                                           ? gen_rtx_fmt_ee (code, mode, source,
4836                                                             GEN_INT (val))
4837                                           : GEN_INT (val))));
4838       return 1;
4839     }
4840
4841   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4842      insn.  */
4843   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4844       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4845     {
4846       if (generate)
4847         {
4848           if (mode == SImode && i == 16)
4849             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4850                smaller insn.  */
4851             emit_constant_insn (cond,
4852                                 gen_zero_extendhisi2
4853                                 (target, gen_lowpart (HImode, source)));
4854           else
4855             /* Extz only supports SImode, but we can coerce the operands
4856                into that mode.  */
4857             emit_constant_insn (cond,
4858                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4859                                               gen_lowpart (SImode, source),
4860                                               GEN_INT (i), const0_rtx));
4861         }
4862
4863       return 1;
4864     }
4865
4866   /* Calculate a few attributes that may be useful for specific
4867      optimizations.  */
4868   /* Count number of leading zeros.  */
4869   for (i = 31; i >= 0; i--)
4870     {
4871       if ((remainder & (1 << i)) == 0)
4872         clear_sign_bit_copies++;
4873       else
4874         break;
4875     }
4876
4877   /* Count number of leading 1's.  */
4878   for (i = 31; i >= 0; i--)
4879     {
4880       if ((remainder & (1 << i)) != 0)
4881         set_sign_bit_copies++;
4882       else
4883         break;
4884     }
4885
4886   /* Count number of trailing zero's.  */
4887   for (i = 0; i <= 31; i++)
4888     {
4889       if ((remainder & (1 << i)) == 0)
4890         clear_zero_bit_copies++;
4891       else
4892         break;
4893     }
4894
4895   /* Count number of trailing 1's.  */
4896   for (i = 0; i <= 31; i++)
4897     {
4898       if ((remainder & (1 << i)) != 0)
4899         set_zero_bit_copies++;
4900       else
4901         break;
4902     }
4903
4904   switch (code)
4905     {
4906     case SET:
4907       /* See if we can do this by sign_extending a constant that is known
4908          to be negative.  This is a good, way of doing it, since the shift
4909          may well merge into a subsequent insn.  */
4910       if (set_sign_bit_copies > 1)
4911         {
4912           if (const_ok_for_arm
4913               (temp1 = ARM_SIGN_EXTEND (remainder
4914                                         << (set_sign_bit_copies - 1))))
4915             {
4916               if (generate)
4917                 {
4918                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4919                   emit_constant_insn (cond,
4920                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4921                   emit_constant_insn (cond,
4922                                       gen_ashrsi3 (target, new_src,
4923                                                    GEN_INT (set_sign_bit_copies - 1)));
4924                 }
4925               return 2;
4926             }
4927           /* For an inverted constant, we will need to set the low bits,
4928              these will be shifted out of harm's way.  */
4929           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4930           if (const_ok_for_arm (~temp1))
4931             {
4932               if (generate)
4933                 {
4934                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4935                   emit_constant_insn (cond,
4936                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4937                   emit_constant_insn (cond,
4938                                       gen_ashrsi3 (target, new_src,
4939                                                    GEN_INT (set_sign_bit_copies - 1)));
4940                 }
4941               return 2;
4942             }
4943         }
4944
4945       /* See if we can calculate the value as the difference between two
4946          valid immediates.  */
4947       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4948         {
4949           int topshift = clear_sign_bit_copies & ~1;
4950
4951           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4952                                    & (0xff000000 >> topshift));
4953
4954           /* If temp1 is zero, then that means the 9 most significant
4955              bits of remainder were 1 and we've caused it to overflow.
4956              When topshift is 0 we don't need to do anything since we
4957              can borrow from 'bit 32'.  */
4958           if (temp1 == 0 && topshift != 0)
4959             temp1 = 0x80000000 >> (topshift - 1);
4960
4961           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4962
4963           if (const_ok_for_arm (temp2))
4964             {
4965               if (generate)
4966                 {
4967                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4968                   emit_constant_insn (cond,
4969                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4970                   emit_constant_insn (cond,
4971                                       gen_addsi3 (target, new_src,
4972                                                   GEN_INT (-temp2)));
4973                 }
4974
4975               return 2;
4976             }
4977         }
4978
4979       /* See if we can generate this by setting the bottom (or the top)
4980          16 bits, and then shifting these into the other half of the
4981          word.  We only look for the simplest cases, to do more would cost
4982          too much.  Be careful, however, not to generate this when the
4983          alternative would take fewer insns.  */
4984       if (val & 0xffff0000)
4985         {
4986           temp1 = remainder & 0xffff0000;
4987           temp2 = remainder & 0x0000ffff;
4988
4989           /* Overlaps outside this range are best done using other methods.  */
4990           for (i = 9; i < 24; i++)
4991             {
4992               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4993                   && !const_ok_for_arm (temp2))
4994                 {
4995                   rtx new_src = (subtargets
4996                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4997                                  : target);
4998                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4999                                             source, subtargets, generate);
5000                   source = new_src;
5001                   if (generate)
5002                     emit_constant_insn
5003                       (cond,
5004                        gen_rtx_SET
5005                        (target,
5006                         gen_rtx_IOR (mode,
5007                                      gen_rtx_ASHIFT (mode, source,
5008                                                      GEN_INT (i)),
5009                                      source)));
5010                   return insns + 1;
5011                 }
5012             }
5013
5014           /* Don't duplicate cases already considered.  */
5015           for (i = 17; i < 24; i++)
5016             {
5017               if (((temp1 | (temp1 >> i)) == remainder)
5018                   && !const_ok_for_arm (temp1))
5019                 {
5020                   rtx new_src = (subtargets
5021                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5022                                  : target);
5023                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5024                                             source, subtargets, generate);
5025                   source = new_src;
5026                   if (generate)
5027                     emit_constant_insn
5028                       (cond,
5029                        gen_rtx_SET (target,
5030                                     gen_rtx_IOR
5031                                     (mode,
5032                                      gen_rtx_LSHIFTRT (mode, source,
5033                                                        GEN_INT (i)),
5034                                      source)));
5035                   return insns + 1;
5036                 }
5037             }
5038         }
5039       break;
5040
5041     case IOR:
5042     case XOR:
5043       /* If we have IOR or XOR, and the constant can be loaded in a
5044          single instruction, and we can find a temporary to put it in,
5045          then this can be done in two instructions instead of 3-4.  */
5046       if (subtargets
5047           /* TARGET can't be NULL if SUBTARGETS is 0 */
5048           || (reload_completed && !reg_mentioned_p (target, source)))
5049         {
5050           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5051             {
5052               if (generate)
5053                 {
5054                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5055
5056                   emit_constant_insn (cond,
5057                                       gen_rtx_SET (sub, GEN_INT (val)));
5058                   emit_constant_insn (cond,
5059                                       gen_rtx_SET (target,
5060                                                    gen_rtx_fmt_ee (code, mode,
5061                                                                    source, sub)));
5062                 }
5063               return 2;
5064             }
5065         }
5066
5067       if (code == XOR)
5068         break;
5069
5070       /*  Convert.
5071           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5072                              and the remainder 0s for e.g. 0xfff00000)
5073           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5074
5075           This can be done in 2 instructions by using shifts with mov or mvn.
5076           e.g. for
5077           x = x | 0xfff00000;
5078           we generate.
5079           mvn   r0, r0, asl #12
5080           mvn   r0, r0, lsr #12  */
5081       if (set_sign_bit_copies > 8
5082           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5083         {
5084           if (generate)
5085             {
5086               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5087               rtx shift = GEN_INT (set_sign_bit_copies);
5088
5089               emit_constant_insn
5090                 (cond,
5091                  gen_rtx_SET (sub,
5092                               gen_rtx_NOT (mode,
5093                                            gen_rtx_ASHIFT (mode,
5094                                                            source,
5095                                                            shift))));
5096               emit_constant_insn
5097                 (cond,
5098                  gen_rtx_SET (target,
5099                               gen_rtx_NOT (mode,
5100                                            gen_rtx_LSHIFTRT (mode, sub,
5101                                                              shift))));
5102             }
5103           return 2;
5104         }
5105
5106       /* Convert
5107           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5108            to
5109           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5110
5111           For eg. r0 = r0 | 0xfff
5112                mvn      r0, r0, lsr #12
5113                mvn      r0, r0, asl #12
5114
5115       */
5116       if (set_zero_bit_copies > 8
5117           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5118         {
5119           if (generate)
5120             {
5121               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5122               rtx shift = GEN_INT (set_zero_bit_copies);
5123
5124               emit_constant_insn
5125                 (cond,
5126                  gen_rtx_SET (sub,
5127                               gen_rtx_NOT (mode,
5128                                            gen_rtx_LSHIFTRT (mode,
5129                                                              source,
5130                                                              shift))));
5131               emit_constant_insn
5132                 (cond,
5133                  gen_rtx_SET (target,
5134                               gen_rtx_NOT (mode,
5135                                            gen_rtx_ASHIFT (mode, sub,
5136                                                            shift))));
5137             }
5138           return 2;
5139         }
5140
5141       /* This will never be reached for Thumb2 because orn is a valid
5142          instruction. This is for Thumb1 and the ARM 32 bit cases.
5143
5144          x = y | constant (such that ~constant is a valid constant)
5145          Transform this to
5146          x = ~(~y & ~constant).
5147       */
5148       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5149         {
5150           if (generate)
5151             {
5152               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5153               emit_constant_insn (cond,
5154                                   gen_rtx_SET (sub,
5155                                                gen_rtx_NOT (mode, source)));
5156               source = sub;
5157               if (subtargets)
5158                 sub = gen_reg_rtx (mode);
5159               emit_constant_insn (cond,
5160                                   gen_rtx_SET (sub,
5161                                                gen_rtx_AND (mode, source,
5162                                                             GEN_INT (temp1))));
5163               emit_constant_insn (cond,
5164                                   gen_rtx_SET (target,
5165                                                gen_rtx_NOT (mode, sub)));
5166             }
5167           return 3;
5168         }
5169       break;
5170
5171     case AND:
5172       /* See if two shifts will do 2 or more insn's worth of work.  */
5173       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5174         {
5175           HOST_WIDE_INT shift_mask = ((0xffffffff
5176                                        << (32 - clear_sign_bit_copies))
5177                                       & 0xffffffff);
5178
5179           if ((remainder | shift_mask) != 0xffffffff)
5180             {
5181               HOST_WIDE_INT new_val
5182                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5183
5184               if (generate)
5185                 {
5186                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5187                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5188                                             new_src, source, subtargets, 1);
5189                   source = new_src;
5190                 }
5191               else
5192                 {
5193                   rtx targ = subtargets ? NULL_RTX : target;
5194                   insns = arm_gen_constant (AND, mode, cond, new_val,
5195                                             targ, source, subtargets, 0);
5196                 }
5197             }
5198
5199           if (generate)
5200             {
5201               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5202               rtx shift = GEN_INT (clear_sign_bit_copies);
5203
5204               emit_insn (gen_ashlsi3 (new_src, source, shift));
5205               emit_insn (gen_lshrsi3 (target, new_src, shift));
5206             }
5207
5208           return insns + 2;
5209         }
5210
5211       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5212         {
5213           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5214
5215           if ((remainder | shift_mask) != 0xffffffff)
5216             {
5217               HOST_WIDE_INT new_val
5218                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5219               if (generate)
5220                 {
5221                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5222
5223                   insns = arm_gen_constant (AND, mode, cond, new_val,
5224                                             new_src, source, subtargets, 1);
5225                   source = new_src;
5226                 }
5227               else
5228                 {
5229                   rtx targ = subtargets ? NULL_RTX : target;
5230
5231                   insns = arm_gen_constant (AND, mode, cond, new_val,
5232                                             targ, source, subtargets, 0);
5233                 }
5234             }
5235
5236           if (generate)
5237             {
5238               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5239               rtx shift = GEN_INT (clear_zero_bit_copies);
5240
5241               emit_insn (gen_lshrsi3 (new_src, source, shift));
5242               emit_insn (gen_ashlsi3 (target, new_src, shift));
5243             }
5244
5245           return insns + 2;
5246         }
5247
5248       break;
5249
5250     default:
5251       break;
5252     }
5253
5254   /* Calculate what the instruction sequences would be if we generated it
5255      normally, negated, or inverted.  */
5256   if (code == AND)
5257     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5258     insns = 99;
5259   else
5260     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5261
5262   if (can_negate)
5263     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5264                                             &neg_immediates);
5265   else
5266     neg_insns = 99;
5267
5268   if (can_invert || final_invert)
5269     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5270                                             &inv_immediates);
5271   else
5272     inv_insns = 99;
5273
5274   immediates = &pos_immediates;
5275
5276   /* Is the negated immediate sequence more efficient?  */
5277   if (neg_insns < insns && neg_insns <= inv_insns)
5278     {
5279       insns = neg_insns;
5280       immediates = &neg_immediates;
5281     }
5282   else
5283     can_negate = 0;
5284
5285   /* Is the inverted immediate sequence more efficient?
5286      We must allow for an extra NOT instruction for XOR operations, although
5287      there is some chance that the final 'mvn' will get optimized later.  */
5288   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5289     {
5290       insns = inv_insns;
5291       immediates = &inv_immediates;
5292     }
5293   else
5294     {
5295       can_invert = 0;
5296       final_invert = 0;
5297     }
5298
5299   /* Now output the chosen sequence as instructions.  */
5300   if (generate)
5301     {
5302       for (i = 0; i < insns; i++)
5303         {
5304           rtx new_src, temp1_rtx;
5305
5306           temp1 = immediates->i[i];
5307
5308           if (code == SET || code == MINUS)
5309             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5310           else if ((final_invert || i < (insns - 1)) && subtargets)
5311             new_src = gen_reg_rtx (mode);
5312           else
5313             new_src = target;
5314
5315           if (can_invert)
5316             temp1 = ~temp1;
5317           else if (can_negate)
5318             temp1 = -temp1;
5319
5320           temp1 = trunc_int_for_mode (temp1, mode);
5321           temp1_rtx = GEN_INT (temp1);
5322
5323           if (code == SET)
5324             ;
5325           else if (code == MINUS)
5326             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5327           else
5328             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5329
5330           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5331           source = new_src;
5332
5333           if (code == SET)
5334             {
5335               can_negate = can_invert;
5336               can_invert = 0;
5337               code = PLUS;
5338             }
5339           else if (code == MINUS)
5340             code = PLUS;
5341         }
5342     }
5343
5344   if (final_invert)
5345     {
5346       if (generate)
5347         emit_constant_insn (cond, gen_rtx_SET (target,
5348                                                gen_rtx_NOT (mode, source)));
5349       insns++;
5350     }
5351
5352   return insns;
5353 }
5354
5355 /* Canonicalize a comparison so that we are more likely to recognize it.
5356    This can be done for a few constant compares, where we can make the
5357    immediate value easier to load.  */
5358
5359 static void
5360 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5361                              bool op0_preserve_value)
5362 {
5363   machine_mode mode;
5364   unsigned HOST_WIDE_INT i, maxval;
5365
5366   mode = GET_MODE (*op0);
5367   if (mode == VOIDmode)
5368     mode = GET_MODE (*op1);
5369
5370   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5371
5372   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5373      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5374      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5375      for GTU/LEU in Thumb mode.  */
5376   if (mode == DImode)
5377     {
5378
5379       if (*code == GT || *code == LE
5380           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5381         {
5382           /* Missing comparison.  First try to use an available
5383              comparison.  */
5384           if (CONST_INT_P (*op1))
5385             {
5386               i = INTVAL (*op1);
5387               switch (*code)
5388                 {
5389                 case GT:
5390                 case LE:
5391                   if (i != maxval
5392                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5393                     {
5394                       *op1 = GEN_INT (i + 1);
5395                       *code = *code == GT ? GE : LT;
5396                       return;
5397                     }
5398                   break;
5399                 case GTU:
5400                 case LEU:
5401                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5402                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5403                     {
5404                       *op1 = GEN_INT (i + 1);
5405                       *code = *code == GTU ? GEU : LTU;
5406                       return;
5407                     }
5408                   break;
5409                 default:
5410                   gcc_unreachable ();
5411                 }
5412             }
5413
5414           /* If that did not work, reverse the condition.  */
5415           if (!op0_preserve_value)
5416             {
5417               std::swap (*op0, *op1);
5418               *code = (int)swap_condition ((enum rtx_code)*code);
5419             }
5420         }
5421       return;
5422     }
5423
5424   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5425      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5426      to facilitate possible combining with a cmp into 'ands'.  */
5427   if (mode == SImode
5428       && GET_CODE (*op0) == ZERO_EXTEND
5429       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5430       && GET_MODE (XEXP (*op0, 0)) == QImode
5431       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5432       && subreg_lowpart_p (XEXP (*op0, 0))
5433       && *op1 == const0_rtx)
5434     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5435                         GEN_INT (255));
5436
5437   /* Comparisons smaller than DImode.  Only adjust comparisons against
5438      an out-of-range constant.  */
5439   if (!CONST_INT_P (*op1)
5440       || const_ok_for_arm (INTVAL (*op1))
5441       || const_ok_for_arm (- INTVAL (*op1)))
5442     return;
5443
5444   i = INTVAL (*op1);
5445
5446   switch (*code)
5447     {
5448     case EQ:
5449     case NE:
5450       return;
5451
5452     case GT:
5453     case LE:
5454       if (i != maxval
5455           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5456         {
5457           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5458           *code = *code == GT ? GE : LT;
5459           return;
5460         }
5461       break;
5462
5463     case GE:
5464     case LT:
5465       if (i != ~maxval
5466           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5467         {
5468           *op1 = GEN_INT (i - 1);
5469           *code = *code == GE ? GT : LE;
5470           return;
5471         }
5472       break;
5473
5474     case GTU:
5475     case LEU:
5476       if (i != ~((unsigned HOST_WIDE_INT) 0)
5477           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5478         {
5479           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5480           *code = *code == GTU ? GEU : LTU;
5481           return;
5482         }
5483       break;
5484
5485     case GEU:
5486     case LTU:
5487       if (i != 0
5488           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5489         {
5490           *op1 = GEN_INT (i - 1);
5491           *code = *code == GEU ? GTU : LEU;
5492           return;
5493         }
5494       break;
5495
5496     default:
5497       gcc_unreachable ();
5498     }
5499 }
5500
5501
5502 /* Define how to find the value returned by a function.  */
5503
5504 static rtx
5505 arm_function_value(const_tree type, const_tree func,
5506                    bool outgoing ATTRIBUTE_UNUSED)
5507 {
5508   machine_mode mode;
5509   int unsignedp ATTRIBUTE_UNUSED;
5510   rtx r ATTRIBUTE_UNUSED;
5511
5512   mode = TYPE_MODE (type);
5513
5514   if (TARGET_AAPCS_BASED)
5515     return aapcs_allocate_return_reg (mode, type, func);
5516
5517   /* Promote integer types.  */
5518   if (INTEGRAL_TYPE_P (type))
5519     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5520
5521   /* Promotes small structs returned in a register to full-word size
5522      for big-endian AAPCS.  */
5523   if (arm_return_in_msb (type))
5524     {
5525       HOST_WIDE_INT size = int_size_in_bytes (type);
5526       if (size % UNITS_PER_WORD != 0)
5527         {
5528           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5529           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5530         }
5531     }
5532
5533   return arm_libcall_value_1 (mode);
5534 }
5535
5536 /* libcall hashtable helpers.  */
5537
5538 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5539 {
5540   static inline hashval_t hash (const rtx_def *);
5541   static inline bool equal (const rtx_def *, const rtx_def *);
5542   static inline void remove (rtx_def *);
5543 };
5544
5545 inline bool
5546 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5547 {
5548   return rtx_equal_p (p1, p2);
5549 }
5550
5551 inline hashval_t
5552 libcall_hasher::hash (const rtx_def *p1)
5553 {
5554   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5555 }
5556
5557 typedef hash_table<libcall_hasher> libcall_table_type;
5558
5559 static void
5560 add_libcall (libcall_table_type *htab, rtx libcall)
5561 {
5562   *htab->find_slot (libcall, INSERT) = libcall;
5563 }
5564
5565 static bool
5566 arm_libcall_uses_aapcs_base (const_rtx libcall)
5567 {
5568   static bool init_done = false;
5569   static libcall_table_type *libcall_htab = NULL;
5570
5571   if (!init_done)
5572     {
5573       init_done = true;
5574
5575       libcall_htab = new libcall_table_type (31);
5576       add_libcall (libcall_htab,
5577                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5578       add_libcall (libcall_htab,
5579                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5580       add_libcall (libcall_htab,
5581                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5582       add_libcall (libcall_htab,
5583                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5584
5585       add_libcall (libcall_htab,
5586                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5587       add_libcall (libcall_htab,
5588                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5589       add_libcall (libcall_htab,
5590                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5591       add_libcall (libcall_htab,
5592                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5593
5594       add_libcall (libcall_htab,
5595                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5596       add_libcall (libcall_htab,
5597                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5598       add_libcall (libcall_htab,
5599                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5600       add_libcall (libcall_htab,
5601                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5602       add_libcall (libcall_htab,
5603                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5604       add_libcall (libcall_htab,
5605                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5606       add_libcall (libcall_htab,
5607                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5608       add_libcall (libcall_htab,
5609                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5610
5611       /* Values from double-precision helper functions are returned in core
5612          registers if the selected core only supports single-precision
5613          arithmetic, even if we are using the hard-float ABI.  The same is
5614          true for single-precision helpers, but we will never be using the
5615          hard-float ABI on a CPU which doesn't support single-precision
5616          operations in hardware.  */
5617       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5618       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5619       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5620       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5621       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5622       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5623       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5624       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5625       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5626       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5627       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5628       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5629                                                         SFmode));
5630       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5631                                                         DFmode));
5632       add_libcall (libcall_htab,
5633                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5634     }
5635
5636   return libcall && libcall_htab->find (libcall) != NULL;
5637 }
5638
5639 static rtx
5640 arm_libcall_value_1 (machine_mode mode)
5641 {
5642   if (TARGET_AAPCS_BASED)
5643     return aapcs_libcall_value (mode);
5644   else if (TARGET_IWMMXT_ABI
5645            && arm_vector_mode_supported_p (mode))
5646     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5647   else
5648     return gen_rtx_REG (mode, ARG_REGISTER (1));
5649 }
5650
5651 /* Define how to find the value returned by a library function
5652    assuming the value has mode MODE.  */
5653
5654 static rtx
5655 arm_libcall_value (machine_mode mode, const_rtx libcall)
5656 {
5657   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5658       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5659     {
5660       /* The following libcalls return their result in integer registers,
5661          even though they return a floating point value.  */
5662       if (arm_libcall_uses_aapcs_base (libcall))
5663         return gen_rtx_REG (mode, ARG_REGISTER(1));
5664
5665     }
5666
5667   return arm_libcall_value_1 (mode);
5668 }
5669
5670 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5671
5672 static bool
5673 arm_function_value_regno_p (const unsigned int regno)
5674 {
5675   if (regno == ARG_REGISTER (1)
5676       || (TARGET_32BIT
5677           && TARGET_AAPCS_BASED
5678           && TARGET_HARD_FLOAT
5679           && regno == FIRST_VFP_REGNUM)
5680       || (TARGET_IWMMXT_ABI
5681           && regno == FIRST_IWMMXT_REGNUM))
5682     return true;
5683
5684   return false;
5685 }
5686
5687 /* Determine the amount of memory needed to store the possible return
5688    registers of an untyped call.  */
5689 int
5690 arm_apply_result_size (void)
5691 {
5692   int size = 16;
5693
5694   if (TARGET_32BIT)
5695     {
5696       if (TARGET_HARD_FLOAT_ABI)
5697         size += 32;
5698       if (TARGET_IWMMXT_ABI)
5699         size += 8;
5700     }
5701
5702   return size;
5703 }
5704
5705 /* Decide whether TYPE should be returned in memory (true)
5706    or in a register (false).  FNTYPE is the type of the function making
5707    the call.  */
5708 static bool
5709 arm_return_in_memory (const_tree type, const_tree fntype)
5710 {
5711   HOST_WIDE_INT size;
5712
5713   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5714
5715   if (TARGET_AAPCS_BASED)
5716     {
5717       /* Simple, non-aggregate types (ie not including vectors and
5718          complex) are always returned in a register (or registers).
5719          We don't care about which register here, so we can short-cut
5720          some of the detail.  */
5721       if (!AGGREGATE_TYPE_P (type)
5722           && TREE_CODE (type) != VECTOR_TYPE
5723           && TREE_CODE (type) != COMPLEX_TYPE)
5724         return false;
5725
5726       /* Any return value that is no larger than one word can be
5727          returned in r0.  */
5728       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5729         return false;
5730
5731       /* Check any available co-processors to see if they accept the
5732          type as a register candidate (VFP, for example, can return
5733          some aggregates in consecutive registers).  These aren't
5734          available if the call is variadic.  */
5735       if (aapcs_select_return_coproc (type, fntype) >= 0)
5736         return false;
5737
5738       /* Vector values should be returned using ARM registers, not
5739          memory (unless they're over 16 bytes, which will break since
5740          we only have four call-clobbered registers to play with).  */
5741       if (TREE_CODE (type) == VECTOR_TYPE)
5742         return (size < 0 || size > (4 * UNITS_PER_WORD));
5743
5744       /* The rest go in memory.  */
5745       return true;
5746     }
5747
5748   if (TREE_CODE (type) == VECTOR_TYPE)
5749     return (size < 0 || size > (4 * UNITS_PER_WORD));
5750
5751   if (!AGGREGATE_TYPE_P (type) &&
5752       (TREE_CODE (type) != VECTOR_TYPE))
5753     /* All simple types are returned in registers.  */
5754     return false;
5755
5756   if (arm_abi != ARM_ABI_APCS)
5757     {
5758       /* ATPCS and later return aggregate types in memory only if they are
5759          larger than a word (or are variable size).  */
5760       return (size < 0 || size > UNITS_PER_WORD);
5761     }
5762
5763   /* For the arm-wince targets we choose to be compatible with Microsoft's
5764      ARM and Thumb compilers, which always return aggregates in memory.  */
5765 #ifndef ARM_WINCE
5766   /* All structures/unions bigger than one word are returned in memory.
5767      Also catch the case where int_size_in_bytes returns -1.  In this case
5768      the aggregate is either huge or of variable size, and in either case
5769      we will want to return it via memory and not in a register.  */
5770   if (size < 0 || size > UNITS_PER_WORD)
5771     return true;
5772
5773   if (TREE_CODE (type) == RECORD_TYPE)
5774     {
5775       tree field;
5776
5777       /* For a struct the APCS says that we only return in a register
5778          if the type is 'integer like' and every addressable element
5779          has an offset of zero.  For practical purposes this means
5780          that the structure can have at most one non bit-field element
5781          and that this element must be the first one in the structure.  */
5782
5783       /* Find the first field, ignoring non FIELD_DECL things which will
5784          have been created by C++.  */
5785       for (field = TYPE_FIELDS (type);
5786            field && TREE_CODE (field) != FIELD_DECL;
5787            field = DECL_CHAIN (field))
5788         continue;
5789
5790       if (field == NULL)
5791         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5792
5793       /* Check that the first field is valid for returning in a register.  */
5794
5795       /* ... Floats are not allowed */
5796       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5797         return true;
5798
5799       /* ... Aggregates that are not themselves valid for returning in
5800          a register are not allowed.  */
5801       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5802         return true;
5803
5804       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5805          since they are not addressable.  */
5806       for (field = DECL_CHAIN (field);
5807            field;
5808            field = DECL_CHAIN (field))
5809         {
5810           if (TREE_CODE (field) != FIELD_DECL)
5811             continue;
5812
5813           if (!DECL_BIT_FIELD_TYPE (field))
5814             return true;
5815         }
5816
5817       return false;
5818     }
5819
5820   if (TREE_CODE (type) == UNION_TYPE)
5821     {
5822       tree field;
5823
5824       /* Unions can be returned in registers if every element is
5825          integral, or can be returned in an integer register.  */
5826       for (field = TYPE_FIELDS (type);
5827            field;
5828            field = DECL_CHAIN (field))
5829         {
5830           if (TREE_CODE (field) != FIELD_DECL)
5831             continue;
5832
5833           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5834             return true;
5835
5836           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5837             return true;
5838         }
5839
5840       return false;
5841     }
5842 #endif /* not ARM_WINCE */
5843
5844   /* Return all other types in memory.  */
5845   return true;
5846 }
5847
5848 const struct pcs_attribute_arg
5849 {
5850   const char *arg;
5851   enum arm_pcs value;
5852 } pcs_attribute_args[] =
5853   {
5854     {"aapcs", ARM_PCS_AAPCS},
5855     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5856 #if 0
5857     /* We could recognize these, but changes would be needed elsewhere
5858      * to implement them.  */
5859     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5860     {"atpcs", ARM_PCS_ATPCS},
5861     {"apcs", ARM_PCS_APCS},
5862 #endif
5863     {NULL, ARM_PCS_UNKNOWN}
5864   };
5865
5866 static enum arm_pcs
5867 arm_pcs_from_attribute (tree attr)
5868 {
5869   const struct pcs_attribute_arg *ptr;
5870   const char *arg;
5871
5872   /* Get the value of the argument.  */
5873   if (TREE_VALUE (attr) == NULL_TREE
5874       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5875     return ARM_PCS_UNKNOWN;
5876
5877   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5878
5879   /* Check it against the list of known arguments.  */
5880   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5881     if (streq (arg, ptr->arg))
5882       return ptr->value;
5883
5884   /* An unrecognized interrupt type.  */
5885   return ARM_PCS_UNKNOWN;
5886 }
5887
5888 /* Get the PCS variant to use for this call.  TYPE is the function's type
5889    specification, DECL is the specific declartion.  DECL may be null if
5890    the call could be indirect or if this is a library call.  */
5891 static enum arm_pcs
5892 arm_get_pcs_model (const_tree type, const_tree decl)
5893 {
5894   bool user_convention = false;
5895   enum arm_pcs user_pcs = arm_pcs_default;
5896   tree attr;
5897
5898   gcc_assert (type);
5899
5900   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5901   if (attr)
5902     {
5903       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5904       user_convention = true;
5905     }
5906
5907   if (TARGET_AAPCS_BASED)
5908     {
5909       /* Detect varargs functions.  These always use the base rules
5910          (no argument is ever a candidate for a co-processor
5911          register).  */
5912       bool base_rules = stdarg_p (type);
5913
5914       if (user_convention)
5915         {
5916           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5917             sorry ("non-AAPCS derived PCS variant");
5918           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5919             error ("variadic functions must use the base AAPCS variant");
5920         }
5921
5922       if (base_rules)
5923         return ARM_PCS_AAPCS;
5924       else if (user_convention)
5925         return user_pcs;
5926       else if (decl && flag_unit_at_a_time)
5927         {
5928           /* Local functions never leak outside this compilation unit,
5929              so we are free to use whatever conventions are
5930              appropriate.  */
5931           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5932           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5933           if (i && i->local)
5934             return ARM_PCS_AAPCS_LOCAL;
5935         }
5936     }
5937   else if (user_convention && user_pcs != arm_pcs_default)
5938     sorry ("PCS variant");
5939
5940   /* For everything else we use the target's default.  */
5941   return arm_pcs_default;
5942 }
5943
5944
5945 static void
5946 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5947                     const_tree fntype ATTRIBUTE_UNUSED,
5948                     rtx libcall ATTRIBUTE_UNUSED,
5949                     const_tree fndecl ATTRIBUTE_UNUSED)
5950 {
5951   /* Record the unallocated VFP registers.  */
5952   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5953   pcum->aapcs_vfp_reg_alloc = 0;
5954 }
5955
5956 /* Walk down the type tree of TYPE counting consecutive base elements.
5957    If *MODEP is VOIDmode, then set it to the first valid floating point
5958    type.  If a non-floating point type is found, or if a floating point
5959    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5960    otherwise return the count in the sub-tree.  */
5961 static int
5962 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5963 {
5964   machine_mode mode;
5965   HOST_WIDE_INT size;
5966
5967   switch (TREE_CODE (type))
5968     {
5969     case REAL_TYPE:
5970       mode = TYPE_MODE (type);
5971       if (mode != DFmode && mode != SFmode && mode != HFmode)
5972         return -1;
5973
5974       if (*modep == VOIDmode)
5975         *modep = mode;
5976
5977       if (*modep == mode)
5978         return 1;
5979
5980       break;
5981
5982     case COMPLEX_TYPE:
5983       mode = TYPE_MODE (TREE_TYPE (type));
5984       if (mode != DFmode && mode != SFmode)
5985         return -1;
5986
5987       if (*modep == VOIDmode)
5988         *modep = mode;
5989
5990       if (*modep == mode)
5991         return 2;
5992
5993       break;
5994
5995     case VECTOR_TYPE:
5996       /* Use V2SImode and V4SImode as representatives of all 64-bit
5997          and 128-bit vector types, whether or not those modes are
5998          supported with the present options.  */
5999       size = int_size_in_bytes (type);
6000       switch (size)
6001         {
6002         case 8:
6003           mode = V2SImode;
6004           break;
6005         case 16:
6006           mode = V4SImode;
6007           break;
6008         default:
6009           return -1;
6010         }
6011
6012       if (*modep == VOIDmode)
6013         *modep = mode;
6014
6015       /* Vector modes are considered to be opaque: two vectors are
6016          equivalent for the purposes of being homogeneous aggregates
6017          if they are the same size.  */
6018       if (*modep == mode)
6019         return 1;
6020
6021       break;
6022
6023     case ARRAY_TYPE:
6024       {
6025         int count;
6026         tree index = TYPE_DOMAIN (type);
6027
6028         /* Can't handle incomplete types nor sizes that are not
6029            fixed.  */
6030         if (!COMPLETE_TYPE_P (type)
6031             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6032           return -1;
6033
6034         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6035         if (count == -1
6036             || !index
6037             || !TYPE_MAX_VALUE (index)
6038             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6039             || !TYPE_MIN_VALUE (index)
6040             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6041             || count < 0)
6042           return -1;
6043
6044         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6045                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6046
6047         /* There must be no padding.  */
6048         if (wi::to_wide (TYPE_SIZE (type))
6049             != count * GET_MODE_BITSIZE (*modep))
6050           return -1;
6051
6052         return count;
6053       }
6054
6055     case RECORD_TYPE:
6056       {
6057         int count = 0;
6058         int sub_count;
6059         tree field;
6060
6061         /* Can't handle incomplete types nor sizes that are not
6062            fixed.  */
6063         if (!COMPLETE_TYPE_P (type)
6064             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6065           return -1;
6066
6067         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6068           {
6069             if (TREE_CODE (field) != FIELD_DECL)
6070               continue;
6071
6072             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6073             if (sub_count < 0)
6074               return -1;
6075             count += sub_count;
6076           }
6077
6078         /* There must be no padding.  */
6079         if (wi::to_wide (TYPE_SIZE (type))
6080             != count * GET_MODE_BITSIZE (*modep))
6081           return -1;
6082
6083         return count;
6084       }
6085
6086     case UNION_TYPE:
6087     case QUAL_UNION_TYPE:
6088       {
6089         /* These aren't very interesting except in a degenerate case.  */
6090         int count = 0;
6091         int sub_count;
6092         tree field;
6093
6094         /* Can't handle incomplete types nor sizes that are not
6095            fixed.  */
6096         if (!COMPLETE_TYPE_P (type)
6097             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6098           return -1;
6099
6100         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6101           {
6102             if (TREE_CODE (field) != FIELD_DECL)
6103               continue;
6104
6105             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6106             if (sub_count < 0)
6107               return -1;
6108             count = count > sub_count ? count : sub_count;
6109           }
6110
6111         /* There must be no padding.  */
6112         if (wi::to_wide (TYPE_SIZE (type))
6113             != count * GET_MODE_BITSIZE (*modep))
6114           return -1;
6115
6116         return count;
6117       }
6118
6119     default:
6120       break;
6121     }
6122
6123   return -1;
6124 }
6125
6126 /* Return true if PCS_VARIANT should use VFP registers.  */
6127 static bool
6128 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6129 {
6130   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6131     {
6132       static bool seen_thumb1_vfp = false;
6133
6134       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6135         {
6136           sorry ("Thumb-1 hard-float VFP ABI");
6137           /* sorry() is not immediately fatal, so only display this once.  */
6138           seen_thumb1_vfp = true;
6139         }
6140
6141       return true;
6142     }
6143
6144   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6145     return false;
6146
6147   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6148           (TARGET_VFP_DOUBLE || !is_double));
6149 }
6150
6151 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6152    suitable for passing or returning in VFP registers for the PCS
6153    variant selected.  If it is, then *BASE_MODE is updated to contain
6154    a machine mode describing each element of the argument's type and
6155    *COUNT to hold the number of such elements.  */
6156 static bool
6157 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6158                                        machine_mode mode, const_tree type,
6159                                        machine_mode *base_mode, int *count)
6160 {
6161   machine_mode new_mode = VOIDmode;
6162
6163   /* If we have the type information, prefer that to working things
6164      out from the mode.  */
6165   if (type)
6166     {
6167       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6168
6169       if (ag_count > 0 && ag_count <= 4)
6170         *count = ag_count;
6171       else
6172         return false;
6173     }
6174   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6175            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6176            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6177     {
6178       *count = 1;
6179       new_mode = mode;
6180     }
6181   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6182     {
6183       *count = 2;
6184       new_mode = (mode == DCmode ? DFmode : SFmode);
6185     }
6186   else
6187     return false;
6188
6189
6190   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6191     return false;
6192
6193   *base_mode = new_mode;
6194
6195   if (TARGET_GENERAL_REGS_ONLY)
6196     error ("argument of type %qT not permitted with -mgeneral-regs-only",
6197            type);
6198
6199   return true;
6200 }
6201
6202 static bool
6203 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6204                                machine_mode mode, const_tree type)
6205 {
6206   int count ATTRIBUTE_UNUSED;
6207   machine_mode ag_mode ATTRIBUTE_UNUSED;
6208
6209   if (!use_vfp_abi (pcs_variant, false))
6210     return false;
6211   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6212                                                 &ag_mode, &count);
6213 }
6214
6215 static bool
6216 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6217                              const_tree type)
6218 {
6219   if (!use_vfp_abi (pcum->pcs_variant, false))
6220     return false;
6221
6222   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6223                                                 &pcum->aapcs_vfp_rmode,
6224                                                 &pcum->aapcs_vfp_rcount);
6225 }
6226
6227 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6228    for the behaviour of this function.  */
6229
6230 static bool
6231 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6232                     const_tree type  ATTRIBUTE_UNUSED)
6233 {
6234   int rmode_size
6235     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6236   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6237   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6238   int regno;
6239
6240   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6241     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6242       {
6243         pcum->aapcs_vfp_reg_alloc = mask << regno;
6244         if (mode == BLKmode
6245             || (mode == TImode && ! TARGET_NEON)
6246             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6247           {
6248             int i;
6249             int rcount = pcum->aapcs_vfp_rcount;
6250             int rshift = shift;
6251             machine_mode rmode = pcum->aapcs_vfp_rmode;
6252             rtx par;
6253             if (!TARGET_NEON)
6254               {
6255                 /* Avoid using unsupported vector modes.  */
6256                 if (rmode == V2SImode)
6257                   rmode = DImode;
6258                 else if (rmode == V4SImode)
6259                   {
6260                     rmode = DImode;
6261                     rcount *= 2;
6262                     rshift /= 2;
6263                   }
6264               }
6265             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6266             for (i = 0; i < rcount; i++)
6267               {
6268                 rtx tmp = gen_rtx_REG (rmode,
6269                                        FIRST_VFP_REGNUM + regno + i * rshift);
6270                 tmp = gen_rtx_EXPR_LIST
6271                   (VOIDmode, tmp,
6272                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6273                 XVECEXP (par, 0, i) = tmp;
6274               }
6275
6276             pcum->aapcs_reg = par;
6277           }
6278         else
6279           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6280         return true;
6281       }
6282   return false;
6283 }
6284
6285 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6286    comment there for the behaviour of this function.  */
6287
6288 static rtx
6289 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6290                                machine_mode mode,
6291                                const_tree type ATTRIBUTE_UNUSED)
6292 {
6293   if (!use_vfp_abi (pcs_variant, false))
6294     return NULL;
6295
6296   if (mode == BLKmode
6297       || (GET_MODE_CLASS (mode) == MODE_INT
6298           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6299           && !TARGET_NEON))
6300     {
6301       int count;
6302       machine_mode ag_mode;
6303       int i;
6304       rtx par;
6305       int shift;
6306
6307       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6308                                              &ag_mode, &count);
6309
6310       if (!TARGET_NEON)
6311         {
6312           if (ag_mode == V2SImode)
6313             ag_mode = DImode;
6314           else if (ag_mode == V4SImode)
6315             {
6316               ag_mode = DImode;
6317               count *= 2;
6318             }
6319         }
6320       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6321       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6322       for (i = 0; i < count; i++)
6323         {
6324           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6325           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6326                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6327           XVECEXP (par, 0, i) = tmp;
6328         }
6329
6330       return par;
6331     }
6332
6333   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6334 }
6335
6336 static void
6337 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6338                    machine_mode mode  ATTRIBUTE_UNUSED,
6339                    const_tree type  ATTRIBUTE_UNUSED)
6340 {
6341   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6342   pcum->aapcs_vfp_reg_alloc = 0;
6343   return;
6344 }
6345
6346 #define AAPCS_CP(X)                             \
6347   {                                             \
6348     aapcs_ ## X ## _cum_init,                   \
6349     aapcs_ ## X ## _is_call_candidate,          \
6350     aapcs_ ## X ## _allocate,                   \
6351     aapcs_ ## X ## _is_return_candidate,        \
6352     aapcs_ ## X ## _allocate_return_reg,        \
6353     aapcs_ ## X ## _advance                     \
6354   }
6355
6356 /* Table of co-processors that can be used to pass arguments in
6357    registers.  Idealy no arugment should be a candidate for more than
6358    one co-processor table entry, but the table is processed in order
6359    and stops after the first match.  If that entry then fails to put
6360    the argument into a co-processor register, the argument will go on
6361    the stack.  */
6362 static struct
6363 {
6364   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6365   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6366
6367   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6368      BLKmode) is a candidate for this co-processor's registers; this
6369      function should ignore any position-dependent state in
6370      CUMULATIVE_ARGS and only use call-type dependent information.  */
6371   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6372
6373   /* Return true if the argument does get a co-processor register; it
6374      should set aapcs_reg to an RTX of the register allocated as is
6375      required for a return from FUNCTION_ARG.  */
6376   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6377
6378   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6379      be returned in this co-processor's registers.  */
6380   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6381
6382   /* Allocate and return an RTX element to hold the return type of a call.  This
6383      routine must not fail and will only be called if is_return_candidate
6384      returned true with the same parameters.  */
6385   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6386
6387   /* Finish processing this argument and prepare to start processing
6388      the next one.  */
6389   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6390 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6391   {
6392     AAPCS_CP(vfp)
6393   };
6394
6395 #undef AAPCS_CP
6396
6397 static int
6398 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6399                           const_tree type)
6400 {
6401   int i;
6402
6403   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6404     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6405       return i;
6406
6407   return -1;
6408 }
6409
6410 static int
6411 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6412 {
6413   /* We aren't passed a decl, so we can't check that a call is local.
6414      However, it isn't clear that that would be a win anyway, since it
6415      might limit some tail-calling opportunities.  */
6416   enum arm_pcs pcs_variant;
6417
6418   if (fntype)
6419     {
6420       const_tree fndecl = NULL_TREE;
6421
6422       if (TREE_CODE (fntype) == FUNCTION_DECL)
6423         {
6424           fndecl = fntype;
6425           fntype = TREE_TYPE (fntype);
6426         }
6427
6428       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6429     }
6430   else
6431     pcs_variant = arm_pcs_default;
6432
6433   if (pcs_variant != ARM_PCS_AAPCS)
6434     {
6435       int i;
6436
6437       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6438         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6439                                                         TYPE_MODE (type),
6440                                                         type))
6441           return i;
6442     }
6443   return -1;
6444 }
6445
6446 static rtx
6447 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6448                            const_tree fntype)
6449 {
6450   /* We aren't passed a decl, so we can't check that a call is local.
6451      However, it isn't clear that that would be a win anyway, since it
6452      might limit some tail-calling opportunities.  */
6453   enum arm_pcs pcs_variant;
6454   int unsignedp ATTRIBUTE_UNUSED;
6455
6456   if (fntype)
6457     {
6458       const_tree fndecl = NULL_TREE;
6459
6460       if (TREE_CODE (fntype) == FUNCTION_DECL)
6461         {
6462           fndecl = fntype;
6463           fntype = TREE_TYPE (fntype);
6464         }
6465
6466       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6467     }
6468   else
6469     pcs_variant = arm_pcs_default;
6470
6471   /* Promote integer types.  */
6472   if (type && INTEGRAL_TYPE_P (type))
6473     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6474
6475   if (pcs_variant != ARM_PCS_AAPCS)
6476     {
6477       int i;
6478
6479       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6480         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6481                                                         type))
6482           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6483                                                              mode, type);
6484     }
6485
6486   /* Promotes small structs returned in a register to full-word size
6487      for big-endian AAPCS.  */
6488   if (type && arm_return_in_msb (type))
6489     {
6490       HOST_WIDE_INT size = int_size_in_bytes (type);
6491       if (size % UNITS_PER_WORD != 0)
6492         {
6493           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6494           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6495         }
6496     }
6497
6498   return gen_rtx_REG (mode, R0_REGNUM);
6499 }
6500
6501 static rtx
6502 aapcs_libcall_value (machine_mode mode)
6503 {
6504   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6505       && GET_MODE_SIZE (mode) <= 4)
6506     mode = SImode;
6507
6508   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6509 }
6510
6511 /* Lay out a function argument using the AAPCS rules.  The rule
6512    numbers referred to here are those in the AAPCS.  */
6513 static void
6514 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6515                   const_tree type, bool named)
6516 {
6517   int nregs, nregs2;
6518   int ncrn;
6519
6520   /* We only need to do this once per argument.  */
6521   if (pcum->aapcs_arg_processed)
6522     return;
6523
6524   pcum->aapcs_arg_processed = true;
6525
6526   /* Special case: if named is false then we are handling an incoming
6527      anonymous argument which is on the stack.  */
6528   if (!named)
6529     return;
6530
6531   /* Is this a potential co-processor register candidate?  */
6532   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6533     {
6534       int slot = aapcs_select_call_coproc (pcum, mode, type);
6535       pcum->aapcs_cprc_slot = slot;
6536
6537       /* We don't have to apply any of the rules from part B of the
6538          preparation phase, these are handled elsewhere in the
6539          compiler.  */
6540
6541       if (slot >= 0)
6542         {
6543           /* A Co-processor register candidate goes either in its own
6544              class of registers or on the stack.  */
6545           if (!pcum->aapcs_cprc_failed[slot])
6546             {
6547               /* C1.cp - Try to allocate the argument to co-processor
6548                  registers.  */
6549               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6550                 return;
6551
6552               /* C2.cp - Put the argument on the stack and note that we
6553                  can't assign any more candidates in this slot.  We also
6554                  need to note that we have allocated stack space, so that
6555                  we won't later try to split a non-cprc candidate between
6556                  core registers and the stack.  */
6557               pcum->aapcs_cprc_failed[slot] = true;
6558               pcum->can_split = false;
6559             }
6560
6561           /* We didn't get a register, so this argument goes on the
6562              stack.  */
6563           gcc_assert (pcum->can_split == false);
6564           return;
6565         }
6566     }
6567
6568   /* C3 - For double-word aligned arguments, round the NCRN up to the
6569      next even number.  */
6570   ncrn = pcum->aapcs_ncrn;
6571   if (ncrn & 1)
6572     {
6573       int res = arm_needs_doubleword_align (mode, type);
6574       /* Only warn during RTL expansion of call stmts, otherwise we would
6575          warn e.g. during gimplification even on functions that will be
6576          always inlined, and we'd warn multiple times.  Don't warn when
6577          called in expand_function_start either, as we warn instead in
6578          arm_function_arg_boundary in that case.  */
6579       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6580         inform (input_location, "parameter passing for argument of type "
6581                 "%qT changed in GCC 7.1", type);
6582       else if (res > 0)
6583         ncrn++;
6584     }
6585
6586   nregs = ARM_NUM_REGS2(mode, type);
6587
6588   /* Sigh, this test should really assert that nregs > 0, but a GCC
6589      extension allows empty structs and then gives them empty size; it
6590      then allows such a structure to be passed by value.  For some of
6591      the code below we have to pretend that such an argument has
6592      non-zero size so that we 'locate' it correctly either in
6593      registers or on the stack.  */
6594   gcc_assert (nregs >= 0);
6595
6596   nregs2 = nregs ? nregs : 1;
6597
6598   /* C4 - Argument fits entirely in core registers.  */
6599   if (ncrn + nregs2 <= NUM_ARG_REGS)
6600     {
6601       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6602       pcum->aapcs_next_ncrn = ncrn + nregs;
6603       return;
6604     }
6605
6606   /* C5 - Some core registers left and there are no arguments already
6607      on the stack: split this argument between the remaining core
6608      registers and the stack.  */
6609   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6610     {
6611       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6612       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6613       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6614       return;
6615     }
6616
6617   /* C6 - NCRN is set to 4.  */
6618   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6619
6620   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6621   return;
6622 }
6623
6624 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6625    for a call to a function whose data type is FNTYPE.
6626    For a library call, FNTYPE is NULL.  */
6627 void
6628 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6629                           rtx libname,
6630                           tree fndecl ATTRIBUTE_UNUSED)
6631 {
6632   /* Long call handling.  */
6633   if (fntype)
6634     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6635   else
6636     pcum->pcs_variant = arm_pcs_default;
6637
6638   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6639     {
6640       if (arm_libcall_uses_aapcs_base (libname))
6641         pcum->pcs_variant = ARM_PCS_AAPCS;
6642
6643       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6644       pcum->aapcs_reg = NULL_RTX;
6645       pcum->aapcs_partial = 0;
6646       pcum->aapcs_arg_processed = false;
6647       pcum->aapcs_cprc_slot = -1;
6648       pcum->can_split = true;
6649
6650       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6651         {
6652           int i;
6653
6654           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6655             {
6656               pcum->aapcs_cprc_failed[i] = false;
6657               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6658             }
6659         }
6660       return;
6661     }
6662
6663   /* Legacy ABIs */
6664
6665   /* On the ARM, the offset starts at 0.  */
6666   pcum->nregs = 0;
6667   pcum->iwmmxt_nregs = 0;
6668   pcum->can_split = true;
6669
6670   /* Varargs vectors are treated the same as long long.
6671      named_count avoids having to change the way arm handles 'named' */
6672   pcum->named_count = 0;
6673   pcum->nargs = 0;
6674
6675   if (TARGET_REALLY_IWMMXT && fntype)
6676     {
6677       tree fn_arg;
6678
6679       for (fn_arg = TYPE_ARG_TYPES (fntype);
6680            fn_arg;
6681            fn_arg = TREE_CHAIN (fn_arg))
6682         pcum->named_count += 1;
6683
6684       if (! pcum->named_count)
6685         pcum->named_count = INT_MAX;
6686     }
6687 }
6688
6689 /* Return 2 if double word alignment is required for argument passing,
6690    but wasn't required before the fix for PR88469.
6691    Return 1 if double word alignment is required for argument passing.
6692    Return -1 if double word alignment used to be required for argument
6693    passing before PR77728 ABI fix, but is not required anymore.
6694    Return 0 if double word alignment is not required and wasn't requried
6695    before either.  */
6696 static int
6697 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6698 {
6699   if (!type)
6700     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6701
6702   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6703   if (!AGGREGATE_TYPE_P (type))
6704     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6705
6706   /* Array types: Use member alignment of element type.  */
6707   if (TREE_CODE (type) == ARRAY_TYPE)
6708     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6709
6710   int ret = 0;
6711   int ret2 = 0;
6712   /* Record/aggregate types: Use greatest member alignment of any member.  */
6713   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6714     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6715       {
6716         if (TREE_CODE (field) == FIELD_DECL)
6717           return 1;
6718         else
6719           /* Before PR77728 fix, we were incorrectly considering also
6720              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6721              Make sure we can warn about that with -Wpsabi.  */
6722           ret = -1;
6723       }
6724     else if (TREE_CODE (field) == FIELD_DECL
6725              && DECL_BIT_FIELD_TYPE (field)
6726              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
6727       ret2 = 1;
6728
6729   if (ret2)
6730     return 2;
6731
6732   return ret;
6733 }
6734
6735
6736 /* Determine where to put an argument to a function.
6737    Value is zero to push the argument on the stack,
6738    or a hard register in which to store the argument.
6739
6740    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6741     the preceding args and about the function being called.
6742    ARG is a description of the argument.
6743
6744    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6745    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6746    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6747    defined), say it is passed in the stack (function_prologue will
6748    indeed make it pass in the stack if necessary).  */
6749
6750 static rtx
6751 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
6752 {
6753   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6754   int nregs;
6755
6756   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6757      a call insn (op3 of a call_value insn).  */
6758   if (arg.end_marker_p ())
6759     return const0_rtx;
6760
6761   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6762     {
6763       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6764       return pcum->aapcs_reg;
6765     }
6766
6767   /* Varargs vectors are treated the same as long long.
6768      named_count avoids having to change the way arm handles 'named' */
6769   if (TARGET_IWMMXT_ABI
6770       && arm_vector_mode_supported_p (arg.mode)
6771       && pcum->named_count > pcum->nargs + 1)
6772     {
6773       if (pcum->iwmmxt_nregs <= 9)
6774         return gen_rtx_REG (arg.mode,
6775                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6776       else
6777         {
6778           pcum->can_split = false;
6779           return NULL_RTX;
6780         }
6781     }
6782
6783   /* Put doubleword aligned quantities in even register pairs.  */
6784   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6785     {
6786       int res = arm_needs_doubleword_align (arg.mode, arg.type);
6787       if (res < 0 && warn_psabi)
6788         inform (input_location, "parameter passing for argument of type "
6789                 "%qT changed in GCC 7.1", arg.type);
6790       else if (res > 0)
6791         {
6792           pcum->nregs++;
6793           if (res > 1 && warn_psabi)
6794             inform (input_location, "parameter passing for argument of type "
6795                     "%qT changed in GCC 9.1", arg.type);
6796         }
6797     }
6798
6799   /* Only allow splitting an arg between regs and memory if all preceding
6800      args were allocated to regs.  For args passed by reference we only count
6801      the reference pointer.  */
6802   if (pcum->can_split)
6803     nregs = 1;
6804   else
6805     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
6806
6807   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
6808     return NULL_RTX;
6809
6810   return gen_rtx_REG (arg.mode, pcum->nregs);
6811 }
6812
6813 static unsigned int
6814 arm_function_arg_boundary (machine_mode mode, const_tree type)
6815 {
6816   if (!ARM_DOUBLEWORD_ALIGN)
6817     return PARM_BOUNDARY;
6818
6819   int res = arm_needs_doubleword_align (mode, type);
6820   if (res < 0 && warn_psabi)
6821     inform (input_location, "parameter passing for argument of type %qT "
6822             "changed in GCC 7.1", type);
6823   if (res > 1 && warn_psabi)
6824     inform (input_location, "parameter passing for argument of type "
6825             "%qT changed in GCC 9.1", type);
6826
6827   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6828 }
6829
6830 static int
6831 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
6832 {
6833   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6834   int nregs = pcum->nregs;
6835
6836   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6837     {
6838       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6839       return pcum->aapcs_partial;
6840     }
6841
6842   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
6843     return 0;
6844
6845   if (NUM_ARG_REGS > nregs
6846       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
6847       && pcum->can_split)
6848     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6849
6850   return 0;
6851 }
6852
6853 /* Update the data in PCUM to advance over argument ARG.  */
6854
6855 static void
6856 arm_function_arg_advance (cumulative_args_t pcum_v,
6857                           const function_arg_info &arg)
6858 {
6859   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6860
6861   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6862     {
6863       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6864
6865       if (pcum->aapcs_cprc_slot >= 0)
6866         {
6867           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
6868                                                               arg.type);
6869           pcum->aapcs_cprc_slot = -1;
6870         }
6871
6872       /* Generic stuff.  */
6873       pcum->aapcs_arg_processed = false;
6874       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6875       pcum->aapcs_reg = NULL_RTX;
6876       pcum->aapcs_partial = 0;
6877     }
6878   else
6879     {
6880       pcum->nargs += 1;
6881       if (arm_vector_mode_supported_p (arg.mode)
6882           && pcum->named_count > pcum->nargs
6883           && TARGET_IWMMXT_ABI)
6884         pcum->iwmmxt_nregs += 1;
6885       else
6886         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
6887     }
6888 }
6889
6890 /* Variable sized types are passed by reference.  This is a GCC
6891    extension to the ARM ABI.  */
6892
6893 static bool
6894 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6895 {
6896   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
6897 }
6898 \f
6899 /* Encode the current state of the #pragma [no_]long_calls.  */
6900 typedef enum
6901 {
6902   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6903   LONG,         /* #pragma long_calls is in effect.  */
6904   SHORT         /* #pragma no_long_calls is in effect.  */
6905 } arm_pragma_enum;
6906
6907 static arm_pragma_enum arm_pragma_long_calls = OFF;
6908
6909 void
6910 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6911 {
6912   arm_pragma_long_calls = LONG;
6913 }
6914
6915 void
6916 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6917 {
6918   arm_pragma_long_calls = SHORT;
6919 }
6920
6921 void
6922 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6923 {
6924   arm_pragma_long_calls = OFF;
6925 }
6926 \f
6927 /* Handle an attribute requiring a FUNCTION_DECL;
6928    arguments as in struct attribute_spec.handler.  */
6929 static tree
6930 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6931                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6932 {
6933   if (TREE_CODE (*node) != FUNCTION_DECL)
6934     {
6935       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6936                name);
6937       *no_add_attrs = true;
6938     }
6939
6940   return NULL_TREE;
6941 }
6942
6943 /* Handle an "interrupt" or "isr" attribute;
6944    arguments as in struct attribute_spec.handler.  */
6945 static tree
6946 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6947                           bool *no_add_attrs)
6948 {
6949   if (DECL_P (*node))
6950     {
6951       if (TREE_CODE (*node) != FUNCTION_DECL)
6952         {
6953           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6954                    name);
6955           *no_add_attrs = true;
6956         }
6957       /* FIXME: the argument if any is checked for type attributes;
6958          should it be checked for decl ones?  */
6959     }
6960   else
6961     {
6962       if (TREE_CODE (*node) == FUNCTION_TYPE
6963           || TREE_CODE (*node) == METHOD_TYPE)
6964         {
6965           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6966             {
6967               warning (OPT_Wattributes, "%qE attribute ignored",
6968                        name);
6969               *no_add_attrs = true;
6970             }
6971         }
6972       else if (TREE_CODE (*node) == POINTER_TYPE
6973                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6974                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6975                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6976         {
6977           *node = build_variant_type_copy (*node);
6978           TREE_TYPE (*node) = build_type_attribute_variant
6979             (TREE_TYPE (*node),
6980              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6981           *no_add_attrs = true;
6982         }
6983       else
6984         {
6985           /* Possibly pass this attribute on from the type to a decl.  */
6986           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6987                        | (int) ATTR_FLAG_FUNCTION_NEXT
6988                        | (int) ATTR_FLAG_ARRAY_NEXT))
6989             {
6990               *no_add_attrs = true;
6991               return tree_cons (name, args, NULL_TREE);
6992             }
6993           else
6994             {
6995               warning (OPT_Wattributes, "%qE attribute ignored",
6996                        name);
6997             }
6998         }
6999     }
7000
7001   return NULL_TREE;
7002 }
7003
7004 /* Handle a "pcs" attribute; arguments as in struct
7005    attribute_spec.handler.  */
7006 static tree
7007 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7008                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7009 {
7010   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7011     {
7012       warning (OPT_Wattributes, "%qE attribute ignored", name);
7013       *no_add_attrs = true;
7014     }
7015   return NULL_TREE;
7016 }
7017
7018 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7019 /* Handle the "notshared" attribute.  This attribute is another way of
7020    requesting hidden visibility.  ARM's compiler supports
7021    "__declspec(notshared)"; we support the same thing via an
7022    attribute.  */
7023
7024 static tree
7025 arm_handle_notshared_attribute (tree *node,
7026                                 tree name ATTRIBUTE_UNUSED,
7027                                 tree args ATTRIBUTE_UNUSED,
7028                                 int flags ATTRIBUTE_UNUSED,
7029                                 bool *no_add_attrs)
7030 {
7031   tree decl = TYPE_NAME (*node);
7032
7033   if (decl)
7034     {
7035       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7036       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7037       *no_add_attrs = false;
7038     }
7039   return NULL_TREE;
7040 }
7041 #endif
7042
7043 /* This function returns true if a function with declaration FNDECL and type
7044    FNTYPE uses the stack to pass arguments or return variables and false
7045    otherwise.  This is used for functions with the attributes
7046    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7047    diagnostic messages if the stack is used.  NAME is the name of the attribute
7048    used.  */
7049
7050 static bool
7051 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7052 {
7053   function_args_iterator args_iter;
7054   CUMULATIVE_ARGS args_so_far_v;
7055   cumulative_args_t args_so_far;
7056   bool first_param = true;
7057   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7058
7059   /* Error out if any argument is passed on the stack.  */
7060   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7061   args_so_far = pack_cumulative_args (&args_so_far_v);
7062   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7063     {
7064       rtx arg_rtx;
7065
7066       prev_arg_type = arg_type;
7067       if (VOID_TYPE_P (arg_type))
7068         continue;
7069
7070       function_arg_info arg (arg_type, /*named=*/true);
7071       if (!first_param)
7072         /* ??? We should advance after processing the argument and pass
7073            the argument we're advancing past.  */
7074         arm_function_arg_advance (args_so_far, arg);
7075       arg_rtx = arm_function_arg (args_so_far, arg);
7076       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7077         {
7078           error ("%qE attribute not available to functions with arguments "
7079                  "passed on the stack", name);
7080           return true;
7081         }
7082       first_param = false;
7083     }
7084
7085   /* Error out for variadic functions since we cannot control how many
7086      arguments will be passed and thus stack could be used.  stdarg_p () is not
7087      used for the checking to avoid browsing arguments twice.  */
7088   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7089     {
7090       error ("%qE attribute not available to functions with variable number "
7091              "of arguments", name);
7092       return true;
7093     }
7094
7095   /* Error out if return value is passed on the stack.  */
7096   ret_type = TREE_TYPE (fntype);
7097   if (arm_return_in_memory (ret_type, fntype))
7098     {
7099       error ("%qE attribute not available to functions that return value on "
7100              "the stack", name);
7101       return true;
7102     }
7103   return false;
7104 }
7105
7106 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7107    function will check whether the attribute is allowed here and will add the
7108    attribute to the function declaration tree or otherwise issue a warning.  */
7109
7110 static tree
7111 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7112                                  tree /* args */,
7113                                  int /* flags */,
7114                                  bool *no_add_attrs)
7115 {
7116   tree fndecl;
7117
7118   if (!use_cmse)
7119     {
7120       *no_add_attrs = true;
7121       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7122                "option.", name);
7123       return NULL_TREE;
7124     }
7125
7126   /* Ignore attribute for function types.  */
7127   if (TREE_CODE (*node) != FUNCTION_DECL)
7128     {
7129       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7130                name);
7131       *no_add_attrs = true;
7132       return NULL_TREE;
7133     }
7134
7135   fndecl = *node;
7136
7137   /* Warn for static linkage functions.  */
7138   if (!TREE_PUBLIC (fndecl))
7139     {
7140       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7141                "with static linkage", name);
7142       *no_add_attrs = true;
7143       return NULL_TREE;
7144     }
7145
7146   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7147                                                 TREE_TYPE (fndecl));
7148   return NULL_TREE;
7149 }
7150
7151
7152 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7153    function will check whether the attribute is allowed here and will add the
7154    attribute to the function type tree or otherwise issue a diagnostic.  The
7155    reason we check this at declaration time is to only allow the use of the
7156    attribute with declarations of function pointers and not function
7157    declarations.  This function checks NODE is of the expected type and issues
7158    diagnostics otherwise using NAME.  If it is not of the expected type
7159    *NO_ADD_ATTRS will be set to true.  */
7160
7161 static tree
7162 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7163                                  tree /* args */,
7164                                  int /* flags */,
7165                                  bool *no_add_attrs)
7166 {
7167   tree decl = NULL_TREE, fntype = NULL_TREE;
7168   tree type;
7169
7170   if (!use_cmse)
7171     {
7172       *no_add_attrs = true;
7173       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7174                "option.", name);
7175       return NULL_TREE;
7176     }
7177
7178   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7179     {
7180       decl = *node;
7181       fntype = TREE_TYPE (decl);
7182     }
7183
7184   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7185     fntype = TREE_TYPE (fntype);
7186
7187   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7188     {
7189         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7190                  "function pointer", name);
7191         *no_add_attrs = true;
7192         return NULL_TREE;
7193     }
7194
7195   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7196
7197   if (*no_add_attrs)
7198     return NULL_TREE;
7199
7200   /* Prevent trees being shared among function types with and without
7201      cmse_nonsecure_call attribute.  */
7202   type = TREE_TYPE (decl);
7203
7204   type = build_distinct_type_copy (type);
7205   TREE_TYPE (decl) = type;
7206   fntype = type;
7207
7208   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7209     {
7210       type = fntype;
7211       fntype = TREE_TYPE (fntype);
7212       fntype = build_distinct_type_copy (fntype);
7213       TREE_TYPE (type) = fntype;
7214     }
7215
7216   /* Construct a type attribute and add it to the function type.  */
7217   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7218                           TYPE_ATTRIBUTES (fntype));
7219   TYPE_ATTRIBUTES (fntype) = attrs;
7220   return NULL_TREE;
7221 }
7222
7223 /* Return 0 if the attributes for two types are incompatible, 1 if they
7224    are compatible, and 2 if they are nearly compatible (which causes a
7225    warning to be generated).  */
7226 static int
7227 arm_comp_type_attributes (const_tree type1, const_tree type2)
7228 {
7229   int l1, l2, s1, s2;
7230
7231   /* Check for mismatch of non-default calling convention.  */
7232   if (TREE_CODE (type1) != FUNCTION_TYPE)
7233     return 1;
7234
7235   /* Check for mismatched call attributes.  */
7236   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7237   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7238   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7239   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7240
7241   /* Only bother to check if an attribute is defined.  */
7242   if (l1 | l2 | s1 | s2)
7243     {
7244       /* If one type has an attribute, the other must have the same attribute.  */
7245       if ((l1 != l2) || (s1 != s2))
7246         return 0;
7247
7248       /* Disallow mixed attributes.  */
7249       if ((l1 & s2) || (l2 & s1))
7250         return 0;
7251     }
7252
7253   /* Check for mismatched ISR attribute.  */
7254   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7255   if (! l1)
7256     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7257   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7258   if (! l2)
7259     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7260   if (l1 != l2)
7261     return 0;
7262
7263   l1 = lookup_attribute ("cmse_nonsecure_call",
7264                          TYPE_ATTRIBUTES (type1)) != NULL;
7265   l2 = lookup_attribute ("cmse_nonsecure_call",
7266                          TYPE_ATTRIBUTES (type2)) != NULL;
7267
7268   if (l1 != l2)
7269     return 0;
7270
7271   return 1;
7272 }
7273
7274 /*  Assigns default attributes to newly defined type.  This is used to
7275     set short_call/long_call attributes for function types of
7276     functions defined inside corresponding #pragma scopes.  */
7277 static void
7278 arm_set_default_type_attributes (tree type)
7279 {
7280   /* Add __attribute__ ((long_call)) to all functions, when
7281      inside #pragma long_calls or __attribute__ ((short_call)),
7282      when inside #pragma no_long_calls.  */
7283   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7284     {
7285       tree type_attr_list, attr_name;
7286       type_attr_list = TYPE_ATTRIBUTES (type);
7287
7288       if (arm_pragma_long_calls == LONG)
7289         attr_name = get_identifier ("long_call");
7290       else if (arm_pragma_long_calls == SHORT)
7291         attr_name = get_identifier ("short_call");
7292       else
7293         return;
7294
7295       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7296       TYPE_ATTRIBUTES (type) = type_attr_list;
7297     }
7298 }
7299 \f
7300 /* Return true if DECL is known to be linked into section SECTION.  */
7301
7302 static bool
7303 arm_function_in_section_p (tree decl, section *section)
7304 {
7305   /* We can only be certain about the prevailing symbol definition.  */
7306   if (!decl_binds_to_current_def_p (decl))
7307     return false;
7308
7309   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7310   if (!DECL_SECTION_NAME (decl))
7311     {
7312       /* Make sure that we will not create a unique section for DECL.  */
7313       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7314         return false;
7315     }
7316
7317   return function_section (decl) == section;
7318 }
7319
7320 /* Return nonzero if a 32-bit "long_call" should be generated for
7321    a call from the current function to DECL.  We generate a long_call
7322    if the function:
7323
7324         a.  has an __attribute__((long call))
7325      or b.  is within the scope of a #pragma long_calls
7326      or c.  the -mlong-calls command line switch has been specified
7327
7328    However we do not generate a long call if the function:
7329
7330         d.  has an __attribute__ ((short_call))
7331      or e.  is inside the scope of a #pragma no_long_calls
7332      or f.  is defined in the same section as the current function.  */
7333
7334 bool
7335 arm_is_long_call_p (tree decl)
7336 {
7337   tree attrs;
7338
7339   if (!decl)
7340     return TARGET_LONG_CALLS;
7341
7342   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7343   if (lookup_attribute ("short_call", attrs))
7344     return false;
7345
7346   /* For "f", be conservative, and only cater for cases in which the
7347      whole of the current function is placed in the same section.  */
7348   if (!flag_reorder_blocks_and_partition
7349       && TREE_CODE (decl) == FUNCTION_DECL
7350       && arm_function_in_section_p (decl, current_function_section ()))
7351     return false;
7352
7353   if (lookup_attribute ("long_call", attrs))
7354     return true;
7355
7356   return TARGET_LONG_CALLS;
7357 }
7358
7359 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7360 static bool
7361 arm_function_ok_for_sibcall (tree decl, tree exp)
7362 {
7363   unsigned long func_type;
7364
7365   if (cfun->machine->sibcall_blocked)
7366     return false;
7367
7368   if (TARGET_FDPIC)
7369     {
7370       /* In FDPIC, never tailcall something for which we have no decl:
7371          the target function could be in a different module, requiring
7372          a different FDPIC register value.  */
7373       if (decl == NULL)
7374         return false;
7375     }
7376
7377   /* Never tailcall something if we are generating code for Thumb-1.  */
7378   if (TARGET_THUMB1)
7379     return false;
7380
7381   /* The PIC register is live on entry to VxWorks PLT entries, so we
7382      must make the call before restoring the PIC register.  */
7383   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7384     return false;
7385
7386   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7387      may be used both as target of the call and base register for restoring
7388      the VFP registers  */
7389   if (TARGET_APCS_FRAME && TARGET_ARM
7390       && TARGET_HARD_FLOAT
7391       && decl && arm_is_long_call_p (decl))
7392     return false;
7393
7394   /* If we are interworking and the function is not declared static
7395      then we can't tail-call it unless we know that it exists in this
7396      compilation unit (since it might be a Thumb routine).  */
7397   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7398       && !TREE_ASM_WRITTEN (decl))
7399     return false;
7400
7401   func_type = arm_current_func_type ();
7402   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7403   if (IS_INTERRUPT (func_type))
7404     return false;
7405
7406   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7407      generated for entry functions themselves.  */
7408   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7409     return false;
7410
7411   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7412      this would complicate matters for later code generation.  */
7413   if (TREE_CODE (exp) == CALL_EXPR)
7414     {
7415       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7416       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7417         return false;
7418     }
7419
7420   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7421     {
7422       /* Check that the return value locations are the same.  For
7423          example that we aren't returning a value from the sibling in
7424          a VFP register but then need to transfer it to a core
7425          register.  */
7426       rtx a, b;
7427       tree decl_or_type = decl;
7428
7429       /* If it is an indirect function pointer, get the function type.  */
7430       if (!decl)
7431         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7432
7433       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7434       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7435                               cfun->decl, false);
7436       if (!rtx_equal_p (a, b))
7437         return false;
7438     }
7439
7440   /* Never tailcall if function may be called with a misaligned SP.  */
7441   if (IS_STACKALIGN (func_type))
7442     return false;
7443
7444   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7445      references should become a NOP.  Don't convert such calls into
7446      sibling calls.  */
7447   if (TARGET_AAPCS_BASED
7448       && arm_abi == ARM_ABI_AAPCS
7449       && decl
7450       && DECL_WEAK (decl))
7451     return false;
7452
7453   /* We cannot do a tailcall for an indirect call by descriptor if all the
7454      argument registers are used because the only register left to load the
7455      address is IP and it will already contain the static chain.  */
7456   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7457     {
7458       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7459       CUMULATIVE_ARGS cum;
7460       cumulative_args_t cum_v;
7461
7462       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7463       cum_v = pack_cumulative_args (&cum);
7464
7465       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7466         {
7467           tree type = TREE_VALUE (t);
7468           if (!VOID_TYPE_P (type))
7469             {
7470               function_arg_info arg (type, /*named=*/true);
7471               arm_function_arg_advance (cum_v, arg);
7472             }
7473         }
7474
7475       function_arg_info arg (integer_type_node, /*named=*/true);
7476       if (!arm_function_arg (cum_v, arg))
7477         return false;
7478     }
7479
7480   /* Everything else is ok.  */
7481   return true;
7482 }
7483
7484 \f
7485 /* Addressing mode support functions.  */
7486
7487 /* Return nonzero if X is a legitimate immediate operand when compiling
7488    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7489 int
7490 legitimate_pic_operand_p (rtx x)
7491 {
7492   if (GET_CODE (x) == SYMBOL_REF
7493       || (GET_CODE (x) == CONST
7494           && GET_CODE (XEXP (x, 0)) == PLUS
7495           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7496     return 0;
7497
7498   return 1;
7499 }
7500
7501 /* Record that the current function needs a PIC register.  If PIC_REG is null,
7502    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
7503    both case cfun->machine->pic_reg is initialized if we have not already done
7504    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
7505    PIC register is reloaded in the current position of the instruction stream
7506    irregardless of whether it was loaded before.  Otherwise, it is only loaded
7507    if not already done so (crtl->uses_pic_offset_table is null).  Note that
7508    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7509    is only supported iff COMPUTE_NOW is false.  */
7510
7511 static void
7512 require_pic_register (rtx pic_reg, bool compute_now)
7513 {
7514   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7515
7516   /* A lot of the logic here is made obscure by the fact that this
7517      routine gets called as part of the rtx cost estimation process.
7518      We don't want those calls to affect any assumptions about the real
7519      function; and further, we can't call entry_of_function() until we
7520      start the real expansion process.  */
7521   if (!crtl->uses_pic_offset_table || compute_now)
7522     {
7523       gcc_assert (can_create_pseudo_p ()
7524                   || (pic_reg != NULL_RTX
7525                       && REG_P (pic_reg)
7526                       && GET_MODE (pic_reg) == Pmode));
7527       if (arm_pic_register != INVALID_REGNUM
7528           && !compute_now
7529           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7530         {
7531           if (!cfun->machine->pic_reg)
7532             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7533
7534           /* Play games to avoid marking the function as needing pic
7535              if we are being called as part of the cost-estimation
7536              process.  */
7537           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7538             crtl->uses_pic_offset_table = 1;
7539         }
7540       else
7541         {
7542           rtx_insn *seq, *insn;
7543
7544           if (pic_reg == NULL_RTX)
7545             pic_reg = gen_reg_rtx (Pmode);
7546           if (!cfun->machine->pic_reg)
7547             cfun->machine->pic_reg = pic_reg;
7548
7549           /* Play games to avoid marking the function as needing pic
7550              if we are being called as part of the cost-estimation
7551              process.  */
7552           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7553             {
7554               crtl->uses_pic_offset_table = 1;
7555               start_sequence ();
7556
7557               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7558                   && arm_pic_register > LAST_LO_REGNUM
7559                   && !compute_now)
7560                 emit_move_insn (cfun->machine->pic_reg,
7561                                 gen_rtx_REG (Pmode, arm_pic_register));
7562               else
7563                 arm_load_pic_register (0UL, pic_reg);
7564
7565               seq = get_insns ();
7566               end_sequence ();
7567
7568               for (insn = seq; insn; insn = NEXT_INSN (insn))
7569                 if (INSN_P (insn))
7570                   INSN_LOCATION (insn) = prologue_location;
7571
7572               /* We can be called during expansion of PHI nodes, where
7573                  we can't yet emit instructions directly in the final
7574                  insn stream.  Queue the insns on the entry edge, they will
7575                  be committed after everything else is expanded.  */
7576               if (currently_expanding_to_rtl)
7577                 insert_insn_on_edge (seq,
7578                                      single_succ_edge
7579                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7580               else
7581                 emit_insn (seq);
7582             }
7583         }
7584     }
7585 }
7586
7587 /* Generate insns to calculate the address of ORIG in pic mode.  */
7588 static rtx_insn *
7589 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7590 {
7591   rtx pat;
7592   rtx mem;
7593
7594   pat = gen_calculate_pic_address (reg, pic_reg, orig);
7595
7596   /* Make the MEM as close to a constant as possible.  */
7597   mem = SET_SRC (pat);
7598   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7599   MEM_READONLY_P (mem) = 1;
7600   MEM_NOTRAP_P (mem) = 1;
7601
7602   return emit_insn (pat);
7603 }
7604
7605 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
7606    created to hold the result of the load.  If not NULL, PIC_REG indicates
7607    which register to use as PIC register, otherwise it is decided by register
7608    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
7609    location in the instruction stream, irregardless of whether it was loaded
7610    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7611    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7612
7613    Returns the register REG into which the PIC load is performed.  */
7614
7615 rtx
7616 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7617                         bool compute_now)
7618 {
7619   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7620
7621   if (GET_CODE (orig) == SYMBOL_REF
7622       || GET_CODE (orig) == LABEL_REF)
7623     {
7624       if (reg == 0)
7625         {
7626           gcc_assert (can_create_pseudo_p ());
7627           reg = gen_reg_rtx (Pmode);
7628         }
7629
7630       /* VxWorks does not impose a fixed gap between segments; the run-time
7631          gap can be different from the object-file gap.  We therefore can't
7632          use GOTOFF unless we are absolutely sure that the symbol is in the
7633          same segment as the GOT.  Unfortunately, the flexibility of linker
7634          scripts means that we can't be sure of that in general, so assume
7635          that GOTOFF is never valid on VxWorks.  */
7636       /* References to weak symbols cannot be resolved locally: they
7637          may be overridden by a non-weak definition at link time.  */
7638       rtx_insn *insn;
7639       if ((GET_CODE (orig) == LABEL_REF
7640            || (GET_CODE (orig) == SYMBOL_REF
7641                && SYMBOL_REF_LOCAL_P (orig)
7642                && (SYMBOL_REF_DECL (orig)
7643                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7644                && (!SYMBOL_REF_FUNCTION_P (orig)
7645                    || arm_fdpic_local_funcdesc_p (orig))))
7646           && NEED_GOT_RELOC
7647           && arm_pic_data_is_text_relative)
7648         insn = arm_pic_static_addr (orig, reg);
7649       else
7650         {
7651           /* If this function doesn't have a pic register, create one now.  */
7652           require_pic_register (pic_reg, compute_now);
7653
7654           if (pic_reg == NULL_RTX)
7655             pic_reg = cfun->machine->pic_reg;
7656
7657           insn = calculate_pic_address_constant (reg, pic_reg, orig);
7658         }
7659
7660       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7661          by loop.  */
7662       set_unique_reg_note (insn, REG_EQUAL, orig);
7663
7664       return reg;
7665     }
7666   else if (GET_CODE (orig) == CONST)
7667     {
7668       rtx base, offset;
7669
7670       if (GET_CODE (XEXP (orig, 0)) == PLUS
7671           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7672         return orig;
7673
7674       /* Handle the case where we have: const (UNSPEC_TLS).  */
7675       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7676           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7677         return orig;
7678
7679       /* Handle the case where we have:
7680          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7681          CONST_INT.  */
7682       if (GET_CODE (XEXP (orig, 0)) == PLUS
7683           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7684           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7685         {
7686           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7687           return orig;
7688         }
7689
7690       if (reg == 0)
7691         {
7692           gcc_assert (can_create_pseudo_p ());
7693           reg = gen_reg_rtx (Pmode);
7694         }
7695
7696       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7697
7698       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7699                                      pic_reg, compute_now);
7700       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7701                                        base == reg ? 0 : reg, pic_reg,
7702                                        compute_now);
7703
7704       if (CONST_INT_P (offset))
7705         {
7706           /* The base register doesn't really matter, we only want to
7707              test the index for the appropriate mode.  */
7708           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7709             {
7710               gcc_assert (can_create_pseudo_p ());
7711               offset = force_reg (Pmode, offset);
7712             }
7713
7714           if (CONST_INT_P (offset))
7715             return plus_constant (Pmode, base, INTVAL (offset));
7716         }
7717
7718       if (GET_MODE_SIZE (mode) > 4
7719           && (GET_MODE_CLASS (mode) == MODE_INT
7720               || TARGET_SOFT_FLOAT))
7721         {
7722           emit_insn (gen_addsi3 (reg, base, offset));
7723           return reg;
7724         }
7725
7726       return gen_rtx_PLUS (Pmode, base, offset);
7727     }
7728
7729   return orig;
7730 }
7731
7732
7733 /* Whether a register is callee saved or not.  This is necessary because high
7734    registers are marked as caller saved when optimizing for size on Thumb-1
7735    targets despite being callee saved in order to avoid using them.  */
7736 #define callee_saved_reg_p(reg) \
7737   (!call_used_or_fixed_reg_p (reg) \
7738    || (TARGET_THUMB1 && optimize_size \
7739        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7740
7741 /* Return a mask for the call-clobbered low registers that are unused
7742    at the end of the prologue.  */
7743 static unsigned long
7744 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7745 {
7746   unsigned long mask = 0;
7747   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7748
7749   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7750     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
7751       mask |= 1 << (reg - FIRST_LO_REGNUM);
7752   return mask;
7753 }
7754
7755 /* Similarly for the start of the epilogue.  */
7756 static unsigned long
7757 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7758 {
7759   unsigned long mask = 0;
7760   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
7761
7762   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7763     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
7764       mask |= 1 << (reg - FIRST_LO_REGNUM);
7765   return mask;
7766 }
7767
7768 /* Find a spare register to use during the prolog of a function.  */
7769
7770 static int
7771 thumb_find_work_register (unsigned long pushed_regs_mask)
7772 {
7773   int reg;
7774
7775   unsigned long unused_regs
7776     = thumb1_prologue_unused_call_clobbered_lo_regs ();
7777
7778   /* Check the argument registers first as these are call-used.  The
7779      register allocation order means that sometimes r3 might be used
7780      but earlier argument registers might not, so check them all.  */
7781   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
7782     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
7783       return reg;
7784
7785   /* Otherwise look for a call-saved register that is going to be pushed.  */
7786   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7787     if (pushed_regs_mask & (1 << reg))
7788       return reg;
7789
7790   if (TARGET_THUMB2)
7791     {
7792       /* Thumb-2 can use high regs.  */
7793       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7794         if (pushed_regs_mask & (1 << reg))
7795           return reg;
7796     }
7797   /* Something went wrong - thumb_compute_save_reg_mask()
7798      should have arranged for a suitable register to be pushed.  */
7799   gcc_unreachable ();
7800 }
7801
7802 static GTY(()) int pic_labelno;
7803
7804 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7805    low register.  */
7806
7807 void
7808 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7809 {
7810   rtx l1, labelno, pic_tmp, pic_rtx;
7811
7812   if (crtl->uses_pic_offset_table == 0
7813       || TARGET_SINGLE_PIC_BASE
7814       || TARGET_FDPIC)
7815     return;
7816
7817   gcc_assert (flag_pic);
7818
7819   if (pic_reg == NULL_RTX)
7820     pic_reg = cfun->machine->pic_reg;
7821   if (TARGET_VXWORKS_RTP)
7822     {
7823       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7824       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7825       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7826
7827       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7828
7829       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7830       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7831     }
7832   else
7833     {
7834       /* We use an UNSPEC rather than a LABEL_REF because this label
7835          never appears in the code stream.  */
7836
7837       labelno = GEN_INT (pic_labelno++);
7838       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7839       l1 = gen_rtx_CONST (VOIDmode, l1);
7840
7841       /* On the ARM the PC register contains 'dot + 8' at the time of the
7842          addition, on the Thumb it is 'dot + 4'.  */
7843       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7844       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7845                                 UNSPEC_GOTSYM_OFF);
7846       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7847
7848       if (TARGET_32BIT)
7849         {
7850           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7851         }
7852       else /* TARGET_THUMB1 */
7853         {
7854           if (arm_pic_register != INVALID_REGNUM
7855               && REGNO (pic_reg) > LAST_LO_REGNUM)
7856             {
7857               /* We will have pushed the pic register, so we should always be
7858                  able to find a work register.  */
7859               pic_tmp = gen_rtx_REG (SImode,
7860                                      thumb_find_work_register (saved_regs));
7861               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7862               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7863               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7864             }
7865           else if (arm_pic_register != INVALID_REGNUM
7866                    && arm_pic_register > LAST_LO_REGNUM
7867                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7868             {
7869               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7870               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7871               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7872             }
7873           else
7874             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7875         }
7876     }
7877
7878   /* Need to emit this whether or not we obey regdecls,
7879      since setjmp/longjmp can cause life info to screw up.  */
7880   emit_use (pic_reg);
7881 }
7882
7883 /* Try to determine whether an object, referenced via ORIG, will be
7884    placed in the text or data segment.  This is used in FDPIC mode, to
7885    decide which relocations to use when accessing ORIG.  *IS_READONLY
7886    is set to true if ORIG is a read-only location, false otherwise.
7887    Return true if we could determine the location of ORIG, false
7888    otherwise.  *IS_READONLY is valid only when we return true.  */
7889 static bool
7890 arm_is_segment_info_known (rtx orig, bool *is_readonly)
7891 {
7892   *is_readonly = false;
7893
7894   if (GET_CODE (orig) == LABEL_REF)
7895     {
7896       *is_readonly = true;
7897       return true;
7898     }
7899
7900   if (SYMBOL_REF_P (orig))
7901     {
7902       if (CONSTANT_POOL_ADDRESS_P (orig))
7903         {
7904           *is_readonly = true;
7905           return true;
7906         }
7907       if (SYMBOL_REF_LOCAL_P (orig)
7908           && !SYMBOL_REF_EXTERNAL_P (orig)
7909           && SYMBOL_REF_DECL (orig)
7910           && (!DECL_P (SYMBOL_REF_DECL (orig))
7911               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
7912         {
7913           tree decl = SYMBOL_REF_DECL (orig);
7914           tree init = (TREE_CODE (decl) == VAR_DECL)
7915             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
7916             ? decl : 0;
7917           int reloc = 0;
7918           bool named_section, readonly;
7919
7920           if (init && init != error_mark_node)
7921             reloc = compute_reloc_for_constant (init);
7922
7923           named_section = TREE_CODE (decl) == VAR_DECL
7924             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
7925           readonly = decl_readonly_section (decl, reloc);
7926
7927           /* We don't know where the link script will put a named
7928              section, so return false in such a case.  */
7929           if (named_section)
7930             return false;
7931
7932           *is_readonly = readonly;
7933           return true;
7934         }
7935
7936       /* We don't know.  */
7937       return false;
7938     }
7939
7940   gcc_unreachable ();
7941 }
7942
7943 /* Generate code to load the address of a static var when flag_pic is set.  */
7944 static rtx_insn *
7945 arm_pic_static_addr (rtx orig, rtx reg)
7946 {
7947   rtx l1, labelno, offset_rtx;
7948   rtx_insn *insn;
7949
7950   gcc_assert (flag_pic);
7951
7952   bool is_readonly = false;
7953   bool info_known = false;
7954
7955   if (TARGET_FDPIC
7956       && SYMBOL_REF_P (orig)
7957       && !SYMBOL_REF_FUNCTION_P (orig))
7958     info_known = arm_is_segment_info_known (orig, &is_readonly);
7959
7960   if (TARGET_FDPIC
7961       && SYMBOL_REF_P (orig)
7962       && !SYMBOL_REF_FUNCTION_P (orig)
7963       && !info_known)
7964     {
7965       /* We don't know where orig is stored, so we have be
7966          pessimistic and use a GOT relocation.  */
7967       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
7968
7969       insn = calculate_pic_address_constant (reg, pic_reg, orig);
7970     }
7971   else if (TARGET_FDPIC
7972            && SYMBOL_REF_P (orig)
7973            && (SYMBOL_REF_FUNCTION_P (orig)
7974                || !is_readonly))
7975     {
7976       /* We use the GOTOFF relocation.  */
7977       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
7978
7979       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
7980       emit_insn (gen_movsi (reg, l1));
7981       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
7982     }
7983   else
7984     {
7985       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
7986          PC-relative access.  */
7987       /* We use an UNSPEC rather than a LABEL_REF because this label
7988          never appears in the code stream.  */
7989       labelno = GEN_INT (pic_labelno++);
7990       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7991       l1 = gen_rtx_CONST (VOIDmode, l1);
7992
7993       /* On the ARM the PC register contains 'dot + 8' at the time of the
7994          addition, on the Thumb it is 'dot + 4'.  */
7995       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7996       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7997                                    UNSPEC_SYMBOL_OFFSET);
7998       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7999
8000       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8001                                                    labelno));
8002     }
8003
8004   return insn;
8005 }
8006
8007 /* Return nonzero if X is valid as an ARM state addressing register.  */
8008 static int
8009 arm_address_register_rtx_p (rtx x, int strict_p)
8010 {
8011   int regno;
8012
8013   if (!REG_P (x))
8014     return 0;
8015
8016   regno = REGNO (x);
8017
8018   if (strict_p)
8019     return ARM_REGNO_OK_FOR_BASE_P (regno);
8020
8021   return (regno <= LAST_ARM_REGNUM
8022           || regno >= FIRST_PSEUDO_REGISTER
8023           || regno == FRAME_POINTER_REGNUM
8024           || regno == ARG_POINTER_REGNUM);
8025 }
8026
8027 /* Return TRUE if this rtx is the difference of a symbol and a label,
8028    and will reduce to a PC-relative relocation in the object file.
8029    Expressions like this can be left alone when generating PIC, rather
8030    than forced through the GOT.  */
8031 static int
8032 pcrel_constant_p (rtx x)
8033 {
8034   if (GET_CODE (x) == MINUS)
8035     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8036
8037   return FALSE;
8038 }
8039
8040 /* Return true if X will surely end up in an index register after next
8041    splitting pass.  */
8042 static bool
8043 will_be_in_index_register (const_rtx x)
8044 {
8045   /* arm.md: calculate_pic_address will split this into a register.  */
8046   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8047 }
8048
8049 /* Return nonzero if X is a valid ARM state address operand.  */
8050 int
8051 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8052                                 int strict_p)
8053 {
8054   bool use_ldrd;
8055   enum rtx_code code = GET_CODE (x);
8056
8057   if (arm_address_register_rtx_p (x, strict_p))
8058     return 1;
8059
8060   use_ldrd = (TARGET_LDRD
8061               && (mode == DImode || mode == DFmode));
8062
8063   if (code == POST_INC || code == PRE_DEC
8064       || ((code == PRE_INC || code == POST_DEC)
8065           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8066     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8067
8068   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8069            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8070            && GET_CODE (XEXP (x, 1)) == PLUS
8071            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8072     {
8073       rtx addend = XEXP (XEXP (x, 1), 1);
8074
8075       /* Don't allow ldrd post increment by register because it's hard
8076          to fixup invalid register choices.  */
8077       if (use_ldrd
8078           && GET_CODE (x) == POST_MODIFY
8079           && REG_P (addend))
8080         return 0;
8081
8082       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8083               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8084     }
8085
8086   /* After reload constants split into minipools will have addresses
8087      from a LABEL_REF.  */
8088   else if (reload_completed
8089            && (code == LABEL_REF
8090                || (code == CONST
8091                    && GET_CODE (XEXP (x, 0)) == PLUS
8092                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8093                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8094     return 1;
8095
8096   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8097     return 0;
8098
8099   else if (code == PLUS)
8100     {
8101       rtx xop0 = XEXP (x, 0);
8102       rtx xop1 = XEXP (x, 1);
8103
8104       return ((arm_address_register_rtx_p (xop0, strict_p)
8105                && ((CONST_INT_P (xop1)
8106                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8107                    || (!strict_p && will_be_in_index_register (xop1))))
8108               || (arm_address_register_rtx_p (xop1, strict_p)
8109                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8110     }
8111
8112 #if 0
8113   /* Reload currently can't handle MINUS, so disable this for now */
8114   else if (GET_CODE (x) == MINUS)
8115     {
8116       rtx xop0 = XEXP (x, 0);
8117       rtx xop1 = XEXP (x, 1);
8118
8119       return (arm_address_register_rtx_p (xop0, strict_p)
8120               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8121     }
8122 #endif
8123
8124   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8125            && code == SYMBOL_REF
8126            && CONSTANT_POOL_ADDRESS_P (x)
8127            && ! (flag_pic
8128                  && symbol_mentioned_p (get_pool_constant (x))
8129                  && ! pcrel_constant_p (get_pool_constant (x))))
8130     return 1;
8131
8132   return 0;
8133 }
8134
8135 /* Return true if we can avoid creating a constant pool entry for x.  */
8136 static bool
8137 can_avoid_literal_pool_for_label_p (rtx x)
8138 {
8139   /* Normally we can assign constant values to target registers without
8140      the help of constant pool.  But there are cases we have to use constant
8141      pool like:
8142      1) assign a label to register.
8143      2) sign-extend a 8bit value to 32bit and then assign to register.
8144
8145      Constant pool access in format:
8146      (set (reg r0) (mem (symbol_ref (".LC0"))))
8147      will cause the use of literal pool (later in function arm_reorg).
8148      So here we mark such format as an invalid format, then the compiler
8149      will adjust it into:
8150      (set (reg r0) (symbol_ref (".LC0")))
8151      (set (reg r0) (mem (reg r0))).
8152      No extra register is required, and (mem (reg r0)) won't cause the use
8153      of literal pools.  */
8154   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
8155       && CONSTANT_POOL_ADDRESS_P (x))
8156     return 1;
8157   return 0;
8158 }
8159
8160
8161 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8162 static int
8163 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8164 {
8165   bool use_ldrd;
8166   enum rtx_code code = GET_CODE (x);
8167
8168   if (arm_address_register_rtx_p (x, strict_p))
8169     return 1;
8170
8171   use_ldrd = (TARGET_LDRD
8172               && (mode == DImode || mode == DFmode));
8173
8174   if (code == POST_INC || code == PRE_DEC
8175       || ((code == PRE_INC || code == POST_DEC)
8176           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8177     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8178
8179   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8180            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8181            && GET_CODE (XEXP (x, 1)) == PLUS
8182            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8183     {
8184       /* Thumb-2 only has autoincrement by constant.  */
8185       rtx addend = XEXP (XEXP (x, 1), 1);
8186       HOST_WIDE_INT offset;
8187
8188       if (!CONST_INT_P (addend))
8189         return 0;
8190
8191       offset = INTVAL(addend);
8192       if (GET_MODE_SIZE (mode) <= 4)
8193         return (offset > -256 && offset < 256);
8194
8195       return (use_ldrd && offset > -1024 && offset < 1024
8196               && (offset & 3) == 0);
8197     }
8198
8199   /* After reload constants split into minipools will have addresses
8200      from a LABEL_REF.  */
8201   else if (reload_completed
8202            && (code == LABEL_REF
8203                || (code == CONST
8204                    && GET_CODE (XEXP (x, 0)) == PLUS
8205                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8206                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8207     return 1;
8208
8209   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8210     return 0;
8211
8212   else if (code == PLUS)
8213     {
8214       rtx xop0 = XEXP (x, 0);
8215       rtx xop1 = XEXP (x, 1);
8216
8217       return ((arm_address_register_rtx_p (xop0, strict_p)
8218                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8219                    || (!strict_p && will_be_in_index_register (xop1))))
8220               || (arm_address_register_rtx_p (xop1, strict_p)
8221                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8222     }
8223
8224   else if (can_avoid_literal_pool_for_label_p (x))
8225     return 0;
8226
8227   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8228            && code == SYMBOL_REF
8229            && CONSTANT_POOL_ADDRESS_P (x)
8230            && ! (flag_pic
8231                  && symbol_mentioned_p (get_pool_constant (x))
8232                  && ! pcrel_constant_p (get_pool_constant (x))))
8233     return 1;
8234
8235   return 0;
8236 }
8237
8238 /* Return nonzero if INDEX is valid for an address index operand in
8239    ARM state.  */
8240 static int
8241 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8242                         int strict_p)
8243 {
8244   HOST_WIDE_INT range;
8245   enum rtx_code code = GET_CODE (index);
8246
8247   /* Standard coprocessor addressing modes.  */
8248   if (TARGET_HARD_FLOAT
8249       && (mode == SFmode || mode == DFmode))
8250     return (code == CONST_INT && INTVAL (index) < 1024
8251             && INTVAL (index) > -1024
8252             && (INTVAL (index) & 3) == 0);
8253
8254   /* For quad modes, we restrict the constant offset to be slightly less
8255      than what the instruction format permits.  We do this because for
8256      quad mode moves, we will actually decompose them into two separate
8257      double-mode reads or writes.  INDEX must therefore be a valid
8258      (double-mode) offset and so should INDEX+8.  */
8259   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8260     return (code == CONST_INT
8261             && INTVAL (index) < 1016
8262             && INTVAL (index) > -1024
8263             && (INTVAL (index) & 3) == 0);
8264
8265   /* We have no such constraint on double mode offsets, so we permit the
8266      full range of the instruction format.  */
8267   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8268     return (code == CONST_INT
8269             && INTVAL (index) < 1024
8270             && INTVAL (index) > -1024
8271             && (INTVAL (index) & 3) == 0);
8272
8273   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8274     return (code == CONST_INT
8275             && INTVAL (index) < 1024
8276             && INTVAL (index) > -1024
8277             && (INTVAL (index) & 3) == 0);
8278
8279   if (arm_address_register_rtx_p (index, strict_p)
8280       && (GET_MODE_SIZE (mode) <= 4))
8281     return 1;
8282
8283   if (mode == DImode || mode == DFmode)
8284     {
8285       if (code == CONST_INT)
8286         {
8287           HOST_WIDE_INT val = INTVAL (index);
8288
8289           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8290              If vldr is selected it uses arm_coproc_mem_operand.  */
8291           if (TARGET_LDRD)
8292             return val > -256 && val < 256;
8293           else
8294             return val > -4096 && val < 4092;
8295         }
8296
8297       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8298     }
8299
8300   if (GET_MODE_SIZE (mode) <= 4
8301       && ! (arm_arch4
8302             && (mode == HImode
8303                 || mode == HFmode
8304                 || (mode == QImode && outer == SIGN_EXTEND))))
8305     {
8306       if (code == MULT)
8307         {
8308           rtx xiop0 = XEXP (index, 0);
8309           rtx xiop1 = XEXP (index, 1);
8310
8311           return ((arm_address_register_rtx_p (xiop0, strict_p)
8312                    && power_of_two_operand (xiop1, SImode))
8313                   || (arm_address_register_rtx_p (xiop1, strict_p)
8314                       && power_of_two_operand (xiop0, SImode)));
8315         }
8316       else if (code == LSHIFTRT || code == ASHIFTRT
8317                || code == ASHIFT || code == ROTATERT)
8318         {
8319           rtx op = XEXP (index, 1);
8320
8321           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8322                   && CONST_INT_P (op)
8323                   && INTVAL (op) > 0
8324                   && INTVAL (op) <= 31);
8325         }
8326     }
8327
8328   /* For ARM v4 we may be doing a sign-extend operation during the
8329      load.  */
8330   if (arm_arch4)
8331     {
8332       if (mode == HImode
8333           || mode == HFmode
8334           || (outer == SIGN_EXTEND && mode == QImode))
8335         range = 256;
8336       else
8337         range = 4096;
8338     }
8339   else
8340     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8341
8342   return (code == CONST_INT
8343           && INTVAL (index) < range
8344           && INTVAL (index) > -range);
8345 }
8346
8347 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8348    index operand.  i.e. 1, 2, 4 or 8.  */
8349 static bool
8350 thumb2_index_mul_operand (rtx op)
8351 {
8352   HOST_WIDE_INT val;
8353
8354   if (!CONST_INT_P (op))
8355     return false;
8356
8357   val = INTVAL(op);
8358   return (val == 1 || val == 2 || val == 4 || val == 8);
8359 }
8360
8361 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8362 static int
8363 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8364 {
8365   enum rtx_code code = GET_CODE (index);
8366
8367   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8368   /* Standard coprocessor addressing modes.  */
8369   if (TARGET_HARD_FLOAT
8370       && (mode == SFmode || mode == DFmode))
8371     return (code == CONST_INT && INTVAL (index) < 1024
8372             /* Thumb-2 allows only > -256 index range for it's core register
8373                load/stores. Since we allow SF/DF in core registers, we have
8374                to use the intersection between -256~4096 (core) and -1024~1024
8375                (coprocessor).  */
8376             && INTVAL (index) > -256
8377             && (INTVAL (index) & 3) == 0);
8378
8379   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8380     {
8381       /* For DImode assume values will usually live in core regs
8382          and only allow LDRD addressing modes.  */
8383       if (!TARGET_LDRD || mode != DImode)
8384         return (code == CONST_INT
8385                 && INTVAL (index) < 1024
8386                 && INTVAL (index) > -1024
8387                 && (INTVAL (index) & 3) == 0);
8388     }
8389
8390   /* For quad modes, we restrict the constant offset to be slightly less
8391      than what the instruction format permits.  We do this because for
8392      quad mode moves, we will actually decompose them into two separate
8393      double-mode reads or writes.  INDEX must therefore be a valid
8394      (double-mode) offset and so should INDEX+8.  */
8395   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8396     return (code == CONST_INT
8397             && INTVAL (index) < 1016
8398             && INTVAL (index) > -1024
8399             && (INTVAL (index) & 3) == 0);
8400
8401   /* We have no such constraint on double mode offsets, so we permit the
8402      full range of the instruction format.  */
8403   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8404     return (code == CONST_INT
8405             && INTVAL (index) < 1024
8406             && INTVAL (index) > -1024
8407             && (INTVAL (index) & 3) == 0);
8408
8409   if (arm_address_register_rtx_p (index, strict_p)
8410       && (GET_MODE_SIZE (mode) <= 4))
8411     return 1;
8412
8413   if (mode == DImode || mode == DFmode)
8414     {
8415       if (code == CONST_INT)
8416         {
8417           HOST_WIDE_INT val = INTVAL (index);
8418           /* Thumb-2 ldrd only has reg+const addressing modes.
8419              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8420              If vldr is selected it uses arm_coproc_mem_operand.  */
8421           if (TARGET_LDRD)
8422             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8423           else
8424             return IN_RANGE (val, -255, 4095 - 4);
8425         }
8426       else
8427         return 0;
8428     }
8429
8430   if (code == MULT)
8431     {
8432       rtx xiop0 = XEXP (index, 0);
8433       rtx xiop1 = XEXP (index, 1);
8434
8435       return ((arm_address_register_rtx_p (xiop0, strict_p)
8436                && thumb2_index_mul_operand (xiop1))
8437               || (arm_address_register_rtx_p (xiop1, strict_p)
8438                   && thumb2_index_mul_operand (xiop0)));
8439     }
8440   else if (code == ASHIFT)
8441     {
8442       rtx op = XEXP (index, 1);
8443
8444       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8445               && CONST_INT_P (op)
8446               && INTVAL (op) > 0
8447               && INTVAL (op) <= 3);
8448     }
8449
8450   return (code == CONST_INT
8451           && INTVAL (index) < 4096
8452           && INTVAL (index) > -256);
8453 }
8454
8455 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8456 static int
8457 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8458 {
8459   int regno;
8460
8461   if (!REG_P (x))
8462     return 0;
8463
8464   regno = REGNO (x);
8465
8466   if (strict_p)
8467     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8468
8469   return (regno <= LAST_LO_REGNUM
8470           || regno > LAST_VIRTUAL_REGISTER
8471           || regno == FRAME_POINTER_REGNUM
8472           || (GET_MODE_SIZE (mode) >= 4
8473               && (regno == STACK_POINTER_REGNUM
8474                   || regno >= FIRST_PSEUDO_REGISTER
8475                   || x == hard_frame_pointer_rtx
8476                   || x == arg_pointer_rtx)));
8477 }
8478
8479 /* Return nonzero if x is a legitimate index register.  This is the case
8480    for any base register that can access a QImode object.  */
8481 inline static int
8482 thumb1_index_register_rtx_p (rtx x, int strict_p)
8483 {
8484   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8485 }
8486
8487 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8488
8489    The AP may be eliminated to either the SP or the FP, so we use the
8490    least common denominator, e.g. SImode, and offsets from 0 to 64.
8491
8492    ??? Verify whether the above is the right approach.
8493
8494    ??? Also, the FP may be eliminated to the SP, so perhaps that
8495    needs special handling also.
8496
8497    ??? Look at how the mips16 port solves this problem.  It probably uses
8498    better ways to solve some of these problems.
8499
8500    Although it is not incorrect, we don't accept QImode and HImode
8501    addresses based on the frame pointer or arg pointer until the
8502    reload pass starts.  This is so that eliminating such addresses
8503    into stack based ones won't produce impossible code.  */
8504 int
8505 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8506 {
8507   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8508     return 0;
8509
8510   /* ??? Not clear if this is right.  Experiment.  */
8511   if (GET_MODE_SIZE (mode) < 4
8512       && !(reload_in_progress || reload_completed)
8513       && (reg_mentioned_p (frame_pointer_rtx, x)
8514           || reg_mentioned_p (arg_pointer_rtx, x)
8515           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8516           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8517           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8518           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8519     return 0;
8520
8521   /* Accept any base register.  SP only in SImode or larger.  */
8522   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8523     return 1;
8524
8525   /* This is PC relative data before arm_reorg runs.  */
8526   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8527            && GET_CODE (x) == SYMBOL_REF
8528            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8529     return 1;
8530
8531   /* This is PC relative data after arm_reorg runs.  */
8532   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8533            && reload_completed
8534            && (GET_CODE (x) == LABEL_REF
8535                || (GET_CODE (x) == CONST
8536                    && GET_CODE (XEXP (x, 0)) == PLUS
8537                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8538                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8539     return 1;
8540
8541   /* Post-inc indexing only supported for SImode and larger.  */
8542   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8543            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8544     return 1;
8545
8546   else if (GET_CODE (x) == PLUS)
8547     {
8548       /* REG+REG address can be any two index registers.  */
8549       /* We disallow FRAME+REG addressing since we know that FRAME
8550          will be replaced with STACK, and SP relative addressing only
8551          permits SP+OFFSET.  */
8552       if (GET_MODE_SIZE (mode) <= 4
8553           && XEXP (x, 0) != frame_pointer_rtx
8554           && XEXP (x, 1) != frame_pointer_rtx
8555           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8556           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8557               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8558         return 1;
8559
8560       /* REG+const has 5-7 bit offset for non-SP registers.  */
8561       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8562                 || XEXP (x, 0) == arg_pointer_rtx)
8563                && CONST_INT_P (XEXP (x, 1))
8564                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8565         return 1;
8566
8567       /* REG+const has 10-bit offset for SP, but only SImode and
8568          larger is supported.  */
8569       /* ??? Should probably check for DI/DFmode overflow here
8570          just like GO_IF_LEGITIMATE_OFFSET does.  */
8571       else if (REG_P (XEXP (x, 0))
8572                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8573                && GET_MODE_SIZE (mode) >= 4
8574                && CONST_INT_P (XEXP (x, 1))
8575                && INTVAL (XEXP (x, 1)) >= 0
8576                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8577                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8578         return 1;
8579
8580       else if (REG_P (XEXP (x, 0))
8581                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8582                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8583                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8584                        && REGNO (XEXP (x, 0))
8585                           <= LAST_VIRTUAL_POINTER_REGISTER))
8586                && GET_MODE_SIZE (mode) >= 4
8587                && CONST_INT_P (XEXP (x, 1))
8588                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8589         return 1;
8590     }
8591
8592   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8593            && GET_MODE_SIZE (mode) == 4
8594            && GET_CODE (x) == SYMBOL_REF
8595            && CONSTANT_POOL_ADDRESS_P (x)
8596            && ! (flag_pic
8597                  && symbol_mentioned_p (get_pool_constant (x))
8598                  && ! pcrel_constant_p (get_pool_constant (x))))
8599     return 1;
8600
8601   return 0;
8602 }
8603
8604 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8605    instruction of mode MODE.  */
8606 int
8607 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8608 {
8609   switch (GET_MODE_SIZE (mode))
8610     {
8611     case 1:
8612       return val >= 0 && val < 32;
8613
8614     case 2:
8615       return val >= 0 && val < 64 && (val & 1) == 0;
8616
8617     default:
8618       return (val >= 0
8619               && (val + GET_MODE_SIZE (mode)) <= 128
8620               && (val & 3) == 0);
8621     }
8622 }
8623
8624 bool
8625 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8626 {
8627   if (TARGET_ARM)
8628     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8629   else if (TARGET_THUMB2)
8630     return thumb2_legitimate_address_p (mode, x, strict_p);
8631   else /* if (TARGET_THUMB1) */
8632     return thumb1_legitimate_address_p (mode, x, strict_p);
8633 }
8634
8635 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8636
8637    Given an rtx X being reloaded into a reg required to be
8638    in class CLASS, return the class of reg to actually use.
8639    In general this is just CLASS, but for the Thumb core registers and
8640    immediate constants we prefer a LO_REGS class or a subset.  */
8641
8642 static reg_class_t
8643 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8644 {
8645   if (TARGET_32BIT)
8646     return rclass;
8647   else
8648     {
8649       if (rclass == GENERAL_REGS)
8650         return LO_REGS;
8651       else
8652         return rclass;
8653     }
8654 }
8655
8656 /* Build the SYMBOL_REF for __tls_get_addr.  */
8657
8658 static GTY(()) rtx tls_get_addr_libfunc;
8659
8660 static rtx
8661 get_tls_get_addr (void)
8662 {
8663   if (!tls_get_addr_libfunc)
8664     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8665   return tls_get_addr_libfunc;
8666 }
8667
8668 rtx
8669 arm_load_tp (rtx target)
8670 {
8671   if (!target)
8672     target = gen_reg_rtx (SImode);
8673
8674   if (TARGET_HARD_TP)
8675     {
8676       /* Can return in any reg.  */
8677       emit_insn (gen_load_tp_hard (target));
8678     }
8679   else
8680     {
8681       /* Always returned in r0.  Immediately copy the result into a pseudo,
8682          otherwise other uses of r0 (e.g. setting up function arguments) may
8683          clobber the value.  */
8684
8685       rtx tmp;
8686
8687       if (TARGET_FDPIC)
8688         {
8689           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8690           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
8691
8692           emit_insn (gen_load_tp_soft_fdpic ());
8693
8694           /* Restore r9.  */
8695           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
8696         }
8697       else
8698         emit_insn (gen_load_tp_soft ());
8699
8700       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8701       emit_move_insn (target, tmp);
8702     }
8703   return target;
8704 }
8705
8706 static rtx
8707 load_tls_operand (rtx x, rtx reg)
8708 {
8709   rtx tmp;
8710
8711   if (reg == NULL_RTX)
8712     reg = gen_reg_rtx (SImode);
8713
8714   tmp = gen_rtx_CONST (SImode, x);
8715
8716   emit_move_insn (reg, tmp);
8717
8718   return reg;
8719 }
8720
8721 static rtx_insn *
8722 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8723 {
8724   rtx label, labelno = NULL_RTX, sum;
8725
8726   gcc_assert (reloc != TLS_DESCSEQ);
8727   start_sequence ();
8728
8729   if (TARGET_FDPIC)
8730     {
8731       sum = gen_rtx_UNSPEC (Pmode,
8732                             gen_rtvec (2, x, GEN_INT (reloc)),
8733                             UNSPEC_TLS);
8734     }
8735   else
8736     {
8737       labelno = GEN_INT (pic_labelno++);
8738       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8739       label = gen_rtx_CONST (VOIDmode, label);
8740
8741       sum = gen_rtx_UNSPEC (Pmode,
8742                             gen_rtvec (4, x, GEN_INT (reloc), label,
8743                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8744                             UNSPEC_TLS);
8745     }
8746   reg = load_tls_operand (sum, reg);
8747
8748   if (TARGET_FDPIC)
8749       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
8750   else if (TARGET_ARM)
8751     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8752   else
8753     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8754
8755   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8756                                      LCT_PURE, /* LCT_CONST?  */
8757                                      Pmode, reg, Pmode);
8758
8759   rtx_insn *insns = get_insns ();
8760   end_sequence ();
8761
8762   return insns;
8763 }
8764
8765 static rtx
8766 arm_tls_descseq_addr (rtx x, rtx reg)
8767 {
8768   rtx labelno = GEN_INT (pic_labelno++);
8769   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8770   rtx sum = gen_rtx_UNSPEC (Pmode,
8771                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8772                                        gen_rtx_CONST (VOIDmode, label),
8773                                        GEN_INT (!TARGET_ARM)),
8774                             UNSPEC_TLS);
8775   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8776
8777   emit_insn (gen_tlscall (x, labelno));
8778   if (!reg)
8779     reg = gen_reg_rtx (SImode);
8780   else
8781     gcc_assert (REGNO (reg) != R0_REGNUM);
8782
8783   emit_move_insn (reg, reg0);
8784
8785   return reg;
8786 }
8787
8788
8789 rtx
8790 legitimize_tls_address (rtx x, rtx reg)
8791 {
8792   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8793   rtx_insn *insns;
8794   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8795
8796   switch (model)
8797     {
8798     case TLS_MODEL_GLOBAL_DYNAMIC:
8799       if (TARGET_GNU2_TLS)
8800         {
8801           gcc_assert (!TARGET_FDPIC);
8802
8803           reg = arm_tls_descseq_addr (x, reg);
8804
8805           tp = arm_load_tp (NULL_RTX);
8806
8807           dest = gen_rtx_PLUS (Pmode, tp, reg);
8808         }
8809       else
8810         {
8811           /* Original scheme */
8812           if (TARGET_FDPIC)
8813             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
8814           else
8815             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8816           dest = gen_reg_rtx (Pmode);
8817           emit_libcall_block (insns, dest, ret, x);
8818         }
8819       return dest;
8820
8821     case TLS_MODEL_LOCAL_DYNAMIC:
8822       if (TARGET_GNU2_TLS)
8823         {
8824           gcc_assert (!TARGET_FDPIC);
8825
8826           reg = arm_tls_descseq_addr (x, reg);
8827
8828           tp = arm_load_tp (NULL_RTX);
8829
8830           dest = gen_rtx_PLUS (Pmode, tp, reg);
8831         }
8832       else
8833         {
8834           if (TARGET_FDPIC)
8835             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
8836           else
8837             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8838
8839           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8840              share the LDM result with other LD model accesses.  */
8841           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8842                                 UNSPEC_TLS);
8843           dest = gen_reg_rtx (Pmode);
8844           emit_libcall_block (insns, dest, ret, eqv);
8845
8846           /* Load the addend.  */
8847           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8848                                                      GEN_INT (TLS_LDO32)),
8849                                    UNSPEC_TLS);
8850           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8851           dest = gen_rtx_PLUS (Pmode, dest, addend);
8852         }
8853       return dest;
8854
8855     case TLS_MODEL_INITIAL_EXEC:
8856       if (TARGET_FDPIC)
8857         {
8858           sum = gen_rtx_UNSPEC (Pmode,
8859                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
8860                                 UNSPEC_TLS);
8861           reg = load_tls_operand (sum, reg);
8862           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
8863           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
8864         }
8865       else
8866         {
8867           labelno = GEN_INT (pic_labelno++);
8868           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8869           label = gen_rtx_CONST (VOIDmode, label);
8870           sum = gen_rtx_UNSPEC (Pmode,
8871                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8872                                            GEN_INT (TARGET_ARM ? 8 : 4)),
8873                                 UNSPEC_TLS);
8874           reg = load_tls_operand (sum, reg);
8875
8876           if (TARGET_ARM)
8877             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8878           else if (TARGET_THUMB2)
8879             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8880           else
8881             {
8882               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8883               emit_move_insn (reg, gen_const_mem (SImode, reg));
8884             }
8885         }
8886
8887       tp = arm_load_tp (NULL_RTX);
8888
8889       return gen_rtx_PLUS (Pmode, tp, reg);
8890
8891     case TLS_MODEL_LOCAL_EXEC:
8892       tp = arm_load_tp (NULL_RTX);
8893
8894       reg = gen_rtx_UNSPEC (Pmode,
8895                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8896                             UNSPEC_TLS);
8897       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8898
8899       return gen_rtx_PLUS (Pmode, tp, reg);
8900
8901     default:
8902       abort ();
8903     }
8904 }
8905
8906 /* Try machine-dependent ways of modifying an illegitimate address
8907    to be legitimate.  If we find one, return the new, valid address.  */
8908 rtx
8909 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8910 {
8911   if (arm_tls_referenced_p (x))
8912     {
8913       rtx addend = NULL;
8914
8915       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8916         {
8917           addend = XEXP (XEXP (x, 0), 1);
8918           x = XEXP (XEXP (x, 0), 0);
8919         }
8920
8921       if (GET_CODE (x) != SYMBOL_REF)
8922         return x;
8923
8924       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8925
8926       x = legitimize_tls_address (x, NULL_RTX);
8927
8928       if (addend)
8929         {
8930           x = gen_rtx_PLUS (SImode, x, addend);
8931           orig_x = x;
8932         }
8933       else
8934         return x;
8935     }
8936
8937   if (TARGET_THUMB1)
8938     return thumb_legitimize_address (x, orig_x, mode);
8939
8940   if (GET_CODE (x) == PLUS)
8941     {
8942       rtx xop0 = XEXP (x, 0);
8943       rtx xop1 = XEXP (x, 1);
8944
8945       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8946         xop0 = force_reg (SImode, xop0);
8947
8948       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8949           && !symbol_mentioned_p (xop1))
8950         xop1 = force_reg (SImode, xop1);
8951
8952       if (ARM_BASE_REGISTER_RTX_P (xop0)
8953           && CONST_INT_P (xop1))
8954         {
8955           HOST_WIDE_INT n, low_n;
8956           rtx base_reg, val;
8957           n = INTVAL (xop1);
8958
8959           /* VFP addressing modes actually allow greater offsets, but for
8960              now we just stick with the lowest common denominator.  */
8961           if (mode == DImode || mode == DFmode)
8962             {
8963               low_n = n & 0x0f;
8964               n &= ~0x0f;
8965               if (low_n > 4)
8966                 {
8967                   n += 16;
8968                   low_n -= 16;
8969                 }
8970             }
8971           else
8972             {
8973               low_n = ((mode) == TImode ? 0
8974                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8975               n -= low_n;
8976             }
8977
8978           base_reg = gen_reg_rtx (SImode);
8979           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8980           emit_move_insn (base_reg, val);
8981           x = plus_constant (Pmode, base_reg, low_n);
8982         }
8983       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8984         x = gen_rtx_PLUS (SImode, xop0, xop1);
8985     }
8986
8987   /* XXX We don't allow MINUS any more -- see comment in
8988      arm_legitimate_address_outer_p ().  */
8989   else if (GET_CODE (x) == MINUS)
8990     {
8991       rtx xop0 = XEXP (x, 0);
8992       rtx xop1 = XEXP (x, 1);
8993
8994       if (CONSTANT_P (xop0))
8995         xop0 = force_reg (SImode, xop0);
8996
8997       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8998         xop1 = force_reg (SImode, xop1);
8999
9000       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9001         x = gen_rtx_MINUS (SImode, xop0, xop1);
9002     }
9003
9004   /* Make sure to take full advantage of the pre-indexed addressing mode
9005      with absolute addresses which often allows for the base register to
9006      be factorized for multiple adjacent memory references, and it might
9007      even allows for the mini pool to be avoided entirely. */
9008   else if (CONST_INT_P (x) && optimize > 0)
9009     {
9010       unsigned int bits;
9011       HOST_WIDE_INT mask, base, index;
9012       rtx base_reg;
9013
9014       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
9015          use a 8-bit index. So let's use a 12-bit index for SImode only and
9016          hope that arm_gen_constant will enable ldrb to use more bits. */
9017       bits = (mode == SImode) ? 12 : 8;
9018       mask = (1 << bits) - 1;
9019       base = INTVAL (x) & ~mask;
9020       index = INTVAL (x) & mask;
9021       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
9022         {
9023           /* It'll most probably be more efficient to generate the base
9024              with more bits set and use a negative index instead. */
9025           base |= mask;
9026           index -= mask;
9027         }
9028       base_reg = force_reg (SImode, GEN_INT (base));
9029       x = plus_constant (Pmode, base_reg, index);
9030     }
9031
9032   if (flag_pic)
9033     {
9034       /* We need to find and carefully transform any SYMBOL and LABEL
9035          references; so go back to the original address expression.  */
9036       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9037                                           false /*compute_now*/);
9038
9039       if (new_x != orig_x)
9040         x = new_x;
9041     }
9042
9043   return x;
9044 }
9045
9046
9047 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9048    to be legitimate.  If we find one, return the new, valid address.  */
9049 rtx
9050 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9051 {
9052   if (GET_CODE (x) == PLUS
9053       && CONST_INT_P (XEXP (x, 1))
9054       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9055           || INTVAL (XEXP (x, 1)) < 0))
9056     {
9057       rtx xop0 = XEXP (x, 0);
9058       rtx xop1 = XEXP (x, 1);
9059       HOST_WIDE_INT offset = INTVAL (xop1);
9060
9061       /* Try and fold the offset into a biasing of the base register and
9062          then offsetting that.  Don't do this when optimizing for space
9063          since it can cause too many CSEs.  */
9064       if (optimize_size && offset >= 0
9065           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9066         {
9067           HOST_WIDE_INT delta;
9068
9069           if (offset >= 256)
9070             delta = offset - (256 - GET_MODE_SIZE (mode));
9071           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9072             delta = 31 * GET_MODE_SIZE (mode);
9073           else
9074             delta = offset & (~31 * GET_MODE_SIZE (mode));
9075
9076           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9077                                 NULL_RTX);
9078           x = plus_constant (Pmode, xop0, delta);
9079         }
9080       else if (offset < 0 && offset > -256)
9081         /* Small negative offsets are best done with a subtract before the
9082            dereference, forcing these into a register normally takes two
9083            instructions.  */
9084         x = force_operand (x, NULL_RTX);
9085       else
9086         {
9087           /* For the remaining cases, force the constant into a register.  */
9088           xop1 = force_reg (SImode, xop1);
9089           x = gen_rtx_PLUS (SImode, xop0, xop1);
9090         }
9091     }
9092   else if (GET_CODE (x) == PLUS
9093            && s_register_operand (XEXP (x, 1), SImode)
9094            && !s_register_operand (XEXP (x, 0), SImode))
9095     {
9096       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9097
9098       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9099     }
9100
9101   if (flag_pic)
9102     {
9103       /* We need to find and carefully transform any SYMBOL and LABEL
9104          references; so go back to the original address expression.  */
9105       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9106                                           false /*compute_now*/);
9107
9108       if (new_x != orig_x)
9109         x = new_x;
9110     }
9111
9112   return x;
9113 }
9114
9115 /* Return TRUE if X contains any TLS symbol references.  */
9116
9117 bool
9118 arm_tls_referenced_p (rtx x)
9119 {
9120   if (! TARGET_HAVE_TLS)
9121     return false;
9122
9123   subrtx_iterator::array_type array;
9124   FOR_EACH_SUBRTX (iter, array, x, ALL)
9125     {
9126       const_rtx x = *iter;
9127       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
9128         {
9129           /* ARM currently does not provide relocations to encode TLS variables
9130              into AArch32 instructions, only data, so there is no way to
9131              currently implement these if a literal pool is disabled.  */
9132           if (arm_disable_literal_pool)
9133             sorry ("accessing thread-local storage is not currently supported "
9134                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9135
9136           return true;
9137         }
9138
9139       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9140          TLS offsets, not real symbol references.  */
9141       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9142         iter.skip_subrtxes ();
9143     }
9144   return false;
9145 }
9146
9147 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9148
9149    On the ARM, allow any integer (invalid ones are removed later by insn
9150    patterns), nice doubles and symbol_refs which refer to the function's
9151    constant pool XXX.
9152
9153    When generating pic allow anything.  */
9154
9155 static bool
9156 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9157 {
9158   return flag_pic || !label_mentioned_p (x);
9159 }
9160
9161 static bool
9162 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9163 {
9164   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9165      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9166      for ARMv8-M Baseline or later the result is valid.  */
9167   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9168     x = XEXP (x, 0);
9169
9170   return (CONST_INT_P (x)
9171           || CONST_DOUBLE_P (x)
9172           || CONSTANT_ADDRESS_P (x)
9173           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
9174           || flag_pic);
9175 }
9176
9177 static bool
9178 arm_legitimate_constant_p (machine_mode mode, rtx x)
9179 {
9180   return (!arm_cannot_force_const_mem (mode, x)
9181           && (TARGET_32BIT
9182               ? arm_legitimate_constant_p_1 (mode, x)
9183               : thumb_legitimate_constant_p (mode, x)));
9184 }
9185
9186 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9187
9188 static bool
9189 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9190 {
9191   rtx base, offset;
9192   split_const (x, &base, &offset);
9193
9194   if (SYMBOL_REF_P (base))
9195     {
9196       /* Function symbols cannot have an offset due to the Thumb bit.  */
9197       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9198           && INTVAL (offset) != 0)
9199         return true;
9200
9201       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9202           && !offset_within_block_p (base, INTVAL (offset)))
9203         return true;
9204     }
9205   return arm_tls_referenced_p (x);
9206 }
9207 \f
9208 #define REG_OR_SUBREG_REG(X)                                            \
9209   (REG_P (X)                                                    \
9210    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9211
9212 #define REG_OR_SUBREG_RTX(X)                    \
9213    (REG_P (X) ? (X) : SUBREG_REG (X))
9214
9215 static inline int
9216 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9217 {
9218   machine_mode mode = GET_MODE (x);
9219   int total, words;
9220
9221   switch (code)
9222     {
9223     case ASHIFT:
9224     case ASHIFTRT:
9225     case LSHIFTRT:
9226     case ROTATERT:
9227       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9228
9229     case PLUS:
9230     case MINUS:
9231     case COMPARE:
9232     case NEG:
9233     case NOT:
9234       return COSTS_N_INSNS (1);
9235
9236     case MULT:
9237       if (arm_arch6m && arm_m_profile_small_mul)
9238         return COSTS_N_INSNS (32);
9239
9240       if (CONST_INT_P (XEXP (x, 1)))
9241         {
9242           int cycles = 0;
9243           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9244
9245           while (i)
9246             {
9247               i >>= 2;
9248               cycles++;
9249             }
9250           return COSTS_N_INSNS (2) + cycles;
9251         }
9252       return COSTS_N_INSNS (1) + 16;
9253
9254     case SET:
9255       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9256          the mode.  */
9257       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9258       return (COSTS_N_INSNS (words)
9259               + 4 * ((MEM_P (SET_SRC (x)))
9260                      + MEM_P (SET_DEST (x))));
9261
9262     case CONST_INT:
9263       if (outer == SET)
9264         {
9265           if (UINTVAL (x) < 256
9266               /* 16-bit constant.  */
9267               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9268             return 0;
9269           if (thumb_shiftable_const (INTVAL (x)))
9270             return COSTS_N_INSNS (2);
9271           return COSTS_N_INSNS (3);
9272         }
9273       else if ((outer == PLUS || outer == COMPARE)
9274                && INTVAL (x) < 256 && INTVAL (x) > -256)
9275         return 0;
9276       else if ((outer == IOR || outer == XOR || outer == AND)
9277                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9278         return COSTS_N_INSNS (1);
9279       else if (outer == AND)
9280         {
9281           int i;
9282           /* This duplicates the tests in the andsi3 expander.  */
9283           for (i = 9; i <= 31; i++)
9284             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9285                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9286               return COSTS_N_INSNS (2);
9287         }
9288       else if (outer == ASHIFT || outer == ASHIFTRT
9289                || outer == LSHIFTRT)
9290         return 0;
9291       return COSTS_N_INSNS (2);
9292
9293     case CONST:
9294     case CONST_DOUBLE:
9295     case LABEL_REF:
9296     case SYMBOL_REF:
9297       return COSTS_N_INSNS (3);
9298
9299     case UDIV:
9300     case UMOD:
9301     case DIV:
9302     case MOD:
9303       return 100;
9304
9305     case TRUNCATE:
9306       return 99;
9307
9308     case AND:
9309     case XOR:
9310     case IOR:
9311       /* XXX guess.  */
9312       return 8;
9313
9314     case MEM:
9315       /* XXX another guess.  */
9316       /* Memory costs quite a lot for the first word, but subsequent words
9317          load at the equivalent of a single insn each.  */
9318       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9319               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9320                  ? 4 : 0));
9321
9322     case IF_THEN_ELSE:
9323       /* XXX a guess.  */
9324       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9325         return 14;
9326       return 2;
9327
9328     case SIGN_EXTEND:
9329     case ZERO_EXTEND:
9330       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9331       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9332
9333       if (mode == SImode)
9334         return total;
9335
9336       if (arm_arch6)
9337         return total + COSTS_N_INSNS (1);
9338
9339       /* Assume a two-shift sequence.  Increase the cost slightly so
9340          we prefer actual shifts over an extend operation.  */
9341       return total + 1 + COSTS_N_INSNS (2);
9342
9343     default:
9344       return 99;
9345     }
9346 }
9347
9348 /* Estimates the size cost of thumb1 instructions.
9349    For now most of the code is copied from thumb1_rtx_costs. We need more
9350    fine grain tuning when we have more related test cases.  */
9351 static inline int
9352 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9353 {
9354   machine_mode mode = GET_MODE (x);
9355   int words, cost;
9356
9357   switch (code)
9358     {
9359     case ASHIFT:
9360     case ASHIFTRT:
9361     case LSHIFTRT:
9362     case ROTATERT:
9363       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9364
9365     case PLUS:
9366     case MINUS:
9367       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9368          defined by RTL expansion, especially for the expansion of
9369          multiplication.  */
9370       if ((GET_CODE (XEXP (x, 0)) == MULT
9371            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9372           || (GET_CODE (XEXP (x, 1)) == MULT
9373               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9374         return COSTS_N_INSNS (2);
9375       /* Fall through.  */
9376     case COMPARE:
9377     case NEG:
9378     case NOT:
9379       return COSTS_N_INSNS (1);
9380
9381     case MULT:
9382       if (CONST_INT_P (XEXP (x, 1)))
9383         {
9384           /* Thumb1 mul instruction can't operate on const. We must Load it
9385              into a register first.  */
9386           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9387           /* For the targets which have a very small and high-latency multiply
9388              unit, we prefer to synthesize the mult with up to 5 instructions,
9389              giving a good balance between size and performance.  */
9390           if (arm_arch6m && arm_m_profile_small_mul)
9391             return COSTS_N_INSNS (5);
9392           else
9393             return COSTS_N_INSNS (1) + const_size;
9394         }
9395       return COSTS_N_INSNS (1);
9396
9397     case SET:
9398       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9399          the mode.  */
9400       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9401       cost = COSTS_N_INSNS (words);
9402       if (satisfies_constraint_J (SET_SRC (x))
9403           || satisfies_constraint_K (SET_SRC (x))
9404              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9405           || (CONST_INT_P (SET_SRC (x))
9406               && UINTVAL (SET_SRC (x)) >= 256
9407               && TARGET_HAVE_MOVT
9408               && satisfies_constraint_j (SET_SRC (x)))
9409              /* thumb1_movdi_insn.  */
9410           || ((words > 1) && MEM_P (SET_SRC (x))))
9411         cost += COSTS_N_INSNS (1);
9412       return cost;
9413
9414     case CONST_INT:
9415       if (outer == SET)
9416         {
9417           if (UINTVAL (x) < 256)
9418             return COSTS_N_INSNS (1);
9419           /* movw is 4byte long.  */
9420           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9421             return COSTS_N_INSNS (2);
9422           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9423           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9424             return COSTS_N_INSNS (2);
9425           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9426           if (thumb_shiftable_const (INTVAL (x)))
9427             return COSTS_N_INSNS (2);
9428           return COSTS_N_INSNS (3);
9429         }
9430       else if ((outer == PLUS || outer == COMPARE)
9431                && INTVAL (x) < 256 && INTVAL (x) > -256)
9432         return 0;
9433       else if ((outer == IOR || outer == XOR || outer == AND)
9434                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9435         return COSTS_N_INSNS (1);
9436       else if (outer == AND)
9437         {
9438           int i;
9439           /* This duplicates the tests in the andsi3 expander.  */
9440           for (i = 9; i <= 31; i++)
9441             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9442                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9443               return COSTS_N_INSNS (2);
9444         }
9445       else if (outer == ASHIFT || outer == ASHIFTRT
9446                || outer == LSHIFTRT)
9447         return 0;
9448       return COSTS_N_INSNS (2);
9449
9450     case CONST:
9451     case CONST_DOUBLE:
9452     case LABEL_REF:
9453     case SYMBOL_REF:
9454       return COSTS_N_INSNS (3);
9455
9456     case UDIV:
9457     case UMOD:
9458     case DIV:
9459     case MOD:
9460       return 100;
9461
9462     case TRUNCATE:
9463       return 99;
9464
9465     case AND:
9466     case XOR:
9467     case IOR:
9468       return COSTS_N_INSNS (1);
9469
9470     case MEM:
9471       return (COSTS_N_INSNS (1)
9472               + COSTS_N_INSNS (1)
9473                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9474               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9475                  ? COSTS_N_INSNS (1) : 0));
9476
9477     case IF_THEN_ELSE:
9478       /* XXX a guess.  */
9479       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9480         return 14;
9481       return 2;
9482
9483     case ZERO_EXTEND:
9484       /* XXX still guessing.  */
9485       switch (GET_MODE (XEXP (x, 0)))
9486         {
9487           case E_QImode:
9488             return (1 + (mode == DImode ? 4 : 0)
9489                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9490
9491           case E_HImode:
9492             return (4 + (mode == DImode ? 4 : 0)
9493                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9494
9495           case E_SImode:
9496             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9497
9498           default:
9499             return 99;
9500         }
9501
9502     default:
9503       return 99;
9504     }
9505 }
9506
9507 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
9508    PLUS, adds the carry flag, then return the other operand.  If
9509    neither is a carry, return OP unchanged.  */
9510 static rtx
9511 strip_carry_operation (rtx op)
9512 {
9513   gcc_assert (GET_CODE (op) == PLUS);
9514   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9515     return XEXP (op, 1);
9516   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9517     return XEXP (op, 0);
9518   return op;
9519 }
9520
9521 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9522    operand, then return the operand that is being shifted.  If the shift
9523    is not by a constant, then set SHIFT_REG to point to the operand.
9524    Return NULL if OP is not a shifter operand.  */
9525 static rtx
9526 shifter_op_p (rtx op, rtx *shift_reg)
9527 {
9528   enum rtx_code code = GET_CODE (op);
9529
9530   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9531       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9532     return XEXP (op, 0);
9533   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9534     return XEXP (op, 0);
9535   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9536            || code == ASHIFTRT)
9537     {
9538       if (!CONST_INT_P (XEXP (op, 1)))
9539         *shift_reg = XEXP (op, 1);
9540       return XEXP (op, 0);
9541     }
9542
9543   return NULL;
9544 }
9545
9546 static bool
9547 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9548 {
9549   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9550   rtx_code code = GET_CODE (x);
9551   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9552
9553   switch (XINT (x, 1))
9554     {
9555     case UNSPEC_UNALIGNED_LOAD:
9556       /* We can only do unaligned loads into the integer unit, and we can't
9557          use LDM or LDRD.  */
9558       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9559       if (speed_p)
9560         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9561                   + extra_cost->ldst.load_unaligned);
9562
9563 #ifdef NOT_YET
9564       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9565                                  ADDR_SPACE_GENERIC, speed_p);
9566 #endif
9567       return true;
9568
9569     case UNSPEC_UNALIGNED_STORE:
9570       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9571       if (speed_p)
9572         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9573                   + extra_cost->ldst.store_unaligned);
9574
9575       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9576 #ifdef NOT_YET
9577       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9578                                  ADDR_SPACE_GENERIC, speed_p);
9579 #endif
9580       return true;
9581
9582     case UNSPEC_VRINTZ:
9583     case UNSPEC_VRINTP:
9584     case UNSPEC_VRINTM:
9585     case UNSPEC_VRINTR:
9586     case UNSPEC_VRINTX:
9587     case UNSPEC_VRINTA:
9588       if (speed_p)
9589         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9590
9591       return true;
9592     default:
9593       *cost = COSTS_N_INSNS (2);
9594       break;
9595     }
9596   return true;
9597 }
9598
9599 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9600    call (one insn for -Os) and then one for processing the result.  */
9601 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9602
9603 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9604         do                                                              \
9605           {                                                             \
9606             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9607             if (shift_op != NULL                                        \
9608                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9609               {                                                         \
9610                 if (shift_reg)                                          \
9611                   {                                                     \
9612                     if (speed_p)                                        \
9613                       *cost += extra_cost->alu.arith_shift_reg;         \
9614                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9615                                        ASHIFT, 1, speed_p);             \
9616                   }                                                     \
9617                 else if (speed_p)                                       \
9618                   *cost += extra_cost->alu.arith_shift;                 \
9619                                                                         \
9620                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9621                                     ASHIFT, 0, speed_p)                 \
9622                           + rtx_cost (XEXP (x, 1 - IDX),                \
9623                                       GET_MODE (shift_op),              \
9624                                       OP, 1, speed_p));                 \
9625                 return true;                                            \
9626               }                                                         \
9627           }                                                             \
9628         while (0)
9629
9630 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9631    considering the costs of the addressing mode and memory access
9632    separately.  */
9633 static bool
9634 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9635                int *cost, bool speed_p)
9636 {
9637   machine_mode mode = GET_MODE (x);
9638
9639   *cost = COSTS_N_INSNS (1);
9640
9641   if (flag_pic
9642       && GET_CODE (XEXP (x, 0)) == PLUS
9643       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9644     /* This will be split into two instructions.  Add the cost of the
9645        additional instruction here.  The cost of the memory access is computed
9646        below.  See arm.md:calculate_pic_address.  */
9647     *cost += COSTS_N_INSNS (1);
9648
9649   /* Calculate cost of the addressing mode.  */
9650   if (speed_p)
9651     {
9652       arm_addr_mode_op op_type;
9653       switch (GET_CODE (XEXP (x, 0)))
9654         {
9655         default:
9656         case REG:
9657           op_type = AMO_DEFAULT;
9658           break;
9659         case MINUS:
9660           /* MINUS does not appear in RTL, but the architecture supports it,
9661              so handle this case defensively.  */
9662           /* fall through */
9663         case PLUS:
9664           op_type = AMO_NO_WB;
9665           break;
9666         case PRE_INC:
9667         case PRE_DEC:
9668         case POST_INC:
9669         case POST_DEC:
9670         case PRE_MODIFY:
9671         case POST_MODIFY:
9672           op_type = AMO_WB;
9673           break;
9674         }
9675
9676       if (VECTOR_MODE_P (mode))
9677           *cost += current_tune->addr_mode_costs->vector[op_type];
9678       else if (FLOAT_MODE_P (mode))
9679           *cost += current_tune->addr_mode_costs->fp[op_type];
9680       else
9681           *cost += current_tune->addr_mode_costs->integer[op_type];
9682     }
9683
9684   /* Calculate cost of memory access.  */
9685   if (speed_p)
9686     {
9687       if (FLOAT_MODE_P (mode))
9688         {
9689           if (GET_MODE_SIZE (mode) == 8)
9690             *cost += extra_cost->ldst.loadd;
9691           else
9692             *cost += extra_cost->ldst.loadf;
9693         }
9694       else if (VECTOR_MODE_P (mode))
9695         *cost += extra_cost->ldst.loadv;
9696       else
9697         {
9698           /* Integer modes */
9699           if (GET_MODE_SIZE (mode) == 8)
9700             *cost += extra_cost->ldst.ldrd;
9701           else
9702             *cost += extra_cost->ldst.load;
9703         }
9704     }
9705
9706   return true;
9707 }
9708
9709 /* RTX costs.  Make an estimate of the cost of executing the operation
9710    X, which is contained within an operation with code OUTER_CODE.
9711    SPEED_P indicates whether the cost desired is the performance cost,
9712    or the size cost.  The estimate is stored in COST and the return
9713    value is TRUE if the cost calculation is final, or FALSE if the
9714    caller should recurse through the operands of X to add additional
9715    costs.
9716
9717    We currently make no attempt to model the size savings of Thumb-2
9718    16-bit instructions.  At the normal points in compilation where
9719    this code is called we have no measure of whether the condition
9720    flags are live or not, and thus no realistic way to determine what
9721    the size will eventually be.  */
9722 static bool
9723 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9724                    const struct cpu_cost_table *extra_cost,
9725                    int *cost, bool speed_p)
9726 {
9727   machine_mode mode = GET_MODE (x);
9728
9729   *cost = COSTS_N_INSNS (1);
9730
9731   if (TARGET_THUMB1)
9732     {
9733       if (speed_p)
9734         *cost = thumb1_rtx_costs (x, code, outer_code);
9735       else
9736         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9737       return true;
9738     }
9739
9740   switch (code)
9741     {
9742     case SET:
9743       *cost = 0;
9744       /* SET RTXs don't have a mode so we get it from the destination.  */
9745       mode = GET_MODE (SET_DEST (x));
9746
9747       if (REG_P (SET_SRC (x))
9748           && REG_P (SET_DEST (x)))
9749         {
9750           /* Assume that most copies can be done with a single insn,
9751              unless we don't have HW FP, in which case everything
9752              larger than word mode will require two insns.  */
9753           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9754                                    && GET_MODE_SIZE (mode) > 4)
9755                                   || mode == DImode)
9756                                  ? 2 : 1);
9757           /* Conditional register moves can be encoded
9758              in 16 bits in Thumb mode.  */
9759           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9760             *cost >>= 1;
9761
9762           return true;
9763         }
9764
9765       if (CONST_INT_P (SET_SRC (x)))
9766         {
9767           /* Handle CONST_INT here, since the value doesn't have a mode
9768              and we would otherwise be unable to work out the true cost.  */
9769           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9770                             0, speed_p);
9771           outer_code = SET;
9772           /* Slightly lower the cost of setting a core reg to a constant.
9773              This helps break up chains and allows for better scheduling.  */
9774           if (REG_P (SET_DEST (x))
9775               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9776             *cost -= 1;
9777           x = SET_SRC (x);
9778           /* Immediate moves with an immediate in the range [0, 255] can be
9779              encoded in 16 bits in Thumb mode.  */
9780           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9781               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9782             *cost >>= 1;
9783           goto const_int_cost;
9784         }
9785
9786       return false;
9787
9788     case MEM:
9789       return arm_mem_costs (x, extra_cost, cost, speed_p);
9790
9791     case PARALLEL:
9792     {
9793    /* Calculations of LDM costs are complex.  We assume an initial cost
9794    (ldm_1st) which will load the number of registers mentioned in
9795    ldm_regs_per_insn_1st registers; then each additional
9796    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9797    formula for N regs is thus:
9798
9799    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9800                              + ldm_regs_per_insn_subsequent - 1)
9801                             / ldm_regs_per_insn_subsequent).
9802
9803    Additional costs may also be added for addressing.  A similar
9804    formula is used for STM.  */
9805
9806       bool is_ldm = load_multiple_operation (x, SImode);
9807       bool is_stm = store_multiple_operation (x, SImode);
9808
9809       if (is_ldm || is_stm)
9810         {
9811           if (speed_p)
9812             {
9813               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9814               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9815                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9816                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9817               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9818                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9819                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9820
9821               *cost += regs_per_insn_1st
9822                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9823                                             + regs_per_insn_sub - 1)
9824                                           / regs_per_insn_sub);
9825               return true;
9826             }
9827
9828         }
9829       return false;
9830     }
9831     case DIV:
9832     case UDIV:
9833       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9834           && (mode == SFmode || !TARGET_VFP_SINGLE))
9835         *cost += COSTS_N_INSNS (speed_p
9836                                ? extra_cost->fp[mode != SFmode].div : 0);
9837       else if (mode == SImode && TARGET_IDIV)
9838         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9839       else
9840         *cost = LIBCALL_COST (2);
9841
9842       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9843          possible udiv is prefered.  */
9844       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9845       return false;     /* All arguments must be in registers.  */
9846
9847     case MOD:
9848       /* MOD by a power of 2 can be expanded as:
9849          rsbs    r1, r0, #0
9850          and     r0, r0, #(n - 1)
9851          and     r1, r1, #(n - 1)
9852          rsbpl   r0, r1, #0.  */
9853       if (CONST_INT_P (XEXP (x, 1))
9854           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9855           && mode == SImode)
9856         {
9857           *cost += COSTS_N_INSNS (3);
9858
9859           if (speed_p)
9860             *cost += 2 * extra_cost->alu.logical
9861                      + extra_cost->alu.arith;
9862           return true;
9863         }
9864
9865     /* Fall-through.  */
9866     case UMOD:
9867       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9868          possible udiv is prefered.  */
9869       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9870       return false;     /* All arguments must be in registers.  */
9871
9872     case ROTATE:
9873       if (mode == SImode && REG_P (XEXP (x, 1)))
9874         {
9875           *cost += (COSTS_N_INSNS (1)
9876                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9877           if (speed_p)
9878             *cost += extra_cost->alu.shift_reg;
9879           return true;
9880         }
9881       /* Fall through */
9882     case ROTATERT:
9883     case ASHIFT:
9884     case LSHIFTRT:
9885     case ASHIFTRT:
9886       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9887         {
9888           *cost += (COSTS_N_INSNS (2)
9889                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9890           if (speed_p)
9891             *cost += 2 * extra_cost->alu.shift;
9892           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9893           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9894             *cost += 1;
9895           return true;
9896         }
9897       else if (mode == SImode)
9898         {
9899           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9900           /* Slightly disparage register shifts at -Os, but not by much.  */
9901           if (!CONST_INT_P (XEXP (x, 1)))
9902             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9903                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9904           return true;
9905         }
9906       else if (GET_MODE_CLASS (mode) == MODE_INT
9907                && GET_MODE_SIZE (mode) < 4)
9908         {
9909           if (code == ASHIFT)
9910             {
9911               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9912               /* Slightly disparage register shifts at -Os, but not by
9913                  much.  */
9914               if (!CONST_INT_P (XEXP (x, 1)))
9915                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9916                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9917             }
9918           else if (code == LSHIFTRT || code == ASHIFTRT)
9919             {
9920               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9921                 {
9922                   /* Can use SBFX/UBFX.  */
9923                   if (speed_p)
9924                     *cost += extra_cost->alu.bfx;
9925                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9926                 }
9927               else
9928                 {
9929                   *cost += COSTS_N_INSNS (1);
9930                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9931                   if (speed_p)
9932                     {
9933                       if (CONST_INT_P (XEXP (x, 1)))
9934                         *cost += 2 * extra_cost->alu.shift;
9935                       else
9936                         *cost += (extra_cost->alu.shift
9937                                   + extra_cost->alu.shift_reg);
9938                     }
9939                   else
9940                     /* Slightly disparage register shifts.  */
9941                     *cost += !CONST_INT_P (XEXP (x, 1));
9942                 }
9943             }
9944           else /* Rotates.  */
9945             {
9946               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9947               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9948               if (speed_p)
9949                 {
9950                   if (CONST_INT_P (XEXP (x, 1)))
9951                     *cost += (2 * extra_cost->alu.shift
9952                               + extra_cost->alu.log_shift);
9953                   else
9954                     *cost += (extra_cost->alu.shift
9955                               + extra_cost->alu.shift_reg
9956                               + extra_cost->alu.log_shift_reg);
9957                 }
9958             }
9959           return true;
9960         }
9961
9962       *cost = LIBCALL_COST (2);
9963       return false;
9964
9965     case BSWAP:
9966       if (arm_arch6)
9967         {
9968           if (mode == SImode)
9969             {
9970               if (speed_p)
9971                 *cost += extra_cost->alu.rev;
9972
9973               return false;
9974             }
9975         }
9976       else
9977         {
9978         /* No rev instruction available.  Look at arm_legacy_rev
9979            and thumb_legacy_rev for the form of RTL used then.  */
9980           if (TARGET_THUMB)
9981             {
9982               *cost += COSTS_N_INSNS (9);
9983
9984               if (speed_p)
9985                 {
9986                   *cost += 6 * extra_cost->alu.shift;
9987                   *cost += 3 * extra_cost->alu.logical;
9988                 }
9989             }
9990           else
9991             {
9992               *cost += COSTS_N_INSNS (4);
9993
9994               if (speed_p)
9995                 {
9996                   *cost += 2 * extra_cost->alu.shift;
9997                   *cost += extra_cost->alu.arith_shift;
9998                   *cost += 2 * extra_cost->alu.logical;
9999                 }
10000             }
10001           return true;
10002         }
10003       return false;
10004
10005     case MINUS:
10006       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10007           && (mode == SFmode || !TARGET_VFP_SINGLE))
10008         {
10009           if (GET_CODE (XEXP (x, 0)) == MULT
10010               || GET_CODE (XEXP (x, 1)) == MULT)
10011             {
10012               rtx mul_op0, mul_op1, sub_op;
10013
10014               if (speed_p)
10015                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10016
10017               if (GET_CODE (XEXP (x, 0)) == MULT)
10018                 {
10019                   mul_op0 = XEXP (XEXP (x, 0), 0);
10020                   mul_op1 = XEXP (XEXP (x, 0), 1);
10021                   sub_op = XEXP (x, 1);
10022                 }
10023               else
10024                 {
10025                   mul_op0 = XEXP (XEXP (x, 1), 0);
10026                   mul_op1 = XEXP (XEXP (x, 1), 1);
10027                   sub_op = XEXP (x, 0);
10028                 }
10029
10030               /* The first operand of the multiply may be optionally
10031                  negated.  */
10032               if (GET_CODE (mul_op0) == NEG)
10033                 mul_op0 = XEXP (mul_op0, 0);
10034
10035               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10036                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10037                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10038
10039               return true;
10040             }
10041
10042           if (speed_p)
10043             *cost += extra_cost->fp[mode != SFmode].addsub;
10044           return false;
10045         }
10046
10047       if (mode == SImode)
10048         {
10049           rtx shift_by_reg = NULL;
10050           rtx shift_op;
10051           rtx non_shift_op;
10052
10053           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
10054           if (shift_op == NULL)
10055             {
10056               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
10057               non_shift_op = XEXP (x, 0);
10058             }
10059           else
10060             non_shift_op = XEXP (x, 1);
10061
10062           if (shift_op != NULL)
10063             {
10064               if (shift_by_reg != NULL)
10065                 {
10066                   if (speed_p)
10067                     *cost += extra_cost->alu.arith_shift_reg;
10068                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10069                 }
10070               else if (speed_p)
10071                 *cost += extra_cost->alu.arith_shift;
10072
10073               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10074               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10075               return true;
10076             }
10077
10078           if (arm_arch_thumb2
10079               && GET_CODE (XEXP (x, 1)) == MULT)
10080             {
10081               /* MLS.  */
10082               if (speed_p)
10083                 *cost += extra_cost->mult[0].add;
10084               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10085               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10086               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10087               return true;
10088             }
10089
10090           if (CONST_INT_P (XEXP (x, 0)))
10091             {
10092               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10093                                             INTVAL (XEXP (x, 0)), NULL_RTX,
10094                                             NULL_RTX, 1, 0);
10095               *cost = COSTS_N_INSNS (insns);
10096               if (speed_p)
10097                 *cost += insns * extra_cost->alu.arith;
10098               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10099               return true;
10100             }
10101           else if (speed_p)
10102             *cost += extra_cost->alu.arith;
10103
10104           return false;
10105         }
10106
10107       if (GET_MODE_CLASS (mode) == MODE_INT
10108           && GET_MODE_SIZE (mode) < 4)
10109         {
10110           rtx shift_op, shift_reg;
10111           shift_reg = NULL;
10112
10113           /* We check both sides of the MINUS for shifter operands since,
10114              unlike PLUS, it's not commutative.  */
10115
10116           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10117           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10118
10119           /* Slightly disparage, as we might need to widen the result.  */
10120           *cost += 1;
10121           if (speed_p)
10122             *cost += extra_cost->alu.arith;
10123
10124           if (CONST_INT_P (XEXP (x, 0)))
10125             {
10126               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10127               return true;
10128             }
10129
10130           return false;
10131         }
10132
10133       if (mode == DImode)
10134         {
10135           *cost += COSTS_N_INSNS (1);
10136
10137           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10138             {
10139               rtx op1 = XEXP (x, 1);
10140
10141               if (speed_p)
10142                 *cost += 2 * extra_cost->alu.arith;
10143
10144               if (GET_CODE (op1) == ZERO_EXTEND)
10145                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10146                                    0, speed_p);
10147               else
10148                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10149               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10150                                  0, speed_p);
10151               return true;
10152             }
10153           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10154             {
10155               if (speed_p)
10156                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10157               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10158                                   0, speed_p)
10159                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10160               return true;
10161             }
10162           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10163                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10164             {
10165               if (speed_p)
10166                 *cost += (extra_cost->alu.arith
10167                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10168                              ? extra_cost->alu.arith
10169                              : extra_cost->alu.arith_shift));
10170               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10171                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10172                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10173               return true;
10174             }
10175
10176           if (speed_p)
10177             *cost += 2 * extra_cost->alu.arith;
10178           return false;
10179         }
10180
10181       /* Vector mode?  */
10182
10183       *cost = LIBCALL_COST (2);
10184       return false;
10185
10186     case PLUS:
10187       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10188           && (mode == SFmode || !TARGET_VFP_SINGLE))
10189         {
10190           if (GET_CODE (XEXP (x, 0)) == MULT)
10191             {
10192               rtx mul_op0, mul_op1, add_op;
10193
10194               if (speed_p)
10195                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10196
10197               mul_op0 = XEXP (XEXP (x, 0), 0);
10198               mul_op1 = XEXP (XEXP (x, 0), 1);
10199               add_op = XEXP (x, 1);
10200
10201               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10202                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10203                         + rtx_cost (add_op, mode, code, 0, speed_p));
10204
10205               return true;
10206             }
10207
10208           if (speed_p)
10209             *cost += extra_cost->fp[mode != SFmode].addsub;
10210           return false;
10211         }
10212       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10213         {
10214           *cost = LIBCALL_COST (2);
10215           return false;
10216         }
10217
10218         /* Narrow modes can be synthesized in SImode, but the range
10219            of useful sub-operations is limited.  Check for shift operations
10220            on one of the operands.  Only left shifts can be used in the
10221            narrow modes.  */
10222       if (GET_MODE_CLASS (mode) == MODE_INT
10223           && GET_MODE_SIZE (mode) < 4)
10224         {
10225           rtx shift_op, shift_reg;
10226           shift_reg = NULL;
10227
10228           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10229
10230           if (CONST_INT_P (XEXP (x, 1)))
10231             {
10232               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10233                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10234                                             NULL_RTX, 1, 0);
10235               *cost = COSTS_N_INSNS (insns);
10236               if (speed_p)
10237                 *cost += insns * extra_cost->alu.arith;
10238               /* Slightly penalize a narrow operation as the result may
10239                  need widening.  */
10240               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10241               return true;
10242             }
10243
10244           /* Slightly penalize a narrow operation as the result may
10245              need widening.  */
10246           *cost += 1;
10247           if (speed_p)
10248             *cost += extra_cost->alu.arith;
10249
10250           return false;
10251         }
10252
10253       if (mode == SImode)
10254         {
10255           rtx shift_op, shift_reg;
10256
10257           if (TARGET_INT_SIMD
10258               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10259                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10260             {
10261               /* UXTA[BH] or SXTA[BH].  */
10262               if (speed_p)
10263                 *cost += extra_cost->alu.extend_arith;
10264               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10265                                   0, speed_p)
10266                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10267               return true;
10268             }
10269
10270           rtx op0 = XEXP (x, 0);
10271           rtx op1 = XEXP (x, 1);
10272
10273           /* Handle a side effect of adding in the carry to an addition.  */
10274           if (GET_CODE (op0) == PLUS
10275               && arm_carry_operation (op1, mode))
10276             {
10277               op1 = XEXP (op0, 1);
10278               op0 = XEXP (op0, 0);
10279             }
10280           else if (GET_CODE (op1) == PLUS
10281                    && arm_carry_operation (op0, mode))
10282             {
10283               op0 = XEXP (op1, 0);
10284               op1 = XEXP (op1, 1);
10285             }
10286           else if (GET_CODE (op0) == PLUS)
10287             {
10288               op0 = strip_carry_operation (op0);
10289               if (swap_commutative_operands_p (op0, op1))
10290                 std::swap (op0, op1);
10291             }
10292
10293           if (arm_carry_operation (op0, mode))
10294             {
10295               /* Adding the carry to a register is a canonicalization of
10296                  adding 0 to the register plus the carry.  */
10297               if (speed_p)
10298                 *cost += extra_cost->alu.arith;
10299               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10300               return true;
10301             }
10302
10303           shift_reg = NULL;
10304           shift_op = shifter_op_p (op0, &shift_reg);
10305           if (shift_op != NULL)
10306             {
10307               if (shift_reg)
10308                 {
10309                   if (speed_p)
10310                     *cost += extra_cost->alu.arith_shift_reg;
10311                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10312                 }
10313               else if (speed_p)
10314                 *cost += extra_cost->alu.arith_shift;
10315
10316               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10317                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10318               return true;
10319             }
10320
10321           if (GET_CODE (op0) == MULT)
10322             {
10323               rtx mul_op = op0;
10324
10325               if (TARGET_DSP_MULTIPLY
10326                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10327                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10328                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10329                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10330                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10331                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10332                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10333                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10334                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10335                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10336                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10337                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10338                                       == 16))))))
10339                 {
10340                   /* SMLA[BT][BT].  */
10341                   if (speed_p)
10342                     *cost += extra_cost->mult[0].extend_add;
10343                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10344                                       SIGN_EXTEND, 0, speed_p)
10345                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10346                                         SIGN_EXTEND, 0, speed_p)
10347                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
10348                   return true;
10349                 }
10350
10351               if (speed_p)
10352                 *cost += extra_cost->mult[0].add;
10353               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10354                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10355                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10356               return true;
10357             }
10358
10359           if (CONST_INT_P (op1))
10360             {
10361               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10362                                             INTVAL (op1), NULL_RTX,
10363                                             NULL_RTX, 1, 0);
10364               *cost = COSTS_N_INSNS (insns);
10365               if (speed_p)
10366                 *cost += insns * extra_cost->alu.arith;
10367               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10368               return true;
10369             }
10370
10371           if (speed_p)
10372             *cost += extra_cost->alu.arith;
10373
10374           /* Don't recurse here because we want to test the operands
10375              without any carry operation.  */
10376           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10377           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10378           return true;
10379         }
10380
10381       if (mode == DImode)
10382         {
10383           if (GET_CODE (XEXP (x, 0)) == MULT
10384               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10385                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10386                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10387                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10388             {
10389               if (speed_p)
10390                 *cost += extra_cost->mult[1].extend_add;
10391               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10392                                   ZERO_EXTEND, 0, speed_p)
10393                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10394                                     ZERO_EXTEND, 0, speed_p)
10395                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10396               return true;
10397             }
10398
10399           *cost += COSTS_N_INSNS (1);
10400
10401           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10402               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10403             {
10404               if (speed_p)
10405                 *cost += (extra_cost->alu.arith
10406                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10407                              ? extra_cost->alu.arith
10408                              : extra_cost->alu.arith_shift));
10409
10410               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10411                                   0, speed_p)
10412                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10413               return true;
10414             }
10415
10416           if (speed_p)
10417             *cost += 2 * extra_cost->alu.arith;
10418           return false;
10419         }
10420
10421       /* Vector mode?  */
10422       *cost = LIBCALL_COST (2);
10423       return false;
10424     case IOR:
10425       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10426         {
10427           if (speed_p)
10428             *cost += extra_cost->alu.rev;
10429
10430           return true;
10431         }
10432     /* Fall through.  */
10433     case AND: case XOR:
10434       if (mode == SImode)
10435         {
10436           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10437           rtx op0 = XEXP (x, 0);
10438           rtx shift_op, shift_reg;
10439
10440           if (subcode == NOT
10441               && (code == AND
10442                   || (code == IOR && TARGET_THUMB2)))
10443             op0 = XEXP (op0, 0);
10444
10445           shift_reg = NULL;
10446           shift_op = shifter_op_p (op0, &shift_reg);
10447           if (shift_op != NULL)
10448             {
10449               if (shift_reg)
10450                 {
10451                   if (speed_p)
10452                     *cost += extra_cost->alu.log_shift_reg;
10453                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10454                 }
10455               else if (speed_p)
10456                 *cost += extra_cost->alu.log_shift;
10457
10458               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10459                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10460               return true;
10461             }
10462
10463           if (CONST_INT_P (XEXP (x, 1)))
10464             {
10465               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10466                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10467                                             NULL_RTX, 1, 0);
10468
10469               *cost = COSTS_N_INSNS (insns);
10470               if (speed_p)
10471                 *cost += insns * extra_cost->alu.logical;
10472               *cost += rtx_cost (op0, mode, code, 0, speed_p);
10473               return true;
10474             }
10475
10476           if (speed_p)
10477             *cost += extra_cost->alu.logical;
10478           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10479                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10480           return true;
10481         }
10482
10483       if (mode == DImode)
10484         {
10485           rtx op0 = XEXP (x, 0);
10486           enum rtx_code subcode = GET_CODE (op0);
10487
10488           *cost += COSTS_N_INSNS (1);
10489
10490           if (subcode == NOT
10491               && (code == AND
10492                   || (code == IOR && TARGET_THUMB2)))
10493             op0 = XEXP (op0, 0);
10494
10495           if (GET_CODE (op0) == ZERO_EXTEND)
10496             {
10497               if (speed_p)
10498                 *cost += 2 * extra_cost->alu.logical;
10499
10500               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10501                                   0, speed_p)
10502                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10503               return true;
10504             }
10505           else if (GET_CODE (op0) == SIGN_EXTEND)
10506             {
10507               if (speed_p)
10508                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10509
10510               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10511                                   0, speed_p)
10512                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10513               return true;
10514             }
10515
10516           if (speed_p)
10517             *cost += 2 * extra_cost->alu.logical;
10518
10519           return true;
10520         }
10521       /* Vector mode?  */
10522
10523       *cost = LIBCALL_COST (2);
10524       return false;
10525
10526     case MULT:
10527       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10528           && (mode == SFmode || !TARGET_VFP_SINGLE))
10529         {
10530           rtx op0 = XEXP (x, 0);
10531
10532           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10533             op0 = XEXP (op0, 0);
10534
10535           if (speed_p)
10536             *cost += extra_cost->fp[mode != SFmode].mult;
10537
10538           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10539                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10540           return true;
10541         }
10542       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10543         {
10544           *cost = LIBCALL_COST (2);
10545           return false;
10546         }
10547
10548       if (mode == SImode)
10549         {
10550           if (TARGET_DSP_MULTIPLY
10551               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10552                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10553                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10554                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10555                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10556                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10557                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10558                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10559                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10560                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10561                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10562                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10563                                   == 16))))))
10564             {
10565               /* SMUL[TB][TB].  */
10566               if (speed_p)
10567                 *cost += extra_cost->mult[0].extend;
10568               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10569                                  SIGN_EXTEND, 0, speed_p);
10570               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10571                                  SIGN_EXTEND, 1, speed_p);
10572               return true;
10573             }
10574           if (speed_p)
10575             *cost += extra_cost->mult[0].simple;
10576           return false;
10577         }
10578
10579       if (mode == DImode)
10580         {
10581           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10582                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10583                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10584                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10585             {
10586               if (speed_p)
10587                 *cost += extra_cost->mult[1].extend;
10588               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10589                                   ZERO_EXTEND, 0, speed_p)
10590                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10591                                     ZERO_EXTEND, 0, speed_p));
10592               return true;
10593             }
10594
10595           *cost = LIBCALL_COST (2);
10596           return false;
10597         }
10598
10599       /* Vector mode?  */
10600       *cost = LIBCALL_COST (2);
10601       return false;
10602
10603     case NEG:
10604       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10605           && (mode == SFmode || !TARGET_VFP_SINGLE))
10606         {
10607           if (GET_CODE (XEXP (x, 0)) == MULT)
10608             {
10609               /* VNMUL.  */
10610               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10611               return true;
10612             }
10613
10614           if (speed_p)
10615             *cost += extra_cost->fp[mode != SFmode].neg;
10616
10617           return false;
10618         }
10619       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10620         {
10621           *cost = LIBCALL_COST (1);
10622           return false;
10623         }
10624
10625       if (mode == SImode)
10626         {
10627           if (GET_CODE (XEXP (x, 0)) == ABS)
10628             {
10629               *cost += COSTS_N_INSNS (1);
10630               /* Assume the non-flag-changing variant.  */
10631               if (speed_p)
10632                 *cost += (extra_cost->alu.log_shift
10633                           + extra_cost->alu.arith_shift);
10634               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10635               return true;
10636             }
10637
10638           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10639               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10640             {
10641               *cost += COSTS_N_INSNS (1);
10642               /* No extra cost for MOV imm and MVN imm.  */
10643               /* If the comparison op is using the flags, there's no further
10644                  cost, otherwise we need to add the cost of the comparison.  */
10645               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10646                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10647                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10648                 {
10649                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10650                   *cost += (COSTS_N_INSNS (1)
10651                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10652                                         0, speed_p)
10653                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10654                                         1, speed_p));
10655                   if (speed_p)
10656                     *cost += extra_cost->alu.arith;
10657                 }
10658               return true;
10659             }
10660
10661           if (speed_p)
10662             *cost += extra_cost->alu.arith;
10663           return false;
10664         }
10665
10666       if (GET_MODE_CLASS (mode) == MODE_INT
10667           && GET_MODE_SIZE (mode) < 4)
10668         {
10669           /* Slightly disparage, as we might need an extend operation.  */
10670           *cost += 1;
10671           if (speed_p)
10672             *cost += extra_cost->alu.arith;
10673           return false;
10674         }
10675
10676       if (mode == DImode)
10677         {
10678           *cost += COSTS_N_INSNS (1);
10679           if (speed_p)
10680             *cost += 2 * extra_cost->alu.arith;
10681           return false;
10682         }
10683
10684       /* Vector mode?  */
10685       *cost = LIBCALL_COST (1);
10686       return false;
10687
10688     case NOT:
10689       if (mode == SImode)
10690         {
10691           rtx shift_op;
10692           rtx shift_reg = NULL;
10693
10694           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10695
10696           if (shift_op)
10697             {
10698               if (shift_reg != NULL)
10699                 {
10700                   if (speed_p)
10701                     *cost += extra_cost->alu.log_shift_reg;
10702                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10703                 }
10704               else if (speed_p)
10705                 *cost += extra_cost->alu.log_shift;
10706               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10707               return true;
10708             }
10709
10710           if (speed_p)
10711             *cost += extra_cost->alu.logical;
10712           return false;
10713         }
10714       if (mode == DImode)
10715         {
10716           *cost += COSTS_N_INSNS (1);
10717           return false;
10718         }
10719
10720       /* Vector mode?  */
10721
10722       *cost += LIBCALL_COST (1);
10723       return false;
10724
10725     case IF_THEN_ELSE:
10726       {
10727         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10728           {
10729             *cost += COSTS_N_INSNS (3);
10730             return true;
10731           }
10732         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10733         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10734
10735         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10736         /* Assume that if one arm of the if_then_else is a register,
10737            that it will be tied with the result and eliminate the
10738            conditional insn.  */
10739         if (REG_P (XEXP (x, 1)))
10740           *cost += op2cost;
10741         else if (REG_P (XEXP (x, 2)))
10742           *cost += op1cost;
10743         else
10744           {
10745             if (speed_p)
10746               {
10747                 if (extra_cost->alu.non_exec_costs_exec)
10748                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10749                 else
10750                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10751               }
10752             else
10753               *cost += op1cost + op2cost;
10754           }
10755       }
10756       return true;
10757
10758     case COMPARE:
10759       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10760         *cost = 0;
10761       else
10762         {
10763           machine_mode op0mode;
10764           /* We'll mostly assume that the cost of a compare is the cost of the
10765              LHS.  However, there are some notable exceptions.  */
10766
10767           /* Floating point compares are never done as side-effects.  */
10768           op0mode = GET_MODE (XEXP (x, 0));
10769           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10770               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10771             {
10772               if (speed_p)
10773                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10774
10775               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10776                 {
10777                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10778                   return true;
10779                 }
10780
10781               return false;
10782             }
10783           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10784             {
10785               *cost = LIBCALL_COST (2);
10786               return false;
10787             }
10788
10789           /* DImode compares normally take two insns.  */
10790           if (op0mode == DImode)
10791             {
10792               *cost += COSTS_N_INSNS (1);
10793               if (speed_p)
10794                 *cost += 2 * extra_cost->alu.arith;
10795               return false;
10796             }
10797
10798           if (op0mode == SImode)
10799             {
10800               rtx shift_op;
10801               rtx shift_reg;
10802
10803               if (XEXP (x, 1) == const0_rtx
10804                   && !(REG_P (XEXP (x, 0))
10805                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10806                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10807                 {
10808                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10809
10810                   /* Multiply operations that set the flags are often
10811                      significantly more expensive.  */
10812                   if (speed_p
10813                       && GET_CODE (XEXP (x, 0)) == MULT
10814                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10815                     *cost += extra_cost->mult[0].flag_setting;
10816
10817                   if (speed_p
10818                       && GET_CODE (XEXP (x, 0)) == PLUS
10819                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10820                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10821                                                             0), 1), mode))
10822                     *cost += extra_cost->mult[0].flag_setting;
10823                   return true;
10824                 }
10825
10826               shift_reg = NULL;
10827               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10828               if (shift_op != NULL)
10829                 {
10830                   if (shift_reg != NULL)
10831                     {
10832                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10833                                          1, speed_p);
10834                       if (speed_p)
10835                         *cost += extra_cost->alu.arith_shift_reg;
10836                     }
10837                   else if (speed_p)
10838                     *cost += extra_cost->alu.arith_shift;
10839                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10840                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10841                   return true;
10842                 }
10843
10844               if (speed_p)
10845                 *cost += extra_cost->alu.arith;
10846               if (CONST_INT_P (XEXP (x, 1))
10847                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10848                 {
10849                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10850                   return true;
10851                 }
10852               return false;
10853             }
10854
10855           /* Vector mode?  */
10856
10857           *cost = LIBCALL_COST (2);
10858           return false;
10859         }
10860       return true;
10861
10862     case EQ:
10863     case NE:
10864     case LT:
10865     case LE:
10866     case GT:
10867     case GE:
10868     case LTU:
10869     case LEU:
10870     case GEU:
10871     case GTU:
10872     case ORDERED:
10873     case UNORDERED:
10874     case UNEQ:
10875     case UNLE:
10876     case UNLT:
10877     case UNGE:
10878     case UNGT:
10879     case LTGT:
10880       if (outer_code == SET)
10881         {
10882           /* Is it a store-flag operation?  */
10883           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10884               && XEXP (x, 1) == const0_rtx)
10885             {
10886               /* Thumb also needs an IT insn.  */
10887               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10888               return true;
10889             }
10890           if (XEXP (x, 1) == const0_rtx)
10891             {
10892               switch (code)
10893                 {
10894                 case LT:
10895                   /* LSR Rd, Rn, #31.  */
10896                   if (speed_p)
10897                     *cost += extra_cost->alu.shift;
10898                   break;
10899
10900                 case EQ:
10901                   /* RSBS T1, Rn, #0
10902                      ADC  Rd, Rn, T1.  */
10903
10904                 case NE:
10905                   /* SUBS T1, Rn, #1
10906                      SBC  Rd, Rn, T1.  */
10907                   *cost += COSTS_N_INSNS (1);
10908                   break;
10909
10910                 case LE:
10911                   /* RSBS T1, Rn, Rn, LSR #31
10912                      ADC  Rd, Rn, T1. */
10913                   *cost += COSTS_N_INSNS (1);
10914                   if (speed_p)
10915                     *cost += extra_cost->alu.arith_shift;
10916                   break;
10917
10918                 case GT:
10919                   /* RSB  Rd, Rn, Rn, ASR #1
10920                      LSR  Rd, Rd, #31.  */
10921                   *cost += COSTS_N_INSNS (1);
10922                   if (speed_p)
10923                     *cost += (extra_cost->alu.arith_shift
10924                               + extra_cost->alu.shift);
10925                   break;
10926
10927                 case GE:
10928                   /* ASR  Rd, Rn, #31
10929                      ADD  Rd, Rn, #1.  */
10930                   *cost += COSTS_N_INSNS (1);
10931                   if (speed_p)
10932                     *cost += extra_cost->alu.shift;
10933                   break;
10934
10935                 default:
10936                   /* Remaining cases are either meaningless or would take
10937                      three insns anyway.  */
10938                   *cost = COSTS_N_INSNS (3);
10939                   break;
10940                 }
10941               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10942               return true;
10943             }
10944           else
10945             {
10946               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10947               if (CONST_INT_P (XEXP (x, 1))
10948                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10949                 {
10950                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10951                   return true;
10952                 }
10953
10954               return false;
10955             }
10956         }
10957       /* Not directly inside a set.  If it involves the condition code
10958          register it must be the condition for a branch, cond_exec or
10959          I_T_E operation.  Since the comparison is performed elsewhere
10960          this is just the control part which has no additional
10961          cost.  */
10962       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10963                && XEXP (x, 1) == const0_rtx)
10964         {
10965           *cost = 0;
10966           return true;
10967         }
10968       return false;
10969
10970     case ABS:
10971       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10972           && (mode == SFmode || !TARGET_VFP_SINGLE))
10973         {
10974           if (speed_p)
10975             *cost += extra_cost->fp[mode != SFmode].neg;
10976
10977           return false;
10978         }
10979       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10980         {
10981           *cost = LIBCALL_COST (1);
10982           return false;
10983         }
10984
10985       if (mode == SImode)
10986         {
10987           if (speed_p)
10988             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10989           return false;
10990         }
10991       /* Vector mode?  */
10992       *cost = LIBCALL_COST (1);
10993       return false;
10994
10995     case SIGN_EXTEND:
10996       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10997           && MEM_P (XEXP (x, 0)))
10998         {
10999           if (mode == DImode)
11000             *cost += COSTS_N_INSNS (1);
11001
11002           if (!speed_p)
11003             return true;
11004
11005           if (GET_MODE (XEXP (x, 0)) == SImode)
11006             *cost += extra_cost->ldst.load;
11007           else
11008             *cost += extra_cost->ldst.load_sign_extend;
11009
11010           if (mode == DImode)
11011             *cost += extra_cost->alu.shift;
11012
11013           return true;
11014         }
11015
11016       /* Widening from less than 32-bits requires an extend operation.  */
11017       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11018         {
11019           /* We have SXTB/SXTH.  */
11020           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11021           if (speed_p)
11022             *cost += extra_cost->alu.extend;
11023         }
11024       else if (GET_MODE (XEXP (x, 0)) != SImode)
11025         {
11026           /* Needs two shifts.  */
11027           *cost += COSTS_N_INSNS (1);
11028           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11029           if (speed_p)
11030             *cost += 2 * extra_cost->alu.shift;
11031         }
11032
11033       /* Widening beyond 32-bits requires one more insn.  */
11034       if (mode == DImode)
11035         {
11036           *cost += COSTS_N_INSNS (1);
11037           if (speed_p)
11038             *cost += extra_cost->alu.shift;
11039         }
11040
11041       return true;
11042
11043     case ZERO_EXTEND:
11044       if ((arm_arch4
11045            || GET_MODE (XEXP (x, 0)) == SImode
11046            || GET_MODE (XEXP (x, 0)) == QImode)
11047           && MEM_P (XEXP (x, 0)))
11048         {
11049           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11050
11051           if (mode == DImode)
11052             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11053
11054           return true;
11055         }
11056
11057       /* Widening from less than 32-bits requires an extend operation.  */
11058       if (GET_MODE (XEXP (x, 0)) == QImode)
11059         {
11060           /* UXTB can be a shorter instruction in Thumb2, but it might
11061              be slower than the AND Rd, Rn, #255 alternative.  When
11062              optimizing for speed it should never be slower to use
11063              AND, and we don't really model 16-bit vs 32-bit insns
11064              here.  */
11065           if (speed_p)
11066             *cost += extra_cost->alu.logical;
11067         }
11068       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11069         {
11070           /* We have UXTB/UXTH.  */
11071           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11072           if (speed_p)
11073             *cost += extra_cost->alu.extend;
11074         }
11075       else if (GET_MODE (XEXP (x, 0)) != SImode)
11076         {
11077           /* Needs two shifts.  It's marginally preferable to use
11078              shifts rather than two BIC instructions as the second
11079              shift may merge with a subsequent insn as a shifter
11080              op.  */
11081           *cost = COSTS_N_INSNS (2);
11082           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11083           if (speed_p)
11084             *cost += 2 * extra_cost->alu.shift;
11085         }
11086
11087       /* Widening beyond 32-bits requires one more insn.  */
11088       if (mode == DImode)
11089         {
11090           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11091         }
11092
11093       return true;
11094
11095     case CONST_INT:
11096       *cost = 0;
11097       /* CONST_INT has no mode, so we cannot tell for sure how many
11098          insns are really going to be needed.  The best we can do is
11099          look at the value passed.  If it fits in SImode, then assume
11100          that's the mode it will be used for.  Otherwise assume it
11101          will be used in DImode.  */
11102       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11103         mode = SImode;
11104       else
11105         mode = DImode;
11106
11107       /* Avoid blowing up in arm_gen_constant ().  */
11108       if (!(outer_code == PLUS
11109             || outer_code == AND
11110             || outer_code == IOR
11111             || outer_code == XOR
11112             || outer_code == MINUS))
11113         outer_code = SET;
11114
11115     const_int_cost:
11116       if (mode == SImode)
11117         {
11118           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11119                                                     INTVAL (x), NULL, NULL,
11120                                                     0, 0));
11121           /* Extra costs?  */
11122         }
11123       else
11124         {
11125           *cost += COSTS_N_INSNS (arm_gen_constant
11126                                   (outer_code, SImode, NULL,
11127                                    trunc_int_for_mode (INTVAL (x), SImode),
11128                                    NULL, NULL, 0, 0)
11129                                   + arm_gen_constant (outer_code, SImode, NULL,
11130                                                       INTVAL (x) >> 32, NULL,
11131                                                       NULL, 0, 0));
11132           /* Extra costs?  */
11133         }
11134
11135       return true;
11136
11137     case CONST:
11138     case LABEL_REF:
11139     case SYMBOL_REF:
11140       if (speed_p)
11141         {
11142           if (arm_arch_thumb2 && !flag_pic)
11143             *cost += COSTS_N_INSNS (1);
11144           else
11145             *cost += extra_cost->ldst.load;
11146         }
11147       else
11148         *cost += COSTS_N_INSNS (1);
11149
11150       if (flag_pic)
11151         {
11152           *cost += COSTS_N_INSNS (1);
11153           if (speed_p)
11154             *cost += extra_cost->alu.arith;
11155         }
11156
11157       return true;
11158
11159     case CONST_FIXED:
11160       *cost = COSTS_N_INSNS (4);
11161       /* Fixme.  */
11162       return true;
11163
11164     case CONST_DOUBLE:
11165       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11166           && (mode == SFmode || !TARGET_VFP_SINGLE))
11167         {
11168           if (vfp3_const_double_rtx (x))
11169             {
11170               if (speed_p)
11171                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11172               return true;
11173             }
11174
11175           if (speed_p)
11176             {
11177               if (mode == DFmode)
11178                 *cost += extra_cost->ldst.loadd;
11179               else
11180                 *cost += extra_cost->ldst.loadf;
11181             }
11182           else
11183             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11184
11185           return true;
11186         }
11187       *cost = COSTS_N_INSNS (4);
11188       return true;
11189
11190     case CONST_VECTOR:
11191       /* Fixme.  */
11192       if (TARGET_NEON
11193           && TARGET_HARD_FLOAT
11194           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11195           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
11196         *cost = COSTS_N_INSNS (1);
11197       else
11198         *cost = COSTS_N_INSNS (4);
11199       return true;
11200
11201     case HIGH:
11202     case LO_SUM:
11203       /* When optimizing for size, we prefer constant pool entries to
11204          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11205       if (!speed_p)
11206         *cost += 1;
11207       return true;
11208
11209     case CLZ:
11210       if (speed_p)
11211         *cost += extra_cost->alu.clz;
11212       return false;
11213
11214     case SMIN:
11215       if (XEXP (x, 1) == const0_rtx)
11216         {
11217           if (speed_p)
11218             *cost += extra_cost->alu.log_shift;
11219           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11220           return true;
11221         }
11222       /* Fall through.  */
11223     case SMAX:
11224     case UMIN:
11225     case UMAX:
11226       *cost += COSTS_N_INSNS (1);
11227       return false;
11228
11229     case TRUNCATE:
11230       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11231           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11232           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11233           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11234           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11235                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11236               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11237                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11238                       == ZERO_EXTEND))))
11239         {
11240           if (speed_p)
11241             *cost += extra_cost->mult[1].extend;
11242           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11243                               ZERO_EXTEND, 0, speed_p)
11244                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11245                                 ZERO_EXTEND, 0, speed_p));
11246           return true;
11247         }
11248       *cost = LIBCALL_COST (1);
11249       return false;
11250
11251     case UNSPEC_VOLATILE:
11252     case UNSPEC:
11253       return arm_unspec_cost (x, outer_code, speed_p, cost);
11254
11255     case PC:
11256       /* Reading the PC is like reading any other register.  Writing it
11257          is more expensive, but we take that into account elsewhere.  */
11258       *cost = 0;
11259       return true;
11260
11261     case ZERO_EXTRACT:
11262       /* TODO: Simple zero_extract of bottom bits using AND.  */
11263       /* Fall through.  */
11264     case SIGN_EXTRACT:
11265       if (arm_arch6
11266           && mode == SImode
11267           && CONST_INT_P (XEXP (x, 1))
11268           && CONST_INT_P (XEXP (x, 2)))
11269         {
11270           if (speed_p)
11271             *cost += extra_cost->alu.bfx;
11272           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11273           return true;
11274         }
11275       /* Without UBFX/SBFX, need to resort to shift operations.  */
11276       *cost += COSTS_N_INSNS (1);
11277       if (speed_p)
11278         *cost += 2 * extra_cost->alu.shift;
11279       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11280       return true;
11281
11282     case FLOAT_EXTEND:
11283       if (TARGET_HARD_FLOAT)
11284         {
11285           if (speed_p)
11286             *cost += extra_cost->fp[mode == DFmode].widen;
11287           if (!TARGET_VFP5
11288               && GET_MODE (XEXP (x, 0)) == HFmode)
11289             {
11290               /* Pre v8, widening HF->DF is a two-step process, first
11291                  widening to SFmode.  */
11292               *cost += COSTS_N_INSNS (1);
11293               if (speed_p)
11294                 *cost += extra_cost->fp[0].widen;
11295             }
11296           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11297           return true;
11298         }
11299
11300       *cost = LIBCALL_COST (1);
11301       return false;
11302
11303     case FLOAT_TRUNCATE:
11304       if (TARGET_HARD_FLOAT)
11305         {
11306           if (speed_p)
11307             *cost += extra_cost->fp[mode == DFmode].narrow;
11308           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11309           return true;
11310           /* Vector modes?  */
11311         }
11312       *cost = LIBCALL_COST (1);
11313       return false;
11314
11315     case FMA:
11316       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11317         {
11318           rtx op0 = XEXP (x, 0);
11319           rtx op1 = XEXP (x, 1);
11320           rtx op2 = XEXP (x, 2);
11321
11322
11323           /* vfms or vfnma.  */
11324           if (GET_CODE (op0) == NEG)
11325             op0 = XEXP (op0, 0);
11326
11327           /* vfnms or vfnma.  */
11328           if (GET_CODE (op2) == NEG)
11329             op2 = XEXP (op2, 0);
11330
11331           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11332           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11333           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11334
11335           if (speed_p)
11336             *cost += extra_cost->fp[mode ==DFmode].fma;
11337
11338           return true;
11339         }
11340
11341       *cost = LIBCALL_COST (3);
11342       return false;
11343
11344     case FIX:
11345     case UNSIGNED_FIX:
11346       if (TARGET_HARD_FLOAT)
11347         {
11348           /* The *combine_vcvtf2i reduces a vmul+vcvt into
11349              a vcvt fixed-point conversion.  */
11350           if (code == FIX && mode == SImode
11351               && GET_CODE (XEXP (x, 0)) == FIX
11352               && GET_MODE (XEXP (x, 0)) == SFmode
11353               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11354               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11355                  > 0)
11356             {
11357               if (speed_p)
11358                 *cost += extra_cost->fp[0].toint;
11359
11360               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11361                                  code, 0, speed_p);
11362               return true;
11363             }
11364
11365           if (GET_MODE_CLASS (mode) == MODE_INT)
11366             {
11367               mode = GET_MODE (XEXP (x, 0));
11368               if (speed_p)
11369                 *cost += extra_cost->fp[mode == DFmode].toint;
11370               /* Strip of the 'cost' of rounding towards zero.  */
11371               if (GET_CODE (XEXP (x, 0)) == FIX)
11372                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11373                                    0, speed_p);
11374               else
11375                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11376               /* ??? Increase the cost to deal with transferring from
11377                  FP -> CORE registers?  */
11378               return true;
11379             }
11380           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11381                    && TARGET_VFP5)
11382             {
11383               if (speed_p)
11384                 *cost += extra_cost->fp[mode == DFmode].roundint;
11385               return false;
11386             }
11387           /* Vector costs? */
11388         }
11389       *cost = LIBCALL_COST (1);
11390       return false;
11391
11392     case FLOAT:
11393     case UNSIGNED_FLOAT:
11394       if (TARGET_HARD_FLOAT)
11395         {
11396           /* ??? Increase the cost to deal with transferring from CORE
11397              -> FP registers?  */
11398           if (speed_p)
11399             *cost += extra_cost->fp[mode == DFmode].fromint;
11400           return false;
11401         }
11402       *cost = LIBCALL_COST (1);
11403       return false;
11404
11405     case CALL:
11406       return true;
11407
11408     case ASM_OPERANDS:
11409       {
11410       /* Just a guess.  Guess number of instructions in the asm
11411          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11412          though (see PR60663).  */
11413         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11414         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11415
11416         *cost = COSTS_N_INSNS (asm_length + num_operands);
11417         return true;
11418       }
11419     default:
11420       if (mode != VOIDmode)
11421         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11422       else
11423         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11424       return false;
11425     }
11426 }
11427
11428 #undef HANDLE_NARROW_SHIFT_ARITH
11429
11430 /* RTX costs entry point.  */
11431
11432 static bool
11433 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11434                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11435 {
11436   bool result;
11437   int code = GET_CODE (x);
11438   gcc_assert (current_tune->insn_extra_cost);
11439
11440   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11441                                 (enum rtx_code) outer_code,
11442                                 current_tune->insn_extra_cost,
11443                                 total, speed);
11444
11445   if (dump_file && arm_verbose_cost)
11446     {
11447       print_rtl_single (dump_file, x);
11448       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11449                *total, result ? "final" : "partial");
11450     }
11451   return result;
11452 }
11453
11454 /* All address computations that can be done are free, but rtx cost returns
11455    the same for practically all of them.  So we weight the different types
11456    of address here in the order (most pref first):
11457    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11458 static inline int
11459 arm_arm_address_cost (rtx x)
11460 {
11461   enum rtx_code c  = GET_CODE (x);
11462
11463   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11464     return 0;
11465   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11466     return 10;
11467
11468   if (c == PLUS)
11469     {
11470       if (CONST_INT_P (XEXP (x, 1)))
11471         return 2;
11472
11473       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11474         return 3;
11475
11476       return 4;
11477     }
11478
11479   return 6;
11480 }
11481
11482 static inline int
11483 arm_thumb_address_cost (rtx x)
11484 {
11485   enum rtx_code c  = GET_CODE (x);
11486
11487   if (c == REG)
11488     return 1;
11489   if (c == PLUS
11490       && REG_P (XEXP (x, 0))
11491       && CONST_INT_P (XEXP (x, 1)))
11492     return 1;
11493
11494   return 2;
11495 }
11496
11497 static int
11498 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11499                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11500 {
11501   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11502 }
11503
11504 /* Adjust cost hook for XScale.  */
11505 static bool
11506 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11507                           int * cost)
11508 {
11509   /* Some true dependencies can have a higher cost depending
11510      on precisely how certain input operands are used.  */
11511   if (dep_type == 0
11512       && recog_memoized (insn) >= 0
11513       && recog_memoized (dep) >= 0)
11514     {
11515       int shift_opnum = get_attr_shift (insn);
11516       enum attr_type attr_type = get_attr_type (dep);
11517
11518       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11519          operand for INSN.  If we have a shifted input operand and the
11520          instruction we depend on is another ALU instruction, then we may
11521          have to account for an additional stall.  */
11522       if (shift_opnum != 0
11523           && (attr_type == TYPE_ALU_SHIFT_IMM
11524               || attr_type == TYPE_ALUS_SHIFT_IMM
11525               || attr_type == TYPE_LOGIC_SHIFT_IMM
11526               || attr_type == TYPE_LOGICS_SHIFT_IMM
11527               || attr_type == TYPE_ALU_SHIFT_REG
11528               || attr_type == TYPE_ALUS_SHIFT_REG
11529               || attr_type == TYPE_LOGIC_SHIFT_REG
11530               || attr_type == TYPE_LOGICS_SHIFT_REG
11531               || attr_type == TYPE_MOV_SHIFT
11532               || attr_type == TYPE_MVN_SHIFT
11533               || attr_type == TYPE_MOV_SHIFT_REG
11534               || attr_type == TYPE_MVN_SHIFT_REG))
11535         {
11536           rtx shifted_operand;
11537           int opno;
11538
11539           /* Get the shifted operand.  */
11540           extract_insn (insn);
11541           shifted_operand = recog_data.operand[shift_opnum];
11542
11543           /* Iterate over all the operands in DEP.  If we write an operand
11544              that overlaps with SHIFTED_OPERAND, then we have increase the
11545              cost of this dependency.  */
11546           extract_insn (dep);
11547           preprocess_constraints (dep);
11548           for (opno = 0; opno < recog_data.n_operands; opno++)
11549             {
11550               /* We can ignore strict inputs.  */
11551               if (recog_data.operand_type[opno] == OP_IN)
11552                 continue;
11553
11554               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11555                                            shifted_operand))
11556                 {
11557                   *cost = 2;
11558                   return false;
11559                 }
11560             }
11561         }
11562     }
11563   return true;
11564 }
11565
11566 /* Adjust cost hook for Cortex A9.  */
11567 static bool
11568 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11569                              int * cost)
11570 {
11571   switch (dep_type)
11572     {
11573     case REG_DEP_ANTI:
11574       *cost = 0;
11575       return false;
11576
11577     case REG_DEP_TRUE:
11578     case REG_DEP_OUTPUT:
11579         if (recog_memoized (insn) >= 0
11580             && recog_memoized (dep) >= 0)
11581           {
11582             if (GET_CODE (PATTERN (insn)) == SET)
11583               {
11584                 if (GET_MODE_CLASS
11585                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11586                   || GET_MODE_CLASS
11587                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11588                   {
11589                     enum attr_type attr_type_insn = get_attr_type (insn);
11590                     enum attr_type attr_type_dep = get_attr_type (dep);
11591
11592                     /* By default all dependencies of the form
11593                        s0 = s0 <op> s1
11594                        s0 = s0 <op> s2
11595                        have an extra latency of 1 cycle because
11596                        of the input and output dependency in this
11597                        case. However this gets modeled as an true
11598                        dependency and hence all these checks.  */
11599                     if (REG_P (SET_DEST (PATTERN (insn)))
11600                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11601                       {
11602                         /* FMACS is a special case where the dependent
11603                            instruction can be issued 3 cycles before
11604                            the normal latency in case of an output
11605                            dependency.  */
11606                         if ((attr_type_insn == TYPE_FMACS
11607                              || attr_type_insn == TYPE_FMACD)
11608                             && (attr_type_dep == TYPE_FMACS
11609                                 || attr_type_dep == TYPE_FMACD))
11610                           {
11611                             if (dep_type == REG_DEP_OUTPUT)
11612                               *cost = insn_default_latency (dep) - 3;
11613                             else
11614                               *cost = insn_default_latency (dep);
11615                             return false;
11616                           }
11617                         else
11618                           {
11619                             if (dep_type == REG_DEP_OUTPUT)
11620                               *cost = insn_default_latency (dep) + 1;
11621                             else
11622                               *cost = insn_default_latency (dep);
11623                           }
11624                         return false;
11625                       }
11626                   }
11627               }
11628           }
11629         break;
11630
11631     default:
11632       gcc_unreachable ();
11633     }
11634
11635   return true;
11636 }
11637
11638 /* Adjust cost hook for FA726TE.  */
11639 static bool
11640 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11641                            int * cost)
11642 {
11643   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11644      have penalty of 3.  */
11645   if (dep_type == REG_DEP_TRUE
11646       && recog_memoized (insn) >= 0
11647       && recog_memoized (dep) >= 0
11648       && get_attr_conds (dep) == CONDS_SET)
11649     {
11650       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11651       if (get_attr_conds (insn) == CONDS_USE
11652           && get_attr_type (insn) != TYPE_BRANCH)
11653         {
11654           *cost = 3;
11655           return false;
11656         }
11657
11658       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11659           || get_attr_conds (insn) == CONDS_USE)
11660         {
11661           *cost = 0;
11662           return false;
11663         }
11664     }
11665
11666   return true;
11667 }
11668
11669 /* Implement TARGET_REGISTER_MOVE_COST.
11670
11671    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11672    it is typically more expensive than a single memory access.  We set
11673    the cost to less than two memory accesses so that floating
11674    point to integer conversion does not go through memory.  */
11675
11676 int
11677 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11678                         reg_class_t from, reg_class_t to)
11679 {
11680   if (TARGET_32BIT)
11681     {
11682       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11683           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11684         return 15;
11685       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11686                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11687         return 4;
11688       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11689         return 20;
11690       else
11691         return 2;
11692     }
11693   else
11694     {
11695       if (from == HI_REGS || to == HI_REGS)
11696         return 4;
11697       else
11698         return 2;
11699     }
11700 }
11701
11702 /* Implement TARGET_MEMORY_MOVE_COST.  */
11703
11704 int
11705 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11706                       bool in ATTRIBUTE_UNUSED)
11707 {
11708   if (TARGET_32BIT)
11709     return 10;
11710   else
11711     {
11712       if (GET_MODE_SIZE (mode) < 4)
11713         return 8;
11714       else
11715         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11716     }
11717 }
11718
11719 /* Vectorizer cost model implementation.  */
11720
11721 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11722 static int
11723 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11724                                 tree vectype,
11725                                 int misalign ATTRIBUTE_UNUSED)
11726 {
11727   unsigned elements;
11728
11729   switch (type_of_cost)
11730     {
11731       case scalar_stmt:
11732         return current_tune->vec_costs->scalar_stmt_cost;
11733
11734       case scalar_load:
11735         return current_tune->vec_costs->scalar_load_cost;
11736
11737       case scalar_store:
11738         return current_tune->vec_costs->scalar_store_cost;
11739
11740       case vector_stmt:
11741         return current_tune->vec_costs->vec_stmt_cost;
11742
11743       case vector_load:
11744         return current_tune->vec_costs->vec_align_load_cost;
11745
11746       case vector_store:
11747         return current_tune->vec_costs->vec_store_cost;
11748
11749       case vec_to_scalar:
11750         return current_tune->vec_costs->vec_to_scalar_cost;
11751
11752       case scalar_to_vec:
11753         return current_tune->vec_costs->scalar_to_vec_cost;
11754
11755       case unaligned_load:
11756       case vector_gather_load:
11757         return current_tune->vec_costs->vec_unalign_load_cost;
11758
11759       case unaligned_store:
11760       case vector_scatter_store:
11761         return current_tune->vec_costs->vec_unalign_store_cost;
11762
11763       case cond_branch_taken:
11764         return current_tune->vec_costs->cond_taken_branch_cost;
11765
11766       case cond_branch_not_taken:
11767         return current_tune->vec_costs->cond_not_taken_branch_cost;
11768
11769       case vec_perm:
11770       case vec_promote_demote:
11771         return current_tune->vec_costs->vec_stmt_cost;
11772
11773       case vec_construct:
11774         elements = TYPE_VECTOR_SUBPARTS (vectype);
11775         return elements / 2 + 1;
11776
11777       default:
11778         gcc_unreachable ();
11779     }
11780 }
11781
11782 /* Implement targetm.vectorize.add_stmt_cost.  */
11783
11784 static unsigned
11785 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11786                    struct _stmt_vec_info *stmt_info, int misalign,
11787                    enum vect_cost_model_location where)
11788 {
11789   unsigned *cost = (unsigned *) data;
11790   unsigned retval = 0;
11791
11792   if (flag_vect_cost_model)
11793     {
11794       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11795       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11796
11797       /* Statements in an inner loop relative to the loop being
11798          vectorized are weighted more heavily.  The value here is
11799          arbitrary and could potentially be improved with analysis.  */
11800       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11801         count *= 50;  /* FIXME.  */
11802
11803       retval = (unsigned) (count * stmt_cost);
11804       cost[where] += retval;
11805     }
11806
11807   return retval;
11808 }
11809
11810 /* Return true if and only if this insn can dual-issue only as older.  */
11811 static bool
11812 cortexa7_older_only (rtx_insn *insn)
11813 {
11814   if (recog_memoized (insn) < 0)
11815     return false;
11816
11817   switch (get_attr_type (insn))
11818     {
11819     case TYPE_ALU_DSP_REG:
11820     case TYPE_ALU_SREG:
11821     case TYPE_ALUS_SREG:
11822     case TYPE_LOGIC_REG:
11823     case TYPE_LOGICS_REG:
11824     case TYPE_ADC_REG:
11825     case TYPE_ADCS_REG:
11826     case TYPE_ADR:
11827     case TYPE_BFM:
11828     case TYPE_REV:
11829     case TYPE_MVN_REG:
11830     case TYPE_SHIFT_IMM:
11831     case TYPE_SHIFT_REG:
11832     case TYPE_LOAD_BYTE:
11833     case TYPE_LOAD_4:
11834     case TYPE_STORE_4:
11835     case TYPE_FFARITHS:
11836     case TYPE_FADDS:
11837     case TYPE_FFARITHD:
11838     case TYPE_FADDD:
11839     case TYPE_FMOV:
11840     case TYPE_F_CVT:
11841     case TYPE_FCMPS:
11842     case TYPE_FCMPD:
11843     case TYPE_FCONSTS:
11844     case TYPE_FCONSTD:
11845     case TYPE_FMULS:
11846     case TYPE_FMACS:
11847     case TYPE_FMULD:
11848     case TYPE_FMACD:
11849     case TYPE_FDIVS:
11850     case TYPE_FDIVD:
11851     case TYPE_F_MRC:
11852     case TYPE_F_MRRC:
11853     case TYPE_F_FLAG:
11854     case TYPE_F_LOADS:
11855     case TYPE_F_STORES:
11856       return true;
11857     default:
11858       return false;
11859     }
11860 }
11861
11862 /* Return true if and only if this insn can dual-issue as younger.  */
11863 static bool
11864 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11865 {
11866   if (recog_memoized (insn) < 0)
11867     {
11868       if (verbose > 5)
11869         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11870       return false;
11871     }
11872
11873   switch (get_attr_type (insn))
11874     {
11875     case TYPE_ALU_IMM:
11876     case TYPE_ALUS_IMM:
11877     case TYPE_LOGIC_IMM:
11878     case TYPE_LOGICS_IMM:
11879     case TYPE_EXTEND:
11880     case TYPE_MVN_IMM:
11881     case TYPE_MOV_IMM:
11882     case TYPE_MOV_REG:
11883     case TYPE_MOV_SHIFT:
11884     case TYPE_MOV_SHIFT_REG:
11885     case TYPE_BRANCH:
11886     case TYPE_CALL:
11887       return true;
11888     default:
11889       return false;
11890     }
11891 }
11892
11893
11894 /* Look for an instruction that can dual issue only as an older
11895    instruction, and move it in front of any instructions that can
11896    dual-issue as younger, while preserving the relative order of all
11897    other instructions in the ready list.  This is a hueuristic to help
11898    dual-issue in later cycles, by postponing issue of more flexible
11899    instructions.  This heuristic may affect dual issue opportunities
11900    in the current cycle.  */
11901 static void
11902 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11903                         int *n_readyp, int clock)
11904 {
11905   int i;
11906   int first_older_only = -1, first_younger = -1;
11907
11908   if (verbose > 5)
11909     fprintf (file,
11910              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11911              clock,
11912              *n_readyp);
11913
11914   /* Traverse the ready list from the head (the instruction to issue
11915      first), and looking for the first instruction that can issue as
11916      younger and the first instruction that can dual-issue only as
11917      older.  */
11918   for (i = *n_readyp - 1; i >= 0; i--)
11919     {
11920       rtx_insn *insn = ready[i];
11921       if (cortexa7_older_only (insn))
11922         {
11923           first_older_only = i;
11924           if (verbose > 5)
11925             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11926           break;
11927         }
11928       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11929         first_younger = i;
11930     }
11931
11932   /* Nothing to reorder because either no younger insn found or insn
11933      that can dual-issue only as older appears before any insn that
11934      can dual-issue as younger.  */
11935   if (first_younger == -1)
11936     {
11937       if (verbose > 5)
11938         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11939       return;
11940     }
11941
11942   /* Nothing to reorder because no older-only insn in the ready list.  */
11943   if (first_older_only == -1)
11944     {
11945       if (verbose > 5)
11946         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11947       return;
11948     }
11949
11950   /* Move first_older_only insn before first_younger.  */
11951   if (verbose > 5)
11952     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11953              INSN_UID(ready [first_older_only]),
11954              INSN_UID(ready [first_younger]));
11955   rtx_insn *first_older_only_insn = ready [first_older_only];
11956   for (i = first_older_only; i < first_younger; i++)
11957     {
11958       ready[i] = ready[i+1];
11959     }
11960
11961   ready[i] = first_older_only_insn;
11962   return;
11963 }
11964
11965 /* Implement TARGET_SCHED_REORDER. */
11966 static int
11967 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11968                    int clock)
11969 {
11970   switch (arm_tune)
11971     {
11972     case TARGET_CPU_cortexa7:
11973       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11974       break;
11975     default:
11976       /* Do nothing for other cores.  */
11977       break;
11978     }
11979
11980   return arm_issue_rate ();
11981 }
11982
11983 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11984    It corrects the value of COST based on the relationship between
11985    INSN and DEP through the dependence LINK.  It returns the new
11986    value. There is a per-core adjust_cost hook to adjust scheduler costs
11987    and the per-core hook can choose to completely override the generic
11988    adjust_cost function. Only put bits of code into arm_adjust_cost that
11989    are common across all cores.  */
11990 static int
11991 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11992                  unsigned int)
11993 {
11994   rtx i_pat, d_pat;
11995
11996  /* When generating Thumb-1 code, we want to place flag-setting operations
11997     close to a conditional branch which depends on them, so that we can
11998     omit the comparison. */
11999   if (TARGET_THUMB1
12000       && dep_type == 0
12001       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12002       && recog_memoized (dep) >= 0
12003       && get_attr_conds (dep) == CONDS_SET)
12004     return 0;
12005
12006   if (current_tune->sched_adjust_cost != NULL)
12007     {
12008       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12009         return cost;
12010     }
12011
12012   /* XXX Is this strictly true?  */
12013   if (dep_type == REG_DEP_ANTI
12014       || dep_type == REG_DEP_OUTPUT)
12015     return 0;
12016
12017   /* Call insns don't incur a stall, even if they follow a load.  */
12018   if (dep_type == 0
12019       && CALL_P (insn))
12020     return 1;
12021
12022   if ((i_pat = single_set (insn)) != NULL
12023       && MEM_P (SET_SRC (i_pat))
12024       && (d_pat = single_set (dep)) != NULL
12025       && MEM_P (SET_DEST (d_pat)))
12026     {
12027       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12028       /* This is a load after a store, there is no conflict if the load reads
12029          from a cached area.  Assume that loads from the stack, and from the
12030          constant pool are cached, and that others will miss.  This is a
12031          hack.  */
12032
12033       if ((GET_CODE (src_mem) == SYMBOL_REF
12034            && CONSTANT_POOL_ADDRESS_P (src_mem))
12035           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12036           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12037           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12038         return 1;
12039     }
12040
12041   return cost;
12042 }
12043
12044 int
12045 arm_max_conditional_execute (void)
12046 {
12047   return max_insns_skipped;
12048 }
12049
12050 static int
12051 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12052 {
12053   if (TARGET_32BIT)
12054     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12055   else
12056     return (optimize > 0) ? 2 : 0;
12057 }
12058
12059 static int
12060 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12061 {
12062   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12063 }
12064
12065 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12066    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12067    sequences of non-executed instructions in IT blocks probably take the same
12068    amount of time as executed instructions (and the IT instruction itself takes
12069    space in icache).  This function was experimentally determined to give good
12070    results on a popular embedded benchmark.  */
12071
12072 static int
12073 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12074 {
12075   return (TARGET_32BIT && speed_p) ? 1
12076          : arm_default_branch_cost (speed_p, predictable_p);
12077 }
12078
12079 static int
12080 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12081 {
12082   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12083 }
12084
12085 static bool fp_consts_inited = false;
12086
12087 static REAL_VALUE_TYPE value_fp0;
12088
12089 static void
12090 init_fp_table (void)
12091 {
12092   REAL_VALUE_TYPE r;
12093
12094   r = REAL_VALUE_ATOF ("0", DFmode);
12095   value_fp0 = r;
12096   fp_consts_inited = true;
12097 }
12098
12099 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12100 int
12101 arm_const_double_rtx (rtx x)
12102 {
12103   const REAL_VALUE_TYPE *r;
12104
12105   if (!fp_consts_inited)
12106     init_fp_table ();
12107
12108   r = CONST_DOUBLE_REAL_VALUE (x);
12109   if (REAL_VALUE_MINUS_ZERO (*r))
12110     return 0;
12111
12112   if (real_equal (r, &value_fp0))
12113     return 1;
12114
12115   return 0;
12116 }
12117
12118 /* VFPv3 has a fairly wide range of representable immediates, formed from
12119    "quarter-precision" floating-point values. These can be evaluated using this
12120    formula (with ^ for exponentiation):
12121
12122      -1^s * n * 2^-r
12123
12124    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12125    16 <= n <= 31 and 0 <= r <= 7.
12126
12127    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12128
12129      - A (most-significant) is the sign bit.
12130      - BCD are the exponent (encoded as r XOR 3).
12131      - EFGH are the mantissa (encoded as n - 16).
12132 */
12133
12134 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12135    fconst[sd] instruction, or -1 if X isn't suitable.  */
12136 static int
12137 vfp3_const_double_index (rtx x)
12138 {
12139   REAL_VALUE_TYPE r, m;
12140   int sign, exponent;
12141   unsigned HOST_WIDE_INT mantissa, mant_hi;
12142   unsigned HOST_WIDE_INT mask;
12143   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12144   bool fail;
12145
12146   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12147     return -1;
12148
12149   r = *CONST_DOUBLE_REAL_VALUE (x);
12150
12151   /* We can't represent these things, so detect them first.  */
12152   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12153     return -1;
12154
12155   /* Extract sign, exponent and mantissa.  */
12156   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12157   r = real_value_abs (&r);
12158   exponent = REAL_EXP (&r);
12159   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12160      highest (sign) bit, with a fixed binary point at bit point_pos.
12161      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12162      bits for the mantissa, this may fail (low bits would be lost).  */
12163   real_ldexp (&m, &r, point_pos - exponent);
12164   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12165   mantissa = w.elt (0);
12166   mant_hi = w.elt (1);
12167
12168   /* If there are bits set in the low part of the mantissa, we can't
12169      represent this value.  */
12170   if (mantissa != 0)
12171     return -1;
12172
12173   /* Now make it so that mantissa contains the most-significant bits, and move
12174      the point_pos to indicate that the least-significant bits have been
12175      discarded.  */
12176   point_pos -= HOST_BITS_PER_WIDE_INT;
12177   mantissa = mant_hi;
12178
12179   /* We can permit four significant bits of mantissa only, plus a high bit
12180      which is always 1.  */
12181   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12182   if ((mantissa & mask) != 0)
12183     return -1;
12184
12185   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12186   mantissa >>= point_pos - 5;
12187
12188   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12189      floating-point immediate zero with Neon using an integer-zero load, but
12190      that case is handled elsewhere.)  */
12191   if (mantissa == 0)
12192     return -1;
12193
12194   gcc_assert (mantissa >= 16 && mantissa <= 31);
12195
12196   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12197      normalized significands are in the range [1, 2). (Our mantissa is shifted
12198      left 4 places at this point relative to normalized IEEE754 values).  GCC
12199      internally uses [0.5, 1) (see real.c), so the exponent returned from
12200      REAL_EXP must be altered.  */
12201   exponent = 5 - exponent;
12202
12203   if (exponent < 0 || exponent > 7)
12204     return -1;
12205
12206   /* Sign, mantissa and exponent are now in the correct form to plug into the
12207      formula described in the comment above.  */
12208   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12209 }
12210
12211 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12212 int
12213 vfp3_const_double_rtx (rtx x)
12214 {
12215   if (!TARGET_VFP3)
12216     return 0;
12217
12218   return vfp3_const_double_index (x) != -1;
12219 }
12220
12221 /* Recognize immediates which can be used in various Neon instructions. Legal
12222    immediates are described by the following table (for VMVN variants, the
12223    bitwise inverse of the constant shown is recognized. In either case, VMOV
12224    is output and the correct instruction to use for a given constant is chosen
12225    by the assembler). The constant shown is replicated across all elements of
12226    the destination vector.
12227
12228    insn elems variant constant (binary)
12229    ---- ----- ------- -----------------
12230    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12231    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12232    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12233    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12234    vmov  i16     4    00000000 abcdefgh
12235    vmov  i16     5    abcdefgh 00000000
12236    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12237    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12238    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12239    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12240    vmvn  i16    10    00000000 abcdefgh
12241    vmvn  i16    11    abcdefgh 00000000
12242    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12243    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12244    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12245    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12246    vmov   i8    16    abcdefgh
12247    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12248                       eeeeeeee ffffffff gggggggg hhhhhhhh
12249    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12250    vmov  f32    19    00000000 00000000 00000000 00000000
12251
12252    For case 18, B = !b. Representable values are exactly those accepted by
12253    vfp3_const_double_index, but are output as floating-point numbers rather
12254    than indices.
12255
12256    For case 19, we will change it to vmov.i32 when assembling.
12257
12258    Variants 0-5 (inclusive) may also be used as immediates for the second
12259    operand of VORR/VBIC instructions.
12260
12261    The INVERSE argument causes the bitwise inverse of the given operand to be
12262    recognized instead (used for recognizing legal immediates for the VAND/VORN
12263    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12264    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12265    output, rather than the real insns vbic/vorr).
12266
12267    INVERSE makes no difference to the recognition of float vectors.
12268
12269    The return value is the variant of immediate as shown in the above table, or
12270    -1 if the given value doesn't match any of the listed patterns.
12271 */
12272 static int
12273 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12274                       rtx *modconst, int *elementwidth)
12275 {
12276 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12277   matches = 1;                                  \
12278   for (i = 0; i < idx; i += (STRIDE))           \
12279     if (!(TEST))                                \
12280       matches = 0;                              \
12281   if (matches)                                  \
12282     {                                           \
12283       immtype = (CLASS);                        \
12284       elsize = (ELSIZE);                        \
12285       break;                                    \
12286     }
12287
12288   unsigned int i, elsize = 0, idx = 0, n_elts;
12289   unsigned int innersize;
12290   unsigned char bytes[16] = {};
12291   int immtype = -1, matches;
12292   unsigned int invmask = inverse ? 0xff : 0;
12293   bool vector = GET_CODE (op) == CONST_VECTOR;
12294
12295   if (vector)
12296     n_elts = CONST_VECTOR_NUNITS (op);
12297   else
12298     {
12299       n_elts = 1;
12300       gcc_assert (mode != VOIDmode);
12301     }
12302
12303   innersize = GET_MODE_UNIT_SIZE (mode);
12304
12305   /* Vectors of float constants.  */
12306   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12307     {
12308       rtx el0 = CONST_VECTOR_ELT (op, 0);
12309
12310       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12311         return -1;
12312
12313       /* FP16 vectors cannot be represented.  */
12314       if (GET_MODE_INNER (mode) == HFmode)
12315         return -1;
12316
12317       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12318          are distinct in this context.  */
12319       if (!const_vec_duplicate_p (op))
12320         return -1;
12321
12322       if (modconst)
12323         *modconst = CONST_VECTOR_ELT (op, 0);
12324
12325       if (elementwidth)
12326         *elementwidth = 0;
12327
12328       if (el0 == CONST0_RTX (GET_MODE (el0)))
12329         return 19;
12330       else
12331         return 18;
12332     }
12333
12334   /* The tricks done in the code below apply for little-endian vector layout.
12335      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12336      FIXME: Implement logic for big-endian vectors.  */
12337   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12338     return -1;
12339
12340   /* Splat vector constant out into a byte vector.  */
12341   for (i = 0; i < n_elts; i++)
12342     {
12343       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12344       unsigned HOST_WIDE_INT elpart;
12345
12346       gcc_assert (CONST_INT_P (el));
12347       elpart = INTVAL (el);
12348
12349       for (unsigned int byte = 0; byte < innersize; byte++)
12350         {
12351           bytes[idx++] = (elpart & 0xff) ^ invmask;
12352           elpart >>= BITS_PER_UNIT;
12353         }
12354     }
12355
12356   /* Sanity check.  */
12357   gcc_assert (idx == GET_MODE_SIZE (mode));
12358
12359   do
12360     {
12361       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12362                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12363
12364       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12365                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12366
12367       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12368                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12369
12370       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12371                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12372
12373       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12374
12375       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12376
12377       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12378                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12379
12380       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12381                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12382
12383       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12384                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12385
12386       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12387                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12388
12389       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12390
12391       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12392
12393       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12394                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12395
12396       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12397                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12398
12399       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12400                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12401
12402       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12403                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12404
12405       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12406
12407       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12408                         && bytes[i] == bytes[(i + 8) % idx]);
12409     }
12410   while (0);
12411
12412   if (immtype == -1)
12413     return -1;
12414
12415   if (elementwidth)
12416     *elementwidth = elsize;
12417
12418   if (modconst)
12419     {
12420       unsigned HOST_WIDE_INT imm = 0;
12421
12422       /* Un-invert bytes of recognized vector, if necessary.  */
12423       if (invmask != 0)
12424         for (i = 0; i < idx; i++)
12425           bytes[i] ^= invmask;
12426
12427       if (immtype == 17)
12428         {
12429           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12430           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12431
12432           for (i = 0; i < 8; i++)
12433             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12434                    << (i * BITS_PER_UNIT);
12435
12436           *modconst = GEN_INT (imm);
12437         }
12438       else
12439         {
12440           unsigned HOST_WIDE_INT imm = 0;
12441
12442           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12443             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12444
12445           *modconst = GEN_INT (imm);
12446         }
12447     }
12448
12449   return immtype;
12450 #undef CHECK
12451 }
12452
12453 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12454    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12455    float elements), and a modified constant (whatever should be output for a
12456    VMOV) in *MODCONST.  */
12457
12458 int
12459 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12460                                rtx *modconst, int *elementwidth)
12461 {
12462   rtx tmpconst;
12463   int tmpwidth;
12464   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12465
12466   if (retval == -1)
12467     return 0;
12468
12469   if (modconst)
12470     *modconst = tmpconst;
12471
12472   if (elementwidth)
12473     *elementwidth = tmpwidth;
12474
12475   return 1;
12476 }
12477
12478 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12479    the immediate is valid, write a constant suitable for using as an operand
12480    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12481    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12482
12483 int
12484 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12485                                 rtx *modconst, int *elementwidth)
12486 {
12487   rtx tmpconst;
12488   int tmpwidth;
12489   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12490
12491   if (retval < 0 || retval > 5)
12492     return 0;
12493
12494   if (modconst)
12495     *modconst = tmpconst;
12496
12497   if (elementwidth)
12498     *elementwidth = tmpwidth;
12499
12500   return 1;
12501 }
12502
12503 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12504    the immediate is valid, write a constant suitable for using as an operand
12505    to VSHR/VSHL to *MODCONST and the corresponding element width to
12506    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12507    because they have different limitations.  */
12508
12509 int
12510 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12511                                 rtx *modconst, int *elementwidth,
12512                                 bool isleftshift)
12513 {
12514   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12515   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12516   unsigned HOST_WIDE_INT last_elt = 0;
12517   unsigned HOST_WIDE_INT maxshift;
12518
12519   /* Split vector constant out into a byte vector.  */
12520   for (i = 0; i < n_elts; i++)
12521     {
12522       rtx el = CONST_VECTOR_ELT (op, i);
12523       unsigned HOST_WIDE_INT elpart;
12524
12525       if (CONST_INT_P (el))
12526         elpart = INTVAL (el);
12527       else if (CONST_DOUBLE_P (el))
12528         return 0;
12529       else
12530         gcc_unreachable ();
12531
12532       if (i != 0 && elpart != last_elt)
12533         return 0;
12534
12535       last_elt = elpart;
12536     }
12537
12538   /* Shift less than element size.  */
12539   maxshift = innersize * 8;
12540
12541   if (isleftshift)
12542     {
12543       /* Left shift immediate value can be from 0 to <size>-1.  */
12544       if (last_elt >= maxshift)
12545         return 0;
12546     }
12547   else
12548     {
12549       /* Right shift immediate value can be from 1 to <size>.  */
12550       if (last_elt == 0 || last_elt > maxshift)
12551         return 0;
12552     }
12553
12554   if (elementwidth)
12555     *elementwidth = innersize * 8;
12556
12557   if (modconst)
12558     *modconst = CONST_VECTOR_ELT (op, 0);
12559
12560   return 1;
12561 }
12562
12563 /* Return a string suitable for output of Neon immediate logic operation
12564    MNEM.  */
12565
12566 char *
12567 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12568                              int inverse, int quad)
12569 {
12570   int width, is_valid;
12571   static char templ[40];
12572
12573   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12574
12575   gcc_assert (is_valid != 0);
12576
12577   if (quad)
12578     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12579   else
12580     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12581
12582   return templ;
12583 }
12584
12585 /* Return a string suitable for output of Neon immediate shift operation
12586    (VSHR or VSHL) MNEM.  */
12587
12588 char *
12589 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12590                              machine_mode mode, int quad,
12591                              bool isleftshift)
12592 {
12593   int width, is_valid;
12594   static char templ[40];
12595
12596   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12597   gcc_assert (is_valid != 0);
12598
12599   if (quad)
12600     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12601   else
12602     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12603
12604   return templ;
12605 }
12606
12607 /* Output a sequence of pairwise operations to implement a reduction.
12608    NOTE: We do "too much work" here, because pairwise operations work on two
12609    registers-worth of operands in one go. Unfortunately we can't exploit those
12610    extra calculations to do the full operation in fewer steps, I don't think.
12611    Although all vector elements of the result but the first are ignored, we
12612    actually calculate the same result in each of the elements. An alternative
12613    such as initially loading a vector with zero to use as each of the second
12614    operands would use up an additional register and take an extra instruction,
12615    for no particular gain.  */
12616
12617 void
12618 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12619                       rtx (*reduc) (rtx, rtx, rtx))
12620 {
12621   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12622   rtx tmpsum = op1;
12623
12624   for (i = parts / 2; i >= 1; i /= 2)
12625     {
12626       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12627       emit_insn (reduc (dest, tmpsum, tmpsum));
12628       tmpsum = dest;
12629     }
12630 }
12631
12632 /* If VALS is a vector constant that can be loaded into a register
12633    using VDUP, generate instructions to do so and return an RTX to
12634    assign to the register.  Otherwise return NULL_RTX.  */
12635
12636 static rtx
12637 neon_vdup_constant (rtx vals)
12638 {
12639   machine_mode mode = GET_MODE (vals);
12640   machine_mode inner_mode = GET_MODE_INNER (mode);
12641   rtx x;
12642
12643   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12644     return NULL_RTX;
12645
12646   if (!const_vec_duplicate_p (vals, &x))
12647     /* The elements are not all the same.  We could handle repeating
12648        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12649        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12650        vdup.i16).  */
12651     return NULL_RTX;
12652
12653   /* We can load this constant by using VDUP and a constant in a
12654      single ARM register.  This will be cheaper than a vector
12655      load.  */
12656
12657   x = copy_to_mode_reg (inner_mode, x);
12658   return gen_vec_duplicate (mode, x);
12659 }
12660
12661 /* Generate code to load VALS, which is a PARALLEL containing only
12662    constants (for vec_init) or CONST_VECTOR, efficiently into a
12663    register.  Returns an RTX to copy into the register, or NULL_RTX
12664    for a PARALLEL that cannot be converted into a CONST_VECTOR.  */
12665
12666 rtx
12667 neon_make_constant (rtx vals)
12668 {
12669   machine_mode mode = GET_MODE (vals);
12670   rtx target;
12671   rtx const_vec = NULL_RTX;
12672   int n_elts = GET_MODE_NUNITS (mode);
12673   int n_const = 0;
12674   int i;
12675
12676   if (GET_CODE (vals) == CONST_VECTOR)
12677     const_vec = vals;
12678   else if (GET_CODE (vals) == PARALLEL)
12679     {
12680       /* A CONST_VECTOR must contain only CONST_INTs and
12681          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12682          Only store valid constants in a CONST_VECTOR.  */
12683       for (i = 0; i < n_elts; ++i)
12684         {
12685           rtx x = XVECEXP (vals, 0, i);
12686           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12687             n_const++;
12688         }
12689       if (n_const == n_elts)
12690         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12691     }
12692   else
12693     gcc_unreachable ();
12694
12695   if (const_vec != NULL
12696       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12697     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12698     return const_vec;
12699   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12700     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12701        pipeline cycle; creating the constant takes one or two ARM
12702        pipeline cycles.  */
12703     return target;
12704   else if (const_vec != NULL_RTX)
12705     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12706        (for either double or quad vectors).  We cannot take advantage
12707        of single-cycle VLD1 because we need a PC-relative addressing
12708        mode.  */
12709     return const_vec;
12710   else
12711     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12712        We cannot construct an initializer.  */
12713     return NULL_RTX;
12714 }
12715
12716 /* Initialize vector TARGET to VALS.  */
12717
12718 void
12719 neon_expand_vector_init (rtx target, rtx vals)
12720 {
12721   machine_mode mode = GET_MODE (target);
12722   machine_mode inner_mode = GET_MODE_INNER (mode);
12723   int n_elts = GET_MODE_NUNITS (mode);
12724   int n_var = 0, one_var = -1;
12725   bool all_same = true;
12726   rtx x, mem;
12727   int i;
12728
12729   for (i = 0; i < n_elts; ++i)
12730     {
12731       x = XVECEXP (vals, 0, i);
12732       if (!CONSTANT_P (x))
12733         ++n_var, one_var = i;
12734
12735       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12736         all_same = false;
12737     }
12738
12739   if (n_var == 0)
12740     {
12741       rtx constant = neon_make_constant (vals);
12742       if (constant != NULL_RTX)
12743         {
12744           emit_move_insn (target, constant);
12745           return;
12746         }
12747     }
12748
12749   /* Splat a single non-constant element if we can.  */
12750   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12751     {
12752       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12753       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12754       return;
12755     }
12756
12757   /* One field is non-constant.  Load constant then overwrite varying
12758      field.  This is more efficient than using the stack.  */
12759   if (n_var == 1)
12760     {
12761       rtx copy = copy_rtx (vals);
12762       rtx merge_mask = GEN_INT (1 << one_var);
12763
12764       /* Load constant part of vector, substitute neighboring value for
12765          varying element.  */
12766       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12767       neon_expand_vector_init (target, copy);
12768
12769       /* Insert variable.  */
12770       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12771       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
12772       return;
12773     }
12774
12775   /* Construct the vector in memory one field at a time
12776      and load the whole vector.  */
12777   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12778   for (i = 0; i < n_elts; i++)
12779     emit_move_insn (adjust_address_nv (mem, inner_mode,
12780                                     i * GET_MODE_SIZE (inner_mode)),
12781                     XVECEXP (vals, 0, i));
12782   emit_move_insn (target, mem);
12783 }
12784
12785 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12786    ERR if it doesn't.  EXP indicates the source location, which includes the
12787    inlining history for intrinsics.  */
12788
12789 static void
12790 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12791               const_tree exp, const char *desc)
12792 {
12793   HOST_WIDE_INT lane;
12794
12795   gcc_assert (CONST_INT_P (operand));
12796
12797   lane = INTVAL (operand);
12798
12799   if (lane < low || lane >= high)
12800     {
12801       if (exp)
12802         error ("%K%s %wd out of range %wd - %wd",
12803                exp, desc, lane, low, high - 1);
12804       else
12805         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12806     }
12807 }
12808
12809 /* Bounds-check lanes.  */
12810
12811 void
12812 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12813                   const_tree exp)
12814 {
12815   bounds_check (operand, low, high, exp, "lane");
12816 }
12817
12818 /* Bounds-check constants.  */
12819
12820 void
12821 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12822 {
12823   bounds_check (operand, low, high, NULL_TREE, "constant");
12824 }
12825
12826 HOST_WIDE_INT
12827 neon_element_bits (machine_mode mode)
12828 {
12829   return GET_MODE_UNIT_BITSIZE (mode);
12830 }
12831
12832 \f
12833 /* Predicates for `match_operand' and `match_operator'.  */
12834
12835 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12836    WB is true if full writeback address modes are allowed and is false
12837    if limited writeback address modes (POST_INC and PRE_DEC) are
12838    allowed.  */
12839
12840 int
12841 arm_coproc_mem_operand (rtx op, bool wb)
12842 {
12843   rtx ind;
12844
12845   /* Reject eliminable registers.  */
12846   if (! (reload_in_progress || reload_completed || lra_in_progress)
12847       && (   reg_mentioned_p (frame_pointer_rtx, op)
12848           || reg_mentioned_p (arg_pointer_rtx, op)
12849           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12850           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12851           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12852           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12853     return FALSE;
12854
12855   /* Constants are converted into offsets from labels.  */
12856   if (!MEM_P (op))
12857     return FALSE;
12858
12859   ind = XEXP (op, 0);
12860
12861   if (reload_completed
12862       && (GET_CODE (ind) == LABEL_REF
12863           || (GET_CODE (ind) == CONST
12864               && GET_CODE (XEXP (ind, 0)) == PLUS
12865               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12866               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12867     return TRUE;
12868
12869   /* Match: (mem (reg)).  */
12870   if (REG_P (ind))
12871     return arm_address_register_rtx_p (ind, 0);
12872
12873   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12874      acceptable in any case (subject to verification by
12875      arm_address_register_rtx_p).  We need WB to be true to accept
12876      PRE_INC and POST_DEC.  */
12877   if (GET_CODE (ind) == POST_INC
12878       || GET_CODE (ind) == PRE_DEC
12879       || (wb
12880           && (GET_CODE (ind) == PRE_INC
12881               || GET_CODE (ind) == POST_DEC)))
12882     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12883
12884   if (wb
12885       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12886       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12887       && GET_CODE (XEXP (ind, 1)) == PLUS
12888       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12889     ind = XEXP (ind, 1);
12890
12891   /* Match:
12892      (plus (reg)
12893            (const)).  */
12894   if (GET_CODE (ind) == PLUS
12895       && REG_P (XEXP (ind, 0))
12896       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12897       && CONST_INT_P (XEXP (ind, 1))
12898       && INTVAL (XEXP (ind, 1)) > -1024
12899       && INTVAL (XEXP (ind, 1)) <  1024
12900       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12901     return TRUE;
12902
12903   return FALSE;
12904 }
12905
12906 /* Return TRUE if OP is a memory operand which we can load or store a vector
12907    to/from. TYPE is one of the following values:
12908     0 - Vector load/stor (vldr)
12909     1 - Core registers (ldm)
12910     2 - Element/structure loads (vld1)
12911  */
12912 int
12913 neon_vector_mem_operand (rtx op, int type, bool strict)
12914 {
12915   rtx ind;
12916
12917   /* Reject eliminable registers.  */
12918   if (strict && ! (reload_in_progress || reload_completed)
12919       && (reg_mentioned_p (frame_pointer_rtx, op)
12920           || reg_mentioned_p (arg_pointer_rtx, op)
12921           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12922           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12923           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12924           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12925     return FALSE;
12926
12927   /* Constants are converted into offsets from labels.  */
12928   if (!MEM_P (op))
12929     return FALSE;
12930
12931   ind = XEXP (op, 0);
12932
12933   if (reload_completed
12934       && (GET_CODE (ind) == LABEL_REF
12935           || (GET_CODE (ind) == CONST
12936               && GET_CODE (XEXP (ind, 0)) == PLUS
12937               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12938               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12939     return TRUE;
12940
12941   /* Match: (mem (reg)).  */
12942   if (REG_P (ind))
12943     return arm_address_register_rtx_p (ind, 0);
12944
12945   /* Allow post-increment with Neon registers.  */
12946   if ((type != 1 && GET_CODE (ind) == POST_INC)
12947       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12948     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12949
12950   /* Allow post-increment by register for VLDn */
12951   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12952       && GET_CODE (XEXP (ind, 1)) == PLUS
12953       && REG_P (XEXP (XEXP (ind, 1), 1)))
12954      return true;
12955
12956   /* Match:
12957      (plus (reg)
12958           (const)).  */
12959   if (type == 0
12960       && GET_CODE (ind) == PLUS
12961       && REG_P (XEXP (ind, 0))
12962       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12963       && CONST_INT_P (XEXP (ind, 1))
12964       && INTVAL (XEXP (ind, 1)) > -1024
12965       /* For quad modes, we restrict the constant offset to be slightly less
12966          than what the instruction format permits.  We have no such constraint
12967          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12968       && (INTVAL (XEXP (ind, 1))
12969           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12970       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12971     return TRUE;
12972
12973   return FALSE;
12974 }
12975
12976 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12977    type.  */
12978 int
12979 neon_struct_mem_operand (rtx op)
12980 {
12981   rtx ind;
12982
12983   /* Reject eliminable registers.  */
12984   if (! (reload_in_progress || reload_completed)
12985       && (   reg_mentioned_p (frame_pointer_rtx, op)
12986           || reg_mentioned_p (arg_pointer_rtx, op)
12987           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12988           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12989           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12990           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12991     return FALSE;
12992
12993   /* Constants are converted into offsets from labels.  */
12994   if (!MEM_P (op))
12995     return FALSE;
12996
12997   ind = XEXP (op, 0);
12998
12999   if (reload_completed
13000       && (GET_CODE (ind) == LABEL_REF
13001           || (GET_CODE (ind) == CONST
13002               && GET_CODE (XEXP (ind, 0)) == PLUS
13003               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13004               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13005     return TRUE;
13006
13007   /* Match: (mem (reg)).  */
13008   if (REG_P (ind))
13009     return arm_address_register_rtx_p (ind, 0);
13010
13011   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13012   if (GET_CODE (ind) == POST_INC
13013       || GET_CODE (ind) == PRE_DEC)
13014     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13015
13016   return FALSE;
13017 }
13018
13019 /* Prepares the operands for the VCMLA by lane instruction such that the right
13020    register number is selected.  This instruction is special in that it always
13021    requires a D register, however there is a choice to be made between Dn[0],
13022    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13023
13024    The VCMLA by lane function always selects two values. For instance given D0
13025    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13026    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13027    D0[0] or D1[0] are both valid.
13028
13029    This function centralizes that information based on OPERANDS, OPERANDS[3]
13030    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13031    updated to contain the right index.  */
13032
13033 rtx *
13034 neon_vcmla_lane_prepare_operands (rtx *operands)
13035 {
13036   int lane = INTVAL (operands[4]);
13037   machine_mode constmode = SImode;
13038   machine_mode mode = GET_MODE (operands[3]);
13039   int regno = REGNO (operands[3]);
13040   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13041   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13042     {
13043       operands[3] = gen_int_mode (regno + 1, constmode);
13044       operands[4]
13045         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13046     }
13047   else
13048     {
13049       operands[3] = gen_int_mode (regno, constmode);
13050       operands[4] = gen_int_mode (lane, constmode);
13051     }
13052   return operands;
13053 }
13054
13055
13056 /* Return true if X is a register that will be eliminated later on.  */
13057 int
13058 arm_eliminable_register (rtx x)
13059 {
13060   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13061                        || REGNO (x) == ARG_POINTER_REGNUM
13062                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13063                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13064 }
13065
13066 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13067    coprocessor registers.  Otherwise return NO_REGS.  */
13068
13069 enum reg_class
13070 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13071 {
13072   if (mode == HFmode)
13073     {
13074       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13075         return GENERAL_REGS;
13076       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13077         return NO_REGS;
13078       return GENERAL_REGS;
13079     }
13080
13081   /* The neon move patterns handle all legitimate vector and struct
13082      addresses.  */
13083   if (TARGET_NEON
13084       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13085       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13086           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13087           || VALID_NEON_STRUCT_MODE (mode)))
13088     return NO_REGS;
13089
13090   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13091     return NO_REGS;
13092
13093   return GENERAL_REGS;
13094 }
13095
13096 /* Values which must be returned in the most-significant end of the return
13097    register.  */
13098
13099 static bool
13100 arm_return_in_msb (const_tree valtype)
13101 {
13102   return (TARGET_AAPCS_BASED
13103           && BYTES_BIG_ENDIAN
13104           && (AGGREGATE_TYPE_P (valtype)
13105               || TREE_CODE (valtype) == COMPLEX_TYPE
13106               || FIXED_POINT_TYPE_P (valtype)));
13107 }
13108
13109 /* Return TRUE if X references a SYMBOL_REF.  */
13110 int
13111 symbol_mentioned_p (rtx x)
13112 {
13113   const char * fmt;
13114   int i;
13115
13116   if (GET_CODE (x) == SYMBOL_REF)
13117     return 1;
13118
13119   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13120      are constant offsets, not symbols.  */
13121   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13122     return 0;
13123
13124   fmt = GET_RTX_FORMAT (GET_CODE (x));
13125
13126   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13127     {
13128       if (fmt[i] == 'E')
13129         {
13130           int j;
13131
13132           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13133             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13134               return 1;
13135         }
13136       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13137         return 1;
13138     }
13139
13140   return 0;
13141 }
13142
13143 /* Return TRUE if X references a LABEL_REF.  */
13144 int
13145 label_mentioned_p (rtx x)
13146 {
13147   const char * fmt;
13148   int i;
13149
13150   if (GET_CODE (x) == LABEL_REF)
13151     return 1;
13152
13153   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13154      instruction, but they are constant offsets, not symbols.  */
13155   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13156     return 0;
13157
13158   fmt = GET_RTX_FORMAT (GET_CODE (x));
13159   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13160     {
13161       if (fmt[i] == 'E')
13162         {
13163           int j;
13164
13165           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13166             if (label_mentioned_p (XVECEXP (x, i, j)))
13167               return 1;
13168         }
13169       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13170         return 1;
13171     }
13172
13173   return 0;
13174 }
13175
13176 int
13177 tls_mentioned_p (rtx x)
13178 {
13179   switch (GET_CODE (x))
13180     {
13181     case CONST:
13182       return tls_mentioned_p (XEXP (x, 0));
13183
13184     case UNSPEC:
13185       if (XINT (x, 1) == UNSPEC_TLS)
13186         return 1;
13187
13188     /* Fall through.  */
13189     default:
13190       return 0;
13191     }
13192 }
13193
13194 /* Must not copy any rtx that uses a pc-relative address.
13195    Also, disallow copying of load-exclusive instructions that
13196    may appear after splitting of compare-and-swap-style operations
13197    so as to prevent those loops from being transformed away from their
13198    canonical forms (see PR 69904).  */
13199
13200 static bool
13201 arm_cannot_copy_insn_p (rtx_insn *insn)
13202 {
13203   /* The tls call insn cannot be copied, as it is paired with a data
13204      word.  */
13205   if (recog_memoized (insn) == CODE_FOR_tlscall)
13206     return true;
13207
13208   subrtx_iterator::array_type array;
13209   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13210     {
13211       const_rtx x = *iter;
13212       if (GET_CODE (x) == UNSPEC
13213           && (XINT (x, 1) == UNSPEC_PIC_BASE
13214               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13215         return true;
13216     }
13217
13218   rtx set = single_set (insn);
13219   if (set)
13220     {
13221       rtx src = SET_SRC (set);
13222       if (GET_CODE (src) == ZERO_EXTEND)
13223         src = XEXP (src, 0);
13224
13225       /* Catch the load-exclusive and load-acquire operations.  */
13226       if (GET_CODE (src) == UNSPEC_VOLATILE
13227           && (XINT (src, 1) == VUNSPEC_LL
13228               || XINT (src, 1) == VUNSPEC_LAX))
13229         return true;
13230     }
13231   return false;
13232 }
13233
13234 enum rtx_code
13235 minmax_code (rtx x)
13236 {
13237   enum rtx_code code = GET_CODE (x);
13238
13239   switch (code)
13240     {
13241     case SMAX:
13242       return GE;
13243     case SMIN:
13244       return LE;
13245     case UMIN:
13246       return LEU;
13247     case UMAX:
13248       return GEU;
13249     default:
13250       gcc_unreachable ();
13251     }
13252 }
13253
13254 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13255
13256 bool
13257 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13258                         int *mask, bool *signed_sat)
13259 {
13260   /* The high bound must be a power of two minus one.  */
13261   int log = exact_log2 (INTVAL (hi_bound) + 1);
13262   if (log == -1)
13263     return false;
13264
13265   /* The low bound is either zero (for usat) or one less than the
13266      negation of the high bound (for ssat).  */
13267   if (INTVAL (lo_bound) == 0)
13268     {
13269       if (mask)
13270         *mask = log;
13271       if (signed_sat)
13272         *signed_sat = false;
13273
13274       return true;
13275     }
13276
13277   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13278     {
13279       if (mask)
13280         *mask = log + 1;
13281       if (signed_sat)
13282         *signed_sat = true;
13283
13284       return true;
13285     }
13286
13287   return false;
13288 }
13289
13290 /* Return 1 if memory locations are adjacent.  */
13291 int
13292 adjacent_mem_locations (rtx a, rtx b)
13293 {
13294   /* We don't guarantee to preserve the order of these memory refs.  */
13295   if (volatile_refs_p (a) || volatile_refs_p (b))
13296     return 0;
13297
13298   if ((REG_P (XEXP (a, 0))
13299        || (GET_CODE (XEXP (a, 0)) == PLUS
13300            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13301       && (REG_P (XEXP (b, 0))
13302           || (GET_CODE (XEXP (b, 0)) == PLUS
13303               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13304     {
13305       HOST_WIDE_INT val0 = 0, val1 = 0;
13306       rtx reg0, reg1;
13307       int val_diff;
13308
13309       if (GET_CODE (XEXP (a, 0)) == PLUS)
13310         {
13311           reg0 = XEXP (XEXP (a, 0), 0);
13312           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13313         }
13314       else
13315         reg0 = XEXP (a, 0);
13316
13317       if (GET_CODE (XEXP (b, 0)) == PLUS)
13318         {
13319           reg1 = XEXP (XEXP (b, 0), 0);
13320           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13321         }
13322       else
13323         reg1 = XEXP (b, 0);
13324
13325       /* Don't accept any offset that will require multiple
13326          instructions to handle, since this would cause the
13327          arith_adjacentmem pattern to output an overlong sequence.  */
13328       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13329         return 0;
13330
13331       /* Don't allow an eliminable register: register elimination can make
13332          the offset too large.  */
13333       if (arm_eliminable_register (reg0))
13334         return 0;
13335
13336       val_diff = val1 - val0;
13337
13338       if (arm_ld_sched)
13339         {
13340           /* If the target has load delay slots, then there's no benefit
13341              to using an ldm instruction unless the offset is zero and
13342              we are optimizing for size.  */
13343           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13344                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13345                   && (val_diff == 4 || val_diff == -4));
13346         }
13347
13348       return ((REGNO (reg0) == REGNO (reg1))
13349               && (val_diff == 4 || val_diff == -4));
13350     }
13351
13352   return 0;
13353 }
13354
13355 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13356    for load operations, false for store operations.  CONSECUTIVE is true
13357    if the register numbers in the operation must be consecutive in the register
13358    bank. RETURN_PC is true if value is to be loaded in PC.
13359    The pattern we are trying to match for load is:
13360      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13361       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13362        :
13363        :
13364       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13365      ]
13366      where
13367      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13368      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13369      3.  If consecutive is TRUE, then for kth register being loaded,
13370          REGNO (R_dk) = REGNO (R_d0) + k.
13371    The pattern for store is similar.  */
13372 bool
13373 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13374                      bool consecutive, bool return_pc)
13375 {
13376   HOST_WIDE_INT count = XVECLEN (op, 0);
13377   rtx reg, mem, addr;
13378   unsigned regno;
13379   unsigned first_regno;
13380   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13381   rtx elt;
13382   bool addr_reg_in_reglist = false;
13383   bool update = false;
13384   int reg_increment;
13385   int offset_adj;
13386   int regs_per_val;
13387
13388   /* If not in SImode, then registers must be consecutive
13389      (e.g., VLDM instructions for DFmode).  */
13390   gcc_assert ((mode == SImode) || consecutive);
13391   /* Setting return_pc for stores is illegal.  */
13392   gcc_assert (!return_pc || load);
13393
13394   /* Set up the increments and the regs per val based on the mode.  */
13395   reg_increment = GET_MODE_SIZE (mode);
13396   regs_per_val = reg_increment / 4;
13397   offset_adj = return_pc ? 1 : 0;
13398
13399   if (count <= 1
13400       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13401       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13402     return false;
13403
13404   /* Check if this is a write-back.  */
13405   elt = XVECEXP (op, 0, offset_adj);
13406   if (GET_CODE (SET_SRC (elt)) == PLUS)
13407     {
13408       i++;
13409       base = 1;
13410       update = true;
13411
13412       /* The offset adjustment must be the number of registers being
13413          popped times the size of a single register.  */
13414       if (!REG_P (SET_DEST (elt))
13415           || !REG_P (XEXP (SET_SRC (elt), 0))
13416           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13417           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13418           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13419              ((count - 1 - offset_adj) * reg_increment))
13420         return false;
13421     }
13422
13423   i = i + offset_adj;
13424   base = base + offset_adj;
13425   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13426      success depends on the type: VLDM can do just one reg,
13427      LDM must do at least two.  */
13428   if ((count <= i) && (mode == SImode))
13429       return false;
13430
13431   elt = XVECEXP (op, 0, i - 1);
13432   if (GET_CODE (elt) != SET)
13433     return false;
13434
13435   if (load)
13436     {
13437       reg = SET_DEST (elt);
13438       mem = SET_SRC (elt);
13439     }
13440   else
13441     {
13442       reg = SET_SRC (elt);
13443       mem = SET_DEST (elt);
13444     }
13445
13446   if (!REG_P (reg) || !MEM_P (mem))
13447     return false;
13448
13449   regno = REGNO (reg);
13450   first_regno = regno;
13451   addr = XEXP (mem, 0);
13452   if (GET_CODE (addr) == PLUS)
13453     {
13454       if (!CONST_INT_P (XEXP (addr, 1)))
13455         return false;
13456
13457       offset = INTVAL (XEXP (addr, 1));
13458       addr = XEXP (addr, 0);
13459     }
13460
13461   if (!REG_P (addr))
13462     return false;
13463
13464   /* Don't allow SP to be loaded unless it is also the base register. It
13465      guarantees that SP is reset correctly when an LDM instruction
13466      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13467   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13468     return false;
13469
13470   if (regno == REGNO (addr))
13471     addr_reg_in_reglist = true;
13472
13473   for (; i < count; i++)
13474     {
13475       elt = XVECEXP (op, 0, i);
13476       if (GET_CODE (elt) != SET)
13477         return false;
13478
13479       if (load)
13480         {
13481           reg = SET_DEST (elt);
13482           mem = SET_SRC (elt);
13483         }
13484       else
13485         {
13486           reg = SET_SRC (elt);
13487           mem = SET_DEST (elt);
13488         }
13489
13490       if (!REG_P (reg)
13491           || GET_MODE (reg) != mode
13492           || REGNO (reg) <= regno
13493           || (consecutive
13494               && (REGNO (reg) !=
13495                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13496           /* Don't allow SP to be loaded unless it is also the base register. It
13497              guarantees that SP is reset correctly when an LDM instruction
13498              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13499           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13500           || !MEM_P (mem)
13501           || GET_MODE (mem) != mode
13502           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13503                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13504                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13505                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13506                    offset + (i - base) * reg_increment))
13507               && (!REG_P (XEXP (mem, 0))
13508                   || offset + (i - base) * reg_increment != 0)))
13509         return false;
13510
13511       regno = REGNO (reg);
13512       if (regno == REGNO (addr))
13513         addr_reg_in_reglist = true;
13514     }
13515
13516   if (load)
13517     {
13518       if (update && addr_reg_in_reglist)
13519         return false;
13520
13521       /* For Thumb-1, address register is always modified - either by write-back
13522          or by explicit load.  If the pattern does not describe an update,
13523          then the address register must be in the list of loaded registers.  */
13524       if (TARGET_THUMB1)
13525         return update || addr_reg_in_reglist;
13526     }
13527
13528   return true;
13529 }
13530
13531 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13532    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13533    instruction.  ADD_OFFSET is nonzero if the base address register needs
13534    to be modified with an add instruction before we can use it.  */
13535
13536 static bool
13537 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13538                                  int nops, HOST_WIDE_INT add_offset)
13539  {
13540   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13541      if the offset isn't small enough.  The reason 2 ldrs are faster
13542      is because these ARMs are able to do more than one cache access
13543      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13544      whilst the ARM8 has a double bandwidth cache.  This means that
13545      these cores can do both an instruction fetch and a data fetch in
13546      a single cycle, so the trick of calculating the address into a
13547      scratch register (one of the result regs) and then doing a load
13548      multiple actually becomes slower (and no smaller in code size).
13549      That is the transformation
13550
13551         ldr     rd1, [rbase + offset]
13552         ldr     rd2, [rbase + offset + 4]
13553
13554      to
13555
13556         add     rd1, rbase, offset
13557         ldmia   rd1, {rd1, rd2}
13558
13559      produces worse code -- '3 cycles + any stalls on rd2' instead of
13560      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13561      access per cycle, the first sequence could never complete in less
13562      than 6 cycles, whereas the ldm sequence would only take 5 and
13563      would make better use of sequential accesses if not hitting the
13564      cache.
13565
13566      We cheat here and test 'arm_ld_sched' which we currently know to
13567      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13568      changes, then the test below needs to be reworked.  */
13569   if (nops == 2 && arm_ld_sched && add_offset != 0)
13570     return false;
13571
13572   /* XScale has load-store double instructions, but they have stricter
13573      alignment requirements than load-store multiple, so we cannot
13574      use them.
13575
13576      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13577      the pipeline until completion.
13578
13579         NREGS           CYCLES
13580           1               3
13581           2               4
13582           3               5
13583           4               6
13584
13585      An ldr instruction takes 1-3 cycles, but does not block the
13586      pipeline.
13587
13588         NREGS           CYCLES
13589           1              1-3
13590           2              2-6
13591           3              3-9
13592           4              4-12
13593
13594      Best case ldr will always win.  However, the more ldr instructions
13595      we issue, the less likely we are to be able to schedule them well.
13596      Using ldr instructions also increases code size.
13597
13598      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13599      for counts of 3 or 4 regs.  */
13600   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13601     return false;
13602   return true;
13603 }
13604
13605 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13606    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13607    an array ORDER which describes the sequence to use when accessing the
13608    offsets that produces an ascending order.  In this sequence, each
13609    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13610    must have been filled in with the lowest offset by the caller.
13611    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13612    we use to verify that ORDER produces an ascending order of registers.
13613    Return true if it was possible to construct such an order, false if
13614    not.  */
13615
13616 static bool
13617 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13618                       int *unsorted_regs)
13619 {
13620   int i;
13621   for (i = 1; i < nops; i++)
13622     {
13623       int j;
13624
13625       order[i] = order[i - 1];
13626       for (j = 0; j < nops; j++)
13627         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13628           {
13629             /* We must find exactly one offset that is higher than the
13630                previous one by 4.  */
13631             if (order[i] != order[i - 1])
13632               return false;
13633             order[i] = j;
13634           }
13635       if (order[i] == order[i - 1])
13636         return false;
13637       /* The register numbers must be ascending.  */
13638       if (unsorted_regs != NULL
13639           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13640         return false;
13641     }
13642   return true;
13643 }
13644
13645 /* Used to determine in a peephole whether a sequence of load
13646    instructions can be changed into a load-multiple instruction.
13647    NOPS is the number of separate load instructions we are examining.  The
13648    first NOPS entries in OPERANDS are the destination registers, the
13649    next NOPS entries are memory operands.  If this function is
13650    successful, *BASE is set to the common base register of the memory
13651    accesses; *LOAD_OFFSET is set to the first memory location's offset
13652    from that base register.
13653    REGS is an array filled in with the destination register numbers.
13654    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13655    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13656    the sequence of registers in REGS matches the loads from ascending memory
13657    locations, and the function verifies that the register numbers are
13658    themselves ascending.  If CHECK_REGS is false, the register numbers
13659    are stored in the order they are found in the operands.  */
13660 static int
13661 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13662                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13663 {
13664   int unsorted_regs[MAX_LDM_STM_OPS];
13665   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13666   int order[MAX_LDM_STM_OPS];
13667   int base_reg = -1;
13668   int i, ldm_case;
13669
13670   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13671      easily extended if required.  */
13672   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13673
13674   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13675
13676   /* Loop over the operands and check that the memory references are
13677      suitable (i.e. immediate offsets from the same base register).  At
13678      the same time, extract the target register, and the memory
13679      offsets.  */
13680   for (i = 0; i < nops; i++)
13681     {
13682       rtx reg;
13683       rtx offset;
13684
13685       /* Convert a subreg of a mem into the mem itself.  */
13686       if (GET_CODE (operands[nops + i]) == SUBREG)
13687         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13688
13689       gcc_assert (MEM_P (operands[nops + i]));
13690
13691       /* Don't reorder volatile memory references; it doesn't seem worth
13692          looking for the case where the order is ok anyway.  */
13693       if (MEM_VOLATILE_P (operands[nops + i]))
13694         return 0;
13695
13696       offset = const0_rtx;
13697
13698       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13699            || (GET_CODE (reg) == SUBREG
13700                && REG_P (reg = SUBREG_REG (reg))))
13701           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13702               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13703                   || (GET_CODE (reg) == SUBREG
13704                       && REG_P (reg = SUBREG_REG (reg))))
13705               && (CONST_INT_P (offset
13706                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13707         {
13708           if (i == 0)
13709             {
13710               base_reg = REGNO (reg);
13711               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13712                 return 0;
13713             }
13714           else if (base_reg != (int) REGNO (reg))
13715             /* Not addressed from the same base register.  */
13716             return 0;
13717
13718           unsorted_regs[i] = (REG_P (operands[i])
13719                               ? REGNO (operands[i])
13720                               : REGNO (SUBREG_REG (operands[i])));
13721
13722           /* If it isn't an integer register, or if it overwrites the
13723              base register but isn't the last insn in the list, then
13724              we can't do this.  */
13725           if (unsorted_regs[i] < 0
13726               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13727               || unsorted_regs[i] > 14
13728               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13729             return 0;
13730
13731           /* Don't allow SP to be loaded unless it is also the base
13732              register.  It guarantees that SP is reset correctly when
13733              an LDM instruction is interrupted.  Otherwise, we might
13734              end up with a corrupt stack.  */
13735           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13736             return 0;
13737
13738           unsorted_offsets[i] = INTVAL (offset);
13739           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13740             order[0] = i;
13741         }
13742       else
13743         /* Not a suitable memory address.  */
13744         return 0;
13745     }
13746
13747   /* All the useful information has now been extracted from the
13748      operands into unsorted_regs and unsorted_offsets; additionally,
13749      order[0] has been set to the lowest offset in the list.  Sort
13750      the offsets into order, verifying that they are adjacent, and
13751      check that the register numbers are ascending.  */
13752   if (!compute_offset_order (nops, unsorted_offsets, order,
13753                              check_regs ? unsorted_regs : NULL))
13754     return 0;
13755
13756   if (saved_order)
13757     memcpy (saved_order, order, sizeof order);
13758
13759   if (base)
13760     {
13761       *base = base_reg;
13762
13763       for (i = 0; i < nops; i++)
13764         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13765
13766       *load_offset = unsorted_offsets[order[0]];
13767     }
13768
13769   if (unsorted_offsets[order[0]] == 0)
13770     ldm_case = 1; /* ldmia */
13771   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13772     ldm_case = 2; /* ldmib */
13773   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13774     ldm_case = 3; /* ldmda */
13775   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13776     ldm_case = 4; /* ldmdb */
13777   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13778            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13779     ldm_case = 5;
13780   else
13781     return 0;
13782
13783   if (!multiple_operation_profitable_p (false, nops,
13784                                         ldm_case == 5
13785                                         ? unsorted_offsets[order[0]] : 0))
13786     return 0;
13787
13788   return ldm_case;
13789 }
13790
13791 /* Used to determine in a peephole whether a sequence of store instructions can
13792    be changed into a store-multiple instruction.
13793    NOPS is the number of separate store instructions we are examining.
13794    NOPS_TOTAL is the total number of instructions recognized by the peephole
13795    pattern.
13796    The first NOPS entries in OPERANDS are the source registers, the next
13797    NOPS entries are memory operands.  If this function is successful, *BASE is
13798    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13799    to the first memory location's offset from that base register.  REGS is an
13800    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13801    likewise filled with the corresponding rtx's.
13802    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13803    numbers to an ascending order of stores.
13804    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13805    from ascending memory locations, and the function verifies that the register
13806    numbers are themselves ascending.  If CHECK_REGS is false, the register
13807    numbers are stored in the order they are found in the operands.  */
13808 static int
13809 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13810                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13811                          HOST_WIDE_INT *load_offset, bool check_regs)
13812 {
13813   int unsorted_regs[MAX_LDM_STM_OPS];
13814   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13815   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13816   int order[MAX_LDM_STM_OPS];
13817   int base_reg = -1;
13818   rtx base_reg_rtx = NULL;
13819   int i, stm_case;
13820
13821   /* Write back of base register is currently only supported for Thumb 1.  */
13822   int base_writeback = TARGET_THUMB1;
13823
13824   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13825      easily extended if required.  */
13826   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13827
13828   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13829
13830   /* Loop over the operands and check that the memory references are
13831      suitable (i.e. immediate offsets from the same base register).  At
13832      the same time, extract the target register, and the memory
13833      offsets.  */
13834   for (i = 0; i < nops; i++)
13835     {
13836       rtx reg;
13837       rtx offset;
13838
13839       /* Convert a subreg of a mem into the mem itself.  */
13840       if (GET_CODE (operands[nops + i]) == SUBREG)
13841         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13842
13843       gcc_assert (MEM_P (operands[nops + i]));
13844
13845       /* Don't reorder volatile memory references; it doesn't seem worth
13846          looking for the case where the order is ok anyway.  */
13847       if (MEM_VOLATILE_P (operands[nops + i]))
13848         return 0;
13849
13850       offset = const0_rtx;
13851
13852       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13853            || (GET_CODE (reg) == SUBREG
13854                && REG_P (reg = SUBREG_REG (reg))))
13855           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13856               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13857                   || (GET_CODE (reg) == SUBREG
13858                       && REG_P (reg = SUBREG_REG (reg))))
13859               && (CONST_INT_P (offset
13860                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13861         {
13862           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13863                                   ? operands[i] : SUBREG_REG (operands[i]));
13864           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13865
13866           if (i == 0)
13867             {
13868               base_reg = REGNO (reg);
13869               base_reg_rtx = reg;
13870               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13871                 return 0;
13872             }
13873           else if (base_reg != (int) REGNO (reg))
13874             /* Not addressed from the same base register.  */
13875             return 0;
13876
13877           /* If it isn't an integer register, then we can't do this.  */
13878           if (unsorted_regs[i] < 0
13879               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13880               /* The effects are unpredictable if the base register is
13881                  both updated and stored.  */
13882               || (base_writeback && unsorted_regs[i] == base_reg)
13883               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13884               || unsorted_regs[i] > 14)
13885             return 0;
13886
13887           unsorted_offsets[i] = INTVAL (offset);
13888           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13889             order[0] = i;
13890         }
13891       else
13892         /* Not a suitable memory address.  */
13893         return 0;
13894     }
13895
13896   /* All the useful information has now been extracted from the
13897      operands into unsorted_regs and unsorted_offsets; additionally,
13898      order[0] has been set to the lowest offset in the list.  Sort
13899      the offsets into order, verifying that they are adjacent, and
13900      check that the register numbers are ascending.  */
13901   if (!compute_offset_order (nops, unsorted_offsets, order,
13902                              check_regs ? unsorted_regs : NULL))
13903     return 0;
13904
13905   if (saved_order)
13906     memcpy (saved_order, order, sizeof order);
13907
13908   if (base)
13909     {
13910       *base = base_reg;
13911
13912       for (i = 0; i < nops; i++)
13913         {
13914           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13915           if (reg_rtxs)
13916             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13917         }
13918
13919       *load_offset = unsorted_offsets[order[0]];
13920     }
13921
13922   if (TARGET_THUMB1
13923       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13924     return 0;
13925
13926   if (unsorted_offsets[order[0]] == 0)
13927     stm_case = 1; /* stmia */
13928   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13929     stm_case = 2; /* stmib */
13930   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13931     stm_case = 3; /* stmda */
13932   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13933     stm_case = 4; /* stmdb */
13934   else
13935     return 0;
13936
13937   if (!multiple_operation_profitable_p (false, nops, 0))
13938     return 0;
13939
13940   return stm_case;
13941 }
13942 \f
13943 /* Routines for use in generating RTL.  */
13944
13945 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13946    the instruction; REGS and MEMS are arrays containing the operands.
13947    BASEREG is the base register to be used in addressing the memory operands.
13948    WBACK_OFFSET is nonzero if the instruction should update the base
13949    register.  */
13950
13951 static rtx
13952 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13953                          HOST_WIDE_INT wback_offset)
13954 {
13955   int i = 0, j;
13956   rtx result;
13957
13958   if (!multiple_operation_profitable_p (false, count, 0))
13959     {
13960       rtx seq;
13961
13962       start_sequence ();
13963
13964       for (i = 0; i < count; i++)
13965         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13966
13967       if (wback_offset != 0)
13968         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13969
13970       seq = get_insns ();
13971       end_sequence ();
13972
13973       return seq;
13974     }
13975
13976   result = gen_rtx_PARALLEL (VOIDmode,
13977                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13978   if (wback_offset != 0)
13979     {
13980       XVECEXP (result, 0, 0)
13981         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13982       i = 1;
13983       count++;
13984     }
13985
13986   for (j = 0; i < count; i++, j++)
13987     XVECEXP (result, 0, i)
13988       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13989
13990   return result;
13991 }
13992
13993 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13994    the instruction; REGS and MEMS are arrays containing the operands.
13995    BASEREG is the base register to be used in addressing the memory operands.
13996    WBACK_OFFSET is nonzero if the instruction should update the base
13997    register.  */
13998
13999 static rtx
14000 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14001                           HOST_WIDE_INT wback_offset)
14002 {
14003   int i = 0, j;
14004   rtx result;
14005
14006   if (GET_CODE (basereg) == PLUS)
14007     basereg = XEXP (basereg, 0);
14008
14009   if (!multiple_operation_profitable_p (false, count, 0))
14010     {
14011       rtx seq;
14012
14013       start_sequence ();
14014
14015       for (i = 0; i < count; i++)
14016         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14017
14018       if (wback_offset != 0)
14019         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14020
14021       seq = get_insns ();
14022       end_sequence ();
14023
14024       return seq;
14025     }
14026
14027   result = gen_rtx_PARALLEL (VOIDmode,
14028                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14029   if (wback_offset != 0)
14030     {
14031       XVECEXP (result, 0, 0)
14032         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14033       i = 1;
14034       count++;
14035     }
14036
14037   for (j = 0; i < count; i++, j++)
14038     XVECEXP (result, 0, i)
14039       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14040
14041   return result;
14042 }
14043
14044 /* Generate either a load-multiple or a store-multiple instruction.  This
14045    function can be used in situations where we can start with a single MEM
14046    rtx and adjust its address upwards.
14047    COUNT is the number of operations in the instruction, not counting a
14048    possible update of the base register.  REGS is an array containing the
14049    register operands.
14050    BASEREG is the base register to be used in addressing the memory operands,
14051    which are constructed from BASEMEM.
14052    WRITE_BACK specifies whether the generated instruction should include an
14053    update of the base register.
14054    OFFSETP is used to pass an offset to and from this function; this offset
14055    is not used when constructing the address (instead BASEMEM should have an
14056    appropriate offset in its address), it is used only for setting
14057    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14058
14059 static rtx
14060 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14061                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14062 {
14063   rtx mems[MAX_LDM_STM_OPS];
14064   HOST_WIDE_INT offset = *offsetp;
14065   int i;
14066
14067   gcc_assert (count <= MAX_LDM_STM_OPS);
14068
14069   if (GET_CODE (basereg) == PLUS)
14070     basereg = XEXP (basereg, 0);
14071
14072   for (i = 0; i < count; i++)
14073     {
14074       rtx addr = plus_constant (Pmode, basereg, i * 4);
14075       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14076       offset += 4;
14077     }
14078
14079   if (write_back)
14080     *offsetp = offset;
14081
14082   if (is_load)
14083     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14084                                     write_back ? 4 * count : 0);
14085   else
14086     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14087                                      write_back ? 4 * count : 0);
14088 }
14089
14090 rtx
14091 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14092                        rtx basemem, HOST_WIDE_INT *offsetp)
14093 {
14094   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14095                               offsetp);
14096 }
14097
14098 rtx
14099 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14100                         rtx basemem, HOST_WIDE_INT *offsetp)
14101 {
14102   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14103                               offsetp);
14104 }
14105
14106 /* Called from a peephole2 expander to turn a sequence of loads into an
14107    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14108    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14109    is true if we can reorder the registers because they are used commutatively
14110    subsequently.
14111    Returns true iff we could generate a new instruction.  */
14112
14113 bool
14114 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14115 {
14116   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14117   rtx mems[MAX_LDM_STM_OPS];
14118   int i, j, base_reg;
14119   rtx base_reg_rtx;
14120   HOST_WIDE_INT offset;
14121   int write_back = FALSE;
14122   int ldm_case;
14123   rtx addr;
14124
14125   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14126                                      &base_reg, &offset, !sort_regs);
14127
14128   if (ldm_case == 0)
14129     return false;
14130
14131   if (sort_regs)
14132     for (i = 0; i < nops - 1; i++)
14133       for (j = i + 1; j < nops; j++)
14134         if (regs[i] > regs[j])
14135           {
14136             int t = regs[i];
14137             regs[i] = regs[j];
14138             regs[j] = t;
14139           }
14140   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14141
14142   if (TARGET_THUMB1)
14143     {
14144       gcc_assert (ldm_case == 1 || ldm_case == 5);
14145
14146       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
14147       write_back = true;
14148       for (i = 0; i < nops; i++)
14149         if (base_reg == regs[i])
14150           write_back = false;
14151
14152       /* Ensure the base is dead if it is updated.  */
14153       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14154         return false;
14155     }
14156
14157   if (ldm_case == 5)
14158     {
14159       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14160       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14161       offset = 0;
14162       base_reg_rtx = newbase;
14163     }
14164
14165   for (i = 0; i < nops; i++)
14166     {
14167       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14168       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14169                                               SImode, addr, 0);
14170     }
14171   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14172                                       write_back ? offset + i * 4 : 0));
14173   return true;
14174 }
14175
14176 /* Called from a peephole2 expander to turn a sequence of stores into an
14177    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14178    NOPS indicates how many separate stores we are trying to combine.
14179    Returns true iff we could generate a new instruction.  */
14180
14181 bool
14182 gen_stm_seq (rtx *operands, int nops)
14183 {
14184   int i;
14185   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14186   rtx mems[MAX_LDM_STM_OPS];
14187   int base_reg;
14188   rtx base_reg_rtx;
14189   HOST_WIDE_INT offset;
14190   int write_back = FALSE;
14191   int stm_case;
14192   rtx addr;
14193   bool base_reg_dies;
14194
14195   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14196                                       mem_order, &base_reg, &offset, true);
14197
14198   if (stm_case == 0)
14199     return false;
14200
14201   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14202
14203   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14204   if (TARGET_THUMB1)
14205     {
14206       gcc_assert (base_reg_dies);
14207       write_back = TRUE;
14208     }
14209
14210   if (stm_case == 5)
14211     {
14212       gcc_assert (base_reg_dies);
14213       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14214       offset = 0;
14215     }
14216
14217   addr = plus_constant (Pmode, base_reg_rtx, offset);
14218
14219   for (i = 0; i < nops; i++)
14220     {
14221       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14222       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14223                                               SImode, addr, 0);
14224     }
14225   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14226                                        write_back ? offset + i * 4 : 0));
14227   return true;
14228 }
14229
14230 /* Called from a peephole2 expander to turn a sequence of stores that are
14231    preceded by constant loads into an STM instruction.  OPERANDS are the
14232    operands found by the peephole matcher; NOPS indicates how many
14233    separate stores we are trying to combine; there are 2 * NOPS
14234    instructions in the peephole.
14235    Returns true iff we could generate a new instruction.  */
14236
14237 bool
14238 gen_const_stm_seq (rtx *operands, int nops)
14239 {
14240   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14241   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14242   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14243   rtx mems[MAX_LDM_STM_OPS];
14244   int base_reg;
14245   rtx base_reg_rtx;
14246   HOST_WIDE_INT offset;
14247   int write_back = FALSE;
14248   int stm_case;
14249   rtx addr;
14250   bool base_reg_dies;
14251   int i, j;
14252   HARD_REG_SET allocated;
14253
14254   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14255                                       mem_order, &base_reg, &offset, false);
14256
14257   if (stm_case == 0)
14258     return false;
14259
14260   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14261
14262   /* If the same register is used more than once, try to find a free
14263      register.  */
14264   CLEAR_HARD_REG_SET (allocated);
14265   for (i = 0; i < nops; i++)
14266     {
14267       for (j = i + 1; j < nops; j++)
14268         if (regs[i] == regs[j])
14269           {
14270             rtx t = peep2_find_free_register (0, nops * 2,
14271                                               TARGET_THUMB1 ? "l" : "r",
14272                                               SImode, &allocated);
14273             if (t == NULL_RTX)
14274               return false;
14275             reg_rtxs[i] = t;
14276             regs[i] = REGNO (t);
14277           }
14278     }
14279
14280   /* Compute an ordering that maps the register numbers to an ascending
14281      sequence.  */
14282   reg_order[0] = 0;
14283   for (i = 0; i < nops; i++)
14284     if (regs[i] < regs[reg_order[0]])
14285       reg_order[0] = i;
14286
14287   for (i = 1; i < nops; i++)
14288     {
14289       int this_order = reg_order[i - 1];
14290       for (j = 0; j < nops; j++)
14291         if (regs[j] > regs[reg_order[i - 1]]
14292             && (this_order == reg_order[i - 1]
14293                 || regs[j] < regs[this_order]))
14294           this_order = j;
14295       reg_order[i] = this_order;
14296     }
14297
14298   /* Ensure that registers that must be live after the instruction end
14299      up with the correct value.  */
14300   for (i = 0; i < nops; i++)
14301     {
14302       int this_order = reg_order[i];
14303       if ((this_order != mem_order[i]
14304            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14305           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14306         return false;
14307     }
14308
14309   /* Load the constants.  */
14310   for (i = 0; i < nops; i++)
14311     {
14312       rtx op = operands[2 * nops + mem_order[i]];
14313       sorted_regs[i] = regs[reg_order[i]];
14314       emit_move_insn (reg_rtxs[reg_order[i]], op);
14315     }
14316
14317   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14318
14319   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14320   if (TARGET_THUMB1)
14321     {
14322       gcc_assert (base_reg_dies);
14323       write_back = TRUE;
14324     }
14325
14326   if (stm_case == 5)
14327     {
14328       gcc_assert (base_reg_dies);
14329       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14330       offset = 0;
14331     }
14332
14333   addr = plus_constant (Pmode, base_reg_rtx, offset);
14334
14335   for (i = 0; i < nops; i++)
14336     {
14337       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14338       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14339                                               SImode, addr, 0);
14340     }
14341   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14342                                        write_back ? offset + i * 4 : 0));
14343   return true;
14344 }
14345
14346 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14347    unaligned copies on processors which support unaligned semantics for those
14348    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14349    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14350    An interleave factor of 1 (the minimum) will perform no interleaving.
14351    Load/store multiple are used for aligned addresses where possible.  */
14352
14353 static void
14354 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14355                                    HOST_WIDE_INT length,
14356                                    unsigned int interleave_factor)
14357 {
14358   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14359   int *regnos = XALLOCAVEC (int, interleave_factor);
14360   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14361   HOST_WIDE_INT i, j;
14362   HOST_WIDE_INT remaining = length, words;
14363   rtx halfword_tmp = NULL, byte_tmp = NULL;
14364   rtx dst, src;
14365   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14366   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14367   HOST_WIDE_INT srcoffset, dstoffset;
14368   HOST_WIDE_INT src_autoinc, dst_autoinc;
14369   rtx mem, addr;
14370
14371   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14372
14373   /* Use hard registers if we have aligned source or destination so we can use
14374      load/store multiple with contiguous registers.  */
14375   if (dst_aligned || src_aligned)
14376     for (i = 0; i < interleave_factor; i++)
14377       regs[i] = gen_rtx_REG (SImode, i);
14378   else
14379     for (i = 0; i < interleave_factor; i++)
14380       regs[i] = gen_reg_rtx (SImode);
14381
14382   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14383   src = copy_addr_to_reg (XEXP (srcbase, 0));
14384
14385   srcoffset = dstoffset = 0;
14386
14387   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14388      For copying the last bytes we want to subtract this offset again.  */
14389   src_autoinc = dst_autoinc = 0;
14390
14391   for (i = 0; i < interleave_factor; i++)
14392     regnos[i] = i;
14393
14394   /* Copy BLOCK_SIZE_BYTES chunks.  */
14395
14396   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14397     {
14398       /* Load words.  */
14399       if (src_aligned && interleave_factor > 1)
14400         {
14401           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14402                                             TRUE, srcbase, &srcoffset));
14403           src_autoinc += UNITS_PER_WORD * interleave_factor;
14404         }
14405       else
14406         {
14407           for (j = 0; j < interleave_factor; j++)
14408             {
14409               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14410                                                  - src_autoinc));
14411               mem = adjust_automodify_address (srcbase, SImode, addr,
14412                                                srcoffset + j * UNITS_PER_WORD);
14413               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14414             }
14415           srcoffset += block_size_bytes;
14416         }
14417
14418       /* Store words.  */
14419       if (dst_aligned && interleave_factor > 1)
14420         {
14421           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14422                                              TRUE, dstbase, &dstoffset));
14423           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14424         }
14425       else
14426         {
14427           for (j = 0; j < interleave_factor; j++)
14428             {
14429               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14430                                                  - dst_autoinc));
14431               mem = adjust_automodify_address (dstbase, SImode, addr,
14432                                                dstoffset + j * UNITS_PER_WORD);
14433               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14434             }
14435           dstoffset += block_size_bytes;
14436         }
14437
14438       remaining -= block_size_bytes;
14439     }
14440
14441   /* Copy any whole words left (note these aren't interleaved with any
14442      subsequent halfword/byte load/stores in the interests of simplicity).  */
14443
14444   words = remaining / UNITS_PER_WORD;
14445
14446   gcc_assert (words < interleave_factor);
14447
14448   if (src_aligned && words > 1)
14449     {
14450       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14451                                         &srcoffset));
14452       src_autoinc += UNITS_PER_WORD * words;
14453     }
14454   else
14455     {
14456       for (j = 0; j < words; j++)
14457         {
14458           addr = plus_constant (Pmode, src,
14459                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14460           mem = adjust_automodify_address (srcbase, SImode, addr,
14461                                            srcoffset + j * UNITS_PER_WORD);
14462           if (src_aligned)
14463             emit_move_insn (regs[j], mem);
14464           else
14465             emit_insn (gen_unaligned_loadsi (regs[j], mem));
14466         }
14467       srcoffset += words * UNITS_PER_WORD;
14468     }
14469
14470   if (dst_aligned && words > 1)
14471     {
14472       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14473                                          &dstoffset));
14474       dst_autoinc += words * UNITS_PER_WORD;
14475     }
14476   else
14477     {
14478       for (j = 0; j < words; j++)
14479         {
14480           addr = plus_constant (Pmode, dst,
14481                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14482           mem = adjust_automodify_address (dstbase, SImode, addr,
14483                                            dstoffset + j * UNITS_PER_WORD);
14484           if (dst_aligned)
14485             emit_move_insn (mem, regs[j]);
14486           else
14487             emit_insn (gen_unaligned_storesi (mem, regs[j]));
14488         }
14489       dstoffset += words * UNITS_PER_WORD;
14490     }
14491
14492   remaining -= words * UNITS_PER_WORD;
14493
14494   gcc_assert (remaining < 4);
14495
14496   /* Copy a halfword if necessary.  */
14497
14498   if (remaining >= 2)
14499     {
14500       halfword_tmp = gen_reg_rtx (SImode);
14501
14502       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14503       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14504       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14505
14506       /* Either write out immediately, or delay until we've loaded the last
14507          byte, depending on interleave factor.  */
14508       if (interleave_factor == 1)
14509         {
14510           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14511           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14512           emit_insn (gen_unaligned_storehi (mem,
14513                        gen_lowpart (HImode, halfword_tmp)));
14514           halfword_tmp = NULL;
14515           dstoffset += 2;
14516         }
14517
14518       remaining -= 2;
14519       srcoffset += 2;
14520     }
14521
14522   gcc_assert (remaining < 2);
14523
14524   /* Copy last byte.  */
14525
14526   if ((remaining & 1) != 0)
14527     {
14528       byte_tmp = gen_reg_rtx (SImode);
14529
14530       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14531       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14532       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14533
14534       if (interleave_factor == 1)
14535         {
14536           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14537           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14538           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14539           byte_tmp = NULL;
14540           dstoffset++;
14541         }
14542
14543       remaining--;
14544       srcoffset++;
14545     }
14546
14547   /* Store last halfword if we haven't done so already.  */
14548
14549   if (halfword_tmp)
14550     {
14551       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14552       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14553       emit_insn (gen_unaligned_storehi (mem,
14554                    gen_lowpart (HImode, halfword_tmp)));
14555       dstoffset += 2;
14556     }
14557
14558   /* Likewise for last byte.  */
14559
14560   if (byte_tmp)
14561     {
14562       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14563       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14564       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14565       dstoffset++;
14566     }
14567
14568   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14569 }
14570
14571 /* From mips_adjust_block_mem:
14572
14573    Helper function for doing a loop-based block operation on memory
14574    reference MEM.  Each iteration of the loop will operate on LENGTH
14575    bytes of MEM.
14576
14577    Create a new base register for use within the loop and point it to
14578    the start of MEM.  Create a new memory reference that uses this
14579    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14580
14581 static void
14582 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14583                       rtx *loop_mem)
14584 {
14585   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14586
14587   /* Although the new mem does not refer to a known location,
14588      it does keep up to LENGTH bytes of alignment.  */
14589   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14590   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14591 }
14592
14593 /* From mips_block_move_loop:
14594
14595    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14596    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14597    the memory regions do not overlap.  */
14598
14599 static void
14600 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14601                                unsigned int interleave_factor,
14602                                HOST_WIDE_INT bytes_per_iter)
14603 {
14604   rtx src_reg, dest_reg, final_src, test;
14605   HOST_WIDE_INT leftover;
14606
14607   leftover = length % bytes_per_iter;
14608   length -= leftover;
14609
14610   /* Create registers and memory references for use within the loop.  */
14611   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14612   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14613
14614   /* Calculate the value that SRC_REG should have after the last iteration of
14615      the loop.  */
14616   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14617                                    0, 0, OPTAB_WIDEN);
14618
14619   /* Emit the start of the loop.  */
14620   rtx_code_label *label = gen_label_rtx ();
14621   emit_label (label);
14622
14623   /* Emit the loop body.  */
14624   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14625                                      interleave_factor);
14626
14627   /* Move on to the next block.  */
14628   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14629   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14630
14631   /* Emit the loop condition.  */
14632   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14633   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14634
14635   /* Mop up any left-over bytes.  */
14636   if (leftover)
14637     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14638 }
14639
14640 /* Emit a block move when either the source or destination is unaligned (not
14641    aligned to a four-byte boundary).  This may need further tuning depending on
14642    core type, optimize_size setting, etc.  */
14643
14644 static int
14645 arm_cpymemqi_unaligned (rtx *operands)
14646 {
14647   HOST_WIDE_INT length = INTVAL (operands[2]);
14648
14649   if (optimize_size)
14650     {
14651       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14652       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14653       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14654          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14655          or dst_aligned though: allow more interleaving in those cases since the
14656          resulting code can be smaller.  */
14657       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14658       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14659
14660       if (length > 12)
14661         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14662                                        interleave_factor, bytes_per_iter);
14663       else
14664         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14665                                            interleave_factor);
14666     }
14667   else
14668     {
14669       /* Note that the loop created by arm_block_move_unaligned_loop may be
14670          subject to loop unrolling, which makes tuning this condition a little
14671          redundant.  */
14672       if (length > 32)
14673         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14674       else
14675         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14676     }
14677
14678   return 1;
14679 }
14680
14681 int
14682 arm_gen_cpymemqi (rtx *operands)
14683 {
14684   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14685   HOST_WIDE_INT srcoffset, dstoffset;
14686   rtx src, dst, srcbase, dstbase;
14687   rtx part_bytes_reg = NULL;
14688   rtx mem;
14689
14690   if (!CONST_INT_P (operands[2])
14691       || !CONST_INT_P (operands[3])
14692       || INTVAL (operands[2]) > 64)
14693     return 0;
14694
14695   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14696     return arm_cpymemqi_unaligned (operands);
14697
14698   if (INTVAL (operands[3]) & 3)
14699     return 0;
14700
14701   dstbase = operands[0];
14702   srcbase = operands[1];
14703
14704   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14705   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14706
14707   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14708   out_words_to_go = INTVAL (operands[2]) / 4;
14709   last_bytes = INTVAL (operands[2]) & 3;
14710   dstoffset = srcoffset = 0;
14711
14712   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14713     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14714
14715   while (in_words_to_go >= 2)
14716     {
14717       if (in_words_to_go > 4)
14718         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14719                                           TRUE, srcbase, &srcoffset));
14720       else
14721         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14722                                           src, FALSE, srcbase,
14723                                           &srcoffset));
14724
14725       if (out_words_to_go)
14726         {
14727           if (out_words_to_go > 4)
14728             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14729                                                TRUE, dstbase, &dstoffset));
14730           else if (out_words_to_go != 1)
14731             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14732                                                out_words_to_go, dst,
14733                                                (last_bytes == 0
14734                                                 ? FALSE : TRUE),
14735                                                dstbase, &dstoffset));
14736           else
14737             {
14738               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14739               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14740               if (last_bytes != 0)
14741                 {
14742                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14743                   dstoffset += 4;
14744                 }
14745             }
14746         }
14747
14748       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14749       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14750     }
14751
14752   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14753   if (out_words_to_go)
14754     {
14755       rtx sreg;
14756
14757       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14758       sreg = copy_to_reg (mem);
14759
14760       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14761       emit_move_insn (mem, sreg);
14762       in_words_to_go--;
14763
14764       gcc_assert (!in_words_to_go);     /* Sanity check */
14765     }
14766
14767   if (in_words_to_go)
14768     {
14769       gcc_assert (in_words_to_go > 0);
14770
14771       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14772       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14773     }
14774
14775   gcc_assert (!last_bytes || part_bytes_reg);
14776
14777   if (BYTES_BIG_ENDIAN && last_bytes)
14778     {
14779       rtx tmp = gen_reg_rtx (SImode);
14780
14781       /* The bytes we want are in the top end of the word.  */
14782       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14783                               GEN_INT (8 * (4 - last_bytes))));
14784       part_bytes_reg = tmp;
14785
14786       while (last_bytes)
14787         {
14788           mem = adjust_automodify_address (dstbase, QImode,
14789                                            plus_constant (Pmode, dst,
14790                                                           last_bytes - 1),
14791                                            dstoffset + last_bytes - 1);
14792           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14793
14794           if (--last_bytes)
14795             {
14796               tmp = gen_reg_rtx (SImode);
14797               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14798               part_bytes_reg = tmp;
14799             }
14800         }
14801
14802     }
14803   else
14804     {
14805       if (last_bytes > 1)
14806         {
14807           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14808           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14809           last_bytes -= 2;
14810           if (last_bytes)
14811             {
14812               rtx tmp = gen_reg_rtx (SImode);
14813               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14814               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14815               part_bytes_reg = tmp;
14816               dstoffset += 2;
14817             }
14818         }
14819
14820       if (last_bytes)
14821         {
14822           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14823           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14824         }
14825     }
14826
14827   return 1;
14828 }
14829
14830 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14831 by mode size.  */
14832 inline static rtx
14833 next_consecutive_mem (rtx mem)
14834 {
14835   machine_mode mode = GET_MODE (mem);
14836   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14837   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14838
14839   return adjust_automodify_address (mem, mode, addr, offset);
14840 }
14841
14842 /* Copy using LDRD/STRD instructions whenever possible.
14843    Returns true upon success. */
14844 bool
14845 gen_cpymem_ldrd_strd (rtx *operands)
14846 {
14847   unsigned HOST_WIDE_INT len;
14848   HOST_WIDE_INT align;
14849   rtx src, dst, base;
14850   rtx reg0;
14851   bool src_aligned, dst_aligned;
14852   bool src_volatile, dst_volatile;
14853
14854   gcc_assert (CONST_INT_P (operands[2]));
14855   gcc_assert (CONST_INT_P (operands[3]));
14856
14857   len = UINTVAL (operands[2]);
14858   if (len > 64)
14859     return false;
14860
14861   /* Maximum alignment we can assume for both src and dst buffers.  */
14862   align = INTVAL (operands[3]);
14863
14864   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14865     return false;
14866
14867   /* Place src and dst addresses in registers
14868      and update the corresponding mem rtx.  */
14869   dst = operands[0];
14870   dst_volatile = MEM_VOLATILE_P (dst);
14871   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14872   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14873   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14874
14875   src = operands[1];
14876   src_volatile = MEM_VOLATILE_P (src);
14877   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14878   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14879   src = adjust_automodify_address (src, VOIDmode, base, 0);
14880
14881   if (!unaligned_access && !(src_aligned && dst_aligned))
14882     return false;
14883
14884   if (src_volatile || dst_volatile)
14885     return false;
14886
14887   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14888   if (!(dst_aligned || src_aligned))
14889     return arm_gen_cpymemqi (operands);
14890
14891   /* If the either src or dst is unaligned we'll be accessing it as pairs
14892      of unaligned SImode accesses.  Otherwise we can generate DImode
14893      ldrd/strd instructions.  */
14894   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14895   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14896
14897   while (len >= 8)
14898     {
14899       len -= 8;
14900       reg0 = gen_reg_rtx (DImode);
14901       rtx low_reg = NULL_RTX;
14902       rtx hi_reg = NULL_RTX;
14903
14904       if (!src_aligned || !dst_aligned)
14905         {
14906           low_reg = gen_lowpart (SImode, reg0);
14907           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14908         }
14909       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
14910         emit_move_insn (reg0, src);
14911       else if (src_aligned)
14912         emit_insn (gen_unaligned_loaddi (reg0, src));
14913       else
14914         {
14915           emit_insn (gen_unaligned_loadsi (low_reg, src));
14916           src = next_consecutive_mem (src);
14917           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14918         }
14919
14920       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
14921         emit_move_insn (dst, reg0);
14922       else if (dst_aligned)
14923         emit_insn (gen_unaligned_storedi (dst, reg0));
14924       else
14925         {
14926           emit_insn (gen_unaligned_storesi (dst, low_reg));
14927           dst = next_consecutive_mem (dst);
14928           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14929         }
14930
14931       src = next_consecutive_mem (src);
14932       dst = next_consecutive_mem (dst);
14933     }
14934
14935   gcc_assert (len < 8);
14936   if (len >= 4)
14937     {
14938       /* More than a word but less than a double-word to copy.  Copy a word.  */
14939       reg0 = gen_reg_rtx (SImode);
14940       src = adjust_address (src, SImode, 0);
14941       dst = adjust_address (dst, SImode, 0);
14942       if (src_aligned)
14943         emit_move_insn (reg0, src);
14944       else
14945         emit_insn (gen_unaligned_loadsi (reg0, src));
14946
14947       if (dst_aligned)
14948         emit_move_insn (dst, reg0);
14949       else
14950         emit_insn (gen_unaligned_storesi (dst, reg0));
14951
14952       src = next_consecutive_mem (src);
14953       dst = next_consecutive_mem (dst);
14954       len -= 4;
14955     }
14956
14957   if (len == 0)
14958     return true;
14959
14960   /* Copy the remaining bytes.  */
14961   if (len >= 2)
14962     {
14963       dst = adjust_address (dst, HImode, 0);
14964       src = adjust_address (src, HImode, 0);
14965       reg0 = gen_reg_rtx (SImode);
14966       if (src_aligned)
14967         emit_insn (gen_zero_extendhisi2 (reg0, src));
14968       else
14969         emit_insn (gen_unaligned_loadhiu (reg0, src));
14970
14971       if (dst_aligned)
14972         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14973       else
14974         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14975
14976       src = next_consecutive_mem (src);
14977       dst = next_consecutive_mem (dst);
14978       if (len == 2)
14979         return true;
14980     }
14981
14982   dst = adjust_address (dst, QImode, 0);
14983   src = adjust_address (src, QImode, 0);
14984   reg0 = gen_reg_rtx (QImode);
14985   emit_move_insn (reg0, src);
14986   emit_move_insn (dst, reg0);
14987   return true;
14988 }
14989
14990 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
14991    into its component 32-bit subregs.  OP2 may be an immediate
14992    constant and we want to simplify it in that case.  */
14993 void
14994 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
14995                         rtx *lo_op2, rtx *hi_op2)
14996 {
14997   *lo_op1 = gen_lowpart (SImode, op1);
14998   *hi_op1 = gen_highpart (SImode, op1);
14999   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15000                                  subreg_lowpart_offset (SImode, DImode));
15001   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15002                                  subreg_highpart_offset (SImode, DImode));
15003 }
15004
15005 /* Select a dominance comparison mode if possible for a test of the general
15006    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15007    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15008    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15009    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15010    In all cases OP will be either EQ or NE, but we don't need to know which
15011    here.  If we are unable to support a dominance comparison we return
15012    CC mode.  This will then fail to match for the RTL expressions that
15013    generate this call.  */
15014 machine_mode
15015 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15016 {
15017   enum rtx_code cond1, cond2;
15018   int swapped = 0;
15019
15020   /* Currently we will probably get the wrong result if the individual
15021      comparisons are not simple.  This also ensures that it is safe to
15022      reverse a comparison if necessary.  */
15023   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15024        != CCmode)
15025       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15026           != CCmode))
15027     return CCmode;
15028
15029   /* The if_then_else variant of this tests the second condition if the
15030      first passes, but is true if the first fails.  Reverse the first
15031      condition to get a true "inclusive-or" expression.  */
15032   if (cond_or == DOM_CC_NX_OR_Y)
15033     cond1 = reverse_condition (cond1);
15034
15035   /* If the comparisons are not equal, and one doesn't dominate the other,
15036      then we can't do this.  */
15037   if (cond1 != cond2
15038       && !comparison_dominates_p (cond1, cond2)
15039       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15040     return CCmode;
15041
15042   if (swapped)
15043     std::swap (cond1, cond2);
15044
15045   switch (cond1)
15046     {
15047     case EQ:
15048       if (cond_or == DOM_CC_X_AND_Y)
15049         return CC_DEQmode;
15050
15051       switch (cond2)
15052         {
15053         case EQ: return CC_DEQmode;
15054         case LE: return CC_DLEmode;
15055         case LEU: return CC_DLEUmode;
15056         case GE: return CC_DGEmode;
15057         case GEU: return CC_DGEUmode;
15058         default: gcc_unreachable ();
15059         }
15060
15061     case LT:
15062       if (cond_or == DOM_CC_X_AND_Y)
15063         return CC_DLTmode;
15064
15065       switch (cond2)
15066         {
15067         case  LT:
15068             return CC_DLTmode;
15069         case LE:
15070           return CC_DLEmode;
15071         case NE:
15072           return CC_DNEmode;
15073         default:
15074           gcc_unreachable ();
15075         }
15076
15077     case GT:
15078       if (cond_or == DOM_CC_X_AND_Y)
15079         return CC_DGTmode;
15080
15081       switch (cond2)
15082         {
15083         case GT:
15084           return CC_DGTmode;
15085         case GE:
15086           return CC_DGEmode;
15087         case NE:
15088           return CC_DNEmode;
15089         default:
15090           gcc_unreachable ();
15091         }
15092
15093     case LTU:
15094       if (cond_or == DOM_CC_X_AND_Y)
15095         return CC_DLTUmode;
15096
15097       switch (cond2)
15098         {
15099         case LTU:
15100           return CC_DLTUmode;
15101         case LEU:
15102           return CC_DLEUmode;
15103         case NE:
15104           return CC_DNEmode;
15105         default:
15106           gcc_unreachable ();
15107         }
15108
15109     case GTU:
15110       if (cond_or == DOM_CC_X_AND_Y)
15111         return CC_DGTUmode;
15112
15113       switch (cond2)
15114         {
15115         case GTU:
15116           return CC_DGTUmode;
15117         case GEU:
15118           return CC_DGEUmode;
15119         case NE:
15120           return CC_DNEmode;
15121         default:
15122           gcc_unreachable ();
15123         }
15124
15125     /* The remaining cases only occur when both comparisons are the
15126        same.  */
15127     case NE:
15128       gcc_assert (cond1 == cond2);
15129       return CC_DNEmode;
15130
15131     case LE:
15132       gcc_assert (cond1 == cond2);
15133       return CC_DLEmode;
15134
15135     case GE:
15136       gcc_assert (cond1 == cond2);
15137       return CC_DGEmode;
15138
15139     case LEU:
15140       gcc_assert (cond1 == cond2);
15141       return CC_DLEUmode;
15142
15143     case GEU:
15144       gcc_assert (cond1 == cond2);
15145       return CC_DGEUmode;
15146
15147     default:
15148       gcc_unreachable ();
15149     }
15150 }
15151
15152 machine_mode
15153 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15154 {
15155   /* All floating point compares return CCFP if it is an equality
15156      comparison, and CCFPE otherwise.  */
15157   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15158     {
15159       switch (op)
15160         {
15161         case EQ:
15162         case NE:
15163         case UNORDERED:
15164         case ORDERED:
15165         case UNLT:
15166         case UNLE:
15167         case UNGT:
15168         case UNGE:
15169         case UNEQ:
15170         case LTGT:
15171           return CCFPmode;
15172
15173         case LT:
15174         case LE:
15175         case GT:
15176         case GE:
15177           return CCFPEmode;
15178
15179         default:
15180           gcc_unreachable ();
15181         }
15182     }
15183
15184   /* A compare with a shifted operand.  Because of canonicalization, the
15185      comparison will have to be swapped when we emit the assembler.  */
15186   if (GET_MODE (y) == SImode
15187       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15188       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15189           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15190           || GET_CODE (x) == ROTATERT))
15191     return CC_SWPmode;
15192
15193   /* This operation is performed swapped, but since we only rely on the Z
15194      flag we don't need an additional mode.  */
15195   if (GET_MODE (y) == SImode
15196       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15197       && GET_CODE (x) == NEG
15198       && (op == EQ || op == NE))
15199     return CC_Zmode;
15200
15201   /* This is a special case that is used by combine to allow a
15202      comparison of a shifted byte load to be split into a zero-extend
15203      followed by a comparison of the shifted integer (only valid for
15204      equalities and unsigned inequalities).  */
15205   if (GET_MODE (x) == SImode
15206       && GET_CODE (x) == ASHIFT
15207       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15208       && GET_CODE (XEXP (x, 0)) == SUBREG
15209       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15210       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15211       && (op == EQ || op == NE
15212           || op == GEU || op == GTU || op == LTU || op == LEU)
15213       && CONST_INT_P (y))
15214     return CC_Zmode;
15215
15216   /* A construct for a conditional compare, if the false arm contains
15217      0, then both conditions must be true, otherwise either condition
15218      must be true.  Not all conditions are possible, so CCmode is
15219      returned if it can't be done.  */
15220   if (GET_CODE (x) == IF_THEN_ELSE
15221       && (XEXP (x, 2) == const0_rtx
15222           || XEXP (x, 2) == const1_rtx)
15223       && COMPARISON_P (XEXP (x, 0))
15224       && COMPARISON_P (XEXP (x, 1)))
15225     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15226                                          INTVAL (XEXP (x, 2)));
15227
15228   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15229   if (GET_CODE (x) == AND
15230       && (op == EQ || op == NE)
15231       && COMPARISON_P (XEXP (x, 0))
15232       && COMPARISON_P (XEXP (x, 1)))
15233     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15234                                          DOM_CC_X_AND_Y);
15235
15236   if (GET_CODE (x) == IOR
15237       && (op == EQ || op == NE)
15238       && COMPARISON_P (XEXP (x, 0))
15239       && COMPARISON_P (XEXP (x, 1)))
15240     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15241                                          DOM_CC_X_OR_Y);
15242
15243   /* An operation (on Thumb) where we want to test for a single bit.
15244      This is done by shifting that bit up into the top bit of a
15245      scratch register; we can then branch on the sign bit.  */
15246   if (TARGET_THUMB1
15247       && GET_MODE (x) == SImode
15248       && (op == EQ || op == NE)
15249       && GET_CODE (x) == ZERO_EXTRACT
15250       && XEXP (x, 1) == const1_rtx)
15251     return CC_Nmode;
15252
15253   /* An operation that sets the condition codes as a side-effect, the
15254      V flag is not set correctly, so we can only use comparisons where
15255      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15256      instead.)  */
15257   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15258   if (GET_MODE (x) == SImode
15259       && y == const0_rtx
15260       && (op == EQ || op == NE || op == LT || op == GE)
15261       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15262           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15263           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15264           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15265           || GET_CODE (x) == LSHIFTRT
15266           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15267           || GET_CODE (x) == ROTATERT
15268           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15269     return CC_NOOVmode;
15270
15271   /* An unsigned comparison of ~reg with a const is really a special
15272      canoncialization of compare (~const, reg), which is a reverse
15273      subtract operation.  We may not get here if CONST is 0, but that
15274      doesn't matter because ~0 isn't a valid immediate for RSB.  */
15275   if (GET_MODE (x) == SImode
15276       && GET_CODE (x) == NOT
15277       && CONST_INT_P (y)
15278       && (op == EQ || op == NE
15279           || op == LTU || op == LEU || op == GEU || op == GTU))
15280     return CC_RSBmode;
15281
15282   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15283     return CC_Zmode;
15284
15285   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15286       && GET_CODE (x) == PLUS
15287       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15288     return CC_Cmode;
15289
15290   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15291     {
15292       switch (op)
15293         {
15294         case EQ:
15295         case NE:
15296           /* A DImode comparison against zero can be implemented by
15297              or'ing the two halves together.  */
15298           if (y == const0_rtx)
15299             return CC_Zmode;
15300
15301           /* We can do an equality test in three Thumb instructions.  */
15302           if (!TARGET_32BIT)
15303             return CC_Zmode;
15304
15305           /* FALLTHROUGH */
15306
15307         case LTU:
15308         case LEU:
15309         case GTU:
15310         case GEU:
15311           /* DImode unsigned comparisons can be implemented by cmp +
15312              cmpeq without a scratch register.  Not worth doing in
15313              Thumb-2.  */
15314           if (TARGET_32BIT)
15315             return CC_CZmode;
15316
15317           /* FALLTHROUGH */
15318
15319         case LT:
15320         case LE:
15321         case GT:
15322         case GE:
15323           /* DImode signed and unsigned comparisons can be implemented
15324              by cmp + sbcs with a scratch register, but that does not
15325              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15326           gcc_assert (op != EQ && op != NE);
15327           return CC_NCVmode;
15328
15329         default:
15330           gcc_unreachable ();
15331         }
15332     }
15333
15334   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15335     return GET_MODE (x);
15336
15337   return CCmode;
15338 }
15339
15340 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15341    return the rtx for register 0 in the proper mode.  FP means this is a
15342    floating point compare: I don't think that it is needed on the arm.  */
15343 rtx
15344 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15345 {
15346   machine_mode mode;
15347   rtx cc_reg;
15348   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15349
15350   /* We might have X as a constant, Y as a register because of the predicates
15351      used for cmpdi.  If so, force X to a register here.  */
15352   if (dimode_comparison && !REG_P (x))
15353     x = force_reg (DImode, x);
15354
15355   mode = SELECT_CC_MODE (code, x, y);
15356   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15357
15358   if (dimode_comparison
15359       && mode != CC_CZmode)
15360     {
15361       rtx clobber, set;
15362
15363       /* To compare two non-zero values for equality, XOR them and
15364          then compare against zero.  Not used for ARM mode; there
15365          CC_CZmode is cheaper.  */
15366       if (mode == CC_Zmode && y != const0_rtx)
15367         {
15368           gcc_assert (!reload_completed);
15369           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15370           y = const0_rtx;
15371         }
15372
15373       /* A scratch register is required.  */
15374       if (reload_completed)
15375         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15376       else
15377         scratch = gen_rtx_SCRATCH (SImode);
15378
15379       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15380       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15381       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15382     }
15383   else
15384     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15385
15386   return cc_reg;
15387 }
15388
15389 /* Generate a sequence of insns that will generate the correct return
15390    address mask depending on the physical architecture that the program
15391    is running on.  */
15392 rtx
15393 arm_gen_return_addr_mask (void)
15394 {
15395   rtx reg = gen_reg_rtx (Pmode);
15396
15397   emit_insn (gen_return_addr_mask (reg));
15398   return reg;
15399 }
15400
15401 void
15402 arm_reload_in_hi (rtx *operands)
15403 {
15404   rtx ref = operands[1];
15405   rtx base, scratch;
15406   HOST_WIDE_INT offset = 0;
15407
15408   if (GET_CODE (ref) == SUBREG)
15409     {
15410       offset = SUBREG_BYTE (ref);
15411       ref = SUBREG_REG (ref);
15412     }
15413
15414   if (REG_P (ref))
15415     {
15416       /* We have a pseudo which has been spilt onto the stack; there
15417          are two cases here: the first where there is a simple
15418          stack-slot replacement and a second where the stack-slot is
15419          out of range, or is used as a subreg.  */
15420       if (reg_equiv_mem (REGNO (ref)))
15421         {
15422           ref = reg_equiv_mem (REGNO (ref));
15423           base = find_replacement (&XEXP (ref, 0));
15424         }
15425       else
15426         /* The slot is out of range, or was dressed up in a SUBREG.  */
15427         base = reg_equiv_address (REGNO (ref));
15428
15429       /* PR 62554: If there is no equivalent memory location then just move
15430          the value as an SImode register move.  This happens when the target
15431          architecture variant does not have an HImode register move.  */
15432       if (base == NULL)
15433         {
15434           gcc_assert (REG_P (operands[0]));
15435           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15436                                 gen_rtx_SUBREG (SImode, ref, 0)));
15437           return;
15438         }
15439     }
15440   else
15441     base = find_replacement (&XEXP (ref, 0));
15442
15443   /* Handle the case where the address is too complex to be offset by 1.  */
15444   if (GET_CODE (base) == MINUS
15445       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15446     {
15447       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15448
15449       emit_set_insn (base_plus, base);
15450       base = base_plus;
15451     }
15452   else if (GET_CODE (base) == PLUS)
15453     {
15454       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15455       HOST_WIDE_INT hi, lo;
15456
15457       offset += INTVAL (XEXP (base, 1));
15458       base = XEXP (base, 0);
15459
15460       /* Rework the address into a legal sequence of insns.  */
15461       /* Valid range for lo is -4095 -> 4095 */
15462       lo = (offset >= 0
15463             ? (offset & 0xfff)
15464             : -((-offset) & 0xfff));
15465
15466       /* Corner case, if lo is the max offset then we would be out of range
15467          once we have added the additional 1 below, so bump the msb into the
15468          pre-loading insn(s).  */
15469       if (lo == 4095)
15470         lo &= 0x7ff;
15471
15472       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15473              ^ (HOST_WIDE_INT) 0x80000000)
15474             - (HOST_WIDE_INT) 0x80000000);
15475
15476       gcc_assert (hi + lo == offset);
15477
15478       if (hi != 0)
15479         {
15480           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15481
15482           /* Get the base address; addsi3 knows how to handle constants
15483              that require more than one insn.  */
15484           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15485           base = base_plus;
15486           offset = lo;
15487         }
15488     }
15489
15490   /* Operands[2] may overlap operands[0] (though it won't overlap
15491      operands[1]), that's why we asked for a DImode reg -- so we can
15492      use the bit that does not overlap.  */
15493   if (REGNO (operands[2]) == REGNO (operands[0]))
15494     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15495   else
15496     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15497
15498   emit_insn (gen_zero_extendqisi2 (scratch,
15499                                    gen_rtx_MEM (QImode,
15500                                                 plus_constant (Pmode, base,
15501                                                                offset))));
15502   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15503                                    gen_rtx_MEM (QImode,
15504                                                 plus_constant (Pmode, base,
15505                                                                offset + 1))));
15506   if (!BYTES_BIG_ENDIAN)
15507     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15508                    gen_rtx_IOR (SImode,
15509                                 gen_rtx_ASHIFT
15510                                 (SImode,
15511                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15512                                  GEN_INT (8)),
15513                                 scratch));
15514   else
15515     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15516                    gen_rtx_IOR (SImode,
15517                                 gen_rtx_ASHIFT (SImode, scratch,
15518                                                 GEN_INT (8)),
15519                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15520 }
15521
15522 /* Handle storing a half-word to memory during reload by synthesizing as two
15523    byte stores.  Take care not to clobber the input values until after we
15524    have moved them somewhere safe.  This code assumes that if the DImode
15525    scratch in operands[2] overlaps either the input value or output address
15526    in some way, then that value must die in this insn (we absolutely need
15527    two scratch registers for some corner cases).  */
15528 void
15529 arm_reload_out_hi (rtx *operands)
15530 {
15531   rtx ref = operands[0];
15532   rtx outval = operands[1];
15533   rtx base, scratch;
15534   HOST_WIDE_INT offset = 0;
15535
15536   if (GET_CODE (ref) == SUBREG)
15537     {
15538       offset = SUBREG_BYTE (ref);
15539       ref = SUBREG_REG (ref);
15540     }
15541
15542   if (REG_P (ref))
15543     {
15544       /* We have a pseudo which has been spilt onto the stack; there
15545          are two cases here: the first where there is a simple
15546          stack-slot replacement and a second where the stack-slot is
15547          out of range, or is used as a subreg.  */
15548       if (reg_equiv_mem (REGNO (ref)))
15549         {
15550           ref = reg_equiv_mem (REGNO (ref));
15551           base = find_replacement (&XEXP (ref, 0));
15552         }
15553       else
15554         /* The slot is out of range, or was dressed up in a SUBREG.  */
15555         base = reg_equiv_address (REGNO (ref));
15556
15557       /* PR 62254: If there is no equivalent memory location then just move
15558          the value as an SImode register move.  This happens when the target
15559          architecture variant does not have an HImode register move.  */
15560       if (base == NULL)
15561         {
15562           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15563
15564           if (REG_P (outval))
15565             {
15566               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15567                                     gen_rtx_SUBREG (SImode, outval, 0)));
15568             }
15569           else /* SUBREG_P (outval)  */
15570             {
15571               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15572                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15573                                       SUBREG_REG (outval)));
15574               else
15575                 /* FIXME: Handle other cases ?  */
15576                 gcc_unreachable ();
15577             }
15578           return;
15579         }
15580     }
15581   else
15582     base = find_replacement (&XEXP (ref, 0));
15583
15584   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15585
15586   /* Handle the case where the address is too complex to be offset by 1.  */
15587   if (GET_CODE (base) == MINUS
15588       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15589     {
15590       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15591
15592       /* Be careful not to destroy OUTVAL.  */
15593       if (reg_overlap_mentioned_p (base_plus, outval))
15594         {
15595           /* Updating base_plus might destroy outval, see if we can
15596              swap the scratch and base_plus.  */
15597           if (!reg_overlap_mentioned_p (scratch, outval))
15598             std::swap (scratch, base_plus);
15599           else
15600             {
15601               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15602
15603               /* Be conservative and copy OUTVAL into the scratch now,
15604                  this should only be necessary if outval is a subreg
15605                  of something larger than a word.  */
15606               /* XXX Might this clobber base?  I can't see how it can,
15607                  since scratch is known to overlap with OUTVAL, and
15608                  must be wider than a word.  */
15609               emit_insn (gen_movhi (scratch_hi, outval));
15610               outval = scratch_hi;
15611             }
15612         }
15613
15614       emit_set_insn (base_plus, base);
15615       base = base_plus;
15616     }
15617   else if (GET_CODE (base) == PLUS)
15618     {
15619       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15620       HOST_WIDE_INT hi, lo;
15621
15622       offset += INTVAL (XEXP (base, 1));
15623       base = XEXP (base, 0);
15624
15625       /* Rework the address into a legal sequence of insns.  */
15626       /* Valid range for lo is -4095 -> 4095 */
15627       lo = (offset >= 0
15628             ? (offset & 0xfff)
15629             : -((-offset) & 0xfff));
15630
15631       /* Corner case, if lo is the max offset then we would be out of range
15632          once we have added the additional 1 below, so bump the msb into the
15633          pre-loading insn(s).  */
15634       if (lo == 4095)
15635         lo &= 0x7ff;
15636
15637       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15638              ^ (HOST_WIDE_INT) 0x80000000)
15639             - (HOST_WIDE_INT) 0x80000000);
15640
15641       gcc_assert (hi + lo == offset);
15642
15643       if (hi != 0)
15644         {
15645           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15646
15647           /* Be careful not to destroy OUTVAL.  */
15648           if (reg_overlap_mentioned_p (base_plus, outval))
15649             {
15650               /* Updating base_plus might destroy outval, see if we
15651                  can swap the scratch and base_plus.  */
15652               if (!reg_overlap_mentioned_p (scratch, outval))
15653                 std::swap (scratch, base_plus);
15654               else
15655                 {
15656                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15657
15658                   /* Be conservative and copy outval into scratch now,
15659                      this should only be necessary if outval is a
15660                      subreg of something larger than a word.  */
15661                   /* XXX Might this clobber base?  I can't see how it
15662                      can, since scratch is known to overlap with
15663                      outval.  */
15664                   emit_insn (gen_movhi (scratch_hi, outval));
15665                   outval = scratch_hi;
15666                 }
15667             }
15668
15669           /* Get the base address; addsi3 knows how to handle constants
15670              that require more than one insn.  */
15671           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15672           base = base_plus;
15673           offset = lo;
15674         }
15675     }
15676
15677   if (BYTES_BIG_ENDIAN)
15678     {
15679       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15680                                          plus_constant (Pmode, base,
15681                                                         offset + 1)),
15682                             gen_lowpart (QImode, outval)));
15683       emit_insn (gen_lshrsi3 (scratch,
15684                               gen_rtx_SUBREG (SImode, outval, 0),
15685                               GEN_INT (8)));
15686       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15687                                                                 offset)),
15688                             gen_lowpart (QImode, scratch)));
15689     }
15690   else
15691     {
15692       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15693                                                                 offset)),
15694                             gen_lowpart (QImode, outval)));
15695       emit_insn (gen_lshrsi3 (scratch,
15696                               gen_rtx_SUBREG (SImode, outval, 0),
15697                               GEN_INT (8)));
15698       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15699                                          plus_constant (Pmode, base,
15700                                                         offset + 1)),
15701                             gen_lowpart (QImode, scratch)));
15702     }
15703 }
15704
15705 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15706    (padded to the size of a word) should be passed in a register.  */
15707
15708 static bool
15709 arm_must_pass_in_stack (const function_arg_info &arg)
15710 {
15711   if (TARGET_AAPCS_BASED)
15712     return must_pass_in_stack_var_size (arg);
15713   else
15714     return must_pass_in_stack_var_size_or_pad (arg);
15715 }
15716
15717
15718 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15719    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15720    the default.  For AAPCS based ABIs small aggregate types are placed
15721    in the lowest memory address.  */
15722
15723 static pad_direction
15724 arm_function_arg_padding (machine_mode mode, const_tree type)
15725 {
15726   if (!TARGET_AAPCS_BASED)
15727     return default_function_arg_padding (mode, type);
15728
15729   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15730     return PAD_DOWNWARD;
15731
15732   return PAD_UPWARD;
15733 }
15734
15735
15736 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15737    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15738    register has useful data, and return the opposite if the most
15739    significant byte does.  */
15740
15741 bool
15742 arm_pad_reg_upward (machine_mode mode,
15743                     tree type, int first ATTRIBUTE_UNUSED)
15744 {
15745   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15746     {
15747       /* For AAPCS, small aggregates, small fixed-point types,
15748          and small complex types are always padded upwards.  */
15749       if (type)
15750         {
15751           if ((AGGREGATE_TYPE_P (type)
15752                || TREE_CODE (type) == COMPLEX_TYPE
15753                || FIXED_POINT_TYPE_P (type))
15754               && int_size_in_bytes (type) <= 4)
15755             return true;
15756         }
15757       else
15758         {
15759           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15760               && GET_MODE_SIZE (mode) <= 4)
15761             return true;
15762         }
15763     }
15764
15765   /* Otherwise, use default padding.  */
15766   return !BYTES_BIG_ENDIAN;
15767 }
15768
15769 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15770    assuming that the address in the base register is word aligned.  */
15771 bool
15772 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15773 {
15774   HOST_WIDE_INT max_offset;
15775
15776   /* Offset must be a multiple of 4 in Thumb mode.  */
15777   if (TARGET_THUMB2 && ((offset & 3) != 0))
15778     return false;
15779
15780   if (TARGET_THUMB2)
15781     max_offset = 1020;
15782   else if (TARGET_ARM)
15783     max_offset = 255;
15784   else
15785     return false;
15786
15787   return ((offset <= max_offset) && (offset >= -max_offset));
15788 }
15789
15790 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15791    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15792    Assumes that the address in the base register RN is word aligned.  Pattern
15793    guarantees that both memory accesses use the same base register,
15794    the offsets are constants within the range, and the gap between the offsets is 4.
15795    If preload complete then check that registers are legal.  WBACK indicates whether
15796    address is updated.  LOAD indicates whether memory access is load or store.  */
15797 bool
15798 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15799                        bool wback, bool load)
15800 {
15801   unsigned int t, t2, n;
15802
15803   if (!reload_completed)
15804     return true;
15805
15806   if (!offset_ok_for_ldrd_strd (offset))
15807     return false;
15808
15809   t = REGNO (rt);
15810   t2 = REGNO (rt2);
15811   n = REGNO (rn);
15812
15813   if ((TARGET_THUMB2)
15814       && ((wback && (n == t || n == t2))
15815           || (t == SP_REGNUM)
15816           || (t == PC_REGNUM)
15817           || (t2 == SP_REGNUM)
15818           || (t2 == PC_REGNUM)
15819           || (!load && (n == PC_REGNUM))
15820           || (load && (t == t2))
15821           /* Triggers Cortex-M3 LDRD errata.  */
15822           || (!wback && load && fix_cm3_ldrd && (n == t))))
15823     return false;
15824
15825   if ((TARGET_ARM)
15826       && ((wback && (n == t || n == t2))
15827           || (t2 == PC_REGNUM)
15828           || (t % 2 != 0)   /* First destination register is not even.  */
15829           || (t2 != t + 1)
15830           /* PC can be used as base register (for offset addressing only),
15831              but it is depricated.  */
15832           || (n == PC_REGNUM)))
15833     return false;
15834
15835   return true;
15836 }
15837
15838 /* Return true if a 64-bit access with alignment ALIGN and with a
15839    constant offset OFFSET from the base pointer is permitted on this
15840    architecture.  */
15841 static bool
15842 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15843 {
15844   return (unaligned_access
15845           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15846           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15847 }
15848
15849 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15850    operand MEM's address contains an immediate offset from the base
15851    register and has no side effects, in which case it sets BASE,
15852    OFFSET and ALIGN accordingly.  */
15853 static bool
15854 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15855 {
15856   rtx addr;
15857
15858   gcc_assert (base != NULL && offset != NULL);
15859
15860   /* TODO: Handle more general memory operand patterns, such as
15861      PRE_DEC and PRE_INC.  */
15862
15863   if (side_effects_p (mem))
15864     return false;
15865
15866   /* Can't deal with subregs.  */
15867   if (GET_CODE (mem) == SUBREG)
15868     return false;
15869
15870   gcc_assert (MEM_P (mem));
15871
15872   *offset = const0_rtx;
15873   *align = MEM_ALIGN (mem);
15874
15875   addr = XEXP (mem, 0);
15876
15877   /* If addr isn't valid for DImode, then we can't handle it.  */
15878   if (!arm_legitimate_address_p (DImode, addr,
15879                                  reload_in_progress || reload_completed))
15880     return false;
15881
15882   if (REG_P (addr))
15883     {
15884       *base = addr;
15885       return true;
15886     }
15887   else if (GET_CODE (addr) == PLUS)
15888     {
15889       *base = XEXP (addr, 0);
15890       *offset = XEXP (addr, 1);
15891       return (REG_P (*base) && CONST_INT_P (*offset));
15892     }
15893
15894   return false;
15895 }
15896
15897 /* Called from a peephole2 to replace two word-size accesses with a
15898    single LDRD/STRD instruction.  Returns true iff we can generate a
15899    new instruction sequence.  That is, both accesses use the same base
15900    register and the gap between constant offsets is 4.  This function
15901    may reorder its operands to match ldrd/strd RTL templates.
15902    OPERANDS are the operands found by the peephole matcher;
15903    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15904    corresponding memory operands.  LOAD indicaates whether the access
15905    is load or store.  CONST_STORE indicates a store of constant
15906    integer values held in OPERANDS[4,5] and assumes that the pattern
15907    is of length 4 insn, for the purpose of checking dead registers.
15908    COMMUTE indicates that register operands may be reordered.  */
15909 bool
15910 gen_operands_ldrd_strd (rtx *operands, bool load,
15911                         bool const_store, bool commute)
15912 {
15913   int nops = 2;
15914   HOST_WIDE_INT offsets[2], offset, align[2];
15915   rtx base = NULL_RTX;
15916   rtx cur_base, cur_offset, tmp;
15917   int i, gap;
15918   HARD_REG_SET regset;
15919
15920   gcc_assert (!const_store || !load);
15921   /* Check that the memory references are immediate offsets from the
15922      same base register.  Extract the base register, the destination
15923      registers, and the corresponding memory offsets.  */
15924   for (i = 0; i < nops; i++)
15925     {
15926       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15927                                  &align[i]))
15928         return false;
15929
15930       if (i == 0)
15931         base = cur_base;
15932       else if (REGNO (base) != REGNO (cur_base))
15933         return false;
15934
15935       offsets[i] = INTVAL (cur_offset);
15936       if (GET_CODE (operands[i]) == SUBREG)
15937         {
15938           tmp = SUBREG_REG (operands[i]);
15939           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15940           operands[i] = tmp;
15941         }
15942     }
15943
15944   /* Make sure there is no dependency between the individual loads.  */
15945   if (load && REGNO (operands[0]) == REGNO (base))
15946     return false; /* RAW */
15947
15948   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15949     return false; /* WAW */
15950
15951   /* If the same input register is used in both stores
15952      when storing different constants, try to find a free register.
15953      For example, the code
15954         mov r0, 0
15955         str r0, [r2]
15956         mov r0, 1
15957         str r0, [r2, #4]
15958      can be transformed into
15959         mov r1, 0
15960         mov r0, 1
15961         strd r1, r0, [r2]
15962      in Thumb mode assuming that r1 is free.
15963      For ARM mode do the same but only if the starting register
15964      can be made to be even.  */
15965   if (const_store
15966       && REGNO (operands[0]) == REGNO (operands[1])
15967       && INTVAL (operands[4]) != INTVAL (operands[5]))
15968     {
15969     if (TARGET_THUMB2)
15970       {
15971         CLEAR_HARD_REG_SET (regset);
15972         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15973         if (tmp == NULL_RTX)
15974           return false;
15975
15976         /* Use the new register in the first load to ensure that
15977            if the original input register is not dead after peephole,
15978            then it will have the correct constant value.  */
15979         operands[0] = tmp;
15980       }
15981     else if (TARGET_ARM)
15982       {
15983         int regno = REGNO (operands[0]);
15984         if (!peep2_reg_dead_p (4, operands[0]))
15985           {
15986             /* When the input register is even and is not dead after the
15987                pattern, it has to hold the second constant but we cannot
15988                form a legal STRD in ARM mode with this register as the second
15989                register.  */
15990             if (regno % 2 == 0)
15991               return false;
15992
15993             /* Is regno-1 free? */
15994             SET_HARD_REG_SET (regset);
15995             CLEAR_HARD_REG_BIT(regset, regno - 1);
15996             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15997             if (tmp == NULL_RTX)
15998               return false;
15999
16000             operands[0] = tmp;
16001           }
16002         else
16003           {
16004             /* Find a DImode register.  */
16005             CLEAR_HARD_REG_SET (regset);
16006             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16007             if (tmp != NULL_RTX)
16008               {
16009                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16010                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16011               }
16012             else
16013               {
16014                 /* Can we use the input register to form a DI register?  */
16015                 SET_HARD_REG_SET (regset);
16016                 CLEAR_HARD_REG_BIT(regset,
16017                                    regno % 2 == 0 ? regno + 1 : regno - 1);
16018                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16019                 if (tmp == NULL_RTX)
16020                   return false;
16021                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16022               }
16023           }
16024
16025         gcc_assert (operands[0] != NULL_RTX);
16026         gcc_assert (operands[1] != NULL_RTX);
16027         gcc_assert (REGNO (operands[0]) % 2 == 0);
16028         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16029       }
16030     }
16031
16032   /* Make sure the instructions are ordered with lower memory access first.  */
16033   if (offsets[0] > offsets[1])
16034     {
16035       gap = offsets[0] - offsets[1];
16036       offset = offsets[1];
16037
16038       /* Swap the instructions such that lower memory is accessed first.  */
16039       std::swap (operands[0], operands[1]);
16040       std::swap (operands[2], operands[3]);
16041       std::swap (align[0], align[1]);
16042       if (const_store)
16043         std::swap (operands[4], operands[5]);
16044     }
16045   else
16046     {
16047       gap = offsets[1] - offsets[0];
16048       offset = offsets[0];
16049     }
16050
16051   /* Make sure accesses are to consecutive memory locations.  */
16052   if (gap != GET_MODE_SIZE (SImode))
16053     return false;
16054
16055   if (!align_ok_ldrd_strd (align[0], offset))
16056     return false;
16057
16058   /* Make sure we generate legal instructions.  */
16059   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16060                              false, load))
16061     return true;
16062
16063   /* In Thumb state, where registers are almost unconstrained, there
16064      is little hope to fix it.  */
16065   if (TARGET_THUMB2)
16066     return false;
16067
16068   if (load && commute)
16069     {
16070       /* Try reordering registers.  */
16071       std::swap (operands[0], operands[1]);
16072       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16073                                  false, load))
16074         return true;
16075     }
16076
16077   if (const_store)
16078     {
16079       /* If input registers are dead after this pattern, they can be
16080          reordered or replaced by other registers that are free in the
16081          current pattern.  */
16082       if (!peep2_reg_dead_p (4, operands[0])
16083           || !peep2_reg_dead_p (4, operands[1]))
16084         return false;
16085
16086       /* Try to reorder the input registers.  */
16087       /* For example, the code
16088            mov r0, 0
16089            mov r1, 1
16090            str r1, [r2]
16091            str r0, [r2, #4]
16092          can be transformed into
16093            mov r1, 0
16094            mov r0, 1
16095            strd r0, [r2]
16096       */
16097       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16098                                   false, false))
16099         {
16100           std::swap (operands[0], operands[1]);
16101           return true;
16102         }
16103
16104       /* Try to find a free DI register.  */
16105       CLEAR_HARD_REG_SET (regset);
16106       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16107       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16108       while (true)
16109         {
16110           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16111           if (tmp == NULL_RTX)
16112             return false;
16113
16114           /* DREG must be an even-numbered register in DImode.
16115              Split it into SI registers.  */
16116           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16117           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16118           gcc_assert (operands[0] != NULL_RTX);
16119           gcc_assert (operands[1] != NULL_RTX);
16120           gcc_assert (REGNO (operands[0]) % 2 == 0);
16121           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16122
16123           return (operands_ok_ldrd_strd (operands[0], operands[1],
16124                                          base, offset,
16125                                          false, load));
16126         }
16127     }
16128
16129   return false;
16130 }
16131
16132
16133 /* Return true if parallel execution of the two word-size accesses provided
16134    could be satisfied with a single LDRD/STRD instruction.  Two word-size
16135    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16136    register operands and OPERANDS[2,3] are the corresponding memory operands.
16137    */
16138 bool
16139 valid_operands_ldrd_strd (rtx *operands, bool load)
16140 {
16141   int nops = 2;
16142   HOST_WIDE_INT offsets[2], offset, align[2];
16143   rtx base = NULL_RTX;
16144   rtx cur_base, cur_offset;
16145   int i, gap;
16146
16147   /* Check that the memory references are immediate offsets from the
16148      same base register.  Extract the base register, the destination
16149      registers, and the corresponding memory offsets.  */
16150   for (i = 0; i < nops; i++)
16151     {
16152       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16153                                  &align[i]))
16154         return false;
16155
16156       if (i == 0)
16157         base = cur_base;
16158       else if (REGNO (base) != REGNO (cur_base))
16159         return false;
16160
16161       offsets[i] = INTVAL (cur_offset);
16162       if (GET_CODE (operands[i]) == SUBREG)
16163         return false;
16164     }
16165
16166   if (offsets[0] > offsets[1])
16167     return false;
16168
16169   gap = offsets[1] - offsets[0];
16170   offset = offsets[0];
16171
16172   /* Make sure accesses are to consecutive memory locations.  */
16173   if (gap != GET_MODE_SIZE (SImode))
16174     return false;
16175
16176   if (!align_ok_ldrd_strd (align[0], offset))
16177     return false;
16178
16179   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16180                                 false, load);
16181 }
16182
16183 \f
16184 /* Print a symbolic form of X to the debug file, F.  */
16185 static void
16186 arm_print_value (FILE *f, rtx x)
16187 {
16188   switch (GET_CODE (x))
16189     {
16190     case CONST_INT:
16191       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16192       return;
16193
16194     case CONST_DOUBLE:
16195       {
16196         char fpstr[20];
16197         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16198                          sizeof (fpstr), 0, 1);
16199         fputs (fpstr, f);
16200       }
16201       return;
16202
16203     case CONST_VECTOR:
16204       {
16205         int i;
16206
16207         fprintf (f, "<");
16208         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16209           {
16210             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16211             if (i < (CONST_VECTOR_NUNITS (x) - 1))
16212               fputc (',', f);
16213           }
16214         fprintf (f, ">");
16215       }
16216       return;
16217
16218     case CONST_STRING:
16219       fprintf (f, "\"%s\"", XSTR (x, 0));
16220       return;
16221
16222     case SYMBOL_REF:
16223       fprintf (f, "`%s'", XSTR (x, 0));
16224       return;
16225
16226     case LABEL_REF:
16227       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16228       return;
16229
16230     case CONST:
16231       arm_print_value (f, XEXP (x, 0));
16232       return;
16233
16234     case PLUS:
16235       arm_print_value (f, XEXP (x, 0));
16236       fprintf (f, "+");
16237       arm_print_value (f, XEXP (x, 1));
16238       return;
16239
16240     case PC:
16241       fprintf (f, "pc");
16242       return;
16243
16244     default:
16245       fprintf (f, "????");
16246       return;
16247     }
16248 }
16249 \f
16250 /* Routines for manipulation of the constant pool.  */
16251
16252 /* Arm instructions cannot load a large constant directly into a
16253    register; they have to come from a pc relative load.  The constant
16254    must therefore be placed in the addressable range of the pc
16255    relative load.  Depending on the precise pc relative load
16256    instruction the range is somewhere between 256 bytes and 4k.  This
16257    means that we often have to dump a constant inside a function, and
16258    generate code to branch around it.
16259
16260    It is important to minimize this, since the branches will slow
16261    things down and make the code larger.
16262
16263    Normally we can hide the table after an existing unconditional
16264    branch so that there is no interruption of the flow, but in the
16265    worst case the code looks like this:
16266
16267         ldr     rn, L1
16268         ...
16269         b       L2
16270         align
16271         L1:     .long value
16272         L2:
16273         ...
16274
16275         ldr     rn, L3
16276         ...
16277         b       L4
16278         align
16279         L3:     .long value
16280         L4:
16281         ...
16282
16283    We fix this by performing a scan after scheduling, which notices
16284    which instructions need to have their operands fetched from the
16285    constant table and builds the table.
16286
16287    The algorithm starts by building a table of all the constants that
16288    need fixing up and all the natural barriers in the function (places
16289    where a constant table can be dropped without breaking the flow).
16290    For each fixup we note how far the pc-relative replacement will be
16291    able to reach and the offset of the instruction into the function.
16292
16293    Having built the table we then group the fixes together to form
16294    tables that are as large as possible (subject to addressing
16295    constraints) and emit each table of constants after the last
16296    barrier that is within range of all the instructions in the group.
16297    If a group does not contain a barrier, then we forcibly create one
16298    by inserting a jump instruction into the flow.  Once the table has
16299    been inserted, the insns are then modified to reference the
16300    relevant entry in the pool.
16301
16302    Possible enhancements to the algorithm (not implemented) are:
16303
16304    1) For some processors and object formats, there may be benefit in
16305    aligning the pools to the start of cache lines; this alignment
16306    would need to be taken into account when calculating addressability
16307    of a pool.  */
16308
16309 /* These typedefs are located at the start of this file, so that
16310    they can be used in the prototypes there.  This comment is to
16311    remind readers of that fact so that the following structures
16312    can be understood more easily.
16313
16314      typedef struct minipool_node    Mnode;
16315      typedef struct minipool_fixup   Mfix;  */
16316
16317 struct minipool_node
16318 {
16319   /* Doubly linked chain of entries.  */
16320   Mnode * next;
16321   Mnode * prev;
16322   /* The maximum offset into the code that this entry can be placed.  While
16323      pushing fixes for forward references, all entries are sorted in order
16324      of increasing max_address.  */
16325   HOST_WIDE_INT max_address;
16326   /* Similarly for an entry inserted for a backwards ref.  */
16327   HOST_WIDE_INT min_address;
16328   /* The number of fixes referencing this entry.  This can become zero
16329      if we "unpush" an entry.  In this case we ignore the entry when we
16330      come to emit the code.  */
16331   int refcount;
16332   /* The offset from the start of the minipool.  */
16333   HOST_WIDE_INT offset;
16334   /* The value in table.  */
16335   rtx value;
16336   /* The mode of value.  */
16337   machine_mode mode;
16338   /* The size of the value.  With iWMMXt enabled
16339      sizes > 4 also imply an alignment of 8-bytes.  */
16340   int fix_size;
16341 };
16342
16343 struct minipool_fixup
16344 {
16345   Mfix *            next;
16346   rtx_insn *        insn;
16347   HOST_WIDE_INT     address;
16348   rtx *             loc;
16349   machine_mode mode;
16350   int               fix_size;
16351   rtx               value;
16352   Mnode *           minipool;
16353   HOST_WIDE_INT     forwards;
16354   HOST_WIDE_INT     backwards;
16355 };
16356
16357 /* Fixes less than a word need padding out to a word boundary.  */
16358 #define MINIPOOL_FIX_SIZE(mode) \
16359   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16360
16361 static Mnode *  minipool_vector_head;
16362 static Mnode *  minipool_vector_tail;
16363 static rtx_code_label   *minipool_vector_label;
16364 static int      minipool_pad;
16365
16366 /* The linked list of all minipool fixes required for this function.  */
16367 Mfix *          minipool_fix_head;
16368 Mfix *          minipool_fix_tail;
16369 /* The fix entry for the current minipool, once it has been placed.  */
16370 Mfix *          minipool_barrier;
16371
16372 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16373 #define JUMP_TABLES_IN_TEXT_SECTION 0
16374 #endif
16375
16376 static HOST_WIDE_INT
16377 get_jump_table_size (rtx_jump_table_data *insn)
16378 {
16379   /* ADDR_VECs only take room if read-only data does into the text
16380      section.  */
16381   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16382     {
16383       rtx body = PATTERN (insn);
16384       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16385       HOST_WIDE_INT size;
16386       HOST_WIDE_INT modesize;
16387
16388       modesize = GET_MODE_SIZE (GET_MODE (body));
16389       size = modesize * XVECLEN (body, elt);
16390       switch (modesize)
16391         {
16392         case 1:
16393           /* Round up size  of TBB table to a halfword boundary.  */
16394           size = (size + 1) & ~HOST_WIDE_INT_1;
16395           break;
16396         case 2:
16397           /* No padding necessary for TBH.  */
16398           break;
16399         case 4:
16400           /* Add two bytes for alignment on Thumb.  */
16401           if (TARGET_THUMB)
16402             size += 2;
16403           break;
16404         default:
16405           gcc_unreachable ();
16406         }
16407       return size;
16408     }
16409
16410   return 0;
16411 }
16412
16413 /* Emit insns to load the function address from FUNCDESC (an FDPIC
16414    function descriptor) into a register and the GOT address into the
16415    FDPIC register, returning an rtx for the register holding the
16416    function address.  */
16417
16418 rtx
16419 arm_load_function_descriptor (rtx funcdesc)
16420 {
16421   rtx fnaddr_reg = gen_reg_rtx (Pmode);
16422   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
16423   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
16424   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
16425
16426   emit_move_insn (fnaddr_reg, fnaddr);
16427
16428   /* The ABI requires the entry point address to be loaded first, but
16429      since we cannot support lazy binding for lack of atomic load of
16430      two 32-bits values, we do not need to bother to prevent the
16431      previous load from being moved after that of the GOT address.  */
16432   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
16433
16434   return fnaddr_reg;
16435 }
16436
16437 /* Return the maximum amount of padding that will be inserted before
16438    label LABEL.  */
16439 static HOST_WIDE_INT
16440 get_label_padding (rtx label)
16441 {
16442   HOST_WIDE_INT align, min_insn_size;
16443
16444   align = 1 << label_to_alignment (label).levels[0].log;
16445   min_insn_size = TARGET_THUMB ? 2 : 4;
16446   return align > min_insn_size ? align - min_insn_size : 0;
16447 }
16448
16449 /* Move a minipool fix MP from its current location to before MAX_MP.
16450    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16451    constraints may need updating.  */
16452 static Mnode *
16453 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16454                                HOST_WIDE_INT max_address)
16455 {
16456   /* The code below assumes these are different.  */
16457   gcc_assert (mp != max_mp);
16458
16459   if (max_mp == NULL)
16460     {
16461       if (max_address < mp->max_address)
16462         mp->max_address = max_address;
16463     }
16464   else
16465     {
16466       if (max_address > max_mp->max_address - mp->fix_size)
16467         mp->max_address = max_mp->max_address - mp->fix_size;
16468       else
16469         mp->max_address = max_address;
16470
16471       /* Unlink MP from its current position.  Since max_mp is non-null,
16472        mp->prev must be non-null.  */
16473       mp->prev->next = mp->next;
16474       if (mp->next != NULL)
16475         mp->next->prev = mp->prev;
16476       else
16477         minipool_vector_tail = mp->prev;
16478
16479       /* Re-insert it before MAX_MP.  */
16480       mp->next = max_mp;
16481       mp->prev = max_mp->prev;
16482       max_mp->prev = mp;
16483
16484       if (mp->prev != NULL)
16485         mp->prev->next = mp;
16486       else
16487         minipool_vector_head = mp;
16488     }
16489
16490   /* Save the new entry.  */
16491   max_mp = mp;
16492
16493   /* Scan over the preceding entries and adjust their addresses as
16494      required.  */
16495   while (mp->prev != NULL
16496          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16497     {
16498       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16499       mp = mp->prev;
16500     }
16501
16502   return max_mp;
16503 }
16504
16505 /* Add a constant to the minipool for a forward reference.  Returns the
16506    node added or NULL if the constant will not fit in this pool.  */
16507 static Mnode *
16508 add_minipool_forward_ref (Mfix *fix)
16509 {
16510   /* If set, max_mp is the first pool_entry that has a lower
16511      constraint than the one we are trying to add.  */
16512   Mnode *       max_mp = NULL;
16513   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16514   Mnode *       mp;
16515
16516   /* If the minipool starts before the end of FIX->INSN then this FIX
16517      cannot be placed into the current pool.  Furthermore, adding the
16518      new constant pool entry may cause the pool to start FIX_SIZE bytes
16519      earlier.  */
16520   if (minipool_vector_head &&
16521       (fix->address + get_attr_length (fix->insn)
16522        >= minipool_vector_head->max_address - fix->fix_size))
16523     return NULL;
16524
16525   /* Scan the pool to see if a constant with the same value has
16526      already been added.  While we are doing this, also note the
16527      location where we must insert the constant if it doesn't already
16528      exist.  */
16529   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16530     {
16531       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16532           && fix->mode == mp->mode
16533           && (!LABEL_P (fix->value)
16534               || (CODE_LABEL_NUMBER (fix->value)
16535                   == CODE_LABEL_NUMBER (mp->value)))
16536           && rtx_equal_p (fix->value, mp->value))
16537         {
16538           /* More than one fix references this entry.  */
16539           mp->refcount++;
16540           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16541         }
16542
16543       /* Note the insertion point if necessary.  */
16544       if (max_mp == NULL
16545           && mp->max_address > max_address)
16546         max_mp = mp;
16547
16548       /* If we are inserting an 8-bytes aligned quantity and
16549          we have not already found an insertion point, then
16550          make sure that all such 8-byte aligned quantities are
16551          placed at the start of the pool.  */
16552       if (ARM_DOUBLEWORD_ALIGN
16553           && max_mp == NULL
16554           && fix->fix_size >= 8
16555           && mp->fix_size < 8)
16556         {
16557           max_mp = mp;
16558           max_address = mp->max_address;
16559         }
16560     }
16561
16562   /* The value is not currently in the minipool, so we need to create
16563      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16564      the end of the list since the placement is less constrained than
16565      any existing entry.  Otherwise, we insert the new fix before
16566      MAX_MP and, if necessary, adjust the constraints on the other
16567      entries.  */
16568   mp = XNEW (Mnode);
16569   mp->fix_size = fix->fix_size;
16570   mp->mode = fix->mode;
16571   mp->value = fix->value;
16572   mp->refcount = 1;
16573   /* Not yet required for a backwards ref.  */
16574   mp->min_address = -65536;
16575
16576   if (max_mp == NULL)
16577     {
16578       mp->max_address = max_address;
16579       mp->next = NULL;
16580       mp->prev = minipool_vector_tail;
16581
16582       if (mp->prev == NULL)
16583         {
16584           minipool_vector_head = mp;
16585           minipool_vector_label = gen_label_rtx ();
16586         }
16587       else
16588         mp->prev->next = mp;
16589
16590       minipool_vector_tail = mp;
16591     }
16592   else
16593     {
16594       if (max_address > max_mp->max_address - mp->fix_size)
16595         mp->max_address = max_mp->max_address - mp->fix_size;
16596       else
16597         mp->max_address = max_address;
16598
16599       mp->next = max_mp;
16600       mp->prev = max_mp->prev;
16601       max_mp->prev = mp;
16602       if (mp->prev != NULL)
16603         mp->prev->next = mp;
16604       else
16605         minipool_vector_head = mp;
16606     }
16607
16608   /* Save the new entry.  */
16609   max_mp = mp;
16610
16611   /* Scan over the preceding entries and adjust their addresses as
16612      required.  */
16613   while (mp->prev != NULL
16614          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16615     {
16616       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16617       mp = mp->prev;
16618     }
16619
16620   return max_mp;
16621 }
16622
16623 static Mnode *
16624 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16625                                 HOST_WIDE_INT  min_address)
16626 {
16627   HOST_WIDE_INT offset;
16628
16629   /* The code below assumes these are different.  */
16630   gcc_assert (mp != min_mp);
16631
16632   if (min_mp == NULL)
16633     {
16634       if (min_address > mp->min_address)
16635         mp->min_address = min_address;
16636     }
16637   else
16638     {
16639       /* We will adjust this below if it is too loose.  */
16640       mp->min_address = min_address;
16641
16642       /* Unlink MP from its current position.  Since min_mp is non-null,
16643          mp->next must be non-null.  */
16644       mp->next->prev = mp->prev;
16645       if (mp->prev != NULL)
16646         mp->prev->next = mp->next;
16647       else
16648         minipool_vector_head = mp->next;
16649
16650       /* Reinsert it after MIN_MP.  */
16651       mp->prev = min_mp;
16652       mp->next = min_mp->next;
16653       min_mp->next = mp;
16654       if (mp->next != NULL)
16655         mp->next->prev = mp;
16656       else
16657         minipool_vector_tail = mp;
16658     }
16659
16660   min_mp = mp;
16661
16662   offset = 0;
16663   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16664     {
16665       mp->offset = offset;
16666       if (mp->refcount > 0)
16667         offset += mp->fix_size;
16668
16669       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16670         mp->next->min_address = mp->min_address + mp->fix_size;
16671     }
16672
16673   return min_mp;
16674 }
16675
16676 /* Add a constant to the minipool for a backward reference.  Returns the
16677    node added or NULL if the constant will not fit in this pool.
16678
16679    Note that the code for insertion for a backwards reference can be
16680    somewhat confusing because the calculated offsets for each fix do
16681    not take into account the size of the pool (which is still under
16682    construction.  */
16683 static Mnode *
16684 add_minipool_backward_ref (Mfix *fix)
16685 {
16686   /* If set, min_mp is the last pool_entry that has a lower constraint
16687      than the one we are trying to add.  */
16688   Mnode *min_mp = NULL;
16689   /* This can be negative, since it is only a constraint.  */
16690   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16691   Mnode *mp;
16692
16693   /* If we can't reach the current pool from this insn, or if we can't
16694      insert this entry at the end of the pool without pushing other
16695      fixes out of range, then we don't try.  This ensures that we
16696      can't fail later on.  */
16697   if (min_address >= minipool_barrier->address
16698       || (minipool_vector_tail->min_address + fix->fix_size
16699           >= minipool_barrier->address))
16700     return NULL;
16701
16702   /* Scan the pool to see if a constant with the same value has
16703      already been added.  While we are doing this, also note the
16704      location where we must insert the constant if it doesn't already
16705      exist.  */
16706   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16707     {
16708       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16709           && fix->mode == mp->mode
16710           && (!LABEL_P (fix->value)
16711               || (CODE_LABEL_NUMBER (fix->value)
16712                   == CODE_LABEL_NUMBER (mp->value)))
16713           && rtx_equal_p (fix->value, mp->value)
16714           /* Check that there is enough slack to move this entry to the
16715              end of the table (this is conservative).  */
16716           && (mp->max_address
16717               > (minipool_barrier->address
16718                  + minipool_vector_tail->offset
16719                  + minipool_vector_tail->fix_size)))
16720         {
16721           mp->refcount++;
16722           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16723         }
16724
16725       if (min_mp != NULL)
16726         mp->min_address += fix->fix_size;
16727       else
16728         {
16729           /* Note the insertion point if necessary.  */
16730           if (mp->min_address < min_address)
16731             {
16732               /* For now, we do not allow the insertion of 8-byte alignment
16733                  requiring nodes anywhere but at the start of the pool.  */
16734               if (ARM_DOUBLEWORD_ALIGN
16735                   && fix->fix_size >= 8 && mp->fix_size < 8)
16736                 return NULL;
16737               else
16738                 min_mp = mp;
16739             }
16740           else if (mp->max_address
16741                    < minipool_barrier->address + mp->offset + fix->fix_size)
16742             {
16743               /* Inserting before this entry would push the fix beyond
16744                  its maximum address (which can happen if we have
16745                  re-located a forwards fix); force the new fix to come
16746                  after it.  */
16747               if (ARM_DOUBLEWORD_ALIGN
16748                   && fix->fix_size >= 8 && mp->fix_size < 8)
16749                 return NULL;
16750               else
16751                 {
16752                   min_mp = mp;
16753                   min_address = mp->min_address + fix->fix_size;
16754                 }
16755             }
16756           /* Do not insert a non-8-byte aligned quantity before 8-byte
16757              aligned quantities.  */
16758           else if (ARM_DOUBLEWORD_ALIGN
16759                    && fix->fix_size < 8
16760                    && mp->fix_size >= 8)
16761             {
16762               min_mp = mp;
16763               min_address = mp->min_address + fix->fix_size;
16764             }
16765         }
16766     }
16767
16768   /* We need to create a new entry.  */
16769   mp = XNEW (Mnode);
16770   mp->fix_size = fix->fix_size;
16771   mp->mode = fix->mode;
16772   mp->value = fix->value;
16773   mp->refcount = 1;
16774   mp->max_address = minipool_barrier->address + 65536;
16775
16776   mp->min_address = min_address;
16777
16778   if (min_mp == NULL)
16779     {
16780       mp->prev = NULL;
16781       mp->next = minipool_vector_head;
16782
16783       if (mp->next == NULL)
16784         {
16785           minipool_vector_tail = mp;
16786           minipool_vector_label = gen_label_rtx ();
16787         }
16788       else
16789         mp->next->prev = mp;
16790
16791       minipool_vector_head = mp;
16792     }
16793   else
16794     {
16795       mp->next = min_mp->next;
16796       mp->prev = min_mp;
16797       min_mp->next = mp;
16798
16799       if (mp->next != NULL)
16800         mp->next->prev = mp;
16801       else
16802         minipool_vector_tail = mp;
16803     }
16804
16805   /* Save the new entry.  */
16806   min_mp = mp;
16807
16808   if (mp->prev)
16809     mp = mp->prev;
16810   else
16811     mp->offset = 0;
16812
16813   /* Scan over the following entries and adjust their offsets.  */
16814   while (mp->next != NULL)
16815     {
16816       if (mp->next->min_address < mp->min_address + mp->fix_size)
16817         mp->next->min_address = mp->min_address + mp->fix_size;
16818
16819       if (mp->refcount)
16820         mp->next->offset = mp->offset + mp->fix_size;
16821       else
16822         mp->next->offset = mp->offset;
16823
16824       mp = mp->next;
16825     }
16826
16827   return min_mp;
16828 }
16829
16830 static void
16831 assign_minipool_offsets (Mfix *barrier)
16832 {
16833   HOST_WIDE_INT offset = 0;
16834   Mnode *mp;
16835
16836   minipool_barrier = barrier;
16837
16838   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16839     {
16840       mp->offset = offset;
16841
16842       if (mp->refcount > 0)
16843         offset += mp->fix_size;
16844     }
16845 }
16846
16847 /* Output the literal table */
16848 static void
16849 dump_minipool (rtx_insn *scan)
16850 {
16851   Mnode * mp;
16852   Mnode * nmp;
16853   int align64 = 0;
16854
16855   if (ARM_DOUBLEWORD_ALIGN)
16856     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16857       if (mp->refcount > 0 && mp->fix_size >= 8)
16858         {
16859           align64 = 1;
16860           break;
16861         }
16862
16863   if (dump_file)
16864     fprintf (dump_file,
16865              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16866              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16867
16868   scan = emit_label_after (gen_label_rtx (), scan);
16869   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16870   scan = emit_label_after (minipool_vector_label, scan);
16871
16872   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16873     {
16874       if (mp->refcount > 0)
16875         {
16876           if (dump_file)
16877             {
16878               fprintf (dump_file,
16879                        ";;  Offset %u, min %ld, max %ld ",
16880                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16881                        (unsigned long) mp->max_address);
16882               arm_print_value (dump_file, mp->value);
16883               fputc ('\n', dump_file);
16884             }
16885
16886           rtx val = copy_rtx (mp->value);
16887
16888           switch (GET_MODE_SIZE (mp->mode))
16889             {
16890 #ifdef HAVE_consttable_1
16891             case 1:
16892               scan = emit_insn_after (gen_consttable_1 (val), scan);
16893               break;
16894
16895 #endif
16896 #ifdef HAVE_consttable_2
16897             case 2:
16898               scan = emit_insn_after (gen_consttable_2 (val), scan);
16899               break;
16900
16901 #endif
16902 #ifdef HAVE_consttable_4
16903             case 4:
16904               scan = emit_insn_after (gen_consttable_4 (val), scan);
16905               break;
16906
16907 #endif
16908 #ifdef HAVE_consttable_8
16909             case 8:
16910               scan = emit_insn_after (gen_consttable_8 (val), scan);
16911               break;
16912
16913 #endif
16914 #ifdef HAVE_consttable_16
16915             case 16:
16916               scan = emit_insn_after (gen_consttable_16 (val), scan);
16917               break;
16918
16919 #endif
16920             default:
16921               gcc_unreachable ();
16922             }
16923         }
16924
16925       nmp = mp->next;
16926       free (mp);
16927     }
16928
16929   minipool_vector_head = minipool_vector_tail = NULL;
16930   scan = emit_insn_after (gen_consttable_end (), scan);
16931   scan = emit_barrier_after (scan);
16932 }
16933
16934 /* Return the cost of forcibly inserting a barrier after INSN.  */
16935 static int
16936 arm_barrier_cost (rtx_insn *insn)
16937 {
16938   /* Basing the location of the pool on the loop depth is preferable,
16939      but at the moment, the basic block information seems to be
16940      corrupt by this stage of the compilation.  */
16941   int base_cost = 50;
16942   rtx_insn *next = next_nonnote_insn (insn);
16943
16944   if (next != NULL && LABEL_P (next))
16945     base_cost -= 20;
16946
16947   switch (GET_CODE (insn))
16948     {
16949     case CODE_LABEL:
16950       /* It will always be better to place the table before the label, rather
16951          than after it.  */
16952       return 50;
16953
16954     case INSN:
16955     case CALL_INSN:
16956       return base_cost;
16957
16958     case JUMP_INSN:
16959       return base_cost - 10;
16960
16961     default:
16962       return base_cost + 10;
16963     }
16964 }
16965
16966 /* Find the best place in the insn stream in the range
16967    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16968    Create the barrier by inserting a jump and add a new fix entry for
16969    it.  */
16970 static Mfix *
16971 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16972 {
16973   HOST_WIDE_INT count = 0;
16974   rtx_barrier *barrier;
16975   rtx_insn *from = fix->insn;
16976   /* The instruction after which we will insert the jump.  */
16977   rtx_insn *selected = NULL;
16978   int selected_cost;
16979   /* The address at which the jump instruction will be placed.  */
16980   HOST_WIDE_INT selected_address;
16981   Mfix * new_fix;
16982   HOST_WIDE_INT max_count = max_address - fix->address;
16983   rtx_code_label *label = gen_label_rtx ();
16984
16985   selected_cost = arm_barrier_cost (from);
16986   selected_address = fix->address;
16987
16988   while (from && count < max_count)
16989     {
16990       rtx_jump_table_data *tmp;
16991       int new_cost;
16992
16993       /* This code shouldn't have been called if there was a natural barrier
16994          within range.  */
16995       gcc_assert (!BARRIER_P (from));
16996
16997       /* Count the length of this insn.  This must stay in sync with the
16998          code that pushes minipool fixes.  */
16999       if (LABEL_P (from))
17000         count += get_label_padding (from);
17001       else
17002         count += get_attr_length (from);
17003
17004       /* If there is a jump table, add its length.  */
17005       if (tablejump_p (from, NULL, &tmp))
17006         {
17007           count += get_jump_table_size (tmp);
17008
17009           /* Jump tables aren't in a basic block, so base the cost on
17010              the dispatch insn.  If we select this location, we will
17011              still put the pool after the table.  */
17012           new_cost = arm_barrier_cost (from);
17013
17014           if (count < max_count
17015               && (!selected || new_cost <= selected_cost))
17016             {
17017               selected = tmp;
17018               selected_cost = new_cost;
17019               selected_address = fix->address + count;
17020             }
17021
17022           /* Continue after the dispatch table.  */
17023           from = NEXT_INSN (tmp);
17024           continue;
17025         }
17026
17027       new_cost = arm_barrier_cost (from);
17028
17029       if (count < max_count
17030           && (!selected || new_cost <= selected_cost))
17031         {
17032           selected = from;
17033           selected_cost = new_cost;
17034           selected_address = fix->address + count;
17035         }
17036
17037       from = NEXT_INSN (from);
17038     }
17039
17040   /* Make sure that we found a place to insert the jump.  */
17041   gcc_assert (selected);
17042
17043   /* Create a new JUMP_INSN that branches around a barrier.  */
17044   from = emit_jump_insn_after (gen_jump (label), selected);
17045   JUMP_LABEL (from) = label;
17046   barrier = emit_barrier_after (from);
17047   emit_label_after (label, barrier);
17048
17049   /* Create a minipool barrier entry for the new barrier.  */
17050   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17051   new_fix->insn = barrier;
17052   new_fix->address = selected_address;
17053   new_fix->next = fix->next;
17054   fix->next = new_fix;
17055
17056   return new_fix;
17057 }
17058
17059 /* Record that there is a natural barrier in the insn stream at
17060    ADDRESS.  */
17061 static void
17062 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17063 {
17064   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17065
17066   fix->insn = insn;
17067   fix->address = address;
17068
17069   fix->next = NULL;
17070   if (minipool_fix_head != NULL)
17071     minipool_fix_tail->next = fix;
17072   else
17073     minipool_fix_head = fix;
17074
17075   minipool_fix_tail = fix;
17076 }
17077
17078 /* Record INSN, which will need fixing up to load a value from the
17079    minipool.  ADDRESS is the offset of the insn since the start of the
17080    function; LOC is a pointer to the part of the insn which requires
17081    fixing; VALUE is the constant that must be loaded, which is of type
17082    MODE.  */
17083 static void
17084 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17085                    machine_mode mode, rtx value)
17086 {
17087   gcc_assert (!arm_disable_literal_pool);
17088   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17089
17090   fix->insn = insn;
17091   fix->address = address;
17092   fix->loc = loc;
17093   fix->mode = mode;
17094   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17095   fix->value = value;
17096   fix->forwards = get_attr_pool_range (insn);
17097   fix->backwards = get_attr_neg_pool_range (insn);
17098   fix->minipool = NULL;
17099
17100   /* If an insn doesn't have a range defined for it, then it isn't
17101      expecting to be reworked by this code.  Better to stop now than
17102      to generate duff assembly code.  */
17103   gcc_assert (fix->forwards || fix->backwards);
17104
17105   /* If an entry requires 8-byte alignment then assume all constant pools
17106      require 4 bytes of padding.  Trying to do this later on a per-pool
17107      basis is awkward because existing pool entries have to be modified.  */
17108   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17109     minipool_pad = 4;
17110
17111   if (dump_file)
17112     {
17113       fprintf (dump_file,
17114                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17115                GET_MODE_NAME (mode),
17116                INSN_UID (insn), (unsigned long) address,
17117                -1 * (long)fix->backwards, (long)fix->forwards);
17118       arm_print_value (dump_file, fix->value);
17119       fprintf (dump_file, "\n");
17120     }
17121
17122   /* Add it to the chain of fixes.  */
17123   fix->next = NULL;
17124
17125   if (minipool_fix_head != NULL)
17126     minipool_fix_tail->next = fix;
17127   else
17128     minipool_fix_head = fix;
17129
17130   minipool_fix_tail = fix;
17131 }
17132
17133 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17134    Returns the number of insns needed, or 99 if we always want to synthesize
17135    the value.  */
17136 int
17137 arm_max_const_double_inline_cost ()
17138 {
17139   return ((optimize_size || arm_ld_sched) ? 3 : 4);
17140 }
17141
17142 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17143    Returns the number of insns needed, or 99 if we don't know how to
17144    do it.  */
17145 int
17146 arm_const_double_inline_cost (rtx val)
17147 {
17148   rtx lowpart, highpart;
17149   machine_mode mode;
17150
17151   mode = GET_MODE (val);
17152
17153   if (mode == VOIDmode)
17154     mode = DImode;
17155
17156   gcc_assert (GET_MODE_SIZE (mode) == 8);
17157
17158   lowpart = gen_lowpart (SImode, val);
17159   highpart = gen_highpart_mode (SImode, mode, val);
17160
17161   gcc_assert (CONST_INT_P (lowpart));
17162   gcc_assert (CONST_INT_P (highpart));
17163
17164   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17165                             NULL_RTX, NULL_RTX, 0, 0)
17166           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17167                               NULL_RTX, NULL_RTX, 0, 0));
17168 }
17169
17170 /* Cost of loading a SImode constant.  */
17171 static inline int
17172 arm_const_inline_cost (enum rtx_code code, rtx val)
17173 {
17174   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17175                            NULL_RTX, NULL_RTX, 1, 0);
17176 }
17177
17178 /* Return true if it is worthwhile to split a 64-bit constant into two
17179    32-bit operations.  This is the case if optimizing for size, or
17180    if we have load delay slots, or if one 32-bit part can be done with
17181    a single data operation.  */
17182 bool
17183 arm_const_double_by_parts (rtx val)
17184 {
17185   machine_mode mode = GET_MODE (val);
17186   rtx part;
17187
17188   if (optimize_size || arm_ld_sched)
17189     return true;
17190
17191   if (mode == VOIDmode)
17192     mode = DImode;
17193
17194   part = gen_highpart_mode (SImode, mode, val);
17195
17196   gcc_assert (CONST_INT_P (part));
17197
17198   if (const_ok_for_arm (INTVAL (part))
17199       || const_ok_for_arm (~INTVAL (part)))
17200     return true;
17201
17202   part = gen_lowpart (SImode, val);
17203
17204   gcc_assert (CONST_INT_P (part));
17205
17206   if (const_ok_for_arm (INTVAL (part))
17207       || const_ok_for_arm (~INTVAL (part)))
17208     return true;
17209
17210   return false;
17211 }
17212
17213 /* Return true if it is possible to inline both the high and low parts
17214    of a 64-bit constant into 32-bit data processing instructions.  */
17215 bool
17216 arm_const_double_by_immediates (rtx val)
17217 {
17218   machine_mode mode = GET_MODE (val);
17219   rtx part;
17220
17221   if (mode == VOIDmode)
17222     mode = DImode;
17223
17224   part = gen_highpart_mode (SImode, mode, val);
17225
17226   gcc_assert (CONST_INT_P (part));
17227
17228   if (!const_ok_for_arm (INTVAL (part)))
17229     return false;
17230
17231   part = gen_lowpart (SImode, val);
17232
17233   gcc_assert (CONST_INT_P (part));
17234
17235   if (!const_ok_for_arm (INTVAL (part)))
17236     return false;
17237
17238   return true;
17239 }
17240
17241 /* Scan INSN and note any of its operands that need fixing.
17242    If DO_PUSHES is false we do not actually push any of the fixups
17243    needed.  */
17244 static void
17245 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17246 {
17247   int opno;
17248
17249   extract_constrain_insn (insn);
17250
17251   if (recog_data.n_alternatives == 0)
17252     return;
17253
17254   /* Fill in recog_op_alt with information about the constraints of
17255      this insn.  */
17256   preprocess_constraints (insn);
17257
17258   const operand_alternative *op_alt = which_op_alt ();
17259   for (opno = 0; opno < recog_data.n_operands; opno++)
17260     {
17261       /* Things we need to fix can only occur in inputs.  */
17262       if (recog_data.operand_type[opno] != OP_IN)
17263         continue;
17264
17265       /* If this alternative is a memory reference, then any mention
17266          of constants in this alternative is really to fool reload
17267          into allowing us to accept one there.  We need to fix them up
17268          now so that we output the right code.  */
17269       if (op_alt[opno].memory_ok)
17270         {
17271           rtx op = recog_data.operand[opno];
17272
17273           if (CONSTANT_P (op))
17274             {
17275               if (do_pushes)
17276                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17277                                    recog_data.operand_mode[opno], op);
17278             }
17279           else if (MEM_P (op)
17280                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17281                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17282             {
17283               if (do_pushes)
17284                 {
17285                   rtx cop = avoid_constant_pool_reference (op);
17286
17287                   /* Casting the address of something to a mode narrower
17288                      than a word can cause avoid_constant_pool_reference()
17289                      to return the pool reference itself.  That's no good to
17290                      us here.  Lets just hope that we can use the
17291                      constant pool value directly.  */
17292                   if (op == cop)
17293                     cop = get_pool_constant (XEXP (op, 0));
17294
17295                   push_minipool_fix (insn, address,
17296                                      recog_data.operand_loc[opno],
17297                                      recog_data.operand_mode[opno], cop);
17298                 }
17299
17300             }
17301         }
17302     }
17303
17304   return;
17305 }
17306
17307 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
17308    and unions in the context of ARMv8-M Security Extensions.  It is used as a
17309    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
17310    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
17311    or four masks, depending on whether it is being computed for a
17312    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
17313    respectively.  The tree for the type of the argument or a field within an
17314    argument is passed in ARG_TYPE, the current register this argument or field
17315    starts in is kept in the pointer REGNO and updated accordingly, the bit this
17316    argument or field starts at is passed in STARTING_BIT and the last used bit
17317    is kept in LAST_USED_BIT which is also updated accordingly.  */
17318
17319 static unsigned HOST_WIDE_INT
17320 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
17321                                uint32_t * padding_bits_to_clear,
17322                                unsigned starting_bit, int * last_used_bit)
17323
17324 {
17325   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
17326
17327   if (TREE_CODE (arg_type) == RECORD_TYPE)
17328     {
17329       unsigned current_bit = starting_bit;
17330       tree field;
17331       long int offset, size;
17332
17333
17334       field = TYPE_FIELDS (arg_type);
17335       while (field)
17336         {
17337           /* The offset within a structure is always an offset from
17338              the start of that structure.  Make sure we take that into the
17339              calculation of the register based offset that we use here.  */
17340           offset = starting_bit;
17341           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
17342           offset %= 32;
17343
17344           /* This is the actual size of the field, for bitfields this is the
17345              bitfield width and not the container size.  */
17346           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17347
17348           if (*last_used_bit != offset)
17349             {
17350               if (offset < *last_used_bit)
17351                 {
17352                   /* This field's offset is before the 'last_used_bit', that
17353                      means this field goes on the next register.  So we need to
17354                      pad the rest of the current register and increase the
17355                      register number.  */
17356                   uint32_t mask;
17357                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
17358                   mask++;
17359
17360                   padding_bits_to_clear[*regno] |= mask;
17361                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17362                   (*regno)++;
17363                 }
17364               else
17365                 {
17366                   /* Otherwise we pad the bits between the last field's end and
17367                      the start of the new field.  */
17368                   uint32_t mask;
17369
17370                   mask = ((uint32_t)-1) >> (32 - offset);
17371                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
17372                   padding_bits_to_clear[*regno] |= mask;
17373                 }
17374               current_bit = offset;
17375             }
17376
17377           /* Calculate further padding bits for inner structs/unions too.  */
17378           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
17379             {
17380               *last_used_bit = current_bit;
17381               not_to_clear_reg_mask
17382                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
17383                                                   padding_bits_to_clear, offset,
17384                                                   last_used_bit);
17385             }
17386           else
17387             {
17388               /* Update 'current_bit' with this field's size.  If the
17389                  'current_bit' lies in a subsequent register, update 'regno' and
17390                  reset 'current_bit' to point to the current bit in that new
17391                  register.  */
17392               current_bit += size;
17393               while (current_bit >= 32)
17394                 {
17395                   current_bit-=32;
17396                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17397                   (*regno)++;
17398                 }
17399               *last_used_bit = current_bit;
17400             }
17401
17402           field = TREE_CHAIN (field);
17403         }
17404       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17405     }
17406   else if (TREE_CODE (arg_type) == UNION_TYPE)
17407     {
17408       tree field, field_t;
17409       int i, regno_t, field_size;
17410       int max_reg = -1;
17411       int max_bit = -1;
17412       uint32_t mask;
17413       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
17414         = {-1, -1, -1, -1};
17415
17416       /* To compute the padding bits in a union we only consider bits as
17417          padding bits if they are always either a padding bit or fall outside a
17418          fields size for all fields in the union.  */
17419       field = TYPE_FIELDS (arg_type);
17420       while (field)
17421         {
17422           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
17423             = {0U, 0U, 0U, 0U};
17424           int last_used_bit_t = *last_used_bit;
17425           regno_t = *regno;
17426           field_t = TREE_TYPE (field);
17427
17428           /* If the field's type is either a record or a union make sure to
17429              compute their padding bits too.  */
17430           if (RECORD_OR_UNION_TYPE_P (field_t))
17431             not_to_clear_reg_mask
17432               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
17433                                                 &padding_bits_to_clear_t[0],
17434                                                 starting_bit, &last_used_bit_t);
17435           else
17436             {
17437               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17438               regno_t = (field_size / 32) + *regno;
17439               last_used_bit_t = (starting_bit + field_size) % 32;
17440             }
17441
17442           for (i = *regno; i < regno_t; i++)
17443             {
17444               /* For all but the last register used by this field only keep the
17445                  padding bits that were padding bits in this field.  */
17446               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
17447             }
17448
17449             /* For the last register, keep all padding bits that were padding
17450                bits in this field and any padding bits that are still valid
17451                as padding bits but fall outside of this field's size.  */
17452             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17453             padding_bits_to_clear_res[regno_t]
17454               &= padding_bits_to_clear_t[regno_t] | mask;
17455
17456           /* Update the maximum size of the fields in terms of registers used
17457              ('max_reg') and the 'last_used_bit' in said register.  */
17458           if (max_reg < regno_t)
17459             {
17460               max_reg = regno_t;
17461               max_bit = last_used_bit_t;
17462             }
17463           else if (max_reg == regno_t && max_bit < last_used_bit_t)
17464             max_bit = last_used_bit_t;
17465
17466           field = TREE_CHAIN (field);
17467         }
17468
17469       /* Update the current padding_bits_to_clear using the intersection of the
17470          padding bits of all the fields.  */
17471       for (i=*regno; i < max_reg; i++)
17472         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17473
17474       /* Do not keep trailing padding bits, we do not know yet whether this
17475          is the end of the argument.  */
17476       mask = ((uint32_t) 1 << max_bit) - 1;
17477       padding_bits_to_clear[max_reg]
17478         |= padding_bits_to_clear_res[max_reg] & mask;
17479
17480       *regno = max_reg;
17481       *last_used_bit = max_bit;
17482     }
17483   else
17484     /* This function should only be used for structs and unions.  */
17485     gcc_unreachable ();
17486
17487   return not_to_clear_reg_mask;
17488 }
17489
17490 /* In the context of ARMv8-M Security Extensions, this function is used for both
17491    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17492    registers are used when returning or passing arguments, which is then
17493    returned as a mask.  It will also compute a mask to indicate padding/unused
17494    bits for each of these registers, and passes this through the
17495    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
17496    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17497    the starting register used to pass this argument or return value is passed
17498    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17499    for struct and union types.  */
17500
17501 static unsigned HOST_WIDE_INT
17502 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17503                              uint32_t * padding_bits_to_clear)
17504
17505 {
17506   int last_used_bit = 0;
17507   unsigned HOST_WIDE_INT not_to_clear_mask;
17508
17509   if (RECORD_OR_UNION_TYPE_P (arg_type))
17510     {
17511       not_to_clear_mask
17512         = comp_not_to_clear_mask_str_un (arg_type, &regno,
17513                                          padding_bits_to_clear, 0,
17514                                          &last_used_bit);
17515
17516
17517       /* If the 'last_used_bit' is not zero, that means we are still using a
17518          part of the last 'regno'.  In such cases we must clear the trailing
17519          bits.  Otherwise we are not using regno and we should mark it as to
17520          clear.  */
17521       if (last_used_bit != 0)
17522         padding_bits_to_clear[regno]
17523           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17524       else
17525         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17526     }
17527   else
17528     {
17529       not_to_clear_mask = 0;
17530       /* We are not dealing with structs nor unions.  So these arguments may be
17531          passed in floating point registers too.  In some cases a BLKmode is
17532          used when returning or passing arguments in multiple VFP registers.  */
17533       if (GET_MODE (arg_rtx) == BLKmode)
17534         {
17535           int i, arg_regs;
17536           rtx reg;
17537
17538           /* This should really only occur when dealing with the hard-float
17539              ABI.  */
17540           gcc_assert (TARGET_HARD_FLOAT_ABI);
17541
17542           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17543             {
17544               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17545               gcc_assert (REG_P (reg));
17546
17547               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17548
17549               /* If we are dealing with DF mode, make sure we don't
17550                  clear either of the registers it addresses.  */
17551               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17552               if (arg_regs > 1)
17553                 {
17554                   unsigned HOST_WIDE_INT mask;
17555                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17556                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
17557                   not_to_clear_mask |= mask;
17558                 }
17559             }
17560         }
17561       else
17562         {
17563           /* Otherwise we can rely on the MODE to determine how many registers
17564              are being used by this argument.  */
17565           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17566           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17567           if (arg_regs > 1)
17568             {
17569               unsigned HOST_WIDE_INT
17570               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17571               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17572               not_to_clear_mask |= mask;
17573             }
17574         }
17575     }
17576
17577   return not_to_clear_mask;
17578 }
17579
17580 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17581    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
17582    are to be fully cleared, using the value in register CLEARING_REG if more
17583    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17584    the bits that needs to be cleared in caller-saved core registers, with
17585    SCRATCH_REG used as a scratch register for that clearing.
17586
17587    NOTE: one of three following assertions must hold:
17588    - SCRATCH_REG is a low register
17589    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17590      in TO_CLEAR_BITMAP)
17591    - CLEARING_REG is a low register.  */
17592
17593 static void
17594 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17595                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17596 {
17597   bool saved_clearing = false;
17598   rtx saved_clearing_reg = NULL_RTX;
17599   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17600
17601   gcc_assert (arm_arch_cmse);
17602
17603   if (!bitmap_empty_p (to_clear_bitmap))
17604     {
17605       minregno = bitmap_first_set_bit (to_clear_bitmap);
17606       maxregno = bitmap_last_set_bit (to_clear_bitmap);
17607     }
17608   clearing_regno = REGNO (clearing_reg);
17609
17610   /* Clear padding bits.  */
17611   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17612   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17613     {
17614       uint64_t mask;
17615       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17616
17617       if (padding_bits_to_clear[i] == 0)
17618         continue;
17619
17620       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17621          CLEARING_REG as scratch.  */
17622       if (TARGET_THUMB1
17623           && REGNO (scratch_reg) > LAST_LO_REGNUM)
17624         {
17625           /* clearing_reg is not to be cleared, copy its value into scratch_reg
17626              such that we can use clearing_reg to clear the unused bits in the
17627              arguments.  */
17628           if ((clearing_regno > maxregno
17629                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17630               && !saved_clearing)
17631             {
17632               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17633               emit_move_insn (scratch_reg, clearing_reg);
17634               saved_clearing = true;
17635               saved_clearing_reg = scratch_reg;
17636             }
17637           scratch_reg = clearing_reg;
17638         }
17639
17640       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
17641       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17642       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17643
17644       /* Fill the top half of the negated padding_bits_to_clear[i].  */
17645       mask = (~padding_bits_to_clear[i]) >> 16;
17646       rtx16 = gen_int_mode (16, SImode);
17647       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17648       if (mask)
17649         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17650
17651       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17652     }
17653   if (saved_clearing)
17654     emit_move_insn (clearing_reg, saved_clearing_reg);
17655
17656
17657   /* Clear full registers.  */
17658
17659   /* If not marked for clearing, clearing_reg already does not contain
17660      any secret.  */
17661   if (clearing_regno <= maxregno
17662       && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17663     {
17664       emit_move_insn (clearing_reg, const0_rtx);
17665       emit_use (clearing_reg);
17666       bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17667     }
17668
17669   for (regno = minregno; regno <= maxregno; regno++)
17670     {
17671       if (!bitmap_bit_p (to_clear_bitmap, regno))
17672         continue;
17673
17674       if (IS_VFP_REGNUM (regno))
17675         {
17676           /* If regno is an even vfp register and its successor is also to
17677              be cleared, use vmov.  */
17678           if (TARGET_VFP_DOUBLE
17679               && VFP_REGNO_OK_FOR_DOUBLE (regno)
17680               && bitmap_bit_p (to_clear_bitmap, regno + 1))
17681             {
17682               emit_move_insn (gen_rtx_REG (DFmode, regno),
17683                               CONST1_RTX (DFmode));
17684               emit_use (gen_rtx_REG (DFmode, regno));
17685               regno++;
17686             }
17687           else
17688             {
17689               emit_move_insn (gen_rtx_REG (SFmode, regno),
17690                               CONST1_RTX (SFmode));
17691               emit_use (gen_rtx_REG (SFmode, regno));
17692             }
17693         }
17694       else
17695         {
17696           emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17697           emit_use (gen_rtx_REG (SImode, regno));
17698         }
17699     }
17700 }
17701
17702 /* Clears caller saved registers not used to pass arguments before a
17703    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
17704    registers is done in __gnu_cmse_nonsecure_call libcall.
17705    See libgcc/config/arm/cmse_nonsecure_call.S.  */
17706
17707 static void
17708 cmse_nonsecure_call_clear_caller_saved (void)
17709 {
17710   basic_block bb;
17711
17712   FOR_EACH_BB_FN (bb, cfun)
17713     {
17714       rtx_insn *insn;
17715
17716       FOR_BB_INSNS (bb, insn)
17717         {
17718           unsigned address_regnum, regno, maxregno =
17719             TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17720           auto_sbitmap to_clear_bitmap (maxregno + 1);
17721           rtx_insn *seq;
17722           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17723           rtx address;
17724           CUMULATIVE_ARGS args_so_far_v;
17725           cumulative_args_t args_so_far;
17726           tree arg_type, fntype;
17727           bool first_param = true;
17728           function_args_iterator args_iter;
17729           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17730
17731           if (!NONDEBUG_INSN_P (insn))
17732             continue;
17733
17734           if (!CALL_P (insn))
17735             continue;
17736
17737           pat = PATTERN (insn);
17738           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17739           call = XVECEXP (pat, 0, 0);
17740
17741           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17742           if (GET_CODE (call) == SET)
17743               call = SET_SRC (call);
17744
17745           /* Check if it is a cmse_nonsecure_call.  */
17746           unspec = XEXP (call, 0);
17747           if (GET_CODE (unspec) != UNSPEC
17748               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17749             continue;
17750
17751           /* Determine the caller-saved registers we need to clear.  */
17752           bitmap_clear (to_clear_bitmap);
17753           bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17754
17755           /* Only look at the caller-saved floating point registers in case of
17756              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17757              lazy store and loads which clear both caller- and callee-saved
17758              registers.  */
17759           if (TARGET_HARD_FLOAT_ABI)
17760             {
17761               auto_sbitmap float_bitmap (maxregno + 1);
17762
17763               bitmap_clear (float_bitmap);
17764               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17765                                 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17766               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17767             }
17768
17769           /* Make sure the register used to hold the function address is not
17770              cleared.  */
17771           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17772           gcc_assert (MEM_P (address));
17773           gcc_assert (REG_P (XEXP (address, 0)));
17774           address_regnum = REGNO (XEXP (address, 0));
17775           if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17776             bitmap_clear_bit (to_clear_bitmap, address_regnum);
17777
17778           /* Set basic block of call insn so that df rescan is performed on
17779              insns inserted here.  */
17780           set_block_for_insn (insn, bb);
17781           df_set_flags (DF_DEFER_INSN_RESCAN);
17782           start_sequence ();
17783
17784           /* Make sure the scheduler doesn't schedule other insns beyond
17785              here.  */
17786           emit_insn (gen_blockage ());
17787
17788           /* Walk through all arguments and clear registers appropriately.
17789           */
17790           fntype = TREE_TYPE (MEM_EXPR (address));
17791           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17792                                     NULL_TREE);
17793           args_so_far = pack_cumulative_args (&args_so_far_v);
17794           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17795             {
17796               rtx arg_rtx;
17797               uint64_t to_clear_args_mask;
17798
17799               if (VOID_TYPE_P (arg_type))
17800                 continue;
17801
17802               function_arg_info arg (arg_type, /*named=*/true);
17803               if (!first_param)
17804                 /* ??? We should advance after processing the argument and pass
17805                    the argument we're advancing past.  */
17806                 arm_function_arg_advance (args_so_far, arg);
17807
17808               arg_rtx = arm_function_arg (args_so_far, arg);
17809               gcc_assert (REG_P (arg_rtx));
17810               to_clear_args_mask
17811                 = compute_not_to_clear_mask (arg_type, arg_rtx,
17812                                              REGNO (arg_rtx),
17813                                              &padding_bits_to_clear[0]);
17814               if (to_clear_args_mask)
17815                 {
17816                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
17817                     {
17818                       if (to_clear_args_mask & (1ULL << regno))
17819                         bitmap_clear_bit (to_clear_bitmap, regno);
17820                     }
17821                 }
17822
17823               first_param = false;
17824             }
17825
17826           /* We use right shift and left shift to clear the LSB of the address
17827              we jump to instead of using bic, to avoid having to use an extra
17828              register on Thumb-1.  */
17829           clearing_reg = XEXP (address, 0);
17830           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17831           emit_insn (gen_rtx_SET (clearing_reg, shift));
17832           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17833           emit_insn (gen_rtx_SET (clearing_reg, shift));
17834
17835           /* Clear caller-saved registers that leak before doing a non-secure
17836              call.  */
17837           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17838           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17839                                 NUM_ARG_REGS, ip_reg, clearing_reg);
17840
17841           seq = get_insns ();
17842           end_sequence ();
17843           emit_insn_before (seq, insn);
17844         }
17845     }
17846 }
17847
17848 /* Rewrite move insn into subtract of 0 if the condition codes will
17849    be useful in next conditional jump insn.  */
17850
17851 static void
17852 thumb1_reorg (void)
17853 {
17854   basic_block bb;
17855
17856   FOR_EACH_BB_FN (bb, cfun)
17857     {
17858       rtx dest, src;
17859       rtx cmp, op0, op1, set = NULL;
17860       rtx_insn *prev, *insn = BB_END (bb);
17861       bool insn_clobbered = false;
17862
17863       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17864         insn = PREV_INSN (insn);
17865
17866       /* Find the last cbranchsi4_insn in basic block BB.  */
17867       if (insn == BB_HEAD (bb)
17868           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17869         continue;
17870
17871       /* Get the register with which we are comparing.  */
17872       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17873       op0 = XEXP (cmp, 0);
17874       op1 = XEXP (cmp, 1);
17875
17876       /* Check that comparison is against ZERO.  */
17877       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17878         continue;
17879
17880       /* Find the first flag setting insn before INSN in basic block BB.  */
17881       gcc_assert (insn != BB_HEAD (bb));
17882       for (prev = PREV_INSN (insn);
17883            (!insn_clobbered
17884             && prev != BB_HEAD (bb)
17885             && (NOTE_P (prev)
17886                 || DEBUG_INSN_P (prev)
17887                 || ((set = single_set (prev)) != NULL
17888                     && get_attr_conds (prev) == CONDS_NOCOND)));
17889            prev = PREV_INSN (prev))
17890         {
17891           if (reg_set_p (op0, prev))
17892             insn_clobbered = true;
17893         }
17894
17895       /* Skip if op0 is clobbered by insn other than prev. */
17896       if (insn_clobbered)
17897         continue;
17898
17899       if (!set)
17900         continue;
17901
17902       dest = SET_DEST (set);
17903       src = SET_SRC (set);
17904       if (!low_register_operand (dest, SImode)
17905           || !low_register_operand (src, SImode))
17906         continue;
17907
17908       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17909          in INSN.  Both src and dest of the move insn are checked.  */
17910       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17911         {
17912           dest = copy_rtx (dest);
17913           src = copy_rtx (src);
17914           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17915           PATTERN (prev) = gen_rtx_SET (dest, src);
17916           INSN_CODE (prev) = -1;
17917           /* Set test register in INSN to dest.  */
17918           XEXP (cmp, 0) = copy_rtx (dest);
17919           INSN_CODE (insn) = -1;
17920         }
17921     }
17922 }
17923
17924 /* Convert instructions to their cc-clobbering variant if possible, since
17925    that allows us to use smaller encodings.  */
17926
17927 static void
17928 thumb2_reorg (void)
17929 {
17930   basic_block bb;
17931   regset_head live;
17932
17933   INIT_REG_SET (&live);
17934
17935   /* We are freeing block_for_insn in the toplev to keep compatibility
17936      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17937   compute_bb_for_insn ();
17938   df_analyze ();
17939
17940   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17941
17942   FOR_EACH_BB_FN (bb, cfun)
17943     {
17944       if ((current_tune->disparage_flag_setting_t16_encodings
17945            == tune_params::DISPARAGE_FLAGS_ALL)
17946           && optimize_bb_for_speed_p (bb))
17947         continue;
17948
17949       rtx_insn *insn;
17950       Convert_Action action = SKIP;
17951       Convert_Action action_for_partial_flag_setting
17952         = ((current_tune->disparage_flag_setting_t16_encodings
17953             != tune_params::DISPARAGE_FLAGS_NEITHER)
17954            && optimize_bb_for_speed_p (bb))
17955           ? SKIP : CONV;
17956
17957       COPY_REG_SET (&live, DF_LR_OUT (bb));
17958       df_simulate_initialize_backwards (bb, &live);
17959       FOR_BB_INSNS_REVERSE (bb, insn)
17960         {
17961           if (NONJUMP_INSN_P (insn)
17962               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17963               && GET_CODE (PATTERN (insn)) == SET)
17964             {
17965               action = SKIP;
17966               rtx pat = PATTERN (insn);
17967               rtx dst = XEXP (pat, 0);
17968               rtx src = XEXP (pat, 1);
17969               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17970
17971               if (UNARY_P (src) || BINARY_P (src))
17972                   op0 = XEXP (src, 0);
17973
17974               if (BINARY_P (src))
17975                   op1 = XEXP (src, 1);
17976
17977               if (low_register_operand (dst, SImode))
17978                 {
17979                   switch (GET_CODE (src))
17980                     {
17981                     case PLUS:
17982                       /* Adding two registers and storing the result
17983                          in the first source is already a 16-bit
17984                          operation.  */
17985                       if (rtx_equal_p (dst, op0)
17986                           && register_operand (op1, SImode))
17987                         break;
17988
17989                       if (low_register_operand (op0, SImode))
17990                         {
17991                           /* ADDS <Rd>,<Rn>,<Rm>  */
17992                           if (low_register_operand (op1, SImode))
17993                             action = CONV;
17994                           /* ADDS <Rdn>,#<imm8>  */
17995                           /* SUBS <Rdn>,#<imm8>  */
17996                           else if (rtx_equal_p (dst, op0)
17997                                    && CONST_INT_P (op1)
17998                                    && IN_RANGE (INTVAL (op1), -255, 255))
17999                             action = CONV;
18000                           /* ADDS <Rd>,<Rn>,#<imm3>  */
18001                           /* SUBS <Rd>,<Rn>,#<imm3>  */
18002                           else if (CONST_INT_P (op1)
18003                                    && IN_RANGE (INTVAL (op1), -7, 7))
18004                             action = CONV;
18005                         }
18006                       /* ADCS <Rd>, <Rn>  */
18007                       else if (GET_CODE (XEXP (src, 0)) == PLUS
18008                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
18009                               && low_register_operand (XEXP (XEXP (src, 0), 1),
18010                                                        SImode)
18011                               && COMPARISON_P (op1)
18012                               && cc_register (XEXP (op1, 0), VOIDmode)
18013                               && maybe_get_arm_condition_code (op1) == ARM_CS
18014                               && XEXP (op1, 1) == const0_rtx)
18015                         action = CONV;
18016                       break;
18017
18018                     case MINUS:
18019                       /* RSBS <Rd>,<Rn>,#0
18020                          Not handled here: see NEG below.  */
18021                       /* SUBS <Rd>,<Rn>,#<imm3>
18022                          SUBS <Rdn>,#<imm8>
18023                          Not handled here: see PLUS above.  */
18024                       /* SUBS <Rd>,<Rn>,<Rm>  */
18025                       if (low_register_operand (op0, SImode)
18026                           && low_register_operand (op1, SImode))
18027                             action = CONV;
18028                       break;
18029
18030                     case MULT:
18031                       /* MULS <Rdm>,<Rn>,<Rdm>
18032                          As an exception to the rule, this is only used
18033                          when optimizing for size since MULS is slow on all
18034                          known implementations.  We do not even want to use
18035                          MULS in cold code, if optimizing for speed, so we
18036                          test the global flag here.  */
18037                       if (!optimize_size)
18038                         break;
18039                       /* Fall through.  */
18040                     case AND:
18041                     case IOR:
18042                     case XOR:
18043                       /* ANDS <Rdn>,<Rm>  */
18044                       if (rtx_equal_p (dst, op0)
18045                           && low_register_operand (op1, SImode))
18046                         action = action_for_partial_flag_setting;
18047                       else if (rtx_equal_p (dst, op1)
18048                                && low_register_operand (op0, SImode))
18049                         action = action_for_partial_flag_setting == SKIP
18050                                  ? SKIP : SWAP_CONV;
18051                       break;
18052
18053                     case ASHIFTRT:
18054                     case ASHIFT:
18055                     case LSHIFTRT:
18056                       /* ASRS <Rdn>,<Rm> */
18057                       /* LSRS <Rdn>,<Rm> */
18058                       /* LSLS <Rdn>,<Rm> */
18059                       if (rtx_equal_p (dst, op0)
18060                           && low_register_operand (op1, SImode))
18061                         action = action_for_partial_flag_setting;
18062                       /* ASRS <Rd>,<Rm>,#<imm5> */
18063                       /* LSRS <Rd>,<Rm>,#<imm5> */
18064                       /* LSLS <Rd>,<Rm>,#<imm5> */
18065                       else if (low_register_operand (op0, SImode)
18066                                && CONST_INT_P (op1)
18067                                && IN_RANGE (INTVAL (op1), 0, 31))
18068                         action = action_for_partial_flag_setting;
18069                       break;
18070
18071                     case ROTATERT:
18072                       /* RORS <Rdn>,<Rm>  */
18073                       if (rtx_equal_p (dst, op0)
18074                           && low_register_operand (op1, SImode))
18075                         action = action_for_partial_flag_setting;
18076                       break;
18077
18078                     case NOT:
18079                       /* MVNS <Rd>,<Rm>  */
18080                       if (low_register_operand (op0, SImode))
18081                         action = action_for_partial_flag_setting;
18082                       break;
18083
18084                     case NEG:
18085                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
18086                       if (low_register_operand (op0, SImode))
18087                         action = CONV;
18088                       break;
18089
18090                     case CONST_INT:
18091                       /* MOVS <Rd>,#<imm8>  */
18092                       if (CONST_INT_P (src)
18093                           && IN_RANGE (INTVAL (src), 0, 255))
18094                         action = action_for_partial_flag_setting;
18095                       break;
18096
18097                     case REG:
18098                       /* MOVS and MOV<c> with registers have different
18099                          encodings, so are not relevant here.  */
18100                       break;
18101
18102                     default:
18103                       break;
18104                     }
18105                 }
18106
18107               if (action != SKIP)
18108                 {
18109                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18110                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18111                   rtvec vec;
18112
18113                   if (action == SWAP_CONV)
18114                     {
18115                       src = copy_rtx (src);
18116                       XEXP (src, 0) = op1;
18117                       XEXP (src, 1) = op0;
18118                       pat = gen_rtx_SET (dst, src);
18119                       vec = gen_rtvec (2, pat, clobber);
18120                     }
18121                   else /* action == CONV */
18122                     vec = gen_rtvec (2, pat, clobber);
18123
18124                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
18125                   INSN_CODE (insn) = -1;
18126                 }
18127             }
18128
18129           if (NONDEBUG_INSN_P (insn))
18130             df_simulate_one_insn_backwards (bb, insn, &live);
18131         }
18132     }
18133
18134   CLEAR_REG_SET (&live);
18135 }
18136
18137 /* Gcc puts the pool in the wrong place for ARM, since we can only
18138    load addresses a limited distance around the pc.  We do some
18139    special munging to move the constant pool values to the correct
18140    point in the code.  */
18141 static void
18142 arm_reorg (void)
18143 {
18144   rtx_insn *insn;
18145   HOST_WIDE_INT address = 0;
18146   Mfix * fix;
18147
18148   if (use_cmse)
18149     cmse_nonsecure_call_clear_caller_saved ();
18150
18151   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
18152   if (cfun->is_thunk)
18153     ;
18154   else if (TARGET_THUMB1)
18155     thumb1_reorg ();
18156   else if (TARGET_THUMB2)
18157     thumb2_reorg ();
18158
18159   /* Ensure all insns that must be split have been split at this point.
18160      Otherwise, the pool placement code below may compute incorrect
18161      insn lengths.  Note that when optimizing, all insns have already
18162      been split at this point.  */
18163   if (!optimize)
18164     split_all_insns_noflow ();
18165
18166   /* Make sure we do not attempt to create a literal pool even though it should
18167      no longer be necessary to create any.  */
18168   if (arm_disable_literal_pool)
18169     return ;
18170
18171   minipool_fix_head = minipool_fix_tail = NULL;
18172
18173   /* The first insn must always be a note, or the code below won't
18174      scan it properly.  */
18175   insn = get_insns ();
18176   gcc_assert (NOTE_P (insn));
18177   minipool_pad = 0;
18178
18179   /* Scan all the insns and record the operands that will need fixing.  */
18180   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
18181     {
18182       if (BARRIER_P (insn))
18183         push_minipool_barrier (insn, address);
18184       else if (INSN_P (insn))
18185         {
18186           rtx_jump_table_data *table;
18187
18188           note_invalid_constants (insn, address, true);
18189           address += get_attr_length (insn);
18190
18191           /* If the insn is a vector jump, add the size of the table
18192              and skip the table.  */
18193           if (tablejump_p (insn, NULL, &table))
18194             {
18195               address += get_jump_table_size (table);
18196               insn = table;
18197             }
18198         }
18199       else if (LABEL_P (insn))
18200         /* Add the worst-case padding due to alignment.  We don't add
18201            the _current_ padding because the minipool insertions
18202            themselves might change it.  */
18203         address += get_label_padding (insn);
18204     }
18205
18206   fix = minipool_fix_head;
18207
18208   /* Now scan the fixups and perform the required changes.  */
18209   while (fix)
18210     {
18211       Mfix * ftmp;
18212       Mfix * fdel;
18213       Mfix *  last_added_fix;
18214       Mfix * last_barrier = NULL;
18215       Mfix * this_fix;
18216
18217       /* Skip any further barriers before the next fix.  */
18218       while (fix && BARRIER_P (fix->insn))
18219         fix = fix->next;
18220
18221       /* No more fixes.  */
18222       if (fix == NULL)
18223         break;
18224
18225       last_added_fix = NULL;
18226
18227       for (ftmp = fix; ftmp; ftmp = ftmp->next)
18228         {
18229           if (BARRIER_P (ftmp->insn))
18230             {
18231               if (ftmp->address >= minipool_vector_head->max_address)
18232                 break;
18233
18234               last_barrier = ftmp;
18235             }
18236           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
18237             break;
18238
18239           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
18240         }
18241
18242       /* If we found a barrier, drop back to that; any fixes that we
18243          could have reached but come after the barrier will now go in
18244          the next mini-pool.  */
18245       if (last_barrier != NULL)
18246         {
18247           /* Reduce the refcount for those fixes that won't go into this
18248              pool after all.  */
18249           for (fdel = last_barrier->next;
18250                fdel && fdel != ftmp;
18251                fdel = fdel->next)
18252             {
18253               fdel->minipool->refcount--;
18254               fdel->minipool = NULL;
18255             }
18256
18257           ftmp = last_barrier;
18258         }
18259       else
18260         {
18261           /* ftmp is first fix that we can't fit into this pool and
18262              there no natural barriers that we could use.  Insert a
18263              new barrier in the code somewhere between the previous
18264              fix and this one, and arrange to jump around it.  */
18265           HOST_WIDE_INT max_address;
18266
18267           /* The last item on the list of fixes must be a barrier, so
18268              we can never run off the end of the list of fixes without
18269              last_barrier being set.  */
18270           gcc_assert (ftmp);
18271
18272           max_address = minipool_vector_head->max_address;
18273           /* Check that there isn't another fix that is in range that
18274              we couldn't fit into this pool because the pool was
18275              already too large: we need to put the pool before such an
18276              instruction.  The pool itself may come just after the
18277              fix because create_fix_barrier also allows space for a
18278              jump instruction.  */
18279           if (ftmp->address < max_address)
18280             max_address = ftmp->address + 1;
18281
18282           last_barrier = create_fix_barrier (last_added_fix, max_address);
18283         }
18284
18285       assign_minipool_offsets (last_barrier);
18286
18287       while (ftmp)
18288         {
18289           if (!BARRIER_P (ftmp->insn)
18290               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
18291                   == NULL))
18292             break;
18293
18294           ftmp = ftmp->next;
18295         }
18296
18297       /* Scan over the fixes we have identified for this pool, fixing them
18298          up and adding the constants to the pool itself.  */
18299       for (this_fix = fix; this_fix && ftmp != this_fix;
18300            this_fix = this_fix->next)
18301         if (!BARRIER_P (this_fix->insn))
18302           {
18303             rtx addr
18304               = plus_constant (Pmode,
18305                                gen_rtx_LABEL_REF (VOIDmode,
18306                                                   minipool_vector_label),
18307                                this_fix->minipool->offset);
18308             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
18309           }
18310
18311       dump_minipool (last_barrier->insn);
18312       fix = ftmp;
18313     }
18314
18315   /* From now on we must synthesize any constants that we can't handle
18316      directly.  This can happen if the RTL gets split during final
18317      instruction generation.  */
18318   cfun->machine->after_arm_reorg = 1;
18319
18320   /* Free the minipool memory.  */
18321   obstack_free (&minipool_obstack, minipool_startobj);
18322 }
18323 \f
18324 /* Routines to output assembly language.  */
18325
18326 /* Return string representation of passed in real value.  */
18327 static const char *
18328 fp_const_from_val (REAL_VALUE_TYPE *r)
18329 {
18330   if (!fp_consts_inited)
18331     init_fp_table ();
18332
18333   gcc_assert (real_equal (r, &value_fp0));
18334   return "0";
18335 }
18336
18337 /* OPERANDS[0] is the entire list of insns that constitute pop,
18338    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
18339    is in the list, UPDATE is true iff the list contains explicit
18340    update of base register.  */
18341 void
18342 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
18343                          bool update)
18344 {
18345   int i;
18346   char pattern[100];
18347   int offset;
18348   const char *conditional;
18349   int num_saves = XVECLEN (operands[0], 0);
18350   unsigned int regno;
18351   unsigned int regno_base = REGNO (operands[1]);
18352   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
18353
18354   offset = 0;
18355   offset += update ? 1 : 0;
18356   offset += return_pc ? 1 : 0;
18357
18358   /* Is the base register in the list?  */
18359   for (i = offset; i < num_saves; i++)
18360     {
18361       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
18362       /* If SP is in the list, then the base register must be SP.  */
18363       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
18364       /* If base register is in the list, there must be no explicit update.  */
18365       if (regno == regno_base)
18366         gcc_assert (!update);
18367     }
18368
18369   conditional = reverse ? "%?%D0" : "%?%d0";
18370   /* Can't use POP if returning from an interrupt.  */
18371   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
18372     sprintf (pattern, "pop%s\t{", conditional);
18373   else
18374     {
18375       /* Output ldmfd when the base register is SP, otherwise output ldmia.
18376          It's just a convention, their semantics are identical.  */
18377       if (regno_base == SP_REGNUM)
18378         sprintf (pattern, "ldmfd%s\t", conditional);
18379       else if (update)
18380         sprintf (pattern, "ldmia%s\t", conditional);
18381       else
18382         sprintf (pattern, "ldm%s\t", conditional);
18383
18384       strcat (pattern, reg_names[regno_base]);
18385       if (update)
18386         strcat (pattern, "!, {");
18387       else
18388         strcat (pattern, ", {");
18389     }
18390
18391   /* Output the first destination register.  */
18392   strcat (pattern,
18393           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
18394
18395   /* Output the rest of the destination registers.  */
18396   for (i = offset + 1; i < num_saves; i++)
18397     {
18398       strcat (pattern, ", ");
18399       strcat (pattern,
18400               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
18401     }
18402
18403   strcat (pattern, "}");
18404
18405   if (interrupt_p && return_pc)
18406     strcat (pattern, "^");
18407
18408   output_asm_insn (pattern, &cond);
18409 }
18410
18411
18412 /* Output the assembly for a store multiple.  */
18413
18414 const char *
18415 vfp_output_vstmd (rtx * operands)
18416 {
18417   char pattern[100];
18418   int p;
18419   int base;
18420   int i;
18421   rtx addr_reg = REG_P (XEXP (operands[0], 0))
18422                    ? XEXP (operands[0], 0)
18423                    : XEXP (XEXP (operands[0], 0), 0);
18424   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
18425
18426   if (push_p)
18427     strcpy (pattern, "vpush%?.64\t{%P1");
18428   else
18429     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
18430
18431   p = strlen (pattern);
18432
18433   gcc_assert (REG_P (operands[1]));
18434
18435   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
18436   for (i = 1; i < XVECLEN (operands[2], 0); i++)
18437     {
18438       p += sprintf (&pattern[p], ", d%d", base + i);
18439     }
18440   strcpy (&pattern[p], "}");
18441
18442   output_asm_insn (pattern, operands);
18443   return "";
18444 }
18445
18446
18447 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
18448    number of bytes pushed.  */
18449
18450 static int
18451 vfp_emit_fstmd (int base_reg, int count)
18452 {
18453   rtx par;
18454   rtx dwarf;
18455   rtx tmp, reg;
18456   int i;
18457
18458   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
18459      register pairs are stored by a store multiple insn.  We avoid this
18460      by pushing an extra pair.  */
18461   if (count == 2 && !arm_arch6)
18462     {
18463       if (base_reg == LAST_VFP_REGNUM - 3)
18464         base_reg -= 2;
18465       count++;
18466     }
18467
18468   /* FSTMD may not store more than 16 doubleword registers at once.  Split
18469      larger stores into multiple parts (up to a maximum of two, in
18470      practice).  */
18471   if (count > 16)
18472     {
18473       int saved;
18474       /* NOTE: base_reg is an internal register number, so each D register
18475          counts as 2.  */
18476       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18477       saved += vfp_emit_fstmd (base_reg, 16);
18478       return saved;
18479     }
18480
18481   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18482   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18483
18484   reg = gen_rtx_REG (DFmode, base_reg);
18485   base_reg += 2;
18486
18487   XVECEXP (par, 0, 0)
18488     = gen_rtx_SET (gen_frame_mem
18489                    (BLKmode,
18490                     gen_rtx_PRE_MODIFY (Pmode,
18491                                         stack_pointer_rtx,
18492                                         plus_constant
18493                                         (Pmode, stack_pointer_rtx,
18494                                          - (count * 8)))
18495                     ),
18496                    gen_rtx_UNSPEC (BLKmode,
18497                                    gen_rtvec (1, reg),
18498                                    UNSPEC_PUSH_MULT));
18499
18500   tmp = gen_rtx_SET (stack_pointer_rtx,
18501                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18502   RTX_FRAME_RELATED_P (tmp) = 1;
18503   XVECEXP (dwarf, 0, 0) = tmp;
18504
18505   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18506   RTX_FRAME_RELATED_P (tmp) = 1;
18507   XVECEXP (dwarf, 0, 1) = tmp;
18508
18509   for (i = 1; i < count; i++)
18510     {
18511       reg = gen_rtx_REG (DFmode, base_reg);
18512       base_reg += 2;
18513       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18514
18515       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18516                                         plus_constant (Pmode,
18517                                                        stack_pointer_rtx,
18518                                                        i * 8)),
18519                          reg);
18520       RTX_FRAME_RELATED_P (tmp) = 1;
18521       XVECEXP (dwarf, 0, i + 1) = tmp;
18522     }
18523
18524   par = emit_insn (par);
18525   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18526   RTX_FRAME_RELATED_P (par) = 1;
18527
18528   return count * 8;
18529 }
18530
18531 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18532    has the cmse_nonsecure_call attribute and returns false otherwise.  */
18533
18534 bool
18535 detect_cmse_nonsecure_call (tree addr)
18536 {
18537   if (!addr)
18538     return FALSE;
18539
18540   tree fntype = TREE_TYPE (addr);
18541   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18542                                     TYPE_ATTRIBUTES (fntype)))
18543     return TRUE;
18544   return FALSE;
18545 }
18546
18547
18548 /* Emit a call instruction with pattern PAT.  ADDR is the address of
18549    the call target.  */
18550
18551 void
18552 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18553 {
18554   rtx insn;
18555
18556   insn = emit_call_insn (pat);
18557
18558   /* The PIC register is live on entry to VxWorks PIC PLT entries.
18559      If the call might use such an entry, add a use of the PIC register
18560      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
18561   if (TARGET_VXWORKS_RTP
18562       && flag_pic
18563       && !sibcall
18564       && GET_CODE (addr) == SYMBOL_REF
18565       && (SYMBOL_REF_DECL (addr)
18566           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18567           : !SYMBOL_REF_LOCAL_P (addr)))
18568     {
18569       require_pic_register (NULL_RTX, false /*compute_now*/);
18570       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18571     }
18572
18573   if (TARGET_FDPIC)
18574     {
18575       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
18576       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
18577     }
18578
18579   if (TARGET_AAPCS_BASED)
18580     {
18581       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18582          linker.  We need to add an IP clobber to allow setting
18583          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
18584          is not needed since it's a fixed register.  */
18585       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18586       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18587     }
18588 }
18589
18590 /* Output a 'call' insn.  */
18591 const char *
18592 output_call (rtx *operands)
18593 {
18594   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
18595
18596   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
18597   if (REGNO (operands[0]) == LR_REGNUM)
18598     {
18599       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18600       output_asm_insn ("mov%?\t%0, %|lr", operands);
18601     }
18602
18603   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18604
18605   if (TARGET_INTERWORK || arm_arch4t)
18606     output_asm_insn ("bx%?\t%0", operands);
18607   else
18608     output_asm_insn ("mov%?\t%|pc, %0", operands);
18609
18610   return "";
18611 }
18612
18613 /* Output a move from arm registers to arm registers of a long double
18614    OPERANDS[0] is the destination.
18615    OPERANDS[1] is the source.  */
18616 const char *
18617 output_mov_long_double_arm_from_arm (rtx *operands)
18618 {
18619   /* We have to be careful here because the two might overlap.  */
18620   int dest_start = REGNO (operands[0]);
18621   int src_start = REGNO (operands[1]);
18622   rtx ops[2];
18623   int i;
18624
18625   if (dest_start < src_start)
18626     {
18627       for (i = 0; i < 3; i++)
18628         {
18629           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18630           ops[1] = gen_rtx_REG (SImode, src_start + i);
18631           output_asm_insn ("mov%?\t%0, %1", ops);
18632         }
18633     }
18634   else
18635     {
18636       for (i = 2; i >= 0; i--)
18637         {
18638           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18639           ops[1] = gen_rtx_REG (SImode, src_start + i);
18640           output_asm_insn ("mov%?\t%0, %1", ops);
18641         }
18642     }
18643
18644   return "";
18645 }
18646
18647 void
18648 arm_emit_movpair (rtx dest, rtx src)
18649  {
18650   /* If the src is an immediate, simplify it.  */
18651   if (CONST_INT_P (src))
18652     {
18653       HOST_WIDE_INT val = INTVAL (src);
18654       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18655       if ((val >> 16) & 0x0000ffff)
18656         {
18657           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18658                                                GEN_INT (16)),
18659                          GEN_INT ((val >> 16) & 0x0000ffff));
18660           rtx_insn *insn = get_last_insn ();
18661           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18662         }
18663       return;
18664     }
18665    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18666    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18667    rtx_insn *insn = get_last_insn ();
18668    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18669  }
18670
18671 /* Output a move between double words.  It must be REG<-MEM
18672    or MEM<-REG.  */
18673 const char *
18674 output_move_double (rtx *operands, bool emit, int *count)
18675 {
18676   enum rtx_code code0 = GET_CODE (operands[0]);
18677   enum rtx_code code1 = GET_CODE (operands[1]);
18678   rtx otherops[3];
18679   if (count)
18680     *count = 1;
18681
18682   /* The only case when this might happen is when
18683      you are looking at the length of a DImode instruction
18684      that has an invalid constant in it.  */
18685   if (code0 == REG && code1 != MEM)
18686     {
18687       gcc_assert (!emit);
18688       *count = 2;
18689       return "";
18690     }
18691
18692   if (code0 == REG)
18693     {
18694       unsigned int reg0 = REGNO (operands[0]);
18695
18696       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18697
18698       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18699
18700       switch (GET_CODE (XEXP (operands[1], 0)))
18701         {
18702         case REG:
18703
18704           if (emit)
18705             {
18706               if (TARGET_LDRD
18707                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18708                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18709               else
18710                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18711             }
18712           break;
18713
18714         case PRE_INC:
18715           gcc_assert (TARGET_LDRD);
18716           if (emit)
18717             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18718           break;
18719
18720         case PRE_DEC:
18721           if (emit)
18722             {
18723               if (TARGET_LDRD)
18724                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18725               else
18726                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18727             }
18728           break;
18729
18730         case POST_INC:
18731           if (emit)
18732             {
18733               if (TARGET_LDRD)
18734                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18735               else
18736                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18737             }
18738           break;
18739
18740         case POST_DEC:
18741           gcc_assert (TARGET_LDRD);
18742           if (emit)
18743             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18744           break;
18745
18746         case PRE_MODIFY:
18747         case POST_MODIFY:
18748           /* Autoicrement addressing modes should never have overlapping
18749              base and destination registers, and overlapping index registers
18750              are already prohibited, so this doesn't need to worry about
18751              fix_cm3_ldrd.  */
18752           otherops[0] = operands[0];
18753           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18754           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18755
18756           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18757             {
18758               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18759                 {
18760                   /* Registers overlap so split out the increment.  */
18761                   if (emit)
18762                     {
18763                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18764                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18765                     }
18766                   if (count)
18767                     *count = 2;
18768                 }
18769               else
18770                 {
18771                   /* Use a single insn if we can.
18772                      FIXME: IWMMXT allows offsets larger than ldrd can
18773                      handle, fix these up with a pair of ldr.  */
18774                   if (TARGET_THUMB2
18775                       || !CONST_INT_P (otherops[2])
18776                       || (INTVAL (otherops[2]) > -256
18777                           && INTVAL (otherops[2]) < 256))
18778                     {
18779                       if (emit)
18780                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18781                     }
18782                   else
18783                     {
18784                       if (emit)
18785                         {
18786                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18787                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18788                         }
18789                       if (count)
18790                         *count = 2;
18791
18792                     }
18793                 }
18794             }
18795           else
18796             {
18797               /* Use a single insn if we can.
18798                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18799                  fix these up with a pair of ldr.  */
18800               if (TARGET_THUMB2
18801                   || !CONST_INT_P (otherops[2])
18802                   || (INTVAL (otherops[2]) > -256
18803                       && INTVAL (otherops[2]) < 256))
18804                 {
18805                   if (emit)
18806                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18807                 }
18808               else
18809                 {
18810                   if (emit)
18811                     {
18812                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18813                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18814                     }
18815                   if (count)
18816                     *count = 2;
18817                 }
18818             }
18819           break;
18820
18821         case LABEL_REF:
18822         case CONST:
18823           /* We might be able to use ldrd %0, %1 here.  However the range is
18824              different to ldr/adr, and it is broken on some ARMv7-M
18825              implementations.  */
18826           /* Use the second register of the pair to avoid problematic
18827              overlap.  */
18828           otherops[1] = operands[1];
18829           if (emit)
18830             output_asm_insn ("adr%?\t%0, %1", otherops);
18831           operands[1] = otherops[0];
18832           if (emit)
18833             {
18834               if (TARGET_LDRD)
18835                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18836               else
18837                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18838             }
18839
18840           if (count)
18841             *count = 2;
18842           break;
18843
18844           /* ??? This needs checking for thumb2.  */
18845         default:
18846           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18847                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18848             {
18849               otherops[0] = operands[0];
18850               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18851               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18852
18853               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18854                 {
18855                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18856                     {
18857                       switch ((int) INTVAL (otherops[2]))
18858                         {
18859                         case -8:
18860                           if (emit)
18861                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18862                           return "";
18863                         case -4:
18864                           if (TARGET_THUMB2)
18865                             break;
18866                           if (emit)
18867                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18868                           return "";
18869                         case 4:
18870                           if (TARGET_THUMB2)
18871                             break;
18872                           if (emit)
18873                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18874                           return "";
18875                         }
18876                     }
18877                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18878                   operands[1] = otherops[0];
18879                   if (TARGET_LDRD
18880                       && (REG_P (otherops[2])
18881                           || TARGET_THUMB2
18882                           || (CONST_INT_P (otherops[2])
18883                               && INTVAL (otherops[2]) > -256
18884                               && INTVAL (otherops[2]) < 256)))
18885                     {
18886                       if (reg_overlap_mentioned_p (operands[0],
18887                                                    otherops[2]))
18888                         {
18889                           /* Swap base and index registers over to
18890                              avoid a conflict.  */
18891                           std::swap (otherops[1], otherops[2]);
18892                         }
18893                       /* If both registers conflict, it will usually
18894                          have been fixed by a splitter.  */
18895                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18896                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18897                         {
18898                           if (emit)
18899                             {
18900                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18901                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18902                             }
18903                           if (count)
18904                             *count = 2;
18905                         }
18906                       else
18907                         {
18908                           otherops[0] = operands[0];
18909                           if (emit)
18910                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18911                         }
18912                       return "";
18913                     }
18914
18915                   if (CONST_INT_P (otherops[2]))
18916                     {
18917                       if (emit)
18918                         {
18919                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18920                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18921                           else
18922                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18923                         }
18924                     }
18925                   else
18926                     {
18927                       if (emit)
18928                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18929                     }
18930                 }
18931               else
18932                 {
18933                   if (emit)
18934                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18935                 }
18936
18937               if (count)
18938                 *count = 2;
18939
18940               if (TARGET_LDRD)
18941                 return "ldrd%?\t%0, [%1]";
18942
18943               return "ldmia%?\t%1, %M0";
18944             }
18945           else
18946             {
18947               otherops[1] = adjust_address (operands[1], SImode, 4);
18948               /* Take care of overlapping base/data reg.  */
18949               if (reg_mentioned_p (operands[0], operands[1]))
18950                 {
18951                   if (emit)
18952                     {
18953                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18954                       output_asm_insn ("ldr%?\t%0, %1", operands);
18955                     }
18956                   if (count)
18957                     *count = 2;
18958
18959                 }
18960               else
18961                 {
18962                   if (emit)
18963                     {
18964                       output_asm_insn ("ldr%?\t%0, %1", operands);
18965                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18966                     }
18967                   if (count)
18968                     *count = 2;
18969                 }
18970             }
18971         }
18972     }
18973   else
18974     {
18975       /* Constraints should ensure this.  */
18976       gcc_assert (code0 == MEM && code1 == REG);
18977       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18978                   || (TARGET_ARM && TARGET_LDRD));
18979
18980       /* For TARGET_ARM the first source register of an STRD
18981          must be even.  This is usually the case for double-word
18982          values but user assembly constraints can force an odd
18983          starting register.  */
18984       bool allow_strd = TARGET_LDRD
18985                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18986       switch (GET_CODE (XEXP (operands[0], 0)))
18987         {
18988         case REG:
18989           if (emit)
18990             {
18991               if (allow_strd)
18992                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18993               else
18994                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18995             }
18996           break;
18997
18998         case PRE_INC:
18999           gcc_assert (allow_strd);
19000           if (emit)
19001             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
19002           break;
19003
19004         case PRE_DEC:
19005           if (emit)
19006             {
19007               if (allow_strd)
19008                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
19009               else
19010                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
19011             }
19012           break;
19013
19014         case POST_INC:
19015           if (emit)
19016             {
19017               if (allow_strd)
19018                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
19019               else
19020                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
19021             }
19022           break;
19023
19024         case POST_DEC:
19025           gcc_assert (allow_strd);
19026           if (emit)
19027             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
19028           break;
19029
19030         case PRE_MODIFY:
19031         case POST_MODIFY:
19032           otherops[0] = operands[1];
19033           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
19034           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
19035
19036           /* IWMMXT allows offsets larger than strd can handle,
19037              fix these up with a pair of str.  */
19038           if (!TARGET_THUMB2
19039               && CONST_INT_P (otherops[2])
19040               && (INTVAL(otherops[2]) <= -256
19041                   || INTVAL(otherops[2]) >= 256))
19042             {
19043               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
19044                 {
19045                   if (emit)
19046                     {
19047                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
19048                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
19049                     }
19050                   if (count)
19051                     *count = 2;
19052                 }
19053               else
19054                 {
19055                   if (emit)
19056                     {
19057                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
19058                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
19059                     }
19060                   if (count)
19061                     *count = 2;
19062                 }
19063             }
19064           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
19065             {
19066               if (emit)
19067                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
19068             }
19069           else
19070             {
19071               if (emit)
19072                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
19073             }
19074           break;
19075
19076         case PLUS:
19077           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
19078           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19079             {
19080               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
19081                 {
19082                 case -8:
19083                   if (emit)
19084                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
19085                   return "";
19086
19087                 case -4:
19088                   if (TARGET_THUMB2)
19089                     break;
19090                   if (emit)
19091                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
19092                   return "";
19093
19094                 case 4:
19095                   if (TARGET_THUMB2)
19096                     break;
19097                   if (emit)
19098                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
19099                   return "";
19100                 }
19101             }
19102           if (allow_strd
19103               && (REG_P (otherops[2])
19104                   || TARGET_THUMB2
19105                   || (CONST_INT_P (otherops[2])
19106                       && INTVAL (otherops[2]) > -256
19107                       && INTVAL (otherops[2]) < 256)))
19108             {
19109               otherops[0] = operands[1];
19110               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
19111               if (emit)
19112                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
19113               return "";
19114             }
19115           /* Fall through */
19116
19117         default:
19118           otherops[0] = adjust_address (operands[0], SImode, 4);
19119           otherops[1] = operands[1];
19120           if (emit)
19121             {
19122               output_asm_insn ("str%?\t%1, %0", operands);
19123               output_asm_insn ("str%?\t%H1, %0", otherops);
19124             }
19125           if (count)
19126             *count = 2;
19127         }
19128     }
19129
19130   return "";
19131 }
19132
19133 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
19134    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
19135
19136 const char *
19137 output_move_quad (rtx *operands)
19138 {
19139   if (REG_P (operands[0]))
19140     {
19141       /* Load, or reg->reg move.  */
19142
19143       if (MEM_P (operands[1]))
19144         {
19145           switch (GET_CODE (XEXP (operands[1], 0)))
19146             {
19147             case REG:
19148               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19149               break;
19150
19151             case LABEL_REF:
19152             case CONST:
19153               output_asm_insn ("adr%?\t%0, %1", operands);
19154               output_asm_insn ("ldmia%?\t%0, %M0", operands);
19155               break;
19156
19157             default:
19158               gcc_unreachable ();
19159             }
19160         }
19161       else
19162         {
19163           rtx ops[2];
19164           int dest, src, i;
19165
19166           gcc_assert (REG_P (operands[1]));
19167
19168           dest = REGNO (operands[0]);
19169           src = REGNO (operands[1]);
19170
19171           /* This seems pretty dumb, but hopefully GCC won't try to do it
19172              very often.  */
19173           if (dest < src)
19174             for (i = 0; i < 4; i++)
19175               {
19176                 ops[0] = gen_rtx_REG (SImode, dest + i);
19177                 ops[1] = gen_rtx_REG (SImode, src + i);
19178                 output_asm_insn ("mov%?\t%0, %1", ops);
19179               }
19180           else
19181             for (i = 3; i >= 0; i--)
19182               {
19183                 ops[0] = gen_rtx_REG (SImode, dest + i);
19184                 ops[1] = gen_rtx_REG (SImode, src + i);
19185                 output_asm_insn ("mov%?\t%0, %1", ops);
19186               }
19187         }
19188     }
19189   else
19190     {
19191       gcc_assert (MEM_P (operands[0]));
19192       gcc_assert (REG_P (operands[1]));
19193       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
19194
19195       switch (GET_CODE (XEXP (operands[0], 0)))
19196         {
19197         case REG:
19198           output_asm_insn ("stm%?\t%m0, %M1", operands);
19199           break;
19200
19201         default:
19202           gcc_unreachable ();
19203         }
19204     }
19205
19206   return "";
19207 }
19208
19209 /* Output a VFP load or store instruction.  */
19210
19211 const char *
19212 output_move_vfp (rtx *operands)
19213 {
19214   rtx reg, mem, addr, ops[2];
19215   int load = REG_P (operands[0]);
19216   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
19217   int sp = (!TARGET_VFP_FP16INST
19218             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
19219   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
19220   const char *templ;
19221   char buff[50];
19222   machine_mode mode;
19223
19224   reg = operands[!load];
19225   mem = operands[load];
19226
19227   mode = GET_MODE (reg);
19228
19229   gcc_assert (REG_P (reg));
19230   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
19231   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
19232               || mode == SFmode
19233               || mode == DFmode
19234               || mode == HImode
19235               || mode == SImode
19236               || mode == DImode
19237               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
19238   gcc_assert (MEM_P (mem));
19239
19240   addr = XEXP (mem, 0);
19241
19242   switch (GET_CODE (addr))
19243     {
19244     case PRE_DEC:
19245       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
19246       ops[0] = XEXP (addr, 0);
19247       ops[1] = reg;
19248       break;
19249
19250     case POST_INC:
19251       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
19252       ops[0] = XEXP (addr, 0);
19253       ops[1] = reg;
19254       break;
19255
19256     default:
19257       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
19258       ops[0] = reg;
19259       ops[1] = mem;
19260       break;
19261     }
19262
19263   sprintf (buff, templ,
19264            load ? "ld" : "st",
19265            dp ? "64" : sp ? "32" : "16",
19266            dp ? "P" : "",
19267            integer_p ? "\t%@ int" : "");
19268   output_asm_insn (buff, ops);
19269
19270   return "";
19271 }
19272
19273 /* Output a Neon double-word or quad-word load or store, or a load
19274    or store for larger structure modes.
19275
19276    WARNING: The ordering of elements is weird in big-endian mode,
19277    because the EABI requires that vectors stored in memory appear
19278    as though they were stored by a VSTM, as required by the EABI.
19279    GCC RTL defines element ordering based on in-memory order.
19280    This can be different from the architectural ordering of elements
19281    within a NEON register. The intrinsics defined in arm_neon.h use the
19282    NEON register element ordering, not the GCC RTL element ordering.
19283
19284    For example, the in-memory ordering of a big-endian a quadword
19285    vector with 16-bit elements when stored from register pair {d0,d1}
19286    will be (lowest address first, d0[N] is NEON register element N):
19287
19288      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
19289
19290    When necessary, quadword registers (dN, dN+1) are moved to ARM
19291    registers from rN in the order:
19292
19293      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
19294
19295    So that STM/LDM can be used on vectors in ARM registers, and the
19296    same memory layout will result as if VSTM/VLDM were used.
19297
19298    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
19299    possible, which allows use of appropriate alignment tags.
19300    Note that the choice of "64" is independent of the actual vector
19301    element size; this size simply ensures that the behavior is
19302    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
19303
19304    Due to limitations of those instructions, use of VST1.64/VLD1.64
19305    is not possible if:
19306     - the address contains PRE_DEC, or
19307     - the mode refers to more than 4 double-word registers
19308
19309    In those cases, it would be possible to replace VSTM/VLDM by a
19310    sequence of instructions; this is not currently implemented since
19311    this is not certain to actually improve performance.  */
19312
19313 const char *
19314 output_move_neon (rtx *operands)
19315 {
19316   rtx reg, mem, addr, ops[2];
19317   int regno, nregs, load = REG_P (operands[0]);
19318   const char *templ;
19319   char buff[50];
19320   machine_mode mode;
19321
19322   reg = operands[!load];
19323   mem = operands[load];
19324
19325   mode = GET_MODE (reg);
19326
19327   gcc_assert (REG_P (reg));
19328   regno = REGNO (reg);
19329   nregs = REG_NREGS (reg) / 2;
19330   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
19331               || NEON_REGNO_OK_FOR_QUAD (regno));
19332   gcc_assert (VALID_NEON_DREG_MODE (mode)
19333               || VALID_NEON_QREG_MODE (mode)
19334               || VALID_NEON_STRUCT_MODE (mode));
19335   gcc_assert (MEM_P (mem));
19336
19337   addr = XEXP (mem, 0);
19338
19339   /* Strip off const from addresses like (const (plus (...))).  */
19340   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19341     addr = XEXP (addr, 0);
19342
19343   switch (GET_CODE (addr))
19344     {
19345     case POST_INC:
19346       /* We have to use vldm / vstm for too-large modes.  */
19347       if (nregs > 4)
19348         {
19349           templ = "v%smia%%?\t%%0!, %%h1";
19350           ops[0] = XEXP (addr, 0);
19351         }
19352       else
19353         {
19354           templ = "v%s1.64\t%%h1, %%A0";
19355           ops[0] = mem;
19356         }
19357       ops[1] = reg;
19358       break;
19359
19360     case PRE_DEC:
19361       /* We have to use vldm / vstm in this case, since there is no
19362          pre-decrement form of the vld1 / vst1 instructions.  */
19363       templ = "v%smdb%%?\t%%0!, %%h1";
19364       ops[0] = XEXP (addr, 0);
19365       ops[1] = reg;
19366       break;
19367
19368     case POST_MODIFY:
19369       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
19370       gcc_unreachable ();
19371
19372     case REG:
19373       /* We have to use vldm / vstm for too-large modes.  */
19374       if (nregs > 1)
19375         {
19376           if (nregs > 4)
19377             templ = "v%smia%%?\t%%m0, %%h1";
19378           else
19379             templ = "v%s1.64\t%%h1, %%A0";
19380
19381           ops[0] = mem;
19382           ops[1] = reg;
19383           break;
19384         }
19385       /* Fall through.  */
19386     case LABEL_REF:
19387     case PLUS:
19388       {
19389         int i;
19390         int overlap = -1;
19391         for (i = 0; i < nregs; i++)
19392           {
19393             /* We're only using DImode here because it's a convenient size.  */
19394             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
19395             ops[1] = adjust_address (mem, DImode, 8 * i);
19396             if (reg_overlap_mentioned_p (ops[0], mem))
19397               {
19398                 gcc_assert (overlap == -1);
19399                 overlap = i;
19400               }
19401             else
19402               {
19403                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19404                 output_asm_insn (buff, ops);
19405               }
19406           }
19407         if (overlap != -1)
19408           {
19409             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
19410             ops[1] = adjust_address (mem, SImode, 8 * overlap);
19411             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19412             output_asm_insn (buff, ops);
19413           }
19414
19415         return "";
19416       }
19417
19418     default:
19419       gcc_unreachable ();
19420     }
19421
19422   sprintf (buff, templ, load ? "ld" : "st");
19423   output_asm_insn (buff, ops);
19424
19425   return "";
19426 }
19427
19428 /* Compute and return the length of neon_mov<mode>, where <mode> is
19429    one of VSTRUCT modes: EI, OI, CI or XI.  */
19430 int
19431 arm_attr_length_move_neon (rtx_insn *insn)
19432 {
19433   rtx reg, mem, addr;
19434   int load;
19435   machine_mode mode;
19436
19437   extract_insn_cached (insn);
19438
19439   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
19440     {
19441       mode = GET_MODE (recog_data.operand[0]);
19442       switch (mode)
19443         {
19444         case E_EImode:
19445         case E_OImode:
19446           return 8;
19447         case E_CImode:
19448           return 12;
19449         case E_XImode:
19450           return 16;
19451         default:
19452           gcc_unreachable ();
19453         }
19454     }
19455
19456   load = REG_P (recog_data.operand[0]);
19457   reg = recog_data.operand[!load];
19458   mem = recog_data.operand[load];
19459
19460   gcc_assert (MEM_P (mem));
19461
19462   addr = XEXP (mem, 0);
19463
19464   /* Strip off const from addresses like (const (plus (...))).  */
19465   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19466     addr = XEXP (addr, 0);
19467
19468   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19469     {
19470       int insns = REG_NREGS (reg) / 2;
19471       return insns * 4;
19472     }
19473   else
19474     return 4;
19475 }
19476
19477 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
19478    return zero.  */
19479
19480 int
19481 arm_address_offset_is_imm (rtx_insn *insn)
19482 {
19483   rtx mem, addr;
19484
19485   extract_insn_cached (insn);
19486
19487   if (REG_P (recog_data.operand[0]))
19488     return 0;
19489
19490   mem = recog_data.operand[0];
19491
19492   gcc_assert (MEM_P (mem));
19493
19494   addr = XEXP (mem, 0);
19495
19496   if (REG_P (addr)
19497       || (GET_CODE (addr) == PLUS
19498           && REG_P (XEXP (addr, 0))
19499           && CONST_INT_P (XEXP (addr, 1))))
19500     return 1;
19501   else
19502     return 0;
19503 }
19504
19505 /* Output an ADD r, s, #n where n may be too big for one instruction.
19506    If adding zero to one register, output nothing.  */
19507 const char *
19508 output_add_immediate (rtx *operands)
19509 {
19510   HOST_WIDE_INT n = INTVAL (operands[2]);
19511
19512   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19513     {
19514       if (n < 0)
19515         output_multi_immediate (operands,
19516                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19517                                 -n);
19518       else
19519         output_multi_immediate (operands,
19520                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19521                                 n);
19522     }
19523
19524   return "";
19525 }
19526
19527 /* Output a multiple immediate operation.
19528    OPERANDS is the vector of operands referred to in the output patterns.
19529    INSTR1 is the output pattern to use for the first constant.
19530    INSTR2 is the output pattern to use for subsequent constants.
19531    IMMED_OP is the index of the constant slot in OPERANDS.
19532    N is the constant value.  */
19533 static const char *
19534 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19535                         int immed_op, HOST_WIDE_INT n)
19536 {
19537 #if HOST_BITS_PER_WIDE_INT > 32
19538   n &= 0xffffffff;
19539 #endif
19540
19541   if (n == 0)
19542     {
19543       /* Quick and easy output.  */
19544       operands[immed_op] = const0_rtx;
19545       output_asm_insn (instr1, operands);
19546     }
19547   else
19548     {
19549       int i;
19550       const char * instr = instr1;
19551
19552       /* Note that n is never zero here (which would give no output).  */
19553       for (i = 0; i < 32; i += 2)
19554         {
19555           if (n & (3 << i))
19556             {
19557               operands[immed_op] = GEN_INT (n & (255 << i));
19558               output_asm_insn (instr, operands);
19559               instr = instr2;
19560               i += 6;
19561             }
19562         }
19563     }
19564
19565   return "";
19566 }
19567
19568 /* Return the name of a shifter operation.  */
19569 static const char *
19570 arm_shift_nmem(enum rtx_code code)
19571 {
19572   switch (code)
19573     {
19574     case ASHIFT:
19575       return ARM_LSL_NAME;
19576
19577     case ASHIFTRT:
19578       return "asr";
19579
19580     case LSHIFTRT:
19581       return "lsr";
19582
19583     case ROTATERT:
19584       return "ror";
19585
19586     default:
19587       abort();
19588     }
19589 }
19590
19591 /* Return the appropriate ARM instruction for the operation code.
19592    The returned result should not be overwritten.  OP is the rtx of the
19593    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19594    was shifted.  */
19595 const char *
19596 arithmetic_instr (rtx op, int shift_first_arg)
19597 {
19598   switch (GET_CODE (op))
19599     {
19600     case PLUS:
19601       return "add";
19602
19603     case MINUS:
19604       return shift_first_arg ? "rsb" : "sub";
19605
19606     case IOR:
19607       return "orr";
19608
19609     case XOR:
19610       return "eor";
19611
19612     case AND:
19613       return "and";
19614
19615     case ASHIFT:
19616     case ASHIFTRT:
19617     case LSHIFTRT:
19618     case ROTATERT:
19619       return arm_shift_nmem(GET_CODE(op));
19620
19621     default:
19622       gcc_unreachable ();
19623     }
19624 }
19625
19626 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19627    for the operation code.  The returned result should not be overwritten.
19628    OP is the rtx code of the shift.
19629    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19630    shift.  */
19631 static const char *
19632 shift_op (rtx op, HOST_WIDE_INT *amountp)
19633 {
19634   const char * mnem;
19635   enum rtx_code code = GET_CODE (op);
19636
19637   switch (code)
19638     {
19639     case ROTATE:
19640       if (!CONST_INT_P (XEXP (op, 1)))
19641         {
19642           output_operand_lossage ("invalid shift operand");
19643           return NULL;
19644         }
19645
19646       code = ROTATERT;
19647       *amountp = 32 - INTVAL (XEXP (op, 1));
19648       mnem = "ror";
19649       break;
19650
19651     case ASHIFT:
19652     case ASHIFTRT:
19653     case LSHIFTRT:
19654     case ROTATERT:
19655       mnem = arm_shift_nmem(code);
19656       if (CONST_INT_P (XEXP (op, 1)))
19657         {
19658           *amountp = INTVAL (XEXP (op, 1));
19659         }
19660       else if (REG_P (XEXP (op, 1)))
19661         {
19662           *amountp = -1;
19663           return mnem;
19664         }
19665       else
19666         {
19667           output_operand_lossage ("invalid shift operand");
19668           return NULL;
19669         }
19670       break;
19671
19672     case MULT:
19673       /* We never have to worry about the amount being other than a
19674          power of 2, since this case can never be reloaded from a reg.  */
19675       if (!CONST_INT_P (XEXP (op, 1)))
19676         {
19677           output_operand_lossage ("invalid shift operand");
19678           return NULL;
19679         }
19680
19681       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19682
19683       /* Amount must be a power of two.  */
19684       if (*amountp & (*amountp - 1))
19685         {
19686           output_operand_lossage ("invalid shift operand");
19687           return NULL;
19688         }
19689
19690       *amountp = exact_log2 (*amountp);
19691       gcc_assert (IN_RANGE (*amountp, 0, 31));
19692       return ARM_LSL_NAME;
19693
19694     default:
19695       output_operand_lossage ("invalid shift operand");
19696       return NULL;
19697     }
19698
19699   /* This is not 100% correct, but follows from the desire to merge
19700      multiplication by a power of 2 with the recognizer for a
19701      shift.  >=32 is not a valid shift for "lsl", so we must try and
19702      output a shift that produces the correct arithmetical result.
19703      Using lsr #32 is identical except for the fact that the carry bit
19704      is not set correctly if we set the flags; but we never use the
19705      carry bit from such an operation, so we can ignore that.  */
19706   if (code == ROTATERT)
19707     /* Rotate is just modulo 32.  */
19708     *amountp &= 31;
19709   else if (*amountp != (*amountp & 31))
19710     {
19711       if (code == ASHIFT)
19712         mnem = "lsr";
19713       *amountp = 32;
19714     }
19715
19716   /* Shifts of 0 are no-ops.  */
19717   if (*amountp == 0)
19718     return NULL;
19719
19720   return mnem;
19721 }
19722
19723 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19724    because /bin/as is horribly restrictive.  The judgement about
19725    whether or not each character is 'printable' (and can be output as
19726    is) or not (and must be printed with an octal escape) must be made
19727    with reference to the *host* character set -- the situation is
19728    similar to that discussed in the comments above pp_c_char in
19729    c-pretty-print.c.  */
19730
19731 #define MAX_ASCII_LEN 51
19732
19733 void
19734 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19735 {
19736   int i;
19737   int len_so_far = 0;
19738
19739   fputs ("\t.ascii\t\"", stream);
19740
19741   for (i = 0; i < len; i++)
19742     {
19743       int c = p[i];
19744
19745       if (len_so_far >= MAX_ASCII_LEN)
19746         {
19747           fputs ("\"\n\t.ascii\t\"", stream);
19748           len_so_far = 0;
19749         }
19750
19751       if (ISPRINT (c))
19752         {
19753           if (c == '\\' || c == '\"')
19754             {
19755               putc ('\\', stream);
19756               len_so_far++;
19757             }
19758           putc (c, stream);
19759           len_so_far++;
19760         }
19761       else
19762         {
19763           fprintf (stream, "\\%03o", c);
19764           len_so_far += 4;
19765         }
19766     }
19767
19768   fputs ("\"\n", stream);
19769 }
19770 \f
19771
19772 /* Compute the register save mask for registers 0 through 12
19773    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19774
19775 static unsigned long
19776 arm_compute_save_reg0_reg12_mask (void)
19777 {
19778   unsigned long func_type = arm_current_func_type ();
19779   unsigned long save_reg_mask = 0;
19780   unsigned int reg;
19781
19782   if (IS_INTERRUPT (func_type))
19783     {
19784       unsigned int max_reg;
19785       /* Interrupt functions must not corrupt any registers,
19786          even call clobbered ones.  If this is a leaf function
19787          we can just examine the registers used by the RTL, but
19788          otherwise we have to assume that whatever function is
19789          called might clobber anything, and so we have to save
19790          all the call-clobbered registers as well.  */
19791       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19792         /* FIQ handlers have registers r8 - r12 banked, so
19793            we only need to check r0 - r7, Normal ISRs only
19794            bank r14 and r15, so we must check up to r12.
19795            r13 is the stack pointer which is always preserved,
19796            so we do not need to consider it here.  */
19797         max_reg = 7;
19798       else
19799         max_reg = 12;
19800
19801       for (reg = 0; reg <= max_reg; reg++)
19802         if (df_regs_ever_live_p (reg)
19803             || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg)))
19804           save_reg_mask |= (1 << reg);
19805
19806       /* Also save the pic base register if necessary.  */
19807       if (PIC_REGISTER_MAY_NEED_SAVING
19808           && crtl->uses_pic_offset_table)
19809         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19810     }
19811   else if (IS_VOLATILE(func_type))
19812     {
19813       /* For noreturn functions we historically omitted register saves
19814          altogether.  However this really messes up debugging.  As a
19815          compromise save just the frame pointers.  Combined with the link
19816          register saved elsewhere this should be sufficient to get
19817          a backtrace.  */
19818       if (frame_pointer_needed)
19819         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19820       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19821         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19822       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19823         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19824     }
19825   else
19826     {
19827       /* In the normal case we only need to save those registers
19828          which are call saved and which are used by this function.  */
19829       for (reg = 0; reg <= 11; reg++)
19830         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19831           save_reg_mask |= (1 << reg);
19832
19833       /* Handle the frame pointer as a special case.  */
19834       if (frame_pointer_needed)
19835         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19836
19837       /* If we aren't loading the PIC register,
19838          don't stack it even though it may be live.  */
19839       if (PIC_REGISTER_MAY_NEED_SAVING
19840           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19841               || crtl->uses_pic_offset_table))
19842         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19843
19844       /* The prologue will copy SP into R0, so save it.  */
19845       if (IS_STACKALIGN (func_type))
19846         save_reg_mask |= 1;
19847     }
19848
19849   /* Save registers so the exception handler can modify them.  */
19850   if (crtl->calls_eh_return)
19851     {
19852       unsigned int i;
19853
19854       for (i = 0; ; i++)
19855         {
19856           reg = EH_RETURN_DATA_REGNO (i);
19857           if (reg == INVALID_REGNUM)
19858             break;
19859           save_reg_mask |= 1 << reg;
19860         }
19861     }
19862
19863   return save_reg_mask;
19864 }
19865
19866 /* Return true if r3 is live at the start of the function.  */
19867
19868 static bool
19869 arm_r3_live_at_start_p (void)
19870 {
19871   /* Just look at cfg info, which is still close enough to correct at this
19872      point.  This gives false positives for broken functions that might use
19873      uninitialized data that happens to be allocated in r3, but who cares?  */
19874   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19875 }
19876
19877 /* Compute the number of bytes used to store the static chain register on the
19878    stack, above the stack frame.  We need to know this accurately to get the
19879    alignment of the rest of the stack frame correct.  */
19880
19881 static int
19882 arm_compute_static_chain_stack_bytes (void)
19883 {
19884   /* Once the value is updated from the init value of -1, do not
19885      re-compute.  */
19886   if (cfun->machine->static_chain_stack_bytes != -1)
19887     return cfun->machine->static_chain_stack_bytes;
19888
19889   /* See the defining assertion in arm_expand_prologue.  */
19890   if (IS_NESTED (arm_current_func_type ())
19891       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19892           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19893                || flag_stack_clash_protection)
19894               && !df_regs_ever_live_p (LR_REGNUM)))
19895       && arm_r3_live_at_start_p ()
19896       && crtl->args.pretend_args_size == 0)
19897     return 4;
19898
19899   return 0;
19900 }
19901
19902 /* Compute a bit mask of which core registers need to be
19903    saved on the stack for the current function.
19904    This is used by arm_compute_frame_layout, which may add extra registers.  */
19905
19906 static unsigned long
19907 arm_compute_save_core_reg_mask (void)
19908 {
19909   unsigned int save_reg_mask = 0;
19910   unsigned long func_type = arm_current_func_type ();
19911   unsigned int reg;
19912
19913   if (IS_NAKED (func_type))
19914     /* This should never really happen.  */
19915     return 0;
19916
19917   /* If we are creating a stack frame, then we must save the frame pointer,
19918      IP (which will hold the old stack pointer), LR and the PC.  */
19919   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19920     save_reg_mask |=
19921       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19922       | (1 << IP_REGNUM)
19923       | (1 << LR_REGNUM)
19924       | (1 << PC_REGNUM);
19925
19926   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19927
19928   /* Decide if we need to save the link register.
19929      Interrupt routines have their own banked link register,
19930      so they never need to save it.
19931      Otherwise if we do not use the link register we do not need to save
19932      it.  If we are pushing other registers onto the stack however, we
19933      can save an instruction in the epilogue by pushing the link register
19934      now and then popping it back into the PC.  This incurs extra memory
19935      accesses though, so we only do it when optimizing for size, and only
19936      if we know that we will not need a fancy return sequence.  */
19937   if (df_regs_ever_live_p (LR_REGNUM)
19938       || (save_reg_mask
19939           && optimize_size
19940           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19941           && !crtl->tail_call_emit
19942           && !crtl->calls_eh_return))
19943     save_reg_mask |= 1 << LR_REGNUM;
19944
19945   if (cfun->machine->lr_save_eliminated)
19946     save_reg_mask &= ~ (1 << LR_REGNUM);
19947
19948   if (TARGET_REALLY_IWMMXT
19949       && ((bit_count (save_reg_mask)
19950            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19951                            arm_compute_static_chain_stack_bytes())
19952            ) % 2) != 0)
19953     {
19954       /* The total number of registers that are going to be pushed
19955          onto the stack is odd.  We need to ensure that the stack
19956          is 64-bit aligned before we start to save iWMMXt registers,
19957          and also before we start to create locals.  (A local variable
19958          might be a double or long long which we will load/store using
19959          an iWMMXt instruction).  Therefore we need to push another
19960          ARM register, so that the stack will be 64-bit aligned.  We
19961          try to avoid using the arg registers (r0 -r3) as they might be
19962          used to pass values in a tail call.  */
19963       for (reg = 4; reg <= 12; reg++)
19964         if ((save_reg_mask & (1 << reg)) == 0)
19965           break;
19966
19967       if (reg <= 12)
19968         save_reg_mask |= (1 << reg);
19969       else
19970         {
19971           cfun->machine->sibcall_blocked = 1;
19972           save_reg_mask |= (1 << 3);
19973         }
19974     }
19975
19976   /* We may need to push an additional register for use initializing the
19977      PIC base register.  */
19978   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19979       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19980     {
19981       reg = thumb_find_work_register (1 << 4);
19982       if (!call_used_or_fixed_reg_p (reg))
19983         save_reg_mask |= (1 << reg);
19984     }
19985
19986   return save_reg_mask;
19987 }
19988
19989 /* Compute a bit mask of which core registers need to be
19990    saved on the stack for the current function.  */
19991 static unsigned long
19992 thumb1_compute_save_core_reg_mask (void)
19993 {
19994   unsigned long mask;
19995   unsigned reg;
19996
19997   mask = 0;
19998   for (reg = 0; reg < 12; reg ++)
19999     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20000       mask |= 1 << reg;
20001
20002   /* Handle the frame pointer as a special case.  */
20003   if (frame_pointer_needed)
20004     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20005
20006   if (flag_pic
20007       && !TARGET_SINGLE_PIC_BASE
20008       && arm_pic_register != INVALID_REGNUM
20009       && crtl->uses_pic_offset_table)
20010     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20011
20012   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
20013   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20014     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20015
20016   /* LR will also be pushed if any lo regs are pushed.  */
20017   if (mask & 0xff || thumb_force_lr_save ())
20018     mask |= (1 << LR_REGNUM);
20019
20020   bool call_clobbered_scratch
20021     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
20022        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
20023
20024   /* Make sure we have a low work register if we need one.  We will
20025      need one if we are going to push a high register, but we are not
20026      currently intending to push a low register.  However if both the
20027      prologue and epilogue have a spare call-clobbered low register,
20028      then we won't need to find an additional work register.  It does
20029      not need to be the same register in the prologue and
20030      epilogue.  */
20031   if ((mask & 0xff) == 0
20032       && !call_clobbered_scratch
20033       && ((mask & 0x0f00) || TARGET_BACKTRACE))
20034     {
20035       /* Use thumb_find_work_register to choose which register
20036          we will use.  If the register is live then we will
20037          have to push it.  Use LAST_LO_REGNUM as our fallback
20038          choice for the register to select.  */
20039       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
20040       /* Make sure the register returned by thumb_find_work_register is
20041          not part of the return value.  */
20042       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
20043         reg = LAST_LO_REGNUM;
20044
20045       if (callee_saved_reg_p (reg))
20046         mask |= 1 << reg;
20047     }
20048
20049   /* The 504 below is 8 bytes less than 512 because there are two possible
20050      alignment words.  We can't tell here if they will be present or not so we
20051      have to play it safe and assume that they are. */
20052   if ((CALLER_INTERWORKING_SLOT_SIZE +
20053        ROUND_UP_WORD (get_frame_size ()) +
20054        crtl->outgoing_args_size) >= 504)
20055     {
20056       /* This is the same as the code in thumb1_expand_prologue() which
20057          determines which register to use for stack decrement. */
20058       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
20059         if (mask & (1 << reg))
20060           break;
20061
20062       if (reg > LAST_LO_REGNUM)
20063         {
20064           /* Make sure we have a register available for stack decrement. */
20065           mask |= 1 << LAST_LO_REGNUM;
20066         }
20067     }
20068
20069   return mask;
20070 }
20071
20072
20073 /* Return the number of bytes required to save VFP registers.  */
20074 static int
20075 arm_get_vfp_saved_size (void)
20076 {
20077   unsigned int regno;
20078   int count;
20079   int saved;
20080
20081   saved = 0;
20082   /* Space for saved VFP registers.  */
20083   if (TARGET_HARD_FLOAT)
20084     {
20085       count = 0;
20086       for (regno = FIRST_VFP_REGNUM;
20087            regno < LAST_VFP_REGNUM;
20088            regno += 2)
20089         {
20090           if ((!df_regs_ever_live_p (regno)
20091                || call_used_or_fixed_reg_p (regno))
20092               && (!df_regs_ever_live_p (regno + 1)
20093                   || call_used_or_fixed_reg_p (regno + 1)))
20094             {
20095               if (count > 0)
20096                 {
20097                   /* Workaround ARM10 VFPr1 bug.  */
20098                   if (count == 2 && !arm_arch6)
20099                     count++;
20100                   saved += count * 8;
20101                 }
20102               count = 0;
20103             }
20104           else
20105             count++;
20106         }
20107       if (count > 0)
20108         {
20109           if (count == 2 && !arm_arch6)
20110             count++;
20111           saved += count * 8;
20112         }
20113     }
20114   return saved;
20115 }
20116
20117
20118 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
20119    everything bar the final return instruction.  If simple_return is true,
20120    then do not output epilogue, because it has already been emitted in RTL.
20121
20122    Note: do not forget to update length attribute of corresponding insn pattern
20123    when changing assembly output (eg. length attribute of
20124    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
20125    register clearing sequences).  */
20126 const char *
20127 output_return_instruction (rtx operand, bool really_return, bool reverse,
20128                            bool simple_return)
20129 {
20130   char conditional[10];
20131   char instr[100];
20132   unsigned reg;
20133   unsigned long live_regs_mask;
20134   unsigned long func_type;
20135   arm_stack_offsets *offsets;
20136
20137   func_type = arm_current_func_type ();
20138
20139   if (IS_NAKED (func_type))
20140     return "";
20141
20142   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
20143     {
20144       /* If this function was declared non-returning, and we have
20145          found a tail call, then we have to trust that the called
20146          function won't return.  */
20147       if (really_return)
20148         {
20149           rtx ops[2];
20150
20151           /* Otherwise, trap an attempted return by aborting.  */
20152           ops[0] = operand;
20153           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
20154                                        : "abort");
20155           assemble_external_libcall (ops[1]);
20156           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
20157         }
20158
20159       return "";
20160     }
20161
20162   gcc_assert (!cfun->calls_alloca || really_return);
20163
20164   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
20165
20166   cfun->machine->return_used_this_function = 1;
20167
20168   offsets = arm_get_frame_offsets ();
20169   live_regs_mask = offsets->saved_regs_mask;
20170
20171   if (!simple_return && live_regs_mask)
20172     {
20173       const char * return_reg;
20174
20175       /* If we do not have any special requirements for function exit
20176          (e.g. interworking) then we can load the return address
20177          directly into the PC.  Otherwise we must load it into LR.  */
20178       if (really_return
20179           && !IS_CMSE_ENTRY (func_type)
20180           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
20181         return_reg = reg_names[PC_REGNUM];
20182       else
20183         return_reg = reg_names[LR_REGNUM];
20184
20185       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
20186         {
20187           /* There are three possible reasons for the IP register
20188              being saved.  1) a stack frame was created, in which case
20189              IP contains the old stack pointer, or 2) an ISR routine
20190              corrupted it, or 3) it was saved to align the stack on
20191              iWMMXt.  In case 1, restore IP into SP, otherwise just
20192              restore IP.  */
20193           if (frame_pointer_needed)
20194             {
20195               live_regs_mask &= ~ (1 << IP_REGNUM);
20196               live_regs_mask |=   (1 << SP_REGNUM);
20197             }
20198           else
20199             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
20200         }
20201
20202       /* On some ARM architectures it is faster to use LDR rather than
20203          LDM to load a single register.  On other architectures, the
20204          cost is the same.  In 26 bit mode, or for exception handlers,
20205          we have to use LDM to load the PC so that the CPSR is also
20206          restored.  */
20207       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
20208         if (live_regs_mask == (1U << reg))
20209           break;
20210
20211       if (reg <= LAST_ARM_REGNUM
20212           && (reg != LR_REGNUM
20213               || ! really_return
20214               || ! IS_INTERRUPT (func_type)))
20215         {
20216           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
20217                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
20218         }
20219       else
20220         {
20221           char *p;
20222           int first = 1;
20223
20224           /* Generate the load multiple instruction to restore the
20225              registers.  Note we can get here, even if
20226              frame_pointer_needed is true, but only if sp already
20227              points to the base of the saved core registers.  */
20228           if (live_regs_mask & (1 << SP_REGNUM))
20229             {
20230               unsigned HOST_WIDE_INT stack_adjust;
20231
20232               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
20233               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
20234
20235               if (stack_adjust && arm_arch5t && TARGET_ARM)
20236                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
20237               else
20238                 {
20239                   /* If we can't use ldmib (SA110 bug),
20240                      then try to pop r3 instead.  */
20241                   if (stack_adjust)
20242                     live_regs_mask |= 1 << 3;
20243
20244                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
20245                 }
20246             }
20247           /* For interrupt returns we have to use an LDM rather than
20248              a POP so that we can use the exception return variant.  */
20249           else if (IS_INTERRUPT (func_type))
20250             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
20251           else
20252             sprintf (instr, "pop%s\t{", conditional);
20253
20254           p = instr + strlen (instr);
20255
20256           for (reg = 0; reg <= SP_REGNUM; reg++)
20257             if (live_regs_mask & (1 << reg))
20258               {
20259                 int l = strlen (reg_names[reg]);
20260
20261                 if (first)
20262                   first = 0;
20263                 else
20264                   {
20265                     memcpy (p, ", ", 2);
20266                     p += 2;
20267                   }
20268
20269                 memcpy (p, "%|", 2);
20270                 memcpy (p + 2, reg_names[reg], l);
20271                 p += l + 2;
20272               }
20273
20274           if (live_regs_mask & (1 << LR_REGNUM))
20275             {
20276               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
20277               /* If returning from an interrupt, restore the CPSR.  */
20278               if (IS_INTERRUPT (func_type))
20279                 strcat (p, "^");
20280             }
20281           else
20282             strcpy (p, "}");
20283         }
20284
20285       output_asm_insn (instr, & operand);
20286
20287       /* See if we need to generate an extra instruction to
20288          perform the actual function return.  */
20289       if (really_return
20290           && func_type != ARM_FT_INTERWORKED
20291           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
20292         {
20293           /* The return has already been handled
20294              by loading the LR into the PC.  */
20295           return "";
20296         }
20297     }
20298
20299   if (really_return)
20300     {
20301       switch ((int) ARM_FUNC_TYPE (func_type))
20302         {
20303         case ARM_FT_ISR:
20304         case ARM_FT_FIQ:
20305           /* ??? This is wrong for unified assembly syntax.  */
20306           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
20307           break;
20308
20309         case ARM_FT_INTERWORKED:
20310           gcc_assert (arm_arch5t || arm_arch4t);
20311           sprintf (instr, "bx%s\t%%|lr", conditional);
20312           break;
20313
20314         case ARM_FT_EXCEPTION:
20315           /* ??? This is wrong for unified assembly syntax.  */
20316           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
20317           break;
20318
20319         default:
20320           if (IS_CMSE_ENTRY (func_type))
20321             {
20322               /* Check if we have to clear the 'GE bits' which is only used if
20323                  parallel add and subtraction instructions are available.  */
20324               if (TARGET_INT_SIMD)
20325                 snprintf (instr, sizeof (instr),
20326                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
20327               else
20328                 snprintf (instr, sizeof (instr),
20329                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
20330
20331               output_asm_insn (instr, & operand);
20332               if (TARGET_HARD_FLOAT)
20333                 {
20334                   /* Clear the cumulative exception-status bits (0-4,7) and the
20335                      condition code bits (28-31) of the FPSCR.  We need to
20336                      remember to clear the first scratch register used (IP) and
20337                      save and restore the second (r4).  */
20338                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
20339                   output_asm_insn (instr, & operand);
20340                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
20341                   output_asm_insn (instr, & operand);
20342                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
20343                   output_asm_insn (instr, & operand);
20344                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
20345                   output_asm_insn (instr, & operand);
20346                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
20347                   output_asm_insn (instr, & operand);
20348                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
20349                   output_asm_insn (instr, & operand);
20350                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
20351                   output_asm_insn (instr, & operand);
20352                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
20353                   output_asm_insn (instr, & operand);
20354                 }
20355               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
20356             }
20357           /* Use bx if it's available.  */
20358           else if (arm_arch5t || arm_arch4t)
20359             sprintf (instr, "bx%s\t%%|lr", conditional);
20360           else
20361             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
20362           break;
20363         }
20364
20365       output_asm_insn (instr, & operand);
20366     }
20367
20368   return "";
20369 }
20370
20371 /* Output in FILE asm statements needed to declare the NAME of the function
20372    defined by its DECL node.  */
20373
20374 void
20375 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
20376 {
20377   size_t cmse_name_len;
20378   char *cmse_name = 0;
20379   char cmse_prefix[] = "__acle_se_";
20380
20381   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20382      extra function label for each function with the 'cmse_nonsecure_entry'
20383      attribute.  This extra function label should be prepended with
20384      '__acle_se_', telling the linker that it needs to create secure gateway
20385      veneers for this function.  */
20386   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
20387                                     DECL_ATTRIBUTES (decl)))
20388     {
20389       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
20390       cmse_name = XALLOCAVEC (char, cmse_name_len);
20391       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
20392       targetm.asm_out.globalize_label (file, cmse_name);
20393
20394       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
20395       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
20396     }
20397
20398   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
20399   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20400   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20401   ASM_OUTPUT_LABEL (file, name);
20402
20403   if (cmse_name)
20404     ASM_OUTPUT_LABEL (file, cmse_name);
20405
20406   ARM_OUTPUT_FN_UNWIND (file, TRUE);
20407 }
20408
20409 /* Write the function name into the code section, directly preceding
20410    the function prologue.
20411
20412    Code will be output similar to this:
20413      t0
20414          .ascii "arm_poke_function_name", 0
20415          .align
20416      t1
20417          .word 0xff000000 + (t1 - t0)
20418      arm_poke_function_name
20419          mov     ip, sp
20420          stmfd   sp!, {fp, ip, lr, pc}
20421          sub     fp, ip, #4
20422
20423    When performing a stack backtrace, code can inspect the value
20424    of 'pc' stored at 'fp' + 0.  If the trace function then looks
20425    at location pc - 12 and the top 8 bits are set, then we know
20426    that there is a function name embedded immediately preceding this
20427    location and has length ((pc[-3]) & 0xff000000).
20428
20429    We assume that pc is declared as a pointer to an unsigned long.
20430
20431    It is of no benefit to output the function name if we are assembling
20432    a leaf function.  These function types will not contain a stack
20433    backtrace structure, therefore it is not possible to determine the
20434    function name.  */
20435 void
20436 arm_poke_function_name (FILE *stream, const char *name)
20437 {
20438   unsigned long alignlength;
20439   unsigned long length;
20440   rtx           x;
20441
20442   length      = strlen (name) + 1;
20443   alignlength = ROUND_UP_WORD (length);
20444
20445   ASM_OUTPUT_ASCII (stream, name, length);
20446   ASM_OUTPUT_ALIGN (stream, 2);
20447   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20448   assemble_aligned_integer (UNITS_PER_WORD, x);
20449 }
20450
20451 /* Place some comments into the assembler stream
20452    describing the current function.  */
20453 static void
20454 arm_output_function_prologue (FILE *f)
20455 {
20456   unsigned long func_type;
20457
20458   /* Sanity check.  */
20459   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20460
20461   func_type = arm_current_func_type ();
20462
20463   switch ((int) ARM_FUNC_TYPE (func_type))
20464     {
20465     default:
20466     case ARM_FT_NORMAL:
20467       break;
20468     case ARM_FT_INTERWORKED:
20469       asm_fprintf (f, "\t%@ Function supports interworking.\n");
20470       break;
20471     case ARM_FT_ISR:
20472       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20473       break;
20474     case ARM_FT_FIQ:
20475       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20476       break;
20477     case ARM_FT_EXCEPTION:
20478       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20479       break;
20480     }
20481
20482   if (IS_NAKED (func_type))
20483     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20484
20485   if (IS_VOLATILE (func_type))
20486     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20487
20488   if (IS_NESTED (func_type))
20489     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20490   if (IS_STACKALIGN (func_type))
20491     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20492   if (IS_CMSE_ENTRY (func_type))
20493     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20494
20495   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20496                (HOST_WIDE_INT) crtl->args.size,
20497                crtl->args.pretend_args_size,
20498                (HOST_WIDE_INT) get_frame_size ());
20499
20500   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20501                frame_pointer_needed,
20502                cfun->machine->uses_anonymous_args);
20503
20504   if (cfun->machine->lr_save_eliminated)
20505     asm_fprintf (f, "\t%@ link register save eliminated.\n");
20506
20507   if (crtl->calls_eh_return)
20508     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20509
20510 }
20511
20512 static void
20513 arm_output_function_epilogue (FILE *)
20514 {
20515   arm_stack_offsets *offsets;
20516
20517   if (TARGET_THUMB1)
20518     {
20519       int regno;
20520
20521       /* Emit any call-via-reg trampolines that are needed for v4t support
20522          of call_reg and call_value_reg type insns.  */
20523       for (regno = 0; regno < LR_REGNUM; regno++)
20524         {
20525           rtx label = cfun->machine->call_via[regno];
20526
20527           if (label != NULL)
20528             {
20529               switch_to_section (function_section (current_function_decl));
20530               targetm.asm_out.internal_label (asm_out_file, "L",
20531                                               CODE_LABEL_NUMBER (label));
20532               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20533             }
20534         }
20535
20536       /* ??? Probably not safe to set this here, since it assumes that a
20537          function will be emitted as assembly immediately after we generate
20538          RTL for it.  This does not happen for inline functions.  */
20539       cfun->machine->return_used_this_function = 0;
20540     }
20541   else /* TARGET_32BIT */
20542     {
20543       /* We need to take into account any stack-frame rounding.  */
20544       offsets = arm_get_frame_offsets ();
20545
20546       gcc_assert (!use_return_insn (FALSE, NULL)
20547                   || (cfun->machine->return_used_this_function != 0)
20548                   || offsets->saved_regs == offsets->outgoing_args
20549                   || frame_pointer_needed);
20550     }
20551 }
20552
20553 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20554    STR and STRD.  If an even number of registers are being pushed, one
20555    or more STRD patterns are created for each register pair.  If an
20556    odd number of registers are pushed, emit an initial STR followed by
20557    as many STRD instructions as are needed.  This works best when the
20558    stack is initially 64-bit aligned (the normal case), since it
20559    ensures that each STRD is also 64-bit aligned.  */
20560 static void
20561 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20562 {
20563   int num_regs = 0;
20564   int i;
20565   int regno;
20566   rtx par = NULL_RTX;
20567   rtx dwarf = NULL_RTX;
20568   rtx tmp;
20569   bool first = true;
20570
20571   num_regs = bit_count (saved_regs_mask);
20572
20573   /* Must be at least one register to save, and can't save SP or PC.  */
20574   gcc_assert (num_regs > 0 && num_regs <= 14);
20575   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20576   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20577
20578   /* Create sequence for DWARF info.  All the frame-related data for
20579      debugging is held in this wrapper.  */
20580   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20581
20582   /* Describe the stack adjustment.  */
20583   tmp = gen_rtx_SET (stack_pointer_rtx,
20584                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20585   RTX_FRAME_RELATED_P (tmp) = 1;
20586   XVECEXP (dwarf, 0, 0) = tmp;
20587
20588   /* Find the first register.  */
20589   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20590     ;
20591
20592   i = 0;
20593
20594   /* If there's an odd number of registers to push.  Start off by
20595      pushing a single register.  This ensures that subsequent strd
20596      operations are dword aligned (assuming that SP was originally
20597      64-bit aligned).  */
20598   if ((num_regs & 1) != 0)
20599     {
20600       rtx reg, mem, insn;
20601
20602       reg = gen_rtx_REG (SImode, regno);
20603       if (num_regs == 1)
20604         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20605                                                      stack_pointer_rtx));
20606       else
20607         mem = gen_frame_mem (Pmode,
20608                              gen_rtx_PRE_MODIFY
20609                              (Pmode, stack_pointer_rtx,
20610                               plus_constant (Pmode, stack_pointer_rtx,
20611                                              -4 * num_regs)));
20612
20613       tmp = gen_rtx_SET (mem, reg);
20614       RTX_FRAME_RELATED_P (tmp) = 1;
20615       insn = emit_insn (tmp);
20616       RTX_FRAME_RELATED_P (insn) = 1;
20617       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20618       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20619       RTX_FRAME_RELATED_P (tmp) = 1;
20620       i++;
20621       regno++;
20622       XVECEXP (dwarf, 0, i) = tmp;
20623       first = false;
20624     }
20625
20626   while (i < num_regs)
20627     if (saved_regs_mask & (1 << regno))
20628       {
20629         rtx reg1, reg2, mem1, mem2;
20630         rtx tmp0, tmp1, tmp2;
20631         int regno2;
20632
20633         /* Find the register to pair with this one.  */
20634         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20635              regno2++)
20636           ;
20637
20638         reg1 = gen_rtx_REG (SImode, regno);
20639         reg2 = gen_rtx_REG (SImode, regno2);
20640
20641         if (first)
20642           {
20643             rtx insn;
20644
20645             first = false;
20646             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20647                                                         stack_pointer_rtx,
20648                                                         -4 * num_regs));
20649             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20650                                                         stack_pointer_rtx,
20651                                                         -4 * (num_regs - 1)));
20652             tmp0 = gen_rtx_SET (stack_pointer_rtx,
20653                                 plus_constant (Pmode, stack_pointer_rtx,
20654                                                -4 * (num_regs)));
20655             tmp1 = gen_rtx_SET (mem1, reg1);
20656             tmp2 = gen_rtx_SET (mem2, reg2);
20657             RTX_FRAME_RELATED_P (tmp0) = 1;
20658             RTX_FRAME_RELATED_P (tmp1) = 1;
20659             RTX_FRAME_RELATED_P (tmp2) = 1;
20660             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20661             XVECEXP (par, 0, 0) = tmp0;
20662             XVECEXP (par, 0, 1) = tmp1;
20663             XVECEXP (par, 0, 2) = tmp2;
20664             insn = emit_insn (par);
20665             RTX_FRAME_RELATED_P (insn) = 1;
20666             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20667           }
20668         else
20669           {
20670             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20671                                                         stack_pointer_rtx,
20672                                                         4 * i));
20673             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20674                                                         stack_pointer_rtx,
20675                                                         4 * (i + 1)));
20676             tmp1 = gen_rtx_SET (mem1, reg1);
20677             tmp2 = gen_rtx_SET (mem2, reg2);
20678             RTX_FRAME_RELATED_P (tmp1) = 1;
20679             RTX_FRAME_RELATED_P (tmp2) = 1;
20680             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20681             XVECEXP (par, 0, 0) = tmp1;
20682             XVECEXP (par, 0, 1) = tmp2;
20683             emit_insn (par);
20684           }
20685
20686         /* Create unwind information.  This is an approximation.  */
20687         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20688                                            plus_constant (Pmode,
20689                                                           stack_pointer_rtx,
20690                                                           4 * i)),
20691                             reg1);
20692         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20693                                            plus_constant (Pmode,
20694                                                           stack_pointer_rtx,
20695                                                           4 * (i + 1))),
20696                             reg2);
20697
20698         RTX_FRAME_RELATED_P (tmp1) = 1;
20699         RTX_FRAME_RELATED_P (tmp2) = 1;
20700         XVECEXP (dwarf, 0, i + 1) = tmp1;
20701         XVECEXP (dwarf, 0, i + 2) = tmp2;
20702         i += 2;
20703         regno = regno2 + 1;
20704       }
20705     else
20706       regno++;
20707
20708   return;
20709 }
20710
20711 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20712    whenever possible, otherwise it emits single-word stores.  The first store
20713    also allocates stack space for all saved registers, using writeback with
20714    post-addressing mode.  All other stores use offset addressing.  If no STRD
20715    can be emitted, this function emits a sequence of single-word stores,
20716    and not an STM as before, because single-word stores provide more freedom
20717    scheduling and can be turned into an STM by peephole optimizations.  */
20718 static void
20719 arm_emit_strd_push (unsigned long saved_regs_mask)
20720 {
20721   int num_regs = 0;
20722   int i, j, dwarf_index  = 0;
20723   int offset = 0;
20724   rtx dwarf = NULL_RTX;
20725   rtx insn = NULL_RTX;
20726   rtx tmp, mem;
20727
20728   /* TODO: A more efficient code can be emitted by changing the
20729      layout, e.g., first push all pairs that can use STRD to keep the
20730      stack aligned, and then push all other registers.  */
20731   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20732     if (saved_regs_mask & (1 << i))
20733       num_regs++;
20734
20735   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20736   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20737   gcc_assert (num_regs > 0);
20738
20739   /* Create sequence for DWARF info.  */
20740   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20741
20742   /* For dwarf info, we generate explicit stack update.  */
20743   tmp = gen_rtx_SET (stack_pointer_rtx,
20744                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20745   RTX_FRAME_RELATED_P (tmp) = 1;
20746   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20747
20748   /* Save registers.  */
20749   offset = - 4 * num_regs;
20750   j = 0;
20751   while (j <= LAST_ARM_REGNUM)
20752     if (saved_regs_mask & (1 << j))
20753       {
20754         if ((j % 2 == 0)
20755             && (saved_regs_mask & (1 << (j + 1))))
20756           {
20757             /* Current register and previous register form register pair for
20758                which STRD can be generated.  */
20759             if (offset < 0)
20760               {
20761                 /* Allocate stack space for all saved registers.  */
20762                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20763                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20764                 mem = gen_frame_mem (DImode, tmp);
20765                 offset = 0;
20766               }
20767             else if (offset > 0)
20768               mem = gen_frame_mem (DImode,
20769                                    plus_constant (Pmode,
20770                                                   stack_pointer_rtx,
20771                                                   offset));
20772             else
20773               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20774
20775             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20776             RTX_FRAME_RELATED_P (tmp) = 1;
20777             tmp = emit_insn (tmp);
20778
20779             /* Record the first store insn.  */
20780             if (dwarf_index == 1)
20781               insn = tmp;
20782
20783             /* Generate dwarf info.  */
20784             mem = gen_frame_mem (SImode,
20785                                  plus_constant (Pmode,
20786                                                 stack_pointer_rtx,
20787                                                 offset));
20788             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20789             RTX_FRAME_RELATED_P (tmp) = 1;
20790             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20791
20792             mem = gen_frame_mem (SImode,
20793                                  plus_constant (Pmode,
20794                                                 stack_pointer_rtx,
20795                                                 offset + 4));
20796             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20797             RTX_FRAME_RELATED_P (tmp) = 1;
20798             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20799
20800             offset += 8;
20801             j += 2;
20802           }
20803         else
20804           {
20805             /* Emit a single word store.  */
20806             if (offset < 0)
20807               {
20808                 /* Allocate stack space for all saved registers.  */
20809                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20810                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20811                 mem = gen_frame_mem (SImode, tmp);
20812                 offset = 0;
20813               }
20814             else if (offset > 0)
20815               mem = gen_frame_mem (SImode,
20816                                    plus_constant (Pmode,
20817                                                   stack_pointer_rtx,
20818                                                   offset));
20819             else
20820               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20821
20822             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20823             RTX_FRAME_RELATED_P (tmp) = 1;
20824             tmp = emit_insn (tmp);
20825
20826             /* Record the first store insn.  */
20827             if (dwarf_index == 1)
20828               insn = tmp;
20829
20830             /* Generate dwarf info.  */
20831             mem = gen_frame_mem (SImode,
20832                                  plus_constant(Pmode,
20833                                                stack_pointer_rtx,
20834                                                offset));
20835             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20836             RTX_FRAME_RELATED_P (tmp) = 1;
20837             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20838
20839             offset += 4;
20840             j += 1;
20841           }
20842       }
20843     else
20844       j++;
20845
20846   /* Attach dwarf info to the first insn we generate.  */
20847   gcc_assert (insn != NULL_RTX);
20848   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20849   RTX_FRAME_RELATED_P (insn) = 1;
20850 }
20851
20852 /* Generate and emit an insn that we will recognize as a push_multi.
20853    Unfortunately, since this insn does not reflect very well the actual
20854    semantics of the operation, we need to annotate the insn for the benefit
20855    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20856    MASK for registers that should be annotated for DWARF2 frame unwind
20857    information.  */
20858 static rtx
20859 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20860 {
20861   int num_regs = 0;
20862   int num_dwarf_regs = 0;
20863   int i, j;
20864   rtx par;
20865   rtx dwarf;
20866   int dwarf_par_index;
20867   rtx tmp, reg;
20868
20869   /* We don't record the PC in the dwarf frame information.  */
20870   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20871
20872   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20873     {
20874       if (mask & (1 << i))
20875         num_regs++;
20876       if (dwarf_regs_mask & (1 << i))
20877         num_dwarf_regs++;
20878     }
20879
20880   gcc_assert (num_regs && num_regs <= 16);
20881   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20882
20883   /* For the body of the insn we are going to generate an UNSPEC in
20884      parallel with several USEs.  This allows the insn to be recognized
20885      by the push_multi pattern in the arm.md file.
20886
20887      The body of the insn looks something like this:
20888
20889        (parallel [
20890            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20891                                         (const_int:SI <num>)))
20892                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20893            (use (reg:SI XX))
20894            (use (reg:SI YY))
20895            ...
20896         ])
20897
20898      For the frame note however, we try to be more explicit and actually
20899      show each register being stored into the stack frame, plus a (single)
20900      decrement of the stack pointer.  We do it this way in order to be
20901      friendly to the stack unwinding code, which only wants to see a single
20902      stack decrement per instruction.  The RTL we generate for the note looks
20903      something like this:
20904
20905       (sequence [
20906            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20907            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20908            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20909            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20910            ...
20911         ])
20912
20913      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20914      instead we'd have a parallel expression detailing all
20915      the stores to the various memory addresses so that debug
20916      information is more up-to-date. Remember however while writing
20917      this to take care of the constraints with the push instruction.
20918
20919      Note also that this has to be taken care of for the VFP registers.
20920
20921      For more see PR43399.  */
20922
20923   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20924   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20925   dwarf_par_index = 1;
20926
20927   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20928     {
20929       if (mask & (1 << i))
20930         {
20931           reg = gen_rtx_REG (SImode, i);
20932
20933           XVECEXP (par, 0, 0)
20934             = gen_rtx_SET (gen_frame_mem
20935                            (BLKmode,
20936                             gen_rtx_PRE_MODIFY (Pmode,
20937                                                 stack_pointer_rtx,
20938                                                 plus_constant
20939                                                 (Pmode, stack_pointer_rtx,
20940                                                  -4 * num_regs))
20941                             ),
20942                            gen_rtx_UNSPEC (BLKmode,
20943                                            gen_rtvec (1, reg),
20944                                            UNSPEC_PUSH_MULT));
20945
20946           if (dwarf_regs_mask & (1 << i))
20947             {
20948               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20949                                  reg);
20950               RTX_FRAME_RELATED_P (tmp) = 1;
20951               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20952             }
20953
20954           break;
20955         }
20956     }
20957
20958   for (j = 1, i++; j < num_regs; i++)
20959     {
20960       if (mask & (1 << i))
20961         {
20962           reg = gen_rtx_REG (SImode, i);
20963
20964           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20965
20966           if (dwarf_regs_mask & (1 << i))
20967             {
20968               tmp
20969                 = gen_rtx_SET (gen_frame_mem
20970                                (SImode,
20971                                 plus_constant (Pmode, stack_pointer_rtx,
20972                                                4 * j)),
20973                                reg);
20974               RTX_FRAME_RELATED_P (tmp) = 1;
20975               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20976             }
20977
20978           j++;
20979         }
20980     }
20981
20982   par = emit_insn (par);
20983
20984   tmp = gen_rtx_SET (stack_pointer_rtx,
20985                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20986   RTX_FRAME_RELATED_P (tmp) = 1;
20987   XVECEXP (dwarf, 0, 0) = tmp;
20988
20989   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20990
20991   return par;
20992 }
20993
20994 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20995    SIZE is the offset to be adjusted.
20996    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20997 static void
20998 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20999 {
21000   rtx dwarf;
21001
21002   RTX_FRAME_RELATED_P (insn) = 1;
21003   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
21004   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
21005 }
21006
21007 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
21008    SAVED_REGS_MASK shows which registers need to be restored.
21009
21010    Unfortunately, since this insn does not reflect very well the actual
21011    semantics of the operation, we need to annotate the insn for the benefit
21012    of DWARF2 frame unwind information.  */
21013 static void
21014 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
21015 {
21016   int num_regs = 0;
21017   int i, j;
21018   rtx par;
21019   rtx dwarf = NULL_RTX;
21020   rtx tmp, reg;
21021   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
21022   int offset_adj;
21023   int emit_update;
21024
21025   offset_adj = return_in_pc ? 1 : 0;
21026   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21027     if (saved_regs_mask & (1 << i))
21028       num_regs++;
21029
21030   gcc_assert (num_regs && num_regs <= 16);
21031
21032   /* If SP is in reglist, then we don't emit SP update insn.  */
21033   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
21034
21035   /* The parallel needs to hold num_regs SETs
21036      and one SET for the stack update.  */
21037   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
21038
21039   if (return_in_pc)
21040     XVECEXP (par, 0, 0) = ret_rtx;
21041
21042   if (emit_update)
21043     {
21044       /* Increment the stack pointer, based on there being
21045          num_regs 4-byte registers to restore.  */
21046       tmp = gen_rtx_SET (stack_pointer_rtx,
21047                          plus_constant (Pmode,
21048                                         stack_pointer_rtx,
21049                                         4 * num_regs));
21050       RTX_FRAME_RELATED_P (tmp) = 1;
21051       XVECEXP (par, 0, offset_adj) = tmp;
21052     }
21053
21054   /* Now restore every reg, which may include PC.  */
21055   for (j = 0, i = 0; j < num_regs; i++)
21056     if (saved_regs_mask & (1 << i))
21057       {
21058         reg = gen_rtx_REG (SImode, i);
21059         if ((num_regs == 1) && emit_update && !return_in_pc)
21060           {
21061             /* Emit single load with writeback.  */
21062             tmp = gen_frame_mem (SImode,
21063                                  gen_rtx_POST_INC (Pmode,
21064                                                    stack_pointer_rtx));
21065             tmp = emit_insn (gen_rtx_SET (reg, tmp));
21066             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21067             return;
21068           }
21069
21070         tmp = gen_rtx_SET (reg,
21071                            gen_frame_mem
21072                            (SImode,
21073                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
21074         RTX_FRAME_RELATED_P (tmp) = 1;
21075         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
21076
21077         /* We need to maintain a sequence for DWARF info too.  As dwarf info
21078            should not have PC, skip PC.  */
21079         if (i != PC_REGNUM)
21080           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21081
21082         j++;
21083       }
21084
21085   if (return_in_pc)
21086     par = emit_jump_insn (par);
21087   else
21088     par = emit_insn (par);
21089
21090   REG_NOTES (par) = dwarf;
21091   if (!return_in_pc)
21092     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
21093                                  stack_pointer_rtx, stack_pointer_rtx);
21094 }
21095
21096 /* Generate and emit an insn pattern that we will recognize as a pop_multi
21097    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
21098
21099    Unfortunately, since this insn does not reflect very well the actual
21100    semantics of the operation, we need to annotate the insn for the benefit
21101    of DWARF2 frame unwind information.  */
21102 static void
21103 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
21104 {
21105   int i, j;
21106   rtx par;
21107   rtx dwarf = NULL_RTX;
21108   rtx tmp, reg;
21109
21110   gcc_assert (num_regs && num_regs <= 32);
21111
21112     /* Workaround ARM10 VFPr1 bug.  */
21113   if (num_regs == 2 && !arm_arch6)
21114     {
21115       if (first_reg == 15)
21116         first_reg--;
21117
21118       num_regs++;
21119     }
21120
21121   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
21122      there could be up to 32 D-registers to restore.
21123      If there are more than 16 D-registers, make two recursive calls,
21124      each of which emits one pop_multi instruction.  */
21125   if (num_regs > 16)
21126     {
21127       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
21128       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
21129       return;
21130     }
21131
21132   /* The parallel needs to hold num_regs SETs
21133      and one SET for the stack update.  */
21134   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
21135
21136   /* Increment the stack pointer, based on there being
21137      num_regs 8-byte registers to restore.  */
21138   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
21139   RTX_FRAME_RELATED_P (tmp) = 1;
21140   XVECEXP (par, 0, 0) = tmp;
21141
21142   /* Now show every reg that will be restored, using a SET for each.  */
21143   for (j = 0, i=first_reg; j < num_regs; i += 2)
21144     {
21145       reg = gen_rtx_REG (DFmode, i);
21146
21147       tmp = gen_rtx_SET (reg,
21148                          gen_frame_mem
21149                          (DFmode,
21150                           plus_constant (Pmode, base_reg, 8 * j)));
21151       RTX_FRAME_RELATED_P (tmp) = 1;
21152       XVECEXP (par, 0, j + 1) = tmp;
21153
21154       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21155
21156       j++;
21157     }
21158
21159   par = emit_insn (par);
21160   REG_NOTES (par) = dwarf;
21161
21162   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
21163   if (REGNO (base_reg) == IP_REGNUM)
21164     {
21165       RTX_FRAME_RELATED_P (par) = 1;
21166       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
21167     }
21168   else
21169     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
21170                                  base_reg, base_reg);
21171 }
21172
21173 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
21174    number of registers are being popped, multiple LDRD patterns are created for
21175    all register pairs.  If odd number of registers are popped, last register is
21176    loaded by using LDR pattern.  */
21177 static void
21178 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
21179 {
21180   int num_regs = 0;
21181   int i, j;
21182   rtx par = NULL_RTX;
21183   rtx dwarf = NULL_RTX;
21184   rtx tmp, reg, tmp1;
21185   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
21186
21187   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21188     if (saved_regs_mask & (1 << i))
21189       num_regs++;
21190
21191   gcc_assert (num_regs && num_regs <= 16);
21192
21193   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
21194      to be popped.  So, if num_regs is even, now it will become odd,
21195      and we can generate pop with PC.  If num_regs is odd, it will be
21196      even now, and ldr with return can be generated for PC.  */
21197   if (return_in_pc)
21198     num_regs--;
21199
21200   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21201
21202   /* Var j iterates over all the registers to gather all the registers in
21203      saved_regs_mask.  Var i gives index of saved registers in stack frame.
21204      A PARALLEL RTX of register-pair is created here, so that pattern for
21205      LDRD can be matched.  As PC is always last register to be popped, and
21206      we have already decremented num_regs if PC, we don't have to worry
21207      about PC in this loop.  */
21208   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
21209     if (saved_regs_mask & (1 << j))
21210       {
21211         /* Create RTX for memory load.  */
21212         reg = gen_rtx_REG (SImode, j);
21213         tmp = gen_rtx_SET (reg,
21214                            gen_frame_mem (SImode,
21215                                plus_constant (Pmode,
21216                                               stack_pointer_rtx, 4 * i)));
21217         RTX_FRAME_RELATED_P (tmp) = 1;
21218
21219         if (i % 2 == 0)
21220           {
21221             /* When saved-register index (i) is even, the RTX to be emitted is
21222                yet to be created.  Hence create it first.  The LDRD pattern we
21223                are generating is :
21224                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
21225                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
21226                where target registers need not be consecutive.  */
21227             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21228             dwarf = NULL_RTX;
21229           }
21230
21231         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
21232            added as 0th element and if i is odd, reg_i is added as 1st element
21233            of LDRD pattern shown above.  */
21234         XVECEXP (par, 0, (i % 2)) = tmp;
21235         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21236
21237         if ((i % 2) == 1)
21238           {
21239             /* When saved-register index (i) is odd, RTXs for both the registers
21240                to be loaded are generated in above given LDRD pattern, and the
21241                pattern can be emitted now.  */
21242             par = emit_insn (par);
21243             REG_NOTES (par) = dwarf;
21244             RTX_FRAME_RELATED_P (par) = 1;
21245           }
21246
21247         i++;
21248       }
21249
21250   /* If the number of registers pushed is odd AND return_in_pc is false OR
21251      number of registers are even AND return_in_pc is true, last register is
21252      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
21253      then LDR with post increment.  */
21254
21255   /* Increment the stack pointer, based on there being
21256      num_regs 4-byte registers to restore.  */
21257   tmp = gen_rtx_SET (stack_pointer_rtx,
21258                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
21259   RTX_FRAME_RELATED_P (tmp) = 1;
21260   tmp = emit_insn (tmp);
21261   if (!return_in_pc)
21262     {
21263       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
21264                                    stack_pointer_rtx, stack_pointer_rtx);
21265     }
21266
21267   dwarf = NULL_RTX;
21268
21269   if (((num_regs % 2) == 1 && !return_in_pc)
21270       || ((num_regs % 2) == 0 && return_in_pc))
21271     {
21272       /* Scan for the single register to be popped.  Skip until the saved
21273          register is found.  */
21274       for (; (saved_regs_mask & (1 << j)) == 0; j++);
21275
21276       /* Gen LDR with post increment here.  */
21277       tmp1 = gen_rtx_MEM (SImode,
21278                           gen_rtx_POST_INC (SImode,
21279                                             stack_pointer_rtx));
21280       set_mem_alias_set (tmp1, get_frame_alias_set ());
21281
21282       reg = gen_rtx_REG (SImode, j);
21283       tmp = gen_rtx_SET (reg, tmp1);
21284       RTX_FRAME_RELATED_P (tmp) = 1;
21285       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21286
21287       if (return_in_pc)
21288         {
21289           /* If return_in_pc, j must be PC_REGNUM.  */
21290           gcc_assert (j == PC_REGNUM);
21291           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21292           XVECEXP (par, 0, 0) = ret_rtx;
21293           XVECEXP (par, 0, 1) = tmp;
21294           par = emit_jump_insn (par);
21295         }
21296       else
21297         {
21298           par = emit_insn (tmp);
21299           REG_NOTES (par) = dwarf;
21300           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21301                                        stack_pointer_rtx, stack_pointer_rtx);
21302         }
21303
21304     }
21305   else if ((num_regs % 2) == 1 && return_in_pc)
21306     {
21307       /* There are 2 registers to be popped.  So, generate the pattern
21308          pop_multiple_with_stack_update_and_return to pop in PC.  */
21309       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
21310     }
21311
21312   return;
21313 }
21314
21315 /* LDRD in ARM mode needs consecutive registers as operands.  This function
21316    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
21317    offset addressing and then generates one separate stack udpate. This provides
21318    more scheduling freedom, compared to writeback on every load.  However,
21319    if the function returns using load into PC directly
21320    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
21321    before the last load.  TODO: Add a peephole optimization to recognize
21322    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
21323    peephole optimization to merge the load at stack-offset zero
21324    with the stack update instruction using load with writeback
21325    in post-index addressing mode.  */
21326 static void
21327 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
21328 {
21329   int j = 0;
21330   int offset = 0;
21331   rtx par = NULL_RTX;
21332   rtx dwarf = NULL_RTX;
21333   rtx tmp, mem;
21334
21335   /* Restore saved registers.  */
21336   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
21337   j = 0;
21338   while (j <= LAST_ARM_REGNUM)
21339     if (saved_regs_mask & (1 << j))
21340       {
21341         if ((j % 2) == 0
21342             && (saved_regs_mask & (1 << (j + 1)))
21343             && (j + 1) != PC_REGNUM)
21344           {
21345             /* Current register and next register form register pair for which
21346                LDRD can be generated. PC is always the last register popped, and
21347                we handle it separately.  */
21348             if (offset > 0)
21349               mem = gen_frame_mem (DImode,
21350                                    plus_constant (Pmode,
21351                                                   stack_pointer_rtx,
21352                                                   offset));
21353             else
21354               mem = gen_frame_mem (DImode, stack_pointer_rtx);
21355
21356             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
21357             tmp = emit_insn (tmp);
21358             RTX_FRAME_RELATED_P (tmp) = 1;
21359
21360             /* Generate dwarf info.  */
21361
21362             dwarf = alloc_reg_note (REG_CFA_RESTORE,
21363                                     gen_rtx_REG (SImode, j),
21364                                     NULL_RTX);
21365             dwarf = alloc_reg_note (REG_CFA_RESTORE,
21366                                     gen_rtx_REG (SImode, j + 1),
21367                                     dwarf);
21368
21369             REG_NOTES (tmp) = dwarf;
21370
21371             offset += 8;
21372             j += 2;
21373           }
21374         else if (j != PC_REGNUM)
21375           {
21376             /* Emit a single word load.  */
21377             if (offset > 0)
21378               mem = gen_frame_mem (SImode,
21379                                    plus_constant (Pmode,
21380                                                   stack_pointer_rtx,
21381                                                   offset));
21382             else
21383               mem = gen_frame_mem (SImode, stack_pointer_rtx);
21384
21385             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
21386             tmp = emit_insn (tmp);
21387             RTX_FRAME_RELATED_P (tmp) = 1;
21388
21389             /* Generate dwarf info.  */
21390             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
21391                                               gen_rtx_REG (SImode, j),
21392                                               NULL_RTX);
21393
21394             offset += 4;
21395             j += 1;
21396           }
21397         else /* j == PC_REGNUM */
21398           j++;
21399       }
21400     else
21401       j++;
21402
21403   /* Update the stack.  */
21404   if (offset > 0)
21405     {
21406       tmp = gen_rtx_SET (stack_pointer_rtx,
21407                          plus_constant (Pmode,
21408                                         stack_pointer_rtx,
21409                                         offset));
21410       tmp = emit_insn (tmp);
21411       arm_add_cfa_adjust_cfa_note (tmp, offset,
21412                                    stack_pointer_rtx, stack_pointer_rtx);
21413       offset = 0;
21414     }
21415
21416   if (saved_regs_mask & (1 << PC_REGNUM))
21417     {
21418       /* Only PC is to be popped.  */
21419       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21420       XVECEXP (par, 0, 0) = ret_rtx;
21421       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
21422                          gen_frame_mem (SImode,
21423                                         gen_rtx_POST_INC (SImode,
21424                                                           stack_pointer_rtx)));
21425       RTX_FRAME_RELATED_P (tmp) = 1;
21426       XVECEXP (par, 0, 1) = tmp;
21427       par = emit_jump_insn (par);
21428
21429       /* Generate dwarf info.  */
21430       dwarf = alloc_reg_note (REG_CFA_RESTORE,
21431                               gen_rtx_REG (SImode, PC_REGNUM),
21432                               NULL_RTX);
21433       REG_NOTES (par) = dwarf;
21434       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21435                                    stack_pointer_rtx, stack_pointer_rtx);
21436     }
21437 }
21438
21439 /* Calculate the size of the return value that is passed in registers.  */
21440 static unsigned
21441 arm_size_return_regs (void)
21442 {
21443   machine_mode mode;
21444
21445   if (crtl->return_rtx != 0)
21446     mode = GET_MODE (crtl->return_rtx);
21447   else
21448     mode = DECL_MODE (DECL_RESULT (current_function_decl));
21449
21450   return GET_MODE_SIZE (mode);
21451 }
21452
21453 /* Return true if the current function needs to save/restore LR.  */
21454 static bool
21455 thumb_force_lr_save (void)
21456 {
21457   return !cfun->machine->lr_save_eliminated
21458          && (!crtl->is_leaf
21459              || thumb_far_jump_used_p ()
21460              || df_regs_ever_live_p (LR_REGNUM));
21461 }
21462
21463 /* We do not know if r3 will be available because
21464    we do have an indirect tailcall happening in this
21465    particular case.  */
21466 static bool
21467 is_indirect_tailcall_p (rtx call)
21468 {
21469   rtx pat = PATTERN (call);
21470
21471   /* Indirect tail call.  */
21472   pat = XVECEXP (pat, 0, 0);
21473   if (GET_CODE (pat) == SET)
21474     pat = SET_SRC (pat);
21475
21476   pat = XEXP (XEXP (pat, 0), 0);
21477   return REG_P (pat);
21478 }
21479
21480 /* Return true if r3 is used by any of the tail call insns in the
21481    current function.  */
21482 static bool
21483 any_sibcall_could_use_r3 (void)
21484 {
21485   edge_iterator ei;
21486   edge e;
21487
21488   if (!crtl->tail_call_emit)
21489     return false;
21490   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21491     if (e->flags & EDGE_SIBCALL)
21492       {
21493         rtx_insn *call = BB_END (e->src);
21494         if (!CALL_P (call))
21495           call = prev_nonnote_nondebug_insn (call);
21496         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21497         if (find_regno_fusage (call, USE, 3)
21498             || is_indirect_tailcall_p (call))
21499           return true;
21500       }
21501   return false;
21502 }
21503
21504
21505 /* Compute the distance from register FROM to register TO.
21506    These can be the arg pointer (26), the soft frame pointer (25),
21507    the stack pointer (13) or the hard frame pointer (11).
21508    In thumb mode r7 is used as the soft frame pointer, if needed.
21509    Typical stack layout looks like this:
21510
21511        old stack pointer -> |    |
21512                              ----
21513                             |    | \
21514                             |    |   saved arguments for
21515                             |    |   vararg functions
21516                             |    | /
21517                               --
21518    hard FP & arg pointer -> |    | \
21519                             |    |   stack
21520                             |    |   frame
21521                             |    | /
21522                               --
21523                             |    | \
21524                             |    |   call saved
21525                             |    |   registers
21526       soft frame pointer -> |    | /
21527                               --
21528                             |    | \
21529                             |    |   local
21530                             |    |   variables
21531      locals base pointer -> |    | /
21532                               --
21533                             |    | \
21534                             |    |   outgoing
21535                             |    |   arguments
21536    current stack pointer -> |    | /
21537                               --
21538
21539   For a given function some or all of these stack components
21540   may not be needed, giving rise to the possibility of
21541   eliminating some of the registers.
21542
21543   The values returned by this function must reflect the behavior
21544   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21545
21546   The sign of the number returned reflects the direction of stack
21547   growth, so the values are positive for all eliminations except
21548   from the soft frame pointer to the hard frame pointer.
21549
21550   SFP may point just inside the local variables block to ensure correct
21551   alignment.  */
21552
21553
21554 /* Return cached stack offsets.  */
21555
21556 static arm_stack_offsets *
21557 arm_get_frame_offsets (void)
21558 {
21559   struct arm_stack_offsets *offsets;
21560
21561   offsets = &cfun->machine->stack_offsets;
21562
21563   return offsets;
21564 }
21565
21566
21567 /* Calculate stack offsets.  These are used to calculate register elimination
21568    offsets and in prologue/epilogue code.  Also calculates which registers
21569    should be saved.  */
21570
21571 static void
21572 arm_compute_frame_layout (void)
21573 {
21574   struct arm_stack_offsets *offsets;
21575   unsigned long func_type;
21576   int saved;
21577   int core_saved;
21578   HOST_WIDE_INT frame_size;
21579   int i;
21580
21581   offsets = &cfun->machine->stack_offsets;
21582
21583   /* Initially this is the size of the local variables.  It will translated
21584      into an offset once we have determined the size of preceding data.  */
21585   frame_size = ROUND_UP_WORD (get_frame_size ());
21586
21587   /* Space for variadic functions.  */
21588   offsets->saved_args = crtl->args.pretend_args_size;
21589
21590   /* In Thumb mode this is incorrect, but never used.  */
21591   offsets->frame
21592     = (offsets->saved_args
21593        + arm_compute_static_chain_stack_bytes ()
21594        + (frame_pointer_needed ? 4 : 0));
21595
21596   if (TARGET_32BIT)
21597     {
21598       unsigned int regno;
21599
21600       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21601       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21602       saved = core_saved;
21603
21604       /* We know that SP will be doubleword aligned on entry, and we must
21605          preserve that condition at any subroutine call.  We also require the
21606          soft frame pointer to be doubleword aligned.  */
21607
21608       if (TARGET_REALLY_IWMMXT)
21609         {
21610           /* Check for the call-saved iWMMXt registers.  */
21611           for (regno = FIRST_IWMMXT_REGNUM;
21612                regno <= LAST_IWMMXT_REGNUM;
21613                regno++)
21614             if (df_regs_ever_live_p (regno)
21615                 && !call_used_or_fixed_reg_p (regno))
21616               saved += 8;
21617         }
21618
21619       func_type = arm_current_func_type ();
21620       /* Space for saved VFP registers.  */
21621       if (! IS_VOLATILE (func_type)
21622           && TARGET_HARD_FLOAT)
21623         saved += arm_get_vfp_saved_size ();
21624     }
21625   else /* TARGET_THUMB1 */
21626     {
21627       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21628       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21629       saved = core_saved;
21630       if (TARGET_BACKTRACE)
21631         saved += 16;
21632     }
21633
21634   /* Saved registers include the stack frame.  */
21635   offsets->saved_regs
21636     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21637   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21638
21639   /* A leaf function does not need any stack alignment if it has nothing
21640      on the stack.  */
21641   if (crtl->is_leaf && frame_size == 0
21642       /* However if it calls alloca(), we have a dynamically allocated
21643          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
21644       && ! cfun->calls_alloca)
21645     {
21646       offsets->outgoing_args = offsets->soft_frame;
21647       offsets->locals_base = offsets->soft_frame;
21648       return;
21649     }
21650
21651   /* Ensure SFP has the correct alignment.  */
21652   if (ARM_DOUBLEWORD_ALIGN
21653       && (offsets->soft_frame & 7))
21654     {
21655       offsets->soft_frame += 4;
21656       /* Try to align stack by pushing an extra reg.  Don't bother doing this
21657          when there is a stack frame as the alignment will be rolled into
21658          the normal stack adjustment.  */
21659       if (frame_size + crtl->outgoing_args_size == 0)
21660         {
21661           int reg = -1;
21662
21663           /* Register r3 is caller-saved.  Normally it does not need to be
21664              saved on entry by the prologue.  However if we choose to save
21665              it for padding then we may confuse the compiler into thinking
21666              a prologue sequence is required when in fact it is not.  This
21667              will occur when shrink-wrapping if r3 is used as a scratch
21668              register and there are no other callee-saved writes.
21669
21670              This situation can be avoided when other callee-saved registers
21671              are available and r3 is not mandatory if we choose a callee-saved
21672              register for padding.  */
21673           bool prefer_callee_reg_p = false;
21674
21675           /* If it is safe to use r3, then do so.  This sometimes
21676              generates better code on Thumb-2 by avoiding the need to
21677              use 32-bit push/pop instructions.  */
21678           if (! any_sibcall_could_use_r3 ()
21679               && arm_size_return_regs () <= 12
21680               && (offsets->saved_regs_mask & (1 << 3)) == 0
21681               && (TARGET_THUMB2
21682                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21683             {
21684               reg = 3;
21685               if (!TARGET_THUMB2)
21686                 prefer_callee_reg_p = true;
21687             }
21688           if (reg == -1
21689               || prefer_callee_reg_p)
21690             {
21691               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21692                 {
21693                   /* Avoid fixed registers; they may be changed at
21694                      arbitrary times so it's unsafe to restore them
21695                      during the epilogue.  */
21696                   if (!fixed_regs[i]
21697                       && (offsets->saved_regs_mask & (1 << i)) == 0)
21698                     {
21699                       reg = i;
21700                       break;
21701                     }
21702                 }
21703             }
21704
21705           if (reg != -1)
21706             {
21707               offsets->saved_regs += 4;
21708               offsets->saved_regs_mask |= (1 << reg);
21709             }
21710         }
21711     }
21712
21713   offsets->locals_base = offsets->soft_frame + frame_size;
21714   offsets->outgoing_args = (offsets->locals_base
21715                             + crtl->outgoing_args_size);
21716
21717   if (ARM_DOUBLEWORD_ALIGN)
21718     {
21719       /* Ensure SP remains doubleword aligned.  */
21720       if (offsets->outgoing_args & 7)
21721         offsets->outgoing_args += 4;
21722       gcc_assert (!(offsets->outgoing_args & 7));
21723     }
21724 }
21725
21726
21727 /* Calculate the relative offsets for the different stack pointers.  Positive
21728    offsets are in the direction of stack growth.  */
21729
21730 HOST_WIDE_INT
21731 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21732 {
21733   arm_stack_offsets *offsets;
21734
21735   offsets = arm_get_frame_offsets ();
21736
21737   /* OK, now we have enough information to compute the distances.
21738      There must be an entry in these switch tables for each pair
21739      of registers in ELIMINABLE_REGS, even if some of the entries
21740      seem to be redundant or useless.  */
21741   switch (from)
21742     {
21743     case ARG_POINTER_REGNUM:
21744       switch (to)
21745         {
21746         case THUMB_HARD_FRAME_POINTER_REGNUM:
21747           return 0;
21748
21749         case FRAME_POINTER_REGNUM:
21750           /* This is the reverse of the soft frame pointer
21751              to hard frame pointer elimination below.  */
21752           return offsets->soft_frame - offsets->saved_args;
21753
21754         case ARM_HARD_FRAME_POINTER_REGNUM:
21755           /* This is only non-zero in the case where the static chain register
21756              is stored above the frame.  */
21757           return offsets->frame - offsets->saved_args - 4;
21758
21759         case STACK_POINTER_REGNUM:
21760           /* If nothing has been pushed on the stack at all
21761              then this will return -4.  This *is* correct!  */
21762           return offsets->outgoing_args - (offsets->saved_args + 4);
21763
21764         default:
21765           gcc_unreachable ();
21766         }
21767       gcc_unreachable ();
21768
21769     case FRAME_POINTER_REGNUM:
21770       switch (to)
21771         {
21772         case THUMB_HARD_FRAME_POINTER_REGNUM:
21773           return 0;
21774
21775         case ARM_HARD_FRAME_POINTER_REGNUM:
21776           /* The hard frame pointer points to the top entry in the
21777              stack frame.  The soft frame pointer to the bottom entry
21778              in the stack frame.  If there is no stack frame at all,
21779              then they are identical.  */
21780
21781           return offsets->frame - offsets->soft_frame;
21782
21783         case STACK_POINTER_REGNUM:
21784           return offsets->outgoing_args - offsets->soft_frame;
21785
21786         default:
21787           gcc_unreachable ();
21788         }
21789       gcc_unreachable ();
21790
21791     default:
21792       /* You cannot eliminate from the stack pointer.
21793          In theory you could eliminate from the hard frame
21794          pointer to the stack pointer, but this will never
21795          happen, since if a stack frame is not needed the
21796          hard frame pointer will never be used.  */
21797       gcc_unreachable ();
21798     }
21799 }
21800
21801 /* Given FROM and TO register numbers, say whether this elimination is
21802    allowed.  Frame pointer elimination is automatically handled.
21803
21804    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21805    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21806    pointer, we must eliminate FRAME_POINTER_REGNUM into
21807    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21808    ARG_POINTER_REGNUM.  */
21809
21810 bool
21811 arm_can_eliminate (const int from, const int to)
21812 {
21813   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21814           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21815           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21816           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21817            true);
21818 }
21819
21820 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21821    number of bytes pushed.  */
21822
21823 static int
21824 arm_save_coproc_regs(void)
21825 {
21826   int saved_size = 0;
21827   unsigned reg;
21828   unsigned start_reg;
21829   rtx insn;
21830
21831   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21832     if (df_regs_ever_live_p (reg) && !call_used_or_fixed_reg_p (reg))
21833       {
21834         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21835         insn = gen_rtx_MEM (V2SImode, insn);
21836         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21837         RTX_FRAME_RELATED_P (insn) = 1;
21838         saved_size += 8;
21839       }
21840
21841   if (TARGET_HARD_FLOAT)
21842     {
21843       start_reg = FIRST_VFP_REGNUM;
21844
21845       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21846         {
21847           if ((!df_regs_ever_live_p (reg) || call_used_or_fixed_reg_p (reg))
21848               && (!df_regs_ever_live_p (reg + 1)
21849                   || call_used_or_fixed_reg_p (reg + 1)))
21850             {
21851               if (start_reg != reg)
21852                 saved_size += vfp_emit_fstmd (start_reg,
21853                                               (reg - start_reg) / 2);
21854               start_reg = reg + 2;
21855             }
21856         }
21857       if (start_reg != reg)
21858         saved_size += vfp_emit_fstmd (start_reg,
21859                                       (reg - start_reg) / 2);
21860     }
21861   return saved_size;
21862 }
21863
21864
21865 /* Set the Thumb frame pointer from the stack pointer.  */
21866
21867 static void
21868 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21869 {
21870   HOST_WIDE_INT amount;
21871   rtx insn, dwarf;
21872
21873   amount = offsets->outgoing_args - offsets->locals_base;
21874   if (amount < 1024)
21875     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21876                                   stack_pointer_rtx, GEN_INT (amount)));
21877   else
21878     {
21879       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21880       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21881          expects the first two operands to be the same.  */
21882       if (TARGET_THUMB2)
21883         {
21884           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21885                                         stack_pointer_rtx,
21886                                         hard_frame_pointer_rtx));
21887         }
21888       else
21889         {
21890           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21891                                         hard_frame_pointer_rtx,
21892                                         stack_pointer_rtx));
21893         }
21894       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21895                            plus_constant (Pmode, stack_pointer_rtx, amount));
21896       RTX_FRAME_RELATED_P (dwarf) = 1;
21897       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21898     }
21899
21900   RTX_FRAME_RELATED_P (insn) = 1;
21901 }
21902
21903 struct scratch_reg {
21904   rtx reg;
21905   bool saved;
21906 };
21907
21908 /* Return a short-lived scratch register for use as a 2nd scratch register on
21909    function entry after the registers are saved in the prologue.  This register
21910    must be released by means of release_scratch_register_on_entry.  IP is not
21911    considered since it is always used as the 1st scratch register if available.
21912
21913    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21914    mask of live registers.  */
21915
21916 static void
21917 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21918                                unsigned long live_regs)
21919 {
21920   int regno = -1;
21921
21922   sr->saved = false;
21923
21924   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21925     regno = LR_REGNUM;
21926   else
21927     {
21928       unsigned int i;
21929
21930       for (i = 4; i < 11; i++)
21931         if (regno1 != i && (live_regs & (1 << i)) != 0)
21932           {
21933             regno = i;
21934             break;
21935           }
21936
21937       if (regno < 0)
21938         {
21939           /* If IP is used as the 1st scratch register for a nested function,
21940              then either r3 wasn't available or is used to preserve IP.  */
21941           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21942             regno1 = 3;
21943           regno = (regno1 == 3 ? 2 : 3);
21944           sr->saved
21945             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21946                                regno);
21947         }
21948     }
21949
21950   sr->reg = gen_rtx_REG (SImode, regno);
21951   if (sr->saved)
21952     {
21953       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21954       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21955       rtx x = gen_rtx_SET (stack_pointer_rtx,
21956                            plus_constant (Pmode, stack_pointer_rtx, -4));
21957       RTX_FRAME_RELATED_P (insn) = 1;
21958       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21959     }
21960 }
21961
21962 /* Release a scratch register obtained from the preceding function.  */
21963
21964 static void
21965 release_scratch_register_on_entry (struct scratch_reg *sr)
21966 {
21967   if (sr->saved)
21968     {
21969       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21970       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21971       rtx x = gen_rtx_SET (stack_pointer_rtx,
21972                            plus_constant (Pmode, stack_pointer_rtx, 4));
21973       RTX_FRAME_RELATED_P (insn) = 1;
21974       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21975     }
21976 }
21977
21978 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21979
21980 #if PROBE_INTERVAL > 4096
21981 #error Cannot use indexed addressing mode for stack probing
21982 #endif
21983
21984 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21985    inclusive.  These are offsets from the current stack pointer.  REGNO1
21986    is the index number of the 1st scratch register and LIVE_REGS is the
21987    mask of live registers.  */
21988
21989 static void
21990 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21991                             unsigned int regno1, unsigned long live_regs)
21992 {
21993   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21994
21995   /* See if we have a constant small number of probes to generate.  If so,
21996      that's the easy case.  */
21997   if (size <= PROBE_INTERVAL)
21998     {
21999       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
22000       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22001       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
22002     }
22003
22004   /* The run-time loop is made up of 10 insns in the generic case while the
22005      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
22006   else if (size <= 5 * PROBE_INTERVAL)
22007     {
22008       HOST_WIDE_INT i, rem;
22009
22010       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
22011       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22012       emit_stack_probe (reg1);
22013
22014       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
22015          it exceeds SIZE.  If only two probes are needed, this will not
22016          generate any code.  Then probe at FIRST + SIZE.  */
22017       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22018         {
22019           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
22020           emit_stack_probe (reg1);
22021         }
22022
22023       rem = size - (i - PROBE_INTERVAL);
22024       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
22025         {
22026           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
22027           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
22028         }
22029       else
22030         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
22031     }
22032
22033   /* Otherwise, do the same as above, but in a loop.  Note that we must be
22034      extra careful with variables wrapping around because we might be at
22035      the very top (or the very bottom) of the address space and we have
22036      to be able to handle this case properly; in particular, we use an
22037      equality test for the loop condition.  */
22038   else
22039     {
22040       HOST_WIDE_INT rounded_size;
22041       struct scratch_reg sr;
22042
22043       get_scratch_register_on_entry (&sr, regno1, live_regs);
22044
22045       emit_move_insn (reg1, GEN_INT (first));
22046
22047
22048       /* Step 1: round SIZE to the previous multiple of the interval.  */
22049
22050       rounded_size = size & -PROBE_INTERVAL;
22051       emit_move_insn (sr.reg, GEN_INT (rounded_size));
22052
22053
22054       /* Step 2: compute initial and final value of the loop counter.  */
22055
22056       /* TEST_ADDR = SP + FIRST.  */
22057       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22058
22059       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
22060       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
22061
22062
22063       /* Step 3: the loop
22064
22065          do
22066            {
22067              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22068              probe at TEST_ADDR
22069            }
22070          while (TEST_ADDR != LAST_ADDR)
22071
22072          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22073          until it is equal to ROUNDED_SIZE.  */
22074
22075       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
22076
22077
22078       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22079          that SIZE is equal to ROUNDED_SIZE.  */
22080
22081       if (size != rounded_size)
22082         {
22083           HOST_WIDE_INT rem = size - rounded_size;
22084
22085           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
22086             {
22087               emit_set_insn (sr.reg,
22088                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
22089               emit_stack_probe (plus_constant (Pmode, sr.reg,
22090                                                PROBE_INTERVAL - rem));
22091             }
22092           else
22093             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
22094         }
22095
22096       release_scratch_register_on_entry (&sr);
22097     }
22098
22099   /* Make sure nothing is scheduled before we are done.  */
22100   emit_insn (gen_blockage ());
22101 }
22102
22103 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
22104    absolute addresses.  */
22105
22106 const char *
22107 output_probe_stack_range (rtx reg1, rtx reg2)
22108 {
22109   static int labelno = 0;
22110   char loop_lab[32];
22111   rtx xops[2];
22112
22113   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
22114
22115   /* Loop.  */
22116   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22117
22118   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
22119   xops[0] = reg1;
22120   xops[1] = GEN_INT (PROBE_INTERVAL);
22121   output_asm_insn ("sub\t%0, %0, %1", xops);
22122
22123   /* Probe at TEST_ADDR.  */
22124   output_asm_insn ("str\tr0, [%0, #0]", xops);
22125
22126   /* Test if TEST_ADDR == LAST_ADDR.  */
22127   xops[1] = reg2;
22128   output_asm_insn ("cmp\t%0, %1", xops);
22129
22130   /* Branch.  */
22131   fputs ("\tbne\t", asm_out_file);
22132   assemble_name_raw (asm_out_file, loop_lab);
22133   fputc ('\n', asm_out_file);
22134
22135   return "";
22136 }
22137
22138 /* Generate the prologue instructions for entry into an ARM or Thumb-2
22139    function.  */
22140 void
22141 arm_expand_prologue (void)
22142 {
22143   rtx amount;
22144   rtx insn;
22145   rtx ip_rtx;
22146   unsigned long live_regs_mask;
22147   unsigned long func_type;
22148   int fp_offset = 0;
22149   int saved_pretend_args = 0;
22150   int saved_regs = 0;
22151   unsigned HOST_WIDE_INT args_to_push;
22152   HOST_WIDE_INT size;
22153   arm_stack_offsets *offsets;
22154   bool clobber_ip;
22155
22156   func_type = arm_current_func_type ();
22157
22158   /* Naked functions don't have prologues.  */
22159   if (IS_NAKED (func_type))
22160     {
22161       if (flag_stack_usage_info)
22162         current_function_static_stack_size = 0;
22163       return;
22164     }
22165
22166   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
22167   args_to_push = crtl->args.pretend_args_size;
22168
22169   /* Compute which register we will have to save onto the stack.  */
22170   offsets = arm_get_frame_offsets ();
22171   live_regs_mask = offsets->saved_regs_mask;
22172
22173   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
22174
22175   if (IS_STACKALIGN (func_type))
22176     {
22177       rtx r0, r1;
22178
22179       /* Handle a word-aligned stack pointer.  We generate the following:
22180
22181           mov r0, sp
22182           bic r1, r0, #7
22183           mov sp, r1
22184           <save and restore r0 in normal prologue/epilogue>
22185           mov sp, r0
22186           bx lr
22187
22188          The unwinder doesn't need to know about the stack realignment.
22189          Just tell it we saved SP in r0.  */
22190       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
22191
22192       r0 = gen_rtx_REG (SImode, R0_REGNUM);
22193       r1 = gen_rtx_REG (SImode, R1_REGNUM);
22194
22195       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
22196       RTX_FRAME_RELATED_P (insn) = 1;
22197       add_reg_note (insn, REG_CFA_REGISTER, NULL);
22198
22199       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
22200
22201       /* ??? The CFA changes here, which may cause GDB to conclude that it
22202          has entered a different function.  That said, the unwind info is
22203          correct, individually, before and after this instruction because
22204          we've described the save of SP, which will override the default
22205          handling of SP as restoring from the CFA.  */
22206       emit_insn (gen_movsi (stack_pointer_rtx, r1));
22207     }
22208
22209   /* Let's compute the static_chain_stack_bytes required and store it.  Right
22210      now the value must be -1 as stored by arm_init_machine_status ().  */
22211   cfun->machine->static_chain_stack_bytes
22212     = arm_compute_static_chain_stack_bytes ();
22213
22214   /* The static chain register is the same as the IP register.  If it is
22215      clobbered when creating the frame, we need to save and restore it.  */
22216   clobber_ip = IS_NESTED (func_type)
22217                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22218                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22219                         || flag_stack_clash_protection)
22220                        && !df_regs_ever_live_p (LR_REGNUM)
22221                        && arm_r3_live_at_start_p ()));
22222
22223   /* Find somewhere to store IP whilst the frame is being created.
22224      We try the following places in order:
22225
22226        1. The last argument register r3 if it is available.
22227        2. A slot on the stack above the frame if there are no
22228           arguments to push onto the stack.
22229        3. Register r3 again, after pushing the argument registers
22230           onto the stack, if this is a varargs function.
22231        4. The last slot on the stack created for the arguments to
22232           push, if this isn't a varargs function.
22233
22234      Note - we only need to tell the dwarf2 backend about the SP
22235      adjustment in the second variant; the static chain register
22236      doesn't need to be unwound, as it doesn't contain a value
22237      inherited from the caller.  */
22238   if (clobber_ip)
22239     {
22240       if (!arm_r3_live_at_start_p ())
22241         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
22242       else if (args_to_push == 0)
22243         {
22244           rtx addr, dwarf;
22245
22246           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
22247           saved_regs += 4;
22248
22249           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22250           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
22251           fp_offset = 4;
22252
22253           /* Just tell the dwarf backend that we adjusted SP.  */
22254           dwarf = gen_rtx_SET (stack_pointer_rtx,
22255                                plus_constant (Pmode, stack_pointer_rtx,
22256                                               -fp_offset));
22257           RTX_FRAME_RELATED_P (insn) = 1;
22258           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22259         }
22260       else
22261         {
22262           /* Store the args on the stack.  */
22263           if (cfun->machine->uses_anonymous_args)
22264             {
22265               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
22266                                           (0xf0 >> (args_to_push / 4)) & 0xf);
22267               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
22268               saved_pretend_args = 1;
22269             }
22270           else
22271             {
22272               rtx addr, dwarf;
22273
22274               if (args_to_push == 4)
22275                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22276               else
22277                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
22278                                            plus_constant (Pmode,
22279                                                           stack_pointer_rtx,
22280                                                           -args_to_push));
22281
22282               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
22283
22284               /* Just tell the dwarf backend that we adjusted SP.  */
22285               dwarf = gen_rtx_SET (stack_pointer_rtx,
22286                                    plus_constant (Pmode, stack_pointer_rtx,
22287                                                   -args_to_push));
22288               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22289             }
22290
22291           RTX_FRAME_RELATED_P (insn) = 1;
22292           fp_offset = args_to_push;
22293           args_to_push = 0;
22294         }
22295     }
22296
22297   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22298     {
22299       if (IS_INTERRUPT (func_type))
22300         {
22301           /* Interrupt functions must not corrupt any registers.
22302              Creating a frame pointer however, corrupts the IP
22303              register, so we must push it first.  */
22304           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
22305
22306           /* Do not set RTX_FRAME_RELATED_P on this insn.
22307              The dwarf stack unwinding code only wants to see one
22308              stack decrement per function, and this is not it.  If
22309              this instruction is labeled as being part of the frame
22310              creation sequence then dwarf2out_frame_debug_expr will
22311              die when it encounters the assignment of IP to FP
22312              later on, since the use of SP here establishes SP as
22313              the CFA register and not IP.
22314
22315              Anyway this instruction is not really part of the stack
22316              frame creation although it is part of the prologue.  */
22317         }
22318
22319       insn = emit_set_insn (ip_rtx,
22320                             plus_constant (Pmode, stack_pointer_rtx,
22321                                            fp_offset));
22322       RTX_FRAME_RELATED_P (insn) = 1;
22323     }
22324
22325   if (args_to_push)
22326     {
22327       /* Push the argument registers, or reserve space for them.  */
22328       if (cfun->machine->uses_anonymous_args)
22329         insn = emit_multi_reg_push
22330           ((0xf0 >> (args_to_push / 4)) & 0xf,
22331            (0xf0 >> (args_to_push / 4)) & 0xf);
22332       else
22333         insn = emit_insn
22334           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22335                        GEN_INT (- args_to_push)));
22336       RTX_FRAME_RELATED_P (insn) = 1;
22337     }
22338
22339   /* If this is an interrupt service routine, and the link register
22340      is going to be pushed, and we're not generating extra
22341      push of IP (needed when frame is needed and frame layout if apcs),
22342      subtracting four from LR now will mean that the function return
22343      can be done with a single instruction.  */
22344   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
22345       && (live_regs_mask & (1 << LR_REGNUM)) != 0
22346       && !(frame_pointer_needed && TARGET_APCS_FRAME)
22347       && TARGET_ARM)
22348     {
22349       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
22350
22351       emit_set_insn (lr, plus_constant (SImode, lr, -4));
22352     }
22353
22354   if (live_regs_mask)
22355     {
22356       unsigned long dwarf_regs_mask = live_regs_mask;
22357
22358       saved_regs += bit_count (live_regs_mask) * 4;
22359       if (optimize_size && !frame_pointer_needed
22360           && saved_regs == offsets->saved_regs - offsets->saved_args)
22361         {
22362           /* If no coprocessor registers are being pushed and we don't have
22363              to worry about a frame pointer then push extra registers to
22364              create the stack frame.  This is done in a way that does not
22365              alter the frame layout, so is independent of the epilogue.  */
22366           int n;
22367           int frame;
22368           n = 0;
22369           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
22370             n++;
22371           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
22372           if (frame && n * 4 >= frame)
22373             {
22374               n = frame / 4;
22375               live_regs_mask |= (1 << n) - 1;
22376               saved_regs += frame;
22377             }
22378         }
22379
22380       if (TARGET_LDRD
22381           && current_tune->prefer_ldrd_strd
22382           && !optimize_function_for_size_p (cfun))
22383         {
22384           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
22385           if (TARGET_THUMB2)
22386             thumb2_emit_strd_push (live_regs_mask);
22387           else if (TARGET_ARM
22388                    && !TARGET_APCS_FRAME
22389                    && !IS_INTERRUPT (func_type))
22390             arm_emit_strd_push (live_regs_mask);
22391           else
22392             {
22393               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
22394               RTX_FRAME_RELATED_P (insn) = 1;
22395             }
22396         }
22397       else
22398         {
22399           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
22400           RTX_FRAME_RELATED_P (insn) = 1;
22401         }
22402     }
22403
22404   if (! IS_VOLATILE (func_type))
22405     saved_regs += arm_save_coproc_regs ();
22406
22407   if (frame_pointer_needed && TARGET_ARM)
22408     {
22409       /* Create the new frame pointer.  */
22410       if (TARGET_APCS_FRAME)
22411         {
22412           insn = GEN_INT (-(4 + args_to_push + fp_offset));
22413           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
22414           RTX_FRAME_RELATED_P (insn) = 1;
22415         }
22416       else
22417         {
22418           insn = GEN_INT (saved_regs - (4 + fp_offset));
22419           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22420                                         stack_pointer_rtx, insn));
22421           RTX_FRAME_RELATED_P (insn) = 1;
22422         }
22423     }
22424
22425   size = offsets->outgoing_args - offsets->saved_args;
22426   if (flag_stack_usage_info)
22427     current_function_static_stack_size = size;
22428
22429   /* If this isn't an interrupt service routine and we have a frame, then do
22430      stack checking.  We use IP as the first scratch register, except for the
22431      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
22432   if (!IS_INTERRUPT (func_type)
22433       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22434           || flag_stack_clash_protection))
22435     {
22436       unsigned int regno;
22437
22438       if (!IS_NESTED (func_type) || clobber_ip)
22439         regno = IP_REGNUM;
22440       else if (df_regs_ever_live_p (LR_REGNUM))
22441         regno = LR_REGNUM;
22442       else
22443         regno = 3;
22444
22445       if (crtl->is_leaf && !cfun->calls_alloca)
22446         {
22447           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22448             arm_emit_probe_stack_range (get_stack_check_protect (),
22449                                         size - get_stack_check_protect (),
22450                                         regno, live_regs_mask);
22451         }
22452       else if (size > 0)
22453         arm_emit_probe_stack_range (get_stack_check_protect (), size,
22454                                     regno, live_regs_mask);
22455     }
22456
22457   /* Recover the static chain register.  */
22458   if (clobber_ip)
22459     {
22460       if (!arm_r3_live_at_start_p () || saved_pretend_args)
22461         insn = gen_rtx_REG (SImode, 3);
22462       else
22463         {
22464           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22465           insn = gen_frame_mem (SImode, insn);
22466         }
22467       emit_set_insn (ip_rtx, insn);
22468       emit_insn (gen_force_register_use (ip_rtx));
22469     }
22470
22471   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22472     {
22473       /* This add can produce multiple insns for a large constant, so we
22474          need to get tricky.  */
22475       rtx_insn *last = get_last_insn ();
22476
22477       amount = GEN_INT (offsets->saved_args + saved_regs
22478                         - offsets->outgoing_args);
22479
22480       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22481                                     amount));
22482       do
22483         {
22484           last = last ? NEXT_INSN (last) : get_insns ();
22485           RTX_FRAME_RELATED_P (last) = 1;
22486         }
22487       while (last != insn);
22488
22489       /* If the frame pointer is needed, emit a special barrier that
22490          will prevent the scheduler from moving stores to the frame
22491          before the stack adjustment.  */
22492       if (frame_pointer_needed)
22493         emit_insn (gen_stack_tie (stack_pointer_rtx,
22494                                   hard_frame_pointer_rtx));
22495     }
22496
22497
22498   if (frame_pointer_needed && TARGET_THUMB2)
22499     thumb_set_frame_pointer (offsets);
22500
22501   if (flag_pic && arm_pic_register != INVALID_REGNUM)
22502     {
22503       unsigned long mask;
22504
22505       mask = live_regs_mask;
22506       mask &= THUMB2_WORK_REGS;
22507       if (!IS_NESTED (func_type))
22508         mask |= (1 << IP_REGNUM);
22509       arm_load_pic_register (mask, NULL_RTX);
22510     }
22511
22512   /* If we are profiling, make sure no instructions are scheduled before
22513      the call to mcount.  Similarly if the user has requested no
22514      scheduling in the prolog.  Similarly if we want non-call exceptions
22515      using the EABI unwinder, to prevent faulting instructions from being
22516      swapped with a stack adjustment.  */
22517   if (crtl->profile || !TARGET_SCHED_PROLOG
22518       || (arm_except_unwind_info (&global_options) == UI_TARGET
22519           && cfun->can_throw_non_call_exceptions))
22520     emit_insn (gen_blockage ());
22521
22522   /* If the link register is being kept alive, with the return address in it,
22523      then make sure that it does not get reused by the ce2 pass.  */
22524   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22525     cfun->machine->lr_save_eliminated = 1;
22526 }
22527 \f
22528 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
22529 static void
22530 arm_print_condition (FILE *stream)
22531 {
22532   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22533     {
22534       /* Branch conversion is not implemented for Thumb-2.  */
22535       if (TARGET_THUMB)
22536         {
22537           output_operand_lossage ("predicated Thumb instruction");
22538           return;
22539         }
22540       if (current_insn_predicate != NULL)
22541         {
22542           output_operand_lossage
22543             ("predicated instruction in conditional sequence");
22544           return;
22545         }
22546
22547       fputs (arm_condition_codes[arm_current_cc], stream);
22548     }
22549   else if (current_insn_predicate)
22550     {
22551       enum arm_cond_code code;
22552
22553       if (TARGET_THUMB1)
22554         {
22555           output_operand_lossage ("predicated Thumb instruction");
22556           return;
22557         }
22558
22559       code = get_arm_condition_code (current_insn_predicate);
22560       fputs (arm_condition_codes[code], stream);
22561     }
22562 }
22563
22564
22565 /* Globally reserved letters: acln
22566    Puncutation letters currently used: @_|?().!#
22567    Lower case letters currently used: bcdefhimpqtvwxyz
22568    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22569    Letters previously used, but now deprecated/obsolete: sVWXYZ.
22570
22571    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22572
22573    If CODE is 'd', then the X is a condition operand and the instruction
22574    should only be executed if the condition is true.
22575    if CODE is 'D', then the X is a condition operand and the instruction
22576    should only be executed if the condition is false: however, if the mode
22577    of the comparison is CCFPEmode, then always execute the instruction -- we
22578    do this because in these circumstances !GE does not necessarily imply LT;
22579    in these cases the instruction pattern will take care to make sure that
22580    an instruction containing %d will follow, thereby undoing the effects of
22581    doing this instruction unconditionally.
22582    If CODE is 'N' then X is a floating point operand that must be negated
22583    before output.
22584    If CODE is 'B' then output a bitwise inverted value of X (a const int).
22585    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
22586 static void
22587 arm_print_operand (FILE *stream, rtx x, int code)
22588 {
22589   switch (code)
22590     {
22591     case '@':
22592       fputs (ASM_COMMENT_START, stream);
22593       return;
22594
22595     case '_':
22596       fputs (user_label_prefix, stream);
22597       return;
22598
22599     case '|':
22600       fputs (REGISTER_PREFIX, stream);
22601       return;
22602
22603     case '?':
22604       arm_print_condition (stream);
22605       return;
22606
22607     case '.':
22608       /* The current condition code for a condition code setting instruction.
22609          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
22610       fputc('s', stream);
22611       arm_print_condition (stream);
22612       return;
22613
22614     case '!':
22615       /* If the instruction is conditionally executed then print
22616          the current condition code, otherwise print 's'.  */
22617       gcc_assert (TARGET_THUMB2);
22618       if (current_insn_predicate)
22619         arm_print_condition (stream);
22620       else
22621         fputc('s', stream);
22622       break;
22623
22624     /* %# is a "break" sequence. It doesn't output anything, but is used to
22625        separate e.g. operand numbers from following text, if that text consists
22626        of further digits which we don't want to be part of the operand
22627        number.  */
22628     case '#':
22629       return;
22630
22631     case 'N':
22632       {
22633         REAL_VALUE_TYPE r;
22634         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22635         fprintf (stream, "%s", fp_const_from_val (&r));
22636       }
22637       return;
22638
22639     /* An integer or symbol address without a preceding # sign.  */
22640     case 'c':
22641       switch (GET_CODE (x))
22642         {
22643         case CONST_INT:
22644           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22645           break;
22646
22647         case SYMBOL_REF:
22648           output_addr_const (stream, x);
22649           break;
22650
22651         case CONST:
22652           if (GET_CODE (XEXP (x, 0)) == PLUS
22653               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22654             {
22655               output_addr_const (stream, x);
22656               break;
22657             }
22658           /* Fall through.  */
22659
22660         default:
22661           output_operand_lossage ("Unsupported operand for code '%c'", code);
22662         }
22663       return;
22664
22665     /* An integer that we want to print in HEX.  */
22666     case 'x':
22667       switch (GET_CODE (x))
22668         {
22669         case CONST_INT:
22670           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22671           break;
22672
22673         default:
22674           output_operand_lossage ("Unsupported operand for code '%c'", code);
22675         }
22676       return;
22677
22678     case 'B':
22679       if (CONST_INT_P (x))
22680         {
22681           HOST_WIDE_INT val;
22682           val = ARM_SIGN_EXTEND (~INTVAL (x));
22683           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22684         }
22685       else
22686         {
22687           putc ('~', stream);
22688           output_addr_const (stream, x);
22689         }
22690       return;
22691
22692     case 'b':
22693       /* Print the log2 of a CONST_INT.  */
22694       {
22695         HOST_WIDE_INT val;
22696
22697         if (!CONST_INT_P (x)
22698             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22699           output_operand_lossage ("Unsupported operand for code '%c'", code);
22700         else
22701           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22702       }
22703       return;
22704
22705     case 'L':
22706       /* The low 16 bits of an immediate constant.  */
22707       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22708       return;
22709
22710     case 'i':
22711       fprintf (stream, "%s", arithmetic_instr (x, 1));
22712       return;
22713
22714     case 'I':
22715       fprintf (stream, "%s", arithmetic_instr (x, 0));
22716       return;
22717
22718     case 'S':
22719       {
22720         HOST_WIDE_INT val;
22721         const char *shift;
22722
22723         shift = shift_op (x, &val);
22724
22725         if (shift)
22726           {
22727             fprintf (stream, ", %s ", shift);
22728             if (val == -1)
22729               arm_print_operand (stream, XEXP (x, 1), 0);
22730             else
22731               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22732           }
22733       }
22734       return;
22735
22736       /* An explanation of the 'Q', 'R' and 'H' register operands:
22737
22738          In a pair of registers containing a DI or DF value the 'Q'
22739          operand returns the register number of the register containing
22740          the least significant part of the value.  The 'R' operand returns
22741          the register number of the register containing the most
22742          significant part of the value.
22743
22744          The 'H' operand returns the higher of the two register numbers.
22745          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22746          same as the 'Q' operand, since the most significant part of the
22747          value is held in the lower number register.  The reverse is true
22748          on systems where WORDS_BIG_ENDIAN is false.
22749
22750          The purpose of these operands is to distinguish between cases
22751          where the endian-ness of the values is important (for example
22752          when they are added together), and cases where the endian-ness
22753          is irrelevant, but the order of register operations is important.
22754          For example when loading a value from memory into a register
22755          pair, the endian-ness does not matter.  Provided that the value
22756          from the lower memory address is put into the lower numbered
22757          register, and the value from the higher address is put into the
22758          higher numbered register, the load will work regardless of whether
22759          the value being loaded is big-wordian or little-wordian.  The
22760          order of the two register loads can matter however, if the address
22761          of the memory location is actually held in one of the registers
22762          being overwritten by the load.
22763
22764          The 'Q' and 'R' constraints are also available for 64-bit
22765          constants.  */
22766     case 'Q':
22767       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22768         {
22769           rtx part = gen_lowpart (SImode, x);
22770           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22771           return;
22772         }
22773
22774       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22775         {
22776           output_operand_lossage ("invalid operand for code '%c'", code);
22777           return;
22778         }
22779
22780       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22781       return;
22782
22783     case 'R':
22784       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22785         {
22786           machine_mode mode = GET_MODE (x);
22787           rtx part;
22788
22789           if (mode == VOIDmode)
22790             mode = DImode;
22791           part = gen_highpart_mode (SImode, mode, x);
22792           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22793           return;
22794         }
22795
22796       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22797         {
22798           output_operand_lossage ("invalid operand for code '%c'", code);
22799           return;
22800         }
22801
22802       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22803       return;
22804
22805     case 'H':
22806       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22807         {
22808           output_operand_lossage ("invalid operand for code '%c'", code);
22809           return;
22810         }
22811
22812       asm_fprintf (stream, "%r", REGNO (x) + 1);
22813       return;
22814
22815     case 'J':
22816       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22817         {
22818           output_operand_lossage ("invalid operand for code '%c'", code);
22819           return;
22820         }
22821
22822       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22823       return;
22824
22825     case 'K':
22826       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22827         {
22828           output_operand_lossage ("invalid operand for code '%c'", code);
22829           return;
22830         }
22831
22832       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22833       return;
22834
22835     case 'm':
22836       asm_fprintf (stream, "%r",
22837                    REG_P (XEXP (x, 0))
22838                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22839       return;
22840
22841     case 'M':
22842       asm_fprintf (stream, "{%r-%r}",
22843                    REGNO (x),
22844                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22845       return;
22846
22847     /* Like 'M', but writing doubleword vector registers, for use by Neon
22848        insns.  */
22849     case 'h':
22850       {
22851         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22852         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22853         if (numregs == 1)
22854           asm_fprintf (stream, "{d%d}", regno);
22855         else
22856           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22857       }
22858       return;
22859
22860     case 'd':
22861       /* CONST_TRUE_RTX means always -- that's the default.  */
22862       if (x == const_true_rtx)
22863         return;
22864
22865       if (!COMPARISON_P (x))
22866         {
22867           output_operand_lossage ("invalid operand for code '%c'", code);
22868           return;
22869         }
22870
22871       fputs (arm_condition_codes[get_arm_condition_code (x)],
22872              stream);
22873       return;
22874
22875     case 'D':
22876       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22877          want to do that.  */
22878       if (x == const_true_rtx)
22879         {
22880           output_operand_lossage ("instruction never executed");
22881           return;
22882         }
22883       if (!COMPARISON_P (x))
22884         {
22885           output_operand_lossage ("invalid operand for code '%c'", code);
22886           return;
22887         }
22888
22889       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22890                                  (get_arm_condition_code (x))],
22891              stream);
22892       return;
22893
22894     case 's':
22895     case 'V':
22896     case 'W':
22897     case 'X':
22898     case 'Y':
22899     case 'Z':
22900       /* Former Maverick support, removed after GCC-4.7.  */
22901       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22902       return;
22903
22904     case 'U':
22905       if (!REG_P (x)
22906           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22907           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22908         /* Bad value for wCG register number.  */
22909         {
22910           output_operand_lossage ("invalid operand for code '%c'", code);
22911           return;
22912         }
22913
22914       else
22915         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22916       return;
22917
22918       /* Print an iWMMXt control register name.  */
22919     case 'w':
22920       if (!CONST_INT_P (x)
22921           || INTVAL (x) < 0
22922           || INTVAL (x) >= 16)
22923         /* Bad value for wC register number.  */
22924         {
22925           output_operand_lossage ("invalid operand for code '%c'", code);
22926           return;
22927         }
22928
22929       else
22930         {
22931           static const char * wc_reg_names [16] =
22932             {
22933               "wCID",  "wCon",  "wCSSF", "wCASF",
22934               "wC4",   "wC5",   "wC6",   "wC7",
22935               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22936               "wC12",  "wC13",  "wC14",  "wC15"
22937             };
22938
22939           fputs (wc_reg_names [INTVAL (x)], stream);
22940         }
22941       return;
22942
22943     /* Print the high single-precision register of a VFP double-precision
22944        register.  */
22945     case 'p':
22946       {
22947         machine_mode mode = GET_MODE (x);
22948         int regno;
22949
22950         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22951           {
22952             output_operand_lossage ("invalid operand for code '%c'", code);
22953             return;
22954           }
22955
22956         regno = REGNO (x);
22957         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22958           {
22959             output_operand_lossage ("invalid operand for code '%c'", code);
22960             return;
22961           }
22962
22963         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22964       }
22965       return;
22966
22967     /* Print a VFP/Neon double precision or quad precision register name.  */
22968     case 'P':
22969     case 'q':
22970       {
22971         machine_mode mode = GET_MODE (x);
22972         int is_quad = (code == 'q');
22973         int regno;
22974
22975         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22976           {
22977             output_operand_lossage ("invalid operand for code '%c'", code);
22978             return;
22979           }
22980
22981         if (!REG_P (x)
22982             || !IS_VFP_REGNUM (REGNO (x)))
22983           {
22984             output_operand_lossage ("invalid operand for code '%c'", code);
22985             return;
22986           }
22987
22988         regno = REGNO (x);
22989         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22990             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22991           {
22992             output_operand_lossage ("invalid operand for code '%c'", code);
22993             return;
22994           }
22995
22996         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22997           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22998       }
22999       return;
23000
23001     /* These two codes print the low/high doubleword register of a Neon quad
23002        register, respectively.  For pair-structure types, can also print
23003        low/high quadword registers.  */
23004     case 'e':
23005     case 'f':
23006       {
23007         machine_mode mode = GET_MODE (x);
23008         int regno;
23009
23010         if ((GET_MODE_SIZE (mode) != 16
23011              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
23012           {
23013             output_operand_lossage ("invalid operand for code '%c'", code);
23014             return;
23015           }
23016
23017         regno = REGNO (x);
23018         if (!NEON_REGNO_OK_FOR_QUAD (regno))
23019           {
23020             output_operand_lossage ("invalid operand for code '%c'", code);
23021             return;
23022           }
23023
23024         if (GET_MODE_SIZE (mode) == 16)
23025           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
23026                                   + (code == 'f' ? 1 : 0));
23027         else
23028           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
23029                                   + (code == 'f' ? 1 : 0));
23030       }
23031       return;
23032
23033     /* Print a VFPv3 floating-point constant, represented as an integer
23034        index.  */
23035     case 'G':
23036       {
23037         int index = vfp3_const_double_index (x);
23038         gcc_assert (index != -1);
23039         fprintf (stream, "%d", index);
23040       }
23041       return;
23042
23043     /* Print bits representing opcode features for Neon.
23044
23045        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
23046        and polynomials as unsigned.
23047
23048        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
23049
23050        Bit 2 is 1 for rounding functions, 0 otherwise.  */
23051
23052     /* Identify the type as 's', 'u', 'p' or 'f'.  */
23053     case 'T':
23054       {
23055         HOST_WIDE_INT bits = INTVAL (x);
23056         fputc ("uspf"[bits & 3], stream);
23057       }
23058       return;
23059
23060     /* Likewise, but signed and unsigned integers are both 'i'.  */
23061     case 'F':
23062       {
23063         HOST_WIDE_INT bits = INTVAL (x);
23064         fputc ("iipf"[bits & 3], stream);
23065       }
23066       return;
23067
23068     /* As for 'T', but emit 'u' instead of 'p'.  */
23069     case 't':
23070       {
23071         HOST_WIDE_INT bits = INTVAL (x);
23072         fputc ("usuf"[bits & 3], stream);
23073       }
23074       return;
23075
23076     /* Bit 2: rounding (vs none).  */
23077     case 'O':
23078       {
23079         HOST_WIDE_INT bits = INTVAL (x);
23080         fputs ((bits & 4) != 0 ? "r" : "", stream);
23081       }
23082       return;
23083
23084     /* Memory operand for vld1/vst1 instruction.  */
23085     case 'A':
23086       {
23087         rtx addr;
23088         bool postinc = FALSE;
23089         rtx postinc_reg = NULL;
23090         unsigned align, memsize, align_bits;
23091
23092         gcc_assert (MEM_P (x));
23093         addr = XEXP (x, 0);
23094         if (GET_CODE (addr) == POST_INC)
23095           {
23096             postinc = 1;
23097             addr = XEXP (addr, 0);
23098           }
23099         if (GET_CODE (addr) == POST_MODIFY)
23100           {
23101             postinc_reg = XEXP( XEXP (addr, 1), 1);
23102             addr = XEXP (addr, 0);
23103           }
23104         asm_fprintf (stream, "[%r", REGNO (addr));
23105
23106         /* We know the alignment of this access, so we can emit a hint in the
23107            instruction (for some alignments) as an aid to the memory subsystem
23108            of the target.  */
23109         align = MEM_ALIGN (x) >> 3;
23110         memsize = MEM_SIZE (x);
23111
23112         /* Only certain alignment specifiers are supported by the hardware.  */
23113         if (memsize == 32 && (align % 32) == 0)
23114           align_bits = 256;
23115         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
23116           align_bits = 128;
23117         else if (memsize >= 8 && (align % 8) == 0)
23118           align_bits = 64;
23119         else
23120           align_bits = 0;
23121
23122         if (align_bits != 0)
23123           asm_fprintf (stream, ":%d", align_bits);
23124
23125         asm_fprintf (stream, "]");
23126
23127         if (postinc)
23128           fputs("!", stream);
23129         if (postinc_reg)
23130           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
23131       }
23132       return;
23133
23134     case 'C':
23135       {
23136         rtx addr;
23137
23138         gcc_assert (MEM_P (x));
23139         addr = XEXP (x, 0);
23140         gcc_assert (REG_P (addr));
23141         asm_fprintf (stream, "[%r]", REGNO (addr));
23142       }
23143       return;
23144
23145     /* Translate an S register number into a D register number and element index.  */
23146     case 'y':
23147       {
23148         machine_mode mode = GET_MODE (x);
23149         int regno;
23150
23151         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
23152           {
23153             output_operand_lossage ("invalid operand for code '%c'", code);
23154             return;
23155           }
23156
23157         regno = REGNO (x);
23158         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
23159           {
23160             output_operand_lossage ("invalid operand for code '%c'", code);
23161             return;
23162           }
23163
23164         regno = regno - FIRST_VFP_REGNUM;
23165         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
23166       }
23167       return;
23168
23169     case 'v':
23170         gcc_assert (CONST_DOUBLE_P (x));
23171         int result;
23172         result = vfp3_const_double_for_fract_bits (x);
23173         if (result == 0)
23174           result = vfp3_const_double_for_bits (x);
23175         fprintf (stream, "#%d", result);
23176         return;
23177
23178     /* Register specifier for vld1.16/vst1.16.  Translate the S register
23179        number into a D register number and element index.  */
23180     case 'z':
23181       {
23182         machine_mode mode = GET_MODE (x);
23183         int regno;
23184
23185         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
23186           {
23187             output_operand_lossage ("invalid operand for code '%c'", code);
23188             return;
23189           }
23190
23191         regno = REGNO (x);
23192         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
23193           {
23194             output_operand_lossage ("invalid operand for code '%c'", code);
23195             return;
23196           }
23197
23198         regno = regno - FIRST_VFP_REGNUM;
23199         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
23200       }
23201       return;
23202
23203     default:
23204       if (x == 0)
23205         {
23206           output_operand_lossage ("missing operand");
23207           return;
23208         }
23209
23210       switch (GET_CODE (x))
23211         {
23212         case REG:
23213           asm_fprintf (stream, "%r", REGNO (x));
23214           break;
23215
23216         case MEM:
23217           output_address (GET_MODE (x), XEXP (x, 0));
23218           break;
23219
23220         case CONST_DOUBLE:
23221           {
23222             char fpstr[20];
23223             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
23224                               sizeof (fpstr), 0, 1);
23225             fprintf (stream, "#%s", fpstr);
23226           }
23227           break;
23228
23229         default:
23230           gcc_assert (GET_CODE (x) != NEG);
23231           fputc ('#', stream);
23232           if (GET_CODE (x) == HIGH)
23233             {
23234               fputs (":lower16:", stream);
23235               x = XEXP (x, 0);
23236             }
23237
23238           output_addr_const (stream, x);
23239           break;
23240         }
23241     }
23242 }
23243 \f
23244 /* Target hook for printing a memory address.  */
23245 static void
23246 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
23247 {
23248   if (TARGET_32BIT)
23249     {
23250       int is_minus = GET_CODE (x) == MINUS;
23251
23252       if (REG_P (x))
23253         asm_fprintf (stream, "[%r]", REGNO (x));
23254       else if (GET_CODE (x) == PLUS || is_minus)
23255         {
23256           rtx base = XEXP (x, 0);
23257           rtx index = XEXP (x, 1);
23258           HOST_WIDE_INT offset = 0;
23259           if (!REG_P (base)
23260               || (REG_P (index) && REGNO (index) == SP_REGNUM))
23261             {
23262               /* Ensure that BASE is a register.  */
23263               /* (one of them must be).  */
23264               /* Also ensure the SP is not used as in index register.  */
23265               std::swap (base, index);
23266             }
23267           switch (GET_CODE (index))
23268             {
23269             case CONST_INT:
23270               offset = INTVAL (index);
23271               if (is_minus)
23272                 offset = -offset;
23273               asm_fprintf (stream, "[%r, #%wd]",
23274                            REGNO (base), offset);
23275               break;
23276
23277             case REG:
23278               asm_fprintf (stream, "[%r, %s%r]",
23279                            REGNO (base), is_minus ? "-" : "",
23280                            REGNO (index));
23281               break;
23282
23283             case MULT:
23284             case ASHIFTRT:
23285             case LSHIFTRT:
23286             case ASHIFT:
23287             case ROTATERT:
23288               {
23289                 asm_fprintf (stream, "[%r, %s%r",
23290                              REGNO (base), is_minus ? "-" : "",
23291                              REGNO (XEXP (index, 0)));
23292                 arm_print_operand (stream, index, 'S');
23293                 fputs ("]", stream);
23294                 break;
23295               }
23296
23297             default:
23298               gcc_unreachable ();
23299             }
23300         }
23301       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
23302                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
23303         {
23304           gcc_assert (REG_P (XEXP (x, 0)));
23305
23306           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
23307             asm_fprintf (stream, "[%r, #%s%d]!",
23308                          REGNO (XEXP (x, 0)),
23309                          GET_CODE (x) == PRE_DEC ? "-" : "",
23310                          GET_MODE_SIZE (mode));
23311           else
23312             asm_fprintf (stream, "[%r], #%s%d",
23313                          REGNO (XEXP (x, 0)),
23314                          GET_CODE (x) == POST_DEC ? "-" : "",
23315                          GET_MODE_SIZE (mode));
23316         }
23317       else if (GET_CODE (x) == PRE_MODIFY)
23318         {
23319           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
23320           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
23321             asm_fprintf (stream, "#%wd]!",
23322                          INTVAL (XEXP (XEXP (x, 1), 1)));
23323           else
23324             asm_fprintf (stream, "%r]!",
23325                          REGNO (XEXP (XEXP (x, 1), 1)));
23326         }
23327       else if (GET_CODE (x) == POST_MODIFY)
23328         {
23329           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
23330           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
23331             asm_fprintf (stream, "#%wd",
23332                          INTVAL (XEXP (XEXP (x, 1), 1)));
23333           else
23334             asm_fprintf (stream, "%r",
23335                          REGNO (XEXP (XEXP (x, 1), 1)));
23336         }
23337       else output_addr_const (stream, x);
23338     }
23339   else
23340     {
23341       if (REG_P (x))
23342         asm_fprintf (stream, "[%r]", REGNO (x));
23343       else if (GET_CODE (x) == POST_INC)
23344         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
23345       else if (GET_CODE (x) == PLUS)
23346         {
23347           gcc_assert (REG_P (XEXP (x, 0)));
23348           if (CONST_INT_P (XEXP (x, 1)))
23349             asm_fprintf (stream, "[%r, #%wd]",
23350                          REGNO (XEXP (x, 0)),
23351                          INTVAL (XEXP (x, 1)));
23352           else
23353             asm_fprintf (stream, "[%r, %r]",
23354                          REGNO (XEXP (x, 0)),
23355                          REGNO (XEXP (x, 1)));
23356         }
23357       else
23358         output_addr_const (stream, x);
23359     }
23360 }
23361 \f
23362 /* Target hook for indicating whether a punctuation character for
23363    TARGET_PRINT_OPERAND is valid.  */
23364 static bool
23365 arm_print_operand_punct_valid_p (unsigned char code)
23366 {
23367   return (code == '@' || code == '|' || code == '.'
23368           || code == '(' || code == ')' || code == '#'
23369           || (TARGET_32BIT && (code == '?'))
23370           || (TARGET_THUMB2 && (code == '!'))
23371           || (TARGET_THUMB && (code == '_')));
23372 }
23373 \f
23374 /* Target hook for assembling integer objects.  The ARM version needs to
23375    handle word-sized values specially.  */
23376 static bool
23377 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
23378 {
23379   machine_mode mode;
23380
23381   if (size == UNITS_PER_WORD && aligned_p)
23382     {
23383       fputs ("\t.word\t", asm_out_file);
23384       output_addr_const (asm_out_file, x);
23385
23386       /* Mark symbols as position independent.  We only do this in the
23387          .text segment, not in the .data segment.  */
23388       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
23389           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
23390         {
23391           /* See legitimize_pic_address for an explanation of the
23392              TARGET_VXWORKS_RTP check.  */
23393           /* References to weak symbols cannot be resolved locally:
23394              they may be overridden by a non-weak definition at link
23395              time.  */
23396           if (!arm_pic_data_is_text_relative
23397               || (GET_CODE (x) == SYMBOL_REF
23398                   && (!SYMBOL_REF_LOCAL_P (x)
23399                       || (SYMBOL_REF_DECL (x)
23400                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
23401                       || (SYMBOL_REF_FUNCTION_P (x)
23402                           && !arm_fdpic_local_funcdesc_p (x)))))
23403             {
23404               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
23405                 fputs ("(GOTFUNCDESC)", asm_out_file);
23406               else
23407                 fputs ("(GOT)", asm_out_file);
23408             }
23409           else
23410             {
23411               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
23412                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
23413               else
23414                 {
23415                   bool is_readonly;
23416
23417                   if (!TARGET_FDPIC
23418                       || arm_is_segment_info_known (x, &is_readonly))
23419                     fputs ("(GOTOFF)", asm_out_file);
23420                   else
23421                     fputs ("(GOT)", asm_out_file);
23422                 }
23423             }
23424         }
23425
23426       /* For FDPIC we also have to mark symbol for .data section.  */
23427       if (TARGET_FDPIC
23428           && !making_const_table
23429           && SYMBOL_REF_P (x)
23430           && SYMBOL_REF_FUNCTION_P (x))
23431         fputs ("(FUNCDESC)", asm_out_file);
23432
23433       fputc ('\n', asm_out_file);
23434       return true;
23435     }
23436
23437   mode = GET_MODE (x);
23438
23439   if (arm_vector_mode_supported_p (mode))
23440     {
23441       int i, units;
23442
23443       gcc_assert (GET_CODE (x) == CONST_VECTOR);
23444
23445       units = CONST_VECTOR_NUNITS (x);
23446       size = GET_MODE_UNIT_SIZE (mode);
23447
23448       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23449         for (i = 0; i < units; i++)
23450           {
23451             rtx elt = CONST_VECTOR_ELT (x, i);
23452             assemble_integer
23453               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
23454           }
23455       else
23456         for (i = 0; i < units; i++)
23457           {
23458             rtx elt = CONST_VECTOR_ELT (x, i);
23459             assemble_real
23460               (*CONST_DOUBLE_REAL_VALUE (elt),
23461                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
23462                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
23463           }
23464
23465       return true;
23466     }
23467
23468   return default_assemble_integer (x, size, aligned_p);
23469 }
23470
23471 static void
23472 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23473 {
23474   section *s;
23475
23476   if (!TARGET_AAPCS_BASED)
23477     {
23478       (is_ctor ?
23479        default_named_section_asm_out_constructor
23480        : default_named_section_asm_out_destructor) (symbol, priority);
23481       return;
23482     }
23483
23484   /* Put these in the .init_array section, using a special relocation.  */
23485   if (priority != DEFAULT_INIT_PRIORITY)
23486     {
23487       char buf[18];
23488       sprintf (buf, "%s.%.5u",
23489                is_ctor ? ".init_array" : ".fini_array",
23490                priority);
23491       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23492     }
23493   else if (is_ctor)
23494     s = ctors_section;
23495   else
23496     s = dtors_section;
23497
23498   switch_to_section (s);
23499   assemble_align (POINTER_SIZE);
23500   fputs ("\t.word\t", asm_out_file);
23501   output_addr_const (asm_out_file, symbol);
23502   fputs ("(target1)\n", asm_out_file);
23503 }
23504
23505 /* Add a function to the list of static constructors.  */
23506
23507 static void
23508 arm_elf_asm_constructor (rtx symbol, int priority)
23509 {
23510   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23511 }
23512
23513 /* Add a function to the list of static destructors.  */
23514
23515 static void
23516 arm_elf_asm_destructor (rtx symbol, int priority)
23517 {
23518   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23519 }
23520 \f
23521 /* A finite state machine takes care of noticing whether or not instructions
23522    can be conditionally executed, and thus decrease execution time and code
23523    size by deleting branch instructions.  The fsm is controlled by
23524    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
23525
23526 /* The state of the fsm controlling condition codes are:
23527    0: normal, do nothing special
23528    1: make ASM_OUTPUT_OPCODE not output this instruction
23529    2: make ASM_OUTPUT_OPCODE not output this instruction
23530    3: make instructions conditional
23531    4: make instructions conditional
23532
23533    State transitions (state->state by whom under condition):
23534    0 -> 1 final_prescan_insn if the `target' is a label
23535    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23536    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23537    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23538    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23539           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23540    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23541           (the target insn is arm_target_insn).
23542
23543    If the jump clobbers the conditions then we use states 2 and 4.
23544
23545    A similar thing can be done with conditional return insns.
23546
23547    XXX In case the `target' is an unconditional branch, this conditionalising
23548    of the instructions always reduces code size, but not always execution
23549    time.  But then, I want to reduce the code size to somewhere near what
23550    /bin/cc produces.  */
23551
23552 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23553    instructions.  When a COND_EXEC instruction is seen the subsequent
23554    instructions are scanned so that multiple conditional instructions can be
23555    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
23556    specify the length and true/false mask for the IT block.  These will be
23557    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
23558
23559 /* Returns the index of the ARM condition code string in
23560    `arm_condition_codes', or ARM_NV if the comparison is invalid.
23561    COMPARISON should be an rtx like `(eq (...) (...))'.  */
23562
23563 enum arm_cond_code
23564 maybe_get_arm_condition_code (rtx comparison)
23565 {
23566   machine_mode mode = GET_MODE (XEXP (comparison, 0));
23567   enum arm_cond_code code;
23568   enum rtx_code comp_code = GET_CODE (comparison);
23569
23570   if (GET_MODE_CLASS (mode) != MODE_CC)
23571     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23572                            XEXP (comparison, 1));
23573
23574   switch (mode)
23575     {
23576     case E_CC_DNEmode: code = ARM_NE; goto dominance;
23577     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23578     case E_CC_DGEmode: code = ARM_GE; goto dominance;
23579     case E_CC_DGTmode: code = ARM_GT; goto dominance;
23580     case E_CC_DLEmode: code = ARM_LE; goto dominance;
23581     case E_CC_DLTmode: code = ARM_LT; goto dominance;
23582     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23583     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23584     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23585     case E_CC_DLTUmode: code = ARM_CC;
23586
23587     dominance:
23588       if (comp_code == EQ)
23589         return ARM_INVERSE_CONDITION_CODE (code);
23590       if (comp_code == NE)
23591         return code;
23592       return ARM_NV;
23593
23594     case E_CC_NOOVmode:
23595       switch (comp_code)
23596         {
23597         case NE: return ARM_NE;
23598         case EQ: return ARM_EQ;
23599         case GE: return ARM_PL;
23600         case LT: return ARM_MI;
23601         default: return ARM_NV;
23602         }
23603
23604     case E_CC_Zmode:
23605       switch (comp_code)
23606         {
23607         case NE: return ARM_NE;
23608         case EQ: return ARM_EQ;
23609         default: return ARM_NV;
23610         }
23611
23612     case E_CC_Nmode:
23613       switch (comp_code)
23614         {
23615         case NE: return ARM_MI;
23616         case EQ: return ARM_PL;
23617         default: return ARM_NV;
23618         }
23619
23620     case E_CCFPEmode:
23621     case E_CCFPmode:
23622       /* We can handle all cases except UNEQ and LTGT.  */
23623       switch (comp_code)
23624         {
23625         case GE: return ARM_GE;
23626         case GT: return ARM_GT;
23627         case LE: return ARM_LS;
23628         case LT: return ARM_MI;
23629         case NE: return ARM_NE;
23630         case EQ: return ARM_EQ;
23631         case ORDERED: return ARM_VC;
23632         case UNORDERED: return ARM_VS;
23633         case UNLT: return ARM_LT;
23634         case UNLE: return ARM_LE;
23635         case UNGT: return ARM_HI;
23636         case UNGE: return ARM_PL;
23637           /* UNEQ and LTGT do not have a representation.  */
23638         case UNEQ: /* Fall through.  */
23639         case LTGT: /* Fall through.  */
23640         default: return ARM_NV;
23641         }
23642
23643     case E_CC_SWPmode:
23644       switch (comp_code)
23645         {
23646         case NE: return ARM_NE;
23647         case EQ: return ARM_EQ;
23648         case GE: return ARM_LE;
23649         case GT: return ARM_LT;
23650         case LE: return ARM_GE;
23651         case LT: return ARM_GT;
23652         case GEU: return ARM_LS;
23653         case GTU: return ARM_CC;
23654         case LEU: return ARM_CS;
23655         case LTU: return ARM_HI;
23656         default: return ARM_NV;
23657         }
23658
23659     case E_CC_Cmode:
23660       switch (comp_code)
23661         {
23662         case LTU: return ARM_CS;
23663         case GEU: return ARM_CC;
23664         default: return ARM_NV;
23665         }
23666
23667     case E_CC_CZmode:
23668       switch (comp_code)
23669         {
23670         case NE: return ARM_NE;
23671         case EQ: return ARM_EQ;
23672         case GEU: return ARM_CS;
23673         case GTU: return ARM_HI;
23674         case LEU: return ARM_LS;
23675         case LTU: return ARM_CC;
23676         default: return ARM_NV;
23677         }
23678
23679     case E_CC_NCVmode:
23680       switch (comp_code)
23681         {
23682         case GE: return ARM_GE;
23683         case LT: return ARM_LT;
23684         case GEU: return ARM_CS;
23685         case LTU: return ARM_CC;
23686         default: return ARM_NV;
23687         }
23688
23689     case E_CC_Vmode:
23690       switch (comp_code)
23691         {
23692         case NE: return ARM_VS;
23693         case EQ: return ARM_VC;
23694         default: return ARM_NV;
23695         }
23696
23697     case E_CC_RSBmode:
23698       switch (comp_code)
23699         {
23700         case NE: return ARM_NE;
23701         case EQ: return ARM_EQ;
23702         case GEU: return ARM_CS;
23703         case GTU: return ARM_HI;
23704         case LEU: return ARM_LS;
23705         case LTU: return ARM_CC;
23706         default: return ARM_NV;
23707         }
23708
23709     case E_CCmode:
23710       switch (comp_code)
23711         {
23712         case NE: return ARM_NE;
23713         case EQ: return ARM_EQ;
23714         case GE: return ARM_GE;
23715         case GT: return ARM_GT;
23716         case LE: return ARM_LE;
23717         case LT: return ARM_LT;
23718         case GEU: return ARM_CS;
23719         case GTU: return ARM_HI;
23720         case LEU: return ARM_LS;
23721         case LTU: return ARM_CC;
23722         default: return ARM_NV;
23723         }
23724
23725     default: gcc_unreachable ();
23726     }
23727 }
23728
23729 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23730 static enum arm_cond_code
23731 get_arm_condition_code (rtx comparison)
23732 {
23733   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23734   gcc_assert (code != ARM_NV);
23735   return code;
23736 }
23737
23738 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23739    code registers when not targetting Thumb1.  The VFP condition register
23740    only exists when generating hard-float code.  */
23741 static bool
23742 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23743 {
23744   if (!TARGET_32BIT)
23745     return false;
23746
23747   *p1 = CC_REGNUM;
23748   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23749   return true;
23750 }
23751
23752 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23753    instructions.  */
23754 void
23755 thumb2_final_prescan_insn (rtx_insn *insn)
23756 {
23757   rtx_insn *first_insn = insn;
23758   rtx body = PATTERN (insn);
23759   rtx predicate;
23760   enum arm_cond_code code;
23761   int n;
23762   int mask;
23763   int max;
23764
23765   /* max_insns_skipped in the tune was already taken into account in the
23766      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23767      just emit the IT blocks as we can.  It does not make sense to split
23768      the IT blocks.  */
23769   max = MAX_INSN_PER_IT_BLOCK;
23770
23771   /* Remove the previous insn from the count of insns to be output.  */
23772   if (arm_condexec_count)
23773       arm_condexec_count--;
23774
23775   /* Nothing to do if we are already inside a conditional block.  */
23776   if (arm_condexec_count)
23777     return;
23778
23779   if (GET_CODE (body) != COND_EXEC)
23780     return;
23781
23782   /* Conditional jumps are implemented directly.  */
23783   if (JUMP_P (insn))
23784     return;
23785
23786   predicate = COND_EXEC_TEST (body);
23787   arm_current_cc = get_arm_condition_code (predicate);
23788
23789   n = get_attr_ce_count (insn);
23790   arm_condexec_count = 1;
23791   arm_condexec_mask = (1 << n) - 1;
23792   arm_condexec_masklen = n;
23793   /* See if subsequent instructions can be combined into the same block.  */
23794   for (;;)
23795     {
23796       insn = next_nonnote_insn (insn);
23797
23798       /* Jumping into the middle of an IT block is illegal, so a label or
23799          barrier terminates the block.  */
23800       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23801         break;
23802
23803       body = PATTERN (insn);
23804       /* USE and CLOBBER aren't really insns, so just skip them.  */
23805       if (GET_CODE (body) == USE
23806           || GET_CODE (body) == CLOBBER)
23807         continue;
23808
23809       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23810       if (GET_CODE (body) != COND_EXEC)
23811         break;
23812       /* Maximum number of conditionally executed instructions in a block.  */
23813       n = get_attr_ce_count (insn);
23814       if (arm_condexec_masklen + n > max)
23815         break;
23816
23817       predicate = COND_EXEC_TEST (body);
23818       code = get_arm_condition_code (predicate);
23819       mask = (1 << n) - 1;
23820       if (arm_current_cc == code)
23821         arm_condexec_mask |= (mask << arm_condexec_masklen);
23822       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23823         break;
23824
23825       arm_condexec_count++;
23826       arm_condexec_masklen += n;
23827
23828       /* A jump must be the last instruction in a conditional block.  */
23829       if (JUMP_P (insn))
23830         break;
23831     }
23832   /* Restore recog_data (getting the attributes of other insns can
23833      destroy this array, but final.c assumes that it remains intact
23834      across this call).  */
23835   extract_constrain_insn_cached (first_insn);
23836 }
23837
23838 void
23839 arm_final_prescan_insn (rtx_insn *insn)
23840 {
23841   /* BODY will hold the body of INSN.  */
23842   rtx body = PATTERN (insn);
23843
23844   /* This will be 1 if trying to repeat the trick, and things need to be
23845      reversed if it appears to fail.  */
23846   int reverse = 0;
23847
23848   /* If we start with a return insn, we only succeed if we find another one.  */
23849   int seeking_return = 0;
23850   enum rtx_code return_code = UNKNOWN;
23851
23852   /* START_INSN will hold the insn from where we start looking.  This is the
23853      first insn after the following code_label if REVERSE is true.  */
23854   rtx_insn *start_insn = insn;
23855
23856   /* If in state 4, check if the target branch is reached, in order to
23857      change back to state 0.  */
23858   if (arm_ccfsm_state == 4)
23859     {
23860       if (insn == arm_target_insn)
23861         {
23862           arm_target_insn = NULL;
23863           arm_ccfsm_state = 0;
23864         }
23865       return;
23866     }
23867
23868   /* If in state 3, it is possible to repeat the trick, if this insn is an
23869      unconditional branch to a label, and immediately following this branch
23870      is the previous target label which is only used once, and the label this
23871      branch jumps to is not too far off.  */
23872   if (arm_ccfsm_state == 3)
23873     {
23874       if (simplejump_p (insn))
23875         {
23876           start_insn = next_nonnote_insn (start_insn);
23877           if (BARRIER_P (start_insn))
23878             {
23879               /* XXX Isn't this always a barrier?  */
23880               start_insn = next_nonnote_insn (start_insn);
23881             }
23882           if (LABEL_P (start_insn)
23883               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23884               && LABEL_NUSES (start_insn) == 1)
23885             reverse = TRUE;
23886           else
23887             return;
23888         }
23889       else if (ANY_RETURN_P (body))
23890         {
23891           start_insn = next_nonnote_insn (start_insn);
23892           if (BARRIER_P (start_insn))
23893             start_insn = next_nonnote_insn (start_insn);
23894           if (LABEL_P (start_insn)
23895               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23896               && LABEL_NUSES (start_insn) == 1)
23897             {
23898               reverse = TRUE;
23899               seeking_return = 1;
23900               return_code = GET_CODE (body);
23901             }
23902           else
23903             return;
23904         }
23905       else
23906         return;
23907     }
23908
23909   gcc_assert (!arm_ccfsm_state || reverse);
23910   if (!JUMP_P (insn))
23911     return;
23912
23913   /* This jump might be paralleled with a clobber of the condition codes
23914      the jump should always come first */
23915   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23916     body = XVECEXP (body, 0, 0);
23917
23918   if (reverse
23919       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23920           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23921     {
23922       int insns_skipped;
23923       int fail = FALSE, succeed = FALSE;
23924       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23925       int then_not_else = TRUE;
23926       rtx_insn *this_insn = start_insn;
23927       rtx label = 0;
23928
23929       /* Register the insn jumped to.  */
23930       if (reverse)
23931         {
23932           if (!seeking_return)
23933             label = XEXP (SET_SRC (body), 0);
23934         }
23935       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23936         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23937       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23938         {
23939           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23940           then_not_else = FALSE;
23941         }
23942       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23943         {
23944           seeking_return = 1;
23945           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23946         }
23947       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23948         {
23949           seeking_return = 1;
23950           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23951           then_not_else = FALSE;
23952         }
23953       else
23954         gcc_unreachable ();
23955
23956       /* See how many insns this branch skips, and what kind of insns.  If all
23957          insns are okay, and the label or unconditional branch to the same
23958          label is not too far away, succeed.  */
23959       for (insns_skipped = 0;
23960            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23961         {
23962           rtx scanbody;
23963
23964           this_insn = next_nonnote_insn (this_insn);
23965           if (!this_insn)
23966             break;
23967
23968           switch (GET_CODE (this_insn))
23969             {
23970             case CODE_LABEL:
23971               /* Succeed if it is the target label, otherwise fail since
23972                  control falls in from somewhere else.  */
23973               if (this_insn == label)
23974                 {
23975                   arm_ccfsm_state = 1;
23976                   succeed = TRUE;
23977                 }
23978               else
23979                 fail = TRUE;
23980               break;
23981
23982             case BARRIER:
23983               /* Succeed if the following insn is the target label.
23984                  Otherwise fail.
23985                  If return insns are used then the last insn in a function
23986                  will be a barrier.  */
23987               this_insn = next_nonnote_insn (this_insn);
23988               if (this_insn && this_insn == label)
23989                 {
23990                   arm_ccfsm_state = 1;
23991                   succeed = TRUE;
23992                 }
23993               else
23994                 fail = TRUE;
23995               break;
23996
23997             case CALL_INSN:
23998               /* The AAPCS says that conditional calls should not be
23999                  used since they make interworking inefficient (the
24000                  linker can't transform BL<cond> into BLX).  That's
24001                  only a problem if the machine has BLX.  */
24002               if (arm_arch5t)
24003                 {
24004                   fail = TRUE;
24005                   break;
24006                 }
24007
24008               /* Succeed if the following insn is the target label, or
24009                  if the following two insns are a barrier and the
24010                  target label.  */
24011               this_insn = next_nonnote_insn (this_insn);
24012               if (this_insn && BARRIER_P (this_insn))
24013                 this_insn = next_nonnote_insn (this_insn);
24014
24015               if (this_insn && this_insn == label
24016                   && insns_skipped < max_insns_skipped)
24017                 {
24018                   arm_ccfsm_state = 1;
24019                   succeed = TRUE;
24020                 }
24021               else
24022                 fail = TRUE;
24023               break;
24024
24025             case JUMP_INSN:
24026               /* If this is an unconditional branch to the same label, succeed.
24027                  If it is to another label, do nothing.  If it is conditional,
24028                  fail.  */
24029               /* XXX Probably, the tests for SET and the PC are
24030                  unnecessary.  */
24031
24032               scanbody = PATTERN (this_insn);
24033               if (GET_CODE (scanbody) == SET
24034                   && GET_CODE (SET_DEST (scanbody)) == PC)
24035                 {
24036                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
24037                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
24038                     {
24039                       arm_ccfsm_state = 2;
24040                       succeed = TRUE;
24041                     }
24042                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
24043                     fail = TRUE;
24044                 }
24045               /* Fail if a conditional return is undesirable (e.g. on a
24046                  StrongARM), but still allow this if optimizing for size.  */
24047               else if (GET_CODE (scanbody) == return_code
24048                        && !use_return_insn (TRUE, NULL)
24049                        && !optimize_size)
24050                 fail = TRUE;
24051               else if (GET_CODE (scanbody) == return_code)
24052                 {
24053                   arm_ccfsm_state = 2;
24054                   succeed = TRUE;
24055                 }
24056               else if (GET_CODE (scanbody) == PARALLEL)
24057                 {
24058                   switch (get_attr_conds (this_insn))
24059                     {
24060                     case CONDS_NOCOND:
24061                       break;
24062                     default:
24063                       fail = TRUE;
24064                       break;
24065                     }
24066                 }
24067               else
24068                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
24069
24070               break;
24071
24072             case INSN:
24073               /* Instructions using or affecting the condition codes make it
24074                  fail.  */
24075               scanbody = PATTERN (this_insn);
24076               if (!(GET_CODE (scanbody) == SET
24077                     || GET_CODE (scanbody) == PARALLEL)
24078                   || get_attr_conds (this_insn) != CONDS_NOCOND)
24079                 fail = TRUE;
24080               break;
24081
24082             default:
24083               break;
24084             }
24085         }
24086       if (succeed)
24087         {
24088           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
24089             arm_target_label = CODE_LABEL_NUMBER (label);
24090           else
24091             {
24092               gcc_assert (seeking_return || arm_ccfsm_state == 2);
24093
24094               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
24095                 {
24096                   this_insn = next_nonnote_insn (this_insn);
24097                   gcc_assert (!this_insn
24098                               || (!BARRIER_P (this_insn)
24099                                   && !LABEL_P (this_insn)));
24100                 }
24101               if (!this_insn)
24102                 {
24103                   /* Oh, dear! we ran off the end.. give up.  */
24104                   extract_constrain_insn_cached (insn);
24105                   arm_ccfsm_state = 0;
24106                   arm_target_insn = NULL;
24107                   return;
24108                 }
24109               arm_target_insn = this_insn;
24110             }
24111
24112           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
24113              what it was.  */
24114           if (!reverse)
24115             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
24116
24117           if (reverse || then_not_else)
24118             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
24119         }
24120
24121       /* Restore recog_data (getting the attributes of other insns can
24122          destroy this array, but final.c assumes that it remains intact
24123          across this call.  */
24124       extract_constrain_insn_cached (insn);
24125     }
24126 }
24127
24128 /* Output IT instructions.  */
24129 void
24130 thumb2_asm_output_opcode (FILE * stream)
24131 {
24132   char buff[5];
24133   int n;
24134
24135   if (arm_condexec_mask)
24136     {
24137       for (n = 0; n < arm_condexec_masklen; n++)
24138         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
24139       buff[n] = 0;
24140       asm_fprintf(stream, "i%s\t%s\n\t", buff,
24141                   arm_condition_codes[arm_current_cc]);
24142       arm_condexec_mask = 0;
24143     }
24144 }
24145
24146 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
24147    UNITS_PER_WORD bytes wide.  */
24148 static unsigned int
24149 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
24150 {
24151   if (TARGET_32BIT
24152       && regno > PC_REGNUM
24153       && regno != FRAME_POINTER_REGNUM
24154       && regno != ARG_POINTER_REGNUM
24155       && !IS_VFP_REGNUM (regno))
24156     return 1;
24157
24158   return ARM_NUM_REGS (mode);
24159 }
24160
24161 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
24162 static bool
24163 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
24164 {
24165   if (GET_MODE_CLASS (mode) == MODE_CC)
24166     return (regno == CC_REGNUM
24167             || (TARGET_HARD_FLOAT
24168                 && regno == VFPCC_REGNUM));
24169
24170   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
24171     return false;
24172
24173   if (TARGET_THUMB1)
24174     /* For the Thumb we only allow values bigger than SImode in
24175        registers 0 - 6, so that there is always a second low
24176        register available to hold the upper part of the value.
24177        We probably we ought to ensure that the register is the
24178        start of an even numbered register pair.  */
24179     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
24180
24181   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
24182     {
24183       if (mode == SFmode || mode == SImode)
24184         return VFP_REGNO_OK_FOR_SINGLE (regno);
24185
24186       if (mode == DFmode)
24187         return VFP_REGNO_OK_FOR_DOUBLE (regno);
24188
24189       if (mode == HFmode)
24190         return VFP_REGNO_OK_FOR_SINGLE (regno);
24191
24192       /* VFP registers can hold HImode values.  */
24193       if (mode == HImode)
24194         return VFP_REGNO_OK_FOR_SINGLE (regno);
24195
24196       if (TARGET_NEON)
24197         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
24198                || (VALID_NEON_QREG_MODE (mode)
24199                    && NEON_REGNO_OK_FOR_QUAD (regno))
24200                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
24201                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
24202                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
24203                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
24204                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
24205
24206       return false;
24207     }
24208
24209   if (TARGET_REALLY_IWMMXT)
24210     {
24211       if (IS_IWMMXT_GR_REGNUM (regno))
24212         return mode == SImode;
24213
24214       if (IS_IWMMXT_REGNUM (regno))
24215         return VALID_IWMMXT_REG_MODE (mode);
24216     }
24217
24218   /* We allow almost any value to be stored in the general registers.
24219      Restrict doubleword quantities to even register pairs in ARM state
24220      so that we can use ldrd.  Do not allow very large Neon structure
24221      opaque modes in general registers; they would use too many.  */
24222   if (regno <= LAST_ARM_REGNUM)
24223     {
24224       if (ARM_NUM_REGS (mode) > 4)
24225         return false;
24226
24227       if (TARGET_THUMB2)
24228         return true;
24229
24230       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
24231     }
24232
24233   if (regno == FRAME_POINTER_REGNUM
24234       || regno == ARG_POINTER_REGNUM)
24235     /* We only allow integers in the fake hard registers.  */
24236     return GET_MODE_CLASS (mode) == MODE_INT;
24237
24238   return false;
24239 }
24240
24241 /* Implement TARGET_MODES_TIEABLE_P.  */
24242
24243 static bool
24244 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
24245 {
24246   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
24247     return true;
24248
24249   /* We specifically want to allow elements of "structure" modes to
24250      be tieable to the structure.  This more general condition allows
24251      other rarer situations too.  */
24252   if (TARGET_NEON
24253       && (VALID_NEON_DREG_MODE (mode1)
24254           || VALID_NEON_QREG_MODE (mode1)
24255           || VALID_NEON_STRUCT_MODE (mode1))
24256       && (VALID_NEON_DREG_MODE (mode2)
24257           || VALID_NEON_QREG_MODE (mode2)
24258           || VALID_NEON_STRUCT_MODE (mode2)))
24259     return true;
24260
24261   return false;
24262 }
24263
24264 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
24265    not used in arm mode.  */
24266
24267 enum reg_class
24268 arm_regno_class (int regno)
24269 {
24270   if (regno == PC_REGNUM)
24271     return NO_REGS;
24272
24273   if (TARGET_THUMB1)
24274     {
24275       if (regno == STACK_POINTER_REGNUM)
24276         return STACK_REG;
24277       if (regno == CC_REGNUM)
24278         return CC_REG;
24279       if (regno < 8)
24280         return LO_REGS;
24281       return HI_REGS;
24282     }
24283
24284   if (TARGET_THUMB2 && regno < 8)
24285     return LO_REGS;
24286
24287   if (   regno <= LAST_ARM_REGNUM
24288       || regno == FRAME_POINTER_REGNUM
24289       || regno == ARG_POINTER_REGNUM)
24290     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
24291
24292   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
24293     return TARGET_THUMB2 ? CC_REG : NO_REGS;
24294
24295   if (IS_VFP_REGNUM (regno))
24296     {
24297       if (regno <= D7_VFP_REGNUM)
24298         return VFP_D0_D7_REGS;
24299       else if (regno <= LAST_LO_VFP_REGNUM)
24300         return VFP_LO_REGS;
24301       else
24302         return VFP_HI_REGS;
24303     }
24304
24305   if (IS_IWMMXT_REGNUM (regno))
24306     return IWMMXT_REGS;
24307
24308   if (IS_IWMMXT_GR_REGNUM (regno))
24309     return IWMMXT_GR_REGS;
24310
24311   return NO_REGS;
24312 }
24313
24314 /* Handle a special case when computing the offset
24315    of an argument from the frame pointer.  */
24316 int
24317 arm_debugger_arg_offset (int value, rtx addr)
24318 {
24319   rtx_insn *insn;
24320
24321   /* We are only interested if dbxout_parms() failed to compute the offset.  */
24322   if (value != 0)
24323     return 0;
24324
24325   /* We can only cope with the case where the address is held in a register.  */
24326   if (!REG_P (addr))
24327     return 0;
24328
24329   /* If we are using the frame pointer to point at the argument, then
24330      an offset of 0 is correct.  */
24331   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
24332     return 0;
24333
24334   /* If we are using the stack pointer to point at the
24335      argument, then an offset of 0 is correct.  */
24336   /* ??? Check this is consistent with thumb2 frame layout.  */
24337   if ((TARGET_THUMB || !frame_pointer_needed)
24338       && REGNO (addr) == SP_REGNUM)
24339     return 0;
24340
24341   /* Oh dear.  The argument is pointed to by a register rather
24342      than being held in a register, or being stored at a known
24343      offset from the frame pointer.  Since GDB only understands
24344      those two kinds of argument we must translate the address
24345      held in the register into an offset from the frame pointer.
24346      We do this by searching through the insns for the function
24347      looking to see where this register gets its value.  If the
24348      register is initialized from the frame pointer plus an offset
24349      then we are in luck and we can continue, otherwise we give up.
24350
24351      This code is exercised by producing debugging information
24352      for a function with arguments like this:
24353
24354            double func (double a, double b, int c, double d) {return d;}
24355
24356      Without this code the stab for parameter 'd' will be set to
24357      an offset of 0 from the frame pointer, rather than 8.  */
24358
24359   /* The if() statement says:
24360
24361      If the insn is a normal instruction
24362      and if the insn is setting the value in a register
24363      and if the register being set is the register holding the address of the argument
24364      and if the address is computing by an addition
24365      that involves adding to a register
24366      which is the frame pointer
24367      a constant integer
24368
24369      then...  */
24370
24371   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24372     {
24373       if (   NONJUMP_INSN_P (insn)
24374           && GET_CODE (PATTERN (insn)) == SET
24375           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
24376           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
24377           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
24378           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
24379           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
24380              )
24381         {
24382           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
24383
24384           break;
24385         }
24386     }
24387
24388   if (value == 0)
24389     {
24390       debug_rtx (addr);
24391       warning (0, "unable to compute real location of stacked parameter");
24392       value = 8; /* XXX magic hack */
24393     }
24394
24395   return value;
24396 }
24397 \f
24398 /* Implement TARGET_PROMOTED_TYPE.  */
24399
24400 static tree
24401 arm_promoted_type (const_tree t)
24402 {
24403   if (SCALAR_FLOAT_TYPE_P (t)
24404       && TYPE_PRECISION (t) == 16
24405       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
24406     return float_type_node;
24407   return NULL_TREE;
24408 }
24409
24410 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24411    This simply adds HFmode as a supported mode; even though we don't
24412    implement arithmetic on this type directly, it's supported by
24413    optabs conversions, much the way the double-word arithmetic is
24414    special-cased in the default hook.  */
24415
24416 static bool
24417 arm_scalar_mode_supported_p (scalar_mode mode)
24418 {
24419   if (mode == HFmode)
24420     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24421   else if (ALL_FIXED_POINT_MODE_P (mode))
24422     return true;
24423   else
24424     return default_scalar_mode_supported_p (mode);
24425 }
24426
24427 /* Set the value of FLT_EVAL_METHOD.
24428    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24429
24430     0: evaluate all operations and constants, whose semantic type has at
24431        most the range and precision of type float, to the range and
24432        precision of float; evaluate all other operations and constants to
24433        the range and precision of the semantic type;
24434
24435     N, where _FloatN is a supported interchange floating type
24436        evaluate all operations and constants, whose semantic type has at
24437        most the range and precision of _FloatN type, to the range and
24438        precision of the _FloatN type; evaluate all other operations and
24439        constants to the range and precision of the semantic type;
24440
24441    If we have the ARMv8.2-A extensions then we support _Float16 in native
24442    precision, so we should set this to 16.  Otherwise, we support the type,
24443    but want to evaluate expressions in float precision, so set this to
24444    0.  */
24445
24446 static enum flt_eval_method
24447 arm_excess_precision (enum excess_precision_type type)
24448 {
24449   switch (type)
24450     {
24451       case EXCESS_PRECISION_TYPE_FAST:
24452       case EXCESS_PRECISION_TYPE_STANDARD:
24453         /* We can calculate either in 16-bit range and precision or
24454            32-bit range and precision.  Make that decision based on whether
24455            we have native support for the ARMv8.2-A 16-bit floating-point
24456            instructions or not.  */
24457         return (TARGET_VFP_FP16INST
24458                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24459                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
24460       case EXCESS_PRECISION_TYPE_IMPLICIT:
24461         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24462       default:
24463         gcc_unreachable ();
24464     }
24465   return FLT_EVAL_METHOD_UNPREDICTABLE;
24466 }
24467
24468
24469 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
24470    _Float16 if we are using anything other than ieee format for 16-bit
24471    floating point.  Otherwise, punt to the default implementation.  */
24472 static opt_scalar_float_mode
24473 arm_floatn_mode (int n, bool extended)
24474 {
24475   if (!extended && n == 16)
24476     {
24477       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
24478         return HFmode;
24479       return opt_scalar_float_mode ();
24480     }
24481
24482   return default_floatn_mode (n, extended);
24483 }
24484
24485
24486 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24487    not to early-clobber SRC registers in the process.
24488
24489    We assume that the operands described by SRC and DEST represent a
24490    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
24491    number of components into which the copy has been decomposed.  */
24492 void
24493 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24494 {
24495   unsigned int i;
24496
24497   if (!reg_overlap_mentioned_p (operands[0], operands[1])
24498       || REGNO (operands[0]) < REGNO (operands[1]))
24499     {
24500       for (i = 0; i < count; i++)
24501         {
24502           operands[2 * i] = dest[i];
24503           operands[2 * i + 1] = src[i];
24504         }
24505     }
24506   else
24507     {
24508       for (i = 0; i < count; i++)
24509         {
24510           operands[2 * i] = dest[count - i - 1];
24511           operands[2 * i + 1] = src[count - i - 1];
24512         }
24513     }
24514 }
24515
24516 /* Split operands into moves from op[1] + op[2] into op[0].  */
24517
24518 void
24519 neon_split_vcombine (rtx operands[3])
24520 {
24521   unsigned int dest = REGNO (operands[0]);
24522   unsigned int src1 = REGNO (operands[1]);
24523   unsigned int src2 = REGNO (operands[2]);
24524   machine_mode halfmode = GET_MODE (operands[1]);
24525   unsigned int halfregs = REG_NREGS (operands[1]);
24526   rtx destlo, desthi;
24527
24528   if (src1 == dest && src2 == dest + halfregs)
24529     {
24530       /* No-op move.  Can't split to nothing; emit something.  */
24531       emit_note (NOTE_INSN_DELETED);
24532       return;
24533     }
24534
24535   /* Preserve register attributes for variable tracking.  */
24536   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24537   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24538                                GET_MODE_SIZE (halfmode));
24539
24540   /* Special case of reversed high/low parts.  Use VSWP.  */
24541   if (src2 == dest && src1 == dest + halfregs)
24542     {
24543       rtx x = gen_rtx_SET (destlo, operands[1]);
24544       rtx y = gen_rtx_SET (desthi, operands[2]);
24545       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24546       return;
24547     }
24548
24549   if (!reg_overlap_mentioned_p (operands[2], destlo))
24550     {
24551       /* Try to avoid unnecessary moves if part of the result
24552          is in the right place already.  */
24553       if (src1 != dest)
24554         emit_move_insn (destlo, operands[1]);
24555       if (src2 != dest + halfregs)
24556         emit_move_insn (desthi, operands[2]);
24557     }
24558   else
24559     {
24560       if (src2 != dest + halfregs)
24561         emit_move_insn (desthi, operands[2]);
24562       if (src1 != dest)
24563         emit_move_insn (destlo, operands[1]);
24564     }
24565 }
24566 \f
24567 /* Return the number (counting from 0) of
24568    the least significant set bit in MASK.  */
24569
24570 inline static int
24571 number_of_first_bit_set (unsigned mask)
24572 {
24573   return ctz_hwi (mask);
24574 }
24575
24576 /* Like emit_multi_reg_push, but allowing for a different set of
24577    registers to be described as saved.  MASK is the set of registers
24578    to be saved; REAL_REGS is the set of registers to be described as
24579    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
24580
24581 static rtx_insn *
24582 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24583 {
24584   unsigned long regno;
24585   rtx par[10], tmp, reg;
24586   rtx_insn *insn;
24587   int i, j;
24588
24589   /* Build the parallel of the registers actually being stored.  */
24590   for (i = 0; mask; ++i, mask &= mask - 1)
24591     {
24592       regno = ctz_hwi (mask);
24593       reg = gen_rtx_REG (SImode, regno);
24594
24595       if (i == 0)
24596         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24597       else
24598         tmp = gen_rtx_USE (VOIDmode, reg);
24599
24600       par[i] = tmp;
24601     }
24602
24603   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24604   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24605   tmp = gen_frame_mem (BLKmode, tmp);
24606   tmp = gen_rtx_SET (tmp, par[0]);
24607   par[0] = tmp;
24608
24609   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24610   insn = emit_insn (tmp);
24611
24612   /* Always build the stack adjustment note for unwind info.  */
24613   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24614   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24615   par[0] = tmp;
24616
24617   /* Build the parallel of the registers recorded as saved for unwind.  */
24618   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24619     {
24620       regno = ctz_hwi (real_regs);
24621       reg = gen_rtx_REG (SImode, regno);
24622
24623       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24624       tmp = gen_frame_mem (SImode, tmp);
24625       tmp = gen_rtx_SET (tmp, reg);
24626       RTX_FRAME_RELATED_P (tmp) = 1;
24627       par[j + 1] = tmp;
24628     }
24629
24630   if (j == 0)
24631     tmp = par[0];
24632   else
24633     {
24634       RTX_FRAME_RELATED_P (par[0]) = 1;
24635       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24636     }
24637
24638   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24639
24640   return insn;
24641 }
24642
24643 /* Emit code to push or pop registers to or from the stack.  F is the
24644    assembly file.  MASK is the registers to pop.  */
24645 static void
24646 thumb_pop (FILE *f, unsigned long mask)
24647 {
24648   int regno;
24649   int lo_mask = mask & 0xFF;
24650
24651   gcc_assert (mask);
24652
24653   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24654     {
24655       /* Special case.  Do not generate a POP PC statement here, do it in
24656          thumb_exit() */
24657       thumb_exit (f, -1);
24658       return;
24659     }
24660
24661   fprintf (f, "\tpop\t{");
24662
24663   /* Look at the low registers first.  */
24664   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24665     {
24666       if (lo_mask & 1)
24667         {
24668           asm_fprintf (f, "%r", regno);
24669
24670           if ((lo_mask & ~1) != 0)
24671             fprintf (f, ", ");
24672         }
24673     }
24674
24675   if (mask & (1 << PC_REGNUM))
24676     {
24677       /* Catch popping the PC.  */
24678       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24679           || IS_CMSE_ENTRY (arm_current_func_type ()))
24680         {
24681           /* The PC is never poped directly, instead
24682              it is popped into r3 and then BX is used.  */
24683           fprintf (f, "}\n");
24684
24685           thumb_exit (f, -1);
24686
24687           return;
24688         }
24689       else
24690         {
24691           if (mask & 0xFF)
24692             fprintf (f, ", ");
24693
24694           asm_fprintf (f, "%r", PC_REGNUM);
24695         }
24696     }
24697
24698   fprintf (f, "}\n");
24699 }
24700
24701 /* Generate code to return from a thumb function.
24702    If 'reg_containing_return_addr' is -1, then the return address is
24703    actually on the stack, at the stack pointer.
24704
24705    Note: do not forget to update length attribute of corresponding insn pattern
24706    when changing assembly output (eg. length attribute of epilogue_insns when
24707    updating Armv8-M Baseline Security Extensions register clearing
24708    sequences).  */
24709 static void
24710 thumb_exit (FILE *f, int reg_containing_return_addr)
24711 {
24712   unsigned regs_available_for_popping;
24713   unsigned regs_to_pop;
24714   int pops_needed;
24715   unsigned available;
24716   unsigned required;
24717   machine_mode mode;
24718   int size;
24719   int restore_a4 = FALSE;
24720
24721   /* Compute the registers we need to pop.  */
24722   regs_to_pop = 0;
24723   pops_needed = 0;
24724
24725   if (reg_containing_return_addr == -1)
24726     {
24727       regs_to_pop |= 1 << LR_REGNUM;
24728       ++pops_needed;
24729     }
24730
24731   if (TARGET_BACKTRACE)
24732     {
24733       /* Restore the (ARM) frame pointer and stack pointer.  */
24734       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24735       pops_needed += 2;
24736     }
24737
24738   /* If there is nothing to pop then just emit the BX instruction and
24739      return.  */
24740   if (pops_needed == 0)
24741     {
24742       if (crtl->calls_eh_return)
24743         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24744
24745       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24746         {
24747           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24748                        reg_containing_return_addr);
24749           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24750         }
24751       else
24752         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24753       return;
24754     }
24755   /* Otherwise if we are not supporting interworking and we have not created
24756      a backtrace structure and the function was not entered in ARM mode then
24757      just pop the return address straight into the PC.  */
24758   else if (!TARGET_INTERWORK
24759            && !TARGET_BACKTRACE
24760            && !is_called_in_ARM_mode (current_function_decl)
24761            && !crtl->calls_eh_return
24762            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24763     {
24764       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24765       return;
24766     }
24767
24768   /* Find out how many of the (return) argument registers we can corrupt.  */
24769   regs_available_for_popping = 0;
24770
24771   /* If returning via __builtin_eh_return, the bottom three registers
24772      all contain information needed for the return.  */
24773   if (crtl->calls_eh_return)
24774     size = 12;
24775   else
24776     {
24777       /* If we can deduce the registers used from the function's
24778          return value.  This is more reliable that examining
24779          df_regs_ever_live_p () because that will be set if the register is
24780          ever used in the function, not just if the register is used
24781          to hold a return value.  */
24782
24783       if (crtl->return_rtx != 0)
24784         mode = GET_MODE (crtl->return_rtx);
24785       else
24786         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24787
24788       size = GET_MODE_SIZE (mode);
24789
24790       if (size == 0)
24791         {
24792           /* In a void function we can use any argument register.
24793              In a function that returns a structure on the stack
24794              we can use the second and third argument registers.  */
24795           if (mode == VOIDmode)
24796             regs_available_for_popping =
24797               (1 << ARG_REGISTER (1))
24798               | (1 << ARG_REGISTER (2))
24799               | (1 << ARG_REGISTER (3));
24800           else
24801             regs_available_for_popping =
24802               (1 << ARG_REGISTER (2))
24803               | (1 << ARG_REGISTER (3));
24804         }
24805       else if (size <= 4)
24806         regs_available_for_popping =
24807           (1 << ARG_REGISTER (2))
24808           | (1 << ARG_REGISTER (3));
24809       else if (size <= 8)
24810         regs_available_for_popping =
24811           (1 << ARG_REGISTER (3));
24812     }
24813
24814   /* Match registers to be popped with registers into which we pop them.  */
24815   for (available = regs_available_for_popping,
24816        required  = regs_to_pop;
24817        required != 0 && available != 0;
24818        available &= ~(available & - available),
24819        required  &= ~(required  & - required))
24820     -- pops_needed;
24821
24822   /* If we have any popping registers left over, remove them.  */
24823   if (available > 0)
24824     regs_available_for_popping &= ~available;
24825
24826   /* Otherwise if we need another popping register we can use
24827      the fourth argument register.  */
24828   else if (pops_needed)
24829     {
24830       /* If we have not found any free argument registers and
24831          reg a4 contains the return address, we must move it.  */
24832       if (regs_available_for_popping == 0
24833           && reg_containing_return_addr == LAST_ARG_REGNUM)
24834         {
24835           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24836           reg_containing_return_addr = LR_REGNUM;
24837         }
24838       else if (size > 12)
24839         {
24840           /* Register a4 is being used to hold part of the return value,
24841              but we have dire need of a free, low register.  */
24842           restore_a4 = TRUE;
24843
24844           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24845         }
24846
24847       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24848         {
24849           /* The fourth argument register is available.  */
24850           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24851
24852           --pops_needed;
24853         }
24854     }
24855
24856   /* Pop as many registers as we can.  */
24857   thumb_pop (f, regs_available_for_popping);
24858
24859   /* Process the registers we popped.  */
24860   if (reg_containing_return_addr == -1)
24861     {
24862       /* The return address was popped into the lowest numbered register.  */
24863       regs_to_pop &= ~(1 << LR_REGNUM);
24864
24865       reg_containing_return_addr =
24866         number_of_first_bit_set (regs_available_for_popping);
24867
24868       /* Remove this register for the mask of available registers, so that
24869          the return address will not be corrupted by further pops.  */
24870       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24871     }
24872
24873   /* If we popped other registers then handle them here.  */
24874   if (regs_available_for_popping)
24875     {
24876       int frame_pointer;
24877
24878       /* Work out which register currently contains the frame pointer.  */
24879       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24880
24881       /* Move it into the correct place.  */
24882       asm_fprintf (f, "\tmov\t%r, %r\n",
24883                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24884
24885       /* (Temporarily) remove it from the mask of popped registers.  */
24886       regs_available_for_popping &= ~(1 << frame_pointer);
24887       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24888
24889       if (regs_available_for_popping)
24890         {
24891           int stack_pointer;
24892
24893           /* We popped the stack pointer as well,
24894              find the register that contains it.  */
24895           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24896
24897           /* Move it into the stack register.  */
24898           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24899
24900           /* At this point we have popped all necessary registers, so
24901              do not worry about restoring regs_available_for_popping
24902              to its correct value:
24903
24904              assert (pops_needed == 0)
24905              assert (regs_available_for_popping == (1 << frame_pointer))
24906              assert (regs_to_pop == (1 << STACK_POINTER))  */
24907         }
24908       else
24909         {
24910           /* Since we have just move the popped value into the frame
24911              pointer, the popping register is available for reuse, and
24912              we know that we still have the stack pointer left to pop.  */
24913           regs_available_for_popping |= (1 << frame_pointer);
24914         }
24915     }
24916
24917   /* If we still have registers left on the stack, but we no longer have
24918      any registers into which we can pop them, then we must move the return
24919      address into the link register and make available the register that
24920      contained it.  */
24921   if (regs_available_for_popping == 0 && pops_needed > 0)
24922     {
24923       regs_available_for_popping |= 1 << reg_containing_return_addr;
24924
24925       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24926                    reg_containing_return_addr);
24927
24928       reg_containing_return_addr = LR_REGNUM;
24929     }
24930
24931   /* If we have registers left on the stack then pop some more.
24932      We know that at most we will want to pop FP and SP.  */
24933   if (pops_needed > 0)
24934     {
24935       int  popped_into;
24936       int  move_to;
24937
24938       thumb_pop (f, regs_available_for_popping);
24939
24940       /* We have popped either FP or SP.
24941          Move whichever one it is into the correct register.  */
24942       popped_into = number_of_first_bit_set (regs_available_for_popping);
24943       move_to     = number_of_first_bit_set (regs_to_pop);
24944
24945       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24946       --pops_needed;
24947     }
24948
24949   /* If we still have not popped everything then we must have only
24950      had one register available to us and we are now popping the SP.  */
24951   if (pops_needed > 0)
24952     {
24953       int  popped_into;
24954
24955       thumb_pop (f, regs_available_for_popping);
24956
24957       popped_into = number_of_first_bit_set (regs_available_for_popping);
24958
24959       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24960       /*
24961         assert (regs_to_pop == (1 << STACK_POINTER))
24962         assert (pops_needed == 1)
24963       */
24964     }
24965
24966   /* If necessary restore the a4 register.  */
24967   if (restore_a4)
24968     {
24969       if (reg_containing_return_addr != LR_REGNUM)
24970         {
24971           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24972           reg_containing_return_addr = LR_REGNUM;
24973         }
24974
24975       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24976     }
24977
24978   if (crtl->calls_eh_return)
24979     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24980
24981   /* Return to caller.  */
24982   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24983     {
24984       /* This is for the cases where LR is not being used to contain the return
24985          address.  It may therefore contain information that we might not want
24986          to leak, hence it must be cleared.  The value in R0 will never be a
24987          secret at this point, so it is safe to use it, see the clearing code
24988          in 'cmse_nonsecure_entry_clear_before_return'.  */
24989       if (reg_containing_return_addr != LR_REGNUM)
24990         asm_fprintf (f, "\tmov\tlr, r0\n");
24991
24992       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24993       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24994     }
24995   else
24996     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24997 }
24998 \f
24999 /* Scan INSN just before assembler is output for it.
25000    For Thumb-1, we track the status of the condition codes; this
25001    information is used in the cbranchsi4_insn pattern.  */
25002 void
25003 thumb1_final_prescan_insn (rtx_insn *insn)
25004 {
25005   if (flag_print_asm_name)
25006     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
25007                  INSN_ADDRESSES (INSN_UID (insn)));
25008   /* Don't overwrite the previous setter when we get to a cbranch.  */
25009   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
25010     {
25011       enum attr_conds conds;
25012
25013       if (cfun->machine->thumb1_cc_insn)
25014         {
25015           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
25016               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
25017             CC_STATUS_INIT;
25018         }
25019       conds = get_attr_conds (insn);
25020       if (conds == CONDS_SET)
25021         {
25022           rtx set = single_set (insn);
25023           cfun->machine->thumb1_cc_insn = insn;
25024           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
25025           cfun->machine->thumb1_cc_op1 = const0_rtx;
25026           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
25027           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
25028             {
25029               rtx src1 = XEXP (SET_SRC (set), 1);
25030               if (src1 == const0_rtx)
25031                 cfun->machine->thumb1_cc_mode = CCmode;
25032             }
25033           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
25034             {
25035               /* Record the src register operand instead of dest because
25036                  cprop_hardreg pass propagates src.  */
25037               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
25038             }
25039         }
25040       else if (conds != CONDS_NOCOND)
25041         cfun->machine->thumb1_cc_insn = NULL_RTX;
25042     }
25043
25044     /* Check if unexpected far jump is used.  */
25045     if (cfun->machine->lr_save_eliminated
25046         && get_attr_far_jump (insn) == FAR_JUMP_YES)
25047       internal_error("Unexpected thumb1 far jump");
25048 }
25049
25050 int
25051 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
25052 {
25053   unsigned HOST_WIDE_INT mask = 0xff;
25054   int i;
25055
25056   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
25057   if (val == 0) /* XXX */
25058     return 0;
25059
25060   for (i = 0; i < 25; i++)
25061     if ((val & (mask << i)) == val)
25062       return 1;
25063
25064   return 0;
25065 }
25066
25067 /* Returns nonzero if the current function contains,
25068    or might contain a far jump.  */
25069 static int
25070 thumb_far_jump_used_p (void)
25071 {
25072   rtx_insn *insn;
25073   bool far_jump = false;
25074   unsigned int func_size = 0;
25075
25076   /* If we have already decided that far jumps may be used,
25077      do not bother checking again, and always return true even if
25078      it turns out that they are not being used.  Once we have made
25079      the decision that far jumps are present (and that hence the link
25080      register will be pushed onto the stack) we cannot go back on it.  */
25081   if (cfun->machine->far_jump_used)
25082     return 1;
25083
25084   /* If this function is not being called from the prologue/epilogue
25085      generation code then it must be being called from the
25086      INITIAL_ELIMINATION_OFFSET macro.  */
25087   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
25088     {
25089       /* In this case we know that we are being asked about the elimination
25090          of the arg pointer register.  If that register is not being used,
25091          then there are no arguments on the stack, and we do not have to
25092          worry that a far jump might force the prologue to push the link
25093          register, changing the stack offsets.  In this case we can just
25094          return false, since the presence of far jumps in the function will
25095          not affect stack offsets.
25096
25097          If the arg pointer is live (or if it was live, but has now been
25098          eliminated and so set to dead) then we do have to test to see if
25099          the function might contain a far jump.  This test can lead to some
25100          false negatives, since before reload is completed, then length of
25101          branch instructions is not known, so gcc defaults to returning their
25102          longest length, which in turn sets the far jump attribute to true.
25103
25104          A false negative will not result in bad code being generated, but it
25105          will result in a needless push and pop of the link register.  We
25106          hope that this does not occur too often.
25107
25108          If we need doubleword stack alignment this could affect the other
25109          elimination offsets so we can't risk getting it wrong.  */
25110       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
25111         cfun->machine->arg_pointer_live = 1;
25112       else if (!cfun->machine->arg_pointer_live)
25113         return 0;
25114     }
25115
25116   /* We should not change far_jump_used during or after reload, as there is
25117      no chance to change stack frame layout.  */
25118   if (reload_in_progress || reload_completed)
25119     return 0;
25120
25121   /* Check to see if the function contains a branch
25122      insn with the far jump attribute set.  */
25123   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25124     {
25125       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
25126         {
25127           far_jump = true;
25128         }
25129       func_size += get_attr_length (insn);
25130     }
25131
25132   /* Attribute far_jump will always be true for thumb1 before
25133      shorten_branch pass.  So checking far_jump attribute before
25134      shorten_branch isn't much useful.
25135
25136      Following heuristic tries to estimate more accurately if a far jump
25137      may finally be used.  The heuristic is very conservative as there is
25138      no chance to roll-back the decision of not to use far jump.
25139
25140      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
25141      2-byte insn is associated with a 4 byte constant pool.  Using
25142      function size 2048/3 as the threshold is conservative enough.  */
25143   if (far_jump)
25144     {
25145       if ((func_size * 3) >= 2048)
25146         {
25147           /* Record the fact that we have decided that
25148              the function does use far jumps.  */
25149           cfun->machine->far_jump_used = 1;
25150           return 1;
25151         }
25152     }
25153
25154   return 0;
25155 }
25156
25157 /* Return nonzero if FUNC must be entered in ARM mode.  */
25158 static bool
25159 is_called_in_ARM_mode (tree func)
25160 {
25161   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
25162
25163   /* Ignore the problem about functions whose address is taken.  */
25164   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
25165     return true;
25166
25167 #ifdef ARM_PE
25168   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
25169 #else
25170   return false;
25171 #endif
25172 }
25173
25174 /* Given the stack offsets and register mask in OFFSETS, decide how
25175    many additional registers to push instead of subtracting a constant
25176    from SP.  For epilogues the principle is the same except we use pop.
25177    FOR_PROLOGUE indicates which we're generating.  */
25178 static int
25179 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
25180 {
25181   HOST_WIDE_INT amount;
25182   unsigned long live_regs_mask = offsets->saved_regs_mask;
25183   /* Extract a mask of the ones we can give to the Thumb's push/pop
25184      instruction.  */
25185   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
25186   /* Then count how many other high registers will need to be pushed.  */
25187   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25188   int n_free, reg_base, size;
25189
25190   if (!for_prologue && frame_pointer_needed)
25191     amount = offsets->locals_base - offsets->saved_regs;
25192   else
25193     amount = offsets->outgoing_args - offsets->saved_regs;
25194
25195   /* If the stack frame size is 512 exactly, we can save one load
25196      instruction, which should make this a win even when optimizing
25197      for speed.  */
25198   if (!optimize_size && amount != 512)
25199     return 0;
25200
25201   /* Can't do this if there are high registers to push.  */
25202   if (high_regs_pushed != 0)
25203     return 0;
25204
25205   /* Shouldn't do it in the prologue if no registers would normally
25206      be pushed at all.  In the epilogue, also allow it if we'll have
25207      a pop insn for the PC.  */
25208   if  (l_mask == 0
25209        && (for_prologue
25210            || TARGET_BACKTRACE
25211            || (live_regs_mask & 1 << LR_REGNUM) == 0
25212            || TARGET_INTERWORK
25213            || crtl->args.pretend_args_size != 0))
25214     return 0;
25215
25216   /* Don't do this if thumb_expand_prologue wants to emit instructions
25217      between the push and the stack frame allocation.  */
25218   if (for_prologue
25219       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
25220           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
25221     return 0;
25222
25223   reg_base = 0;
25224   n_free = 0;
25225   if (!for_prologue)
25226     {
25227       size = arm_size_return_regs ();
25228       reg_base = ARM_NUM_INTS (size);
25229       live_regs_mask >>= reg_base;
25230     }
25231
25232   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
25233          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
25234     {
25235       live_regs_mask >>= 1;
25236       n_free++;
25237     }
25238
25239   if (n_free == 0)
25240     return 0;
25241   gcc_assert (amount / 4 * 4 == amount);
25242
25243   if (amount >= 512 && (amount - n_free * 4) < 512)
25244     return (amount - 508) / 4;
25245   if (amount <= n_free * 4)
25246     return amount / 4;
25247   return 0;
25248 }
25249
25250 /* The bits which aren't usefully expanded as rtl.  */
25251 const char *
25252 thumb1_unexpanded_epilogue (void)
25253 {
25254   arm_stack_offsets *offsets;
25255   int regno;
25256   unsigned long live_regs_mask = 0;
25257   int high_regs_pushed = 0;
25258   int extra_pop;
25259   int had_to_push_lr;
25260   int size;
25261
25262   if (cfun->machine->return_used_this_function != 0)
25263     return "";
25264
25265   if (IS_NAKED (arm_current_func_type ()))
25266     return "";
25267
25268   offsets = arm_get_frame_offsets ();
25269   live_regs_mask = offsets->saved_regs_mask;
25270   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25271
25272   /* If we can deduce the registers used from the function's return value.
25273      This is more reliable that examining df_regs_ever_live_p () because that
25274      will be set if the register is ever used in the function, not just if
25275      the register is used to hold a return value.  */
25276   size = arm_size_return_regs ();
25277
25278   extra_pop = thumb1_extra_regs_pushed (offsets, false);
25279   if (extra_pop > 0)
25280     {
25281       unsigned long extra_mask = (1 << extra_pop) - 1;
25282       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
25283     }
25284
25285   /* The prolog may have pushed some high registers to use as
25286      work registers.  e.g. the testsuite file:
25287      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25288      compiles to produce:
25289         push    {r4, r5, r6, r7, lr}
25290         mov     r7, r9
25291         mov     r6, r8
25292         push    {r6, r7}
25293      as part of the prolog.  We have to undo that pushing here.  */
25294
25295   if (high_regs_pushed)
25296     {
25297       unsigned long mask = live_regs_mask & 0xff;
25298       int next_hi_reg;
25299
25300       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
25301
25302       if (mask == 0)
25303         /* Oh dear!  We have no low registers into which we can pop
25304            high registers!  */
25305         internal_error
25306           ("no low registers available for popping high registers");
25307
25308       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25309         if (live_regs_mask & (1 << next_hi_reg))
25310           break;
25311
25312       while (high_regs_pushed)
25313         {
25314           /* Find lo register(s) into which the high register(s) can
25315              be popped.  */
25316           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
25317             {
25318               if (mask & (1 << regno))
25319                 high_regs_pushed--;
25320               if (high_regs_pushed == 0)
25321                 break;
25322             }
25323
25324           if (high_regs_pushed == 0 && regno >= 0)
25325             mask &= ~((1 << regno) - 1);
25326
25327           /* Pop the values into the low register(s).  */
25328           thumb_pop (asm_out_file, mask);
25329
25330           /* Move the value(s) into the high registers.  */
25331           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
25332             {
25333               if (mask & (1 << regno))
25334                 {
25335                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
25336                                regno);
25337
25338                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
25339                        next_hi_reg--)
25340                     if (live_regs_mask & (1 << next_hi_reg))
25341                       break;
25342                 }
25343             }
25344         }
25345       live_regs_mask &= ~0x0f00;
25346     }
25347
25348   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
25349   live_regs_mask &= 0xff;
25350
25351   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
25352     {
25353       /* Pop the return address into the PC.  */
25354       if (had_to_push_lr)
25355         live_regs_mask |= 1 << PC_REGNUM;
25356
25357       /* Either no argument registers were pushed or a backtrace
25358          structure was created which includes an adjusted stack
25359          pointer, so just pop everything.  */
25360       if (live_regs_mask)
25361         thumb_pop (asm_out_file, live_regs_mask);
25362
25363       /* We have either just popped the return address into the
25364          PC or it is was kept in LR for the entire function.
25365          Note that thumb_pop has already called thumb_exit if the
25366          PC was in the list.  */
25367       if (!had_to_push_lr)
25368         thumb_exit (asm_out_file, LR_REGNUM);
25369     }
25370   else
25371     {
25372       /* Pop everything but the return address.  */
25373       if (live_regs_mask)
25374         thumb_pop (asm_out_file, live_regs_mask);
25375
25376       if (had_to_push_lr)
25377         {
25378           if (size > 12)
25379             {
25380               /* We have no free low regs, so save one.  */
25381               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
25382                            LAST_ARG_REGNUM);
25383             }
25384
25385           /* Get the return address into a temporary register.  */
25386           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
25387
25388           if (size > 12)
25389             {
25390               /* Move the return address to lr.  */
25391               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
25392                            LAST_ARG_REGNUM);
25393               /* Restore the low register.  */
25394               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
25395                            IP_REGNUM);
25396               regno = LR_REGNUM;
25397             }
25398           else
25399             regno = LAST_ARG_REGNUM;
25400         }
25401       else
25402         regno = LR_REGNUM;
25403
25404       /* Remove the argument registers that were pushed onto the stack.  */
25405       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
25406                    SP_REGNUM, SP_REGNUM,
25407                    crtl->args.pretend_args_size);
25408
25409       thumb_exit (asm_out_file, regno);
25410     }
25411
25412   return "";
25413 }
25414
25415 /* Functions to save and restore machine-specific function data.  */
25416 static struct machine_function *
25417 arm_init_machine_status (void)
25418 {
25419   struct machine_function *machine;
25420   machine = ggc_cleared_alloc<machine_function> ();
25421
25422 #if ARM_FT_UNKNOWN != 0
25423   machine->func_type = ARM_FT_UNKNOWN;
25424 #endif
25425   machine->static_chain_stack_bytes = -1;
25426   return machine;
25427 }
25428
25429 /* Return an RTX indicating where the return address to the
25430    calling function can be found.  */
25431 rtx
25432 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25433 {
25434   if (count != 0)
25435     return NULL_RTX;
25436
25437   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25438 }
25439
25440 /* Do anything needed before RTL is emitted for each function.  */
25441 void
25442 arm_init_expanders (void)
25443 {
25444   /* Arrange to initialize and mark the machine per-function status.  */
25445   init_machine_status = arm_init_machine_status;
25446
25447   /* This is to stop the combine pass optimizing away the alignment
25448      adjustment of va_arg.  */
25449   /* ??? It is claimed that this should not be necessary.  */
25450   if (cfun)
25451     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25452 }
25453
25454 /* Check that FUNC is called with a different mode.  */
25455
25456 bool
25457 arm_change_mode_p (tree func)
25458 {
25459   if (TREE_CODE (func) != FUNCTION_DECL)
25460     return false;
25461
25462   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
25463
25464   if (!callee_tree)
25465     callee_tree = target_option_default_node;
25466
25467   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25468   int flags = callee_opts->x_target_flags;
25469
25470   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
25471 }
25472
25473 /* Like arm_compute_initial_elimination offset.  Simpler because there
25474    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
25475    to point at the base of the local variables after static stack
25476    space for a function has been allocated.  */
25477
25478 HOST_WIDE_INT
25479 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25480 {
25481   arm_stack_offsets *offsets;
25482
25483   offsets = arm_get_frame_offsets ();
25484
25485   switch (from)
25486     {
25487     case ARG_POINTER_REGNUM:
25488       switch (to)
25489         {
25490         case STACK_POINTER_REGNUM:
25491           return offsets->outgoing_args - offsets->saved_args;
25492
25493         case FRAME_POINTER_REGNUM:
25494           return offsets->soft_frame - offsets->saved_args;
25495
25496         case ARM_HARD_FRAME_POINTER_REGNUM:
25497           return offsets->saved_regs - offsets->saved_args;
25498
25499         case THUMB_HARD_FRAME_POINTER_REGNUM:
25500           return offsets->locals_base - offsets->saved_args;
25501
25502         default:
25503           gcc_unreachable ();
25504         }
25505       break;
25506
25507     case FRAME_POINTER_REGNUM:
25508       switch (to)
25509         {
25510         case STACK_POINTER_REGNUM:
25511           return offsets->outgoing_args - offsets->soft_frame;
25512
25513         case ARM_HARD_FRAME_POINTER_REGNUM:
25514           return offsets->saved_regs - offsets->soft_frame;
25515
25516         case THUMB_HARD_FRAME_POINTER_REGNUM:
25517           return offsets->locals_base - offsets->soft_frame;
25518
25519         default:
25520           gcc_unreachable ();
25521         }
25522       break;
25523
25524     default:
25525       gcc_unreachable ();
25526     }
25527 }
25528
25529 /* Generate the function's prologue.  */
25530
25531 void
25532 thumb1_expand_prologue (void)
25533 {
25534   rtx_insn *insn;
25535
25536   HOST_WIDE_INT amount;
25537   HOST_WIDE_INT size;
25538   arm_stack_offsets *offsets;
25539   unsigned long func_type;
25540   int regno;
25541   unsigned long live_regs_mask;
25542   unsigned long l_mask;
25543   unsigned high_regs_pushed = 0;
25544   bool lr_needs_saving;
25545
25546   func_type = arm_current_func_type ();
25547
25548   /* Naked functions don't have prologues.  */
25549   if (IS_NAKED (func_type))
25550     {
25551       if (flag_stack_usage_info)
25552         current_function_static_stack_size = 0;
25553       return;
25554     }
25555
25556   if (IS_INTERRUPT (func_type))
25557     {
25558       error ("interrupt Service Routines cannot be coded in Thumb mode");
25559       return;
25560     }
25561
25562   if (is_called_in_ARM_mode (current_function_decl))
25563     emit_insn (gen_prologue_thumb1_interwork ());
25564
25565   offsets = arm_get_frame_offsets ();
25566   live_regs_mask = offsets->saved_regs_mask;
25567   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25568
25569   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
25570   l_mask = live_regs_mask & 0x40ff;
25571   /* Then count how many other high registers will need to be pushed.  */
25572   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25573
25574   if (crtl->args.pretend_args_size)
25575     {
25576       rtx x = GEN_INT (-crtl->args.pretend_args_size);
25577
25578       if (cfun->machine->uses_anonymous_args)
25579         {
25580           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25581           unsigned long mask;
25582
25583           mask = 1ul << (LAST_ARG_REGNUM + 1);
25584           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25585
25586           insn = thumb1_emit_multi_reg_push (mask, 0);
25587         }
25588       else
25589         {
25590           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25591                                         stack_pointer_rtx, x));
25592         }
25593       RTX_FRAME_RELATED_P (insn) = 1;
25594     }
25595
25596   if (TARGET_BACKTRACE)
25597     {
25598       HOST_WIDE_INT offset = 0;
25599       unsigned work_register;
25600       rtx work_reg, x, arm_hfp_rtx;
25601
25602       /* We have been asked to create a stack backtrace structure.
25603          The code looks like this:
25604
25605          0   .align 2
25606          0   func:
25607          0     sub   SP, #16         Reserve space for 4 registers.
25608          2     push  {R7}            Push low registers.
25609          4     add   R7, SP, #20     Get the stack pointer before the push.
25610          6     str   R7, [SP, #8]    Store the stack pointer
25611                                         (before reserving the space).
25612          8     mov   R7, PC          Get hold of the start of this code + 12.
25613         10     str   R7, [SP, #16]   Store it.
25614         12     mov   R7, FP          Get hold of the current frame pointer.
25615         14     str   R7, [SP, #4]    Store it.
25616         16     mov   R7, LR          Get hold of the current return address.
25617         18     str   R7, [SP, #12]   Store it.
25618         20     add   R7, SP, #16     Point at the start of the
25619                                         backtrace structure.
25620         22     mov   FP, R7          Put this value into the frame pointer.  */
25621
25622       work_register = thumb_find_work_register (live_regs_mask);
25623       work_reg = gen_rtx_REG (SImode, work_register);
25624       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25625
25626       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25627                                     stack_pointer_rtx, GEN_INT (-16)));
25628       RTX_FRAME_RELATED_P (insn) = 1;
25629
25630       if (l_mask)
25631         {
25632           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25633           RTX_FRAME_RELATED_P (insn) = 1;
25634           lr_needs_saving = false;
25635
25636           offset = bit_count (l_mask) * UNITS_PER_WORD;
25637         }
25638
25639       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25640       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25641
25642       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25643       x = gen_frame_mem (SImode, x);
25644       emit_move_insn (x, work_reg);
25645
25646       /* Make sure that the instruction fetching the PC is in the right place
25647          to calculate "start of backtrace creation code + 12".  */
25648       /* ??? The stores using the common WORK_REG ought to be enough to
25649          prevent the scheduler from doing anything weird.  Failing that
25650          we could always move all of the following into an UNSPEC_VOLATILE.  */
25651       if (l_mask)
25652         {
25653           x = gen_rtx_REG (SImode, PC_REGNUM);
25654           emit_move_insn (work_reg, x);
25655
25656           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25657           x = gen_frame_mem (SImode, x);
25658           emit_move_insn (x, work_reg);
25659
25660           emit_move_insn (work_reg, arm_hfp_rtx);
25661
25662           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25663           x = gen_frame_mem (SImode, x);
25664           emit_move_insn (x, work_reg);
25665         }
25666       else
25667         {
25668           emit_move_insn (work_reg, arm_hfp_rtx);
25669
25670           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25671           x = gen_frame_mem (SImode, x);
25672           emit_move_insn (x, work_reg);
25673
25674           x = gen_rtx_REG (SImode, PC_REGNUM);
25675           emit_move_insn (work_reg, x);
25676
25677           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25678           x = gen_frame_mem (SImode, x);
25679           emit_move_insn (x, work_reg);
25680         }
25681
25682       x = gen_rtx_REG (SImode, LR_REGNUM);
25683       emit_move_insn (work_reg, x);
25684
25685       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25686       x = gen_frame_mem (SImode, x);
25687       emit_move_insn (x, work_reg);
25688
25689       x = GEN_INT (offset + 12);
25690       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25691
25692       emit_move_insn (arm_hfp_rtx, work_reg);
25693     }
25694   /* Optimization:  If we are not pushing any low registers but we are going
25695      to push some high registers then delay our first push.  This will just
25696      be a push of LR and we can combine it with the push of the first high
25697      register.  */
25698   else if ((l_mask & 0xff) != 0
25699            || (high_regs_pushed == 0 && lr_needs_saving))
25700     {
25701       unsigned long mask = l_mask;
25702       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25703       insn = thumb1_emit_multi_reg_push (mask, mask);
25704       RTX_FRAME_RELATED_P (insn) = 1;
25705       lr_needs_saving = false;
25706     }
25707
25708   if (high_regs_pushed)
25709     {
25710       unsigned pushable_regs;
25711       unsigned next_hi_reg;
25712       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25713                                                  : crtl->args.info.nregs;
25714       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25715
25716       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25717         if (live_regs_mask & (1 << next_hi_reg))
25718           break;
25719
25720       /* Here we need to mask out registers used for passing arguments
25721          even if they can be pushed.  This is to avoid using them to
25722          stash the high registers.  Such kind of stash may clobber the
25723          use of arguments.  */
25724       pushable_regs = l_mask & (~arg_regs_mask);
25725       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
25726
25727       /* Normally, LR can be used as a scratch register once it has been
25728          saved; but if the function examines its own return address then
25729          the value is still live and we need to avoid using it.  */
25730       bool return_addr_live
25731         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
25732                            LR_REGNUM);
25733
25734       if (lr_needs_saving || return_addr_live)
25735         pushable_regs &= ~(1 << LR_REGNUM);
25736
25737       if (pushable_regs == 0)
25738         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25739
25740       while (high_regs_pushed > 0)
25741         {
25742           unsigned long real_regs_mask = 0;
25743           unsigned long push_mask = 0;
25744
25745           for (regno = LR_REGNUM; regno >= 0; regno --)
25746             {
25747               if (pushable_regs & (1 << regno))
25748                 {
25749                   emit_move_insn (gen_rtx_REG (SImode, regno),
25750                                   gen_rtx_REG (SImode, next_hi_reg));
25751
25752                   high_regs_pushed --;
25753                   real_regs_mask |= (1 << next_hi_reg);
25754                   push_mask |= (1 << regno);
25755
25756                   if (high_regs_pushed)
25757                     {
25758                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25759                            next_hi_reg --)
25760                         if (live_regs_mask & (1 << next_hi_reg))
25761                           break;
25762                     }
25763                   else
25764                     break;
25765                 }
25766             }
25767
25768           /* If we had to find a work register and we have not yet
25769              saved the LR then add it to the list of regs to push.  */
25770           if (lr_needs_saving)
25771             {
25772               push_mask |= 1 << LR_REGNUM;
25773               real_regs_mask |= 1 << LR_REGNUM;
25774               lr_needs_saving = false;
25775               /* If the return address is not live at this point, we
25776                  can add LR to the list of registers that we can use
25777                  for pushes.  */
25778               if (!return_addr_live)
25779                 pushable_regs |= 1 << LR_REGNUM;
25780             }
25781
25782           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25783           RTX_FRAME_RELATED_P (insn) = 1;
25784         }
25785     }
25786
25787   /* Load the pic register before setting the frame pointer,
25788      so we can use r7 as a temporary work register.  */
25789   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25790     arm_load_pic_register (live_regs_mask, NULL_RTX);
25791
25792   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25793     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25794                     stack_pointer_rtx);
25795
25796   size = offsets->outgoing_args - offsets->saved_args;
25797   if (flag_stack_usage_info)
25798     current_function_static_stack_size = size;
25799
25800   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25801   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25802        || flag_stack_clash_protection)
25803       && size)
25804     sorry ("%<-fstack-check=specific%> for Thumb-1");
25805
25806   amount = offsets->outgoing_args - offsets->saved_regs;
25807   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25808   if (amount)
25809     {
25810       if (amount < 512)
25811         {
25812           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25813                                         GEN_INT (- amount)));
25814           RTX_FRAME_RELATED_P (insn) = 1;
25815         }
25816       else
25817         {
25818           rtx reg, dwarf;
25819
25820           /* The stack decrement is too big for an immediate value in a single
25821              insn.  In theory we could issue multiple subtracts, but after
25822              three of them it becomes more space efficient to place the full
25823              value in the constant pool and load into a register.  (Also the
25824              ARM debugger really likes to see only one stack decrement per
25825              function).  So instead we look for a scratch register into which
25826              we can load the decrement, and then we subtract this from the
25827              stack pointer.  Unfortunately on the thumb the only available
25828              scratch registers are the argument registers, and we cannot use
25829              these as they may hold arguments to the function.  Instead we
25830              attempt to locate a call preserved register which is used by this
25831              function.  If we can find one, then we know that it will have
25832              been pushed at the start of the prologue and so we can corrupt
25833              it now.  */
25834           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25835             if (live_regs_mask & (1 << regno))
25836               break;
25837
25838           gcc_assert(regno <= LAST_LO_REGNUM);
25839
25840           reg = gen_rtx_REG (SImode, regno);
25841
25842           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25843
25844           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25845                                         stack_pointer_rtx, reg));
25846
25847           dwarf = gen_rtx_SET (stack_pointer_rtx,
25848                                plus_constant (Pmode, stack_pointer_rtx,
25849                                               -amount));
25850           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25851           RTX_FRAME_RELATED_P (insn) = 1;
25852         }
25853     }
25854
25855   if (frame_pointer_needed)
25856     thumb_set_frame_pointer (offsets);
25857
25858   /* If we are profiling, make sure no instructions are scheduled before
25859      the call to mcount.  Similarly if the user has requested no
25860      scheduling in the prolog.  Similarly if we want non-call exceptions
25861      using the EABI unwinder, to prevent faulting instructions from being
25862      swapped with a stack adjustment.  */
25863   if (crtl->profile || !TARGET_SCHED_PROLOG
25864       || (arm_except_unwind_info (&global_options) == UI_TARGET
25865           && cfun->can_throw_non_call_exceptions))
25866     emit_insn (gen_blockage ());
25867
25868   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25869   if (live_regs_mask & 0xff)
25870     cfun->machine->lr_save_eliminated = 0;
25871 }
25872
25873 /* Clear caller saved registers not used to pass return values and leaked
25874    condition flags before exiting a cmse_nonsecure_entry function.  */
25875
25876 void
25877 cmse_nonsecure_entry_clear_before_return (void)
25878 {
25879   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25880   uint32_t padding_bits_to_clear = 0;
25881   auto_sbitmap to_clear_bitmap (maxregno + 1);
25882   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25883   tree result_type;
25884
25885   bitmap_clear (to_clear_bitmap);
25886   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25887   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25888
25889   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25890      registers.  */
25891   if (TARGET_HARD_FLOAT)
25892     {
25893       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25894
25895       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25896
25897       /* Make sure we don't clear the two scratch registers used to clear the
25898          relevant FPSCR bits in output_return_instruction.  */
25899       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25900       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25901       emit_use (gen_rtx_REG (SImode, 4));
25902       bitmap_clear_bit (to_clear_bitmap, 4);
25903     }
25904
25905   /* If the user has defined registers to be caller saved, these are no longer
25906      restored by the function before returning and must thus be cleared for
25907      security purposes.  */
25908   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25909     {
25910       /* We do not touch registers that can be used to pass arguments as per
25911          the AAPCS, since these should never be made callee-saved by user
25912          options.  */
25913       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25914         continue;
25915       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25916         continue;
25917       if (call_used_or_fixed_reg_p (regno))
25918         bitmap_set_bit (to_clear_bitmap, regno);
25919     }
25920
25921   /* Make sure we do not clear the registers used to return the result in.  */
25922   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25923   if (!VOID_TYPE_P (result_type))
25924     {
25925       uint64_t to_clear_return_mask;
25926       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25927
25928       /* No need to check that we return in registers, because we don't
25929          support returning on stack yet.  */
25930       gcc_assert (REG_P (result_rtl));
25931       to_clear_return_mask
25932         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25933                                      &padding_bits_to_clear);
25934       if (to_clear_return_mask)
25935         {
25936           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25937           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25938             {
25939               if (to_clear_return_mask & (1ULL << regno))
25940                 bitmap_clear_bit (to_clear_bitmap, regno);
25941             }
25942         }
25943     }
25944
25945   if (padding_bits_to_clear != 0)
25946     {
25947       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25948       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25949
25950       /* Padding_bits_to_clear is not 0 so we know we are dealing with
25951          returning a composite type, which only uses r0.  Let's make sure that
25952          r1-r3 is cleared too.  */
25953       bitmap_clear (to_clear_arg_regs_bitmap);
25954       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25955       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25956     }
25957
25958   /* Clear full registers that leak before returning.  */
25959   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25960   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25961   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25962                         clearing_reg);
25963 }
25964
25965 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25966    POP instruction can be generated.  LR should be replaced by PC.  All
25967    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25968    all we really need to check here is if single register is to be
25969    returned, or multiple register return.  */
25970 void
25971 thumb2_expand_return (bool simple_return)
25972 {
25973   int i, num_regs;
25974   unsigned long saved_regs_mask;
25975   arm_stack_offsets *offsets;
25976
25977   offsets = arm_get_frame_offsets ();
25978   saved_regs_mask = offsets->saved_regs_mask;
25979
25980   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25981     if (saved_regs_mask & (1 << i))
25982       num_regs++;
25983
25984   if (!simple_return && saved_regs_mask)
25985     {
25986       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25987          functions or adapt code to handle according to ACLE.  This path should
25988          not be reachable for cmse_nonsecure_entry functions though we prefer
25989          to assert it for now to ensure that future code changes do not silently
25990          change this behavior.  */
25991       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25992       if (num_regs == 1)
25993         {
25994           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25995           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25996           rtx addr = gen_rtx_MEM (SImode,
25997                                   gen_rtx_POST_INC (SImode,
25998                                                     stack_pointer_rtx));
25999           set_mem_alias_set (addr, get_frame_alias_set ());
26000           XVECEXP (par, 0, 0) = ret_rtx;
26001           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
26002           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26003           emit_jump_insn (par);
26004         }
26005       else
26006         {
26007           saved_regs_mask &= ~ (1 << LR_REGNUM);
26008           saved_regs_mask |=   (1 << PC_REGNUM);
26009           arm_emit_multi_reg_pop (saved_regs_mask);
26010         }
26011     }
26012   else
26013     {
26014       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26015         cmse_nonsecure_entry_clear_before_return ();
26016       emit_jump_insn (simple_return_rtx);
26017     }
26018 }
26019
26020 void
26021 thumb1_expand_epilogue (void)
26022 {
26023   HOST_WIDE_INT amount;
26024   arm_stack_offsets *offsets;
26025   int regno;
26026
26027   /* Naked functions don't have prologues.  */
26028   if (IS_NAKED (arm_current_func_type ()))
26029     return;
26030
26031   offsets = arm_get_frame_offsets ();
26032   amount = offsets->outgoing_args - offsets->saved_regs;
26033
26034   if (frame_pointer_needed)
26035     {
26036       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
26037       amount = offsets->locals_base - offsets->saved_regs;
26038     }
26039   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
26040
26041   gcc_assert (amount >= 0);
26042   if (amount)
26043     {
26044       emit_insn (gen_blockage ());
26045
26046       if (amount < 512)
26047         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26048                                GEN_INT (amount)));
26049       else
26050         {
26051           /* r3 is always free in the epilogue.  */
26052           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
26053
26054           emit_insn (gen_movsi (reg, GEN_INT (amount)));
26055           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
26056         }
26057     }
26058
26059   /* Emit a USE (stack_pointer_rtx), so that
26060      the stack adjustment will not be deleted.  */
26061   emit_insn (gen_force_register_use (stack_pointer_rtx));
26062
26063   if (crtl->profile || !TARGET_SCHED_PROLOG)
26064     emit_insn (gen_blockage ());
26065
26066   /* Emit a clobber for each insn that will be restored in the epilogue,
26067      so that flow2 will get register lifetimes correct.  */
26068   for (regno = 0; regno < 13; regno++)
26069     if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
26070       emit_clobber (gen_rtx_REG (SImode, regno));
26071
26072   if (! df_regs_ever_live_p (LR_REGNUM))
26073     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
26074
26075   /* Clear all caller-saved regs that are not used to return.  */
26076   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26077     cmse_nonsecure_entry_clear_before_return ();
26078 }
26079
26080 /* Epilogue code for APCS frame.  */
26081 static void
26082 arm_expand_epilogue_apcs_frame (bool really_return)
26083 {
26084   unsigned long func_type;
26085   unsigned long saved_regs_mask;
26086   int num_regs = 0;
26087   int i;
26088   int floats_from_frame = 0;
26089   arm_stack_offsets *offsets;
26090
26091   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
26092   func_type = arm_current_func_type ();
26093
26094   /* Get frame offsets for ARM.  */
26095   offsets = arm_get_frame_offsets ();
26096   saved_regs_mask = offsets->saved_regs_mask;
26097
26098   /* Find the offset of the floating-point save area in the frame.  */
26099   floats_from_frame
26100     = (offsets->saved_args
26101        + arm_compute_static_chain_stack_bytes ()
26102        - offsets->frame);
26103
26104   /* Compute how many core registers saved and how far away the floats are.  */
26105   for (i = 0; i <= LAST_ARM_REGNUM; i++)
26106     if (saved_regs_mask & (1 << i))
26107       {
26108         num_regs++;
26109         floats_from_frame += 4;
26110       }
26111
26112   if (TARGET_HARD_FLOAT)
26113     {
26114       int start_reg;
26115       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
26116
26117       /* The offset is from IP_REGNUM.  */
26118       int saved_size = arm_get_vfp_saved_size ();
26119       if (saved_size > 0)
26120         {
26121           rtx_insn *insn;
26122           floats_from_frame += saved_size;
26123           insn = emit_insn (gen_addsi3 (ip_rtx,
26124                                         hard_frame_pointer_rtx,
26125                                         GEN_INT (-floats_from_frame)));
26126           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
26127                                        ip_rtx, hard_frame_pointer_rtx);
26128         }
26129
26130       /* Generate VFP register multi-pop.  */
26131       start_reg = FIRST_VFP_REGNUM;
26132
26133       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
26134         /* Look for a case where a reg does not need restoring.  */
26135         if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
26136             && (!df_regs_ever_live_p (i + 1)
26137                 || call_used_or_fixed_reg_p (i + 1)))
26138           {
26139             if (start_reg != i)
26140               arm_emit_vfp_multi_reg_pop (start_reg,
26141                                           (i - start_reg) / 2,
26142                                           gen_rtx_REG (SImode,
26143                                                        IP_REGNUM));
26144             start_reg = i + 2;
26145           }
26146
26147       /* Restore the remaining regs that we have discovered (or possibly
26148          even all of them, if the conditional in the for loop never
26149          fired).  */
26150       if (start_reg != i)
26151         arm_emit_vfp_multi_reg_pop (start_reg,
26152                                     (i - start_reg) / 2,
26153                                     gen_rtx_REG (SImode, IP_REGNUM));
26154     }
26155
26156   if (TARGET_IWMMXT)
26157     {
26158       /* The frame pointer is guaranteed to be non-double-word aligned, as
26159          it is set to double-word-aligned old_stack_pointer - 4.  */
26160       rtx_insn *insn;
26161       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
26162
26163       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
26164         if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
26165           {
26166             rtx addr = gen_frame_mem (V2SImode,
26167                                  plus_constant (Pmode, hard_frame_pointer_rtx,
26168                                                 - lrm_count * 4));
26169             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26170             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26171                                                gen_rtx_REG (V2SImode, i),
26172                                                NULL_RTX);
26173             lrm_count += 2;
26174           }
26175     }
26176
26177   /* saved_regs_mask should contain IP which contains old stack pointer
26178      at the time of activation creation.  Since SP and IP are adjacent registers,
26179      we can restore the value directly into SP.  */
26180   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
26181   saved_regs_mask &= ~(1 << IP_REGNUM);
26182   saved_regs_mask |= (1 << SP_REGNUM);
26183
26184   /* There are two registers left in saved_regs_mask - LR and PC.  We
26185      only need to restore LR (the return address), but to
26186      save time we can load it directly into PC, unless we need a
26187      special function exit sequence, or we are not really returning.  */
26188   if (really_return
26189       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
26190       && !crtl->calls_eh_return)
26191     /* Delete LR from the register mask, so that LR on
26192        the stack is loaded into the PC in the register mask.  */
26193     saved_regs_mask &= ~(1 << LR_REGNUM);
26194   else
26195     saved_regs_mask &= ~(1 << PC_REGNUM);
26196
26197   num_regs = bit_count (saved_regs_mask);
26198   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
26199     {
26200       rtx_insn *insn;
26201       emit_insn (gen_blockage ());
26202       /* Unwind the stack to just below the saved registers.  */
26203       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26204                                     hard_frame_pointer_rtx,
26205                                     GEN_INT (- 4 * num_regs)));
26206
26207       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
26208                                    stack_pointer_rtx, hard_frame_pointer_rtx);
26209     }
26210
26211   arm_emit_multi_reg_pop (saved_regs_mask);
26212
26213   if (IS_INTERRUPT (func_type))
26214     {
26215       /* Interrupt handlers will have pushed the
26216          IP onto the stack, so restore it now.  */
26217       rtx_insn *insn;
26218       rtx addr = gen_rtx_MEM (SImode,
26219                               gen_rtx_POST_INC (SImode,
26220                               stack_pointer_rtx));
26221       set_mem_alias_set (addr, get_frame_alias_set ());
26222       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
26223       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26224                                          gen_rtx_REG (SImode, IP_REGNUM),
26225                                          NULL_RTX);
26226     }
26227
26228   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
26229     return;
26230
26231   if (crtl->calls_eh_return)
26232     emit_insn (gen_addsi3 (stack_pointer_rtx,
26233                            stack_pointer_rtx,
26234                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26235
26236   if (IS_STACKALIGN (func_type))
26237     /* Restore the original stack pointer.  Before prologue, the stack was
26238        realigned and the original stack pointer saved in r0.  For details,
26239        see comment in arm_expand_prologue.  */
26240     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26241
26242   emit_jump_insn (simple_return_rtx);
26243 }
26244
26245 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
26246    function is not a sibcall.  */
26247 void
26248 arm_expand_epilogue (bool really_return)
26249 {
26250   unsigned long func_type;
26251   unsigned long saved_regs_mask;
26252   int num_regs = 0;
26253   int i;
26254   int amount;
26255   arm_stack_offsets *offsets;
26256
26257   func_type = arm_current_func_type ();
26258
26259   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
26260      let output_return_instruction take care of instruction emission if any.  */
26261   if (IS_NAKED (func_type)
26262       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
26263     {
26264       if (really_return)
26265         emit_jump_insn (simple_return_rtx);
26266       return;
26267     }
26268
26269   /* If we are throwing an exception, then we really must be doing a
26270      return, so we can't tail-call.  */
26271   gcc_assert (!crtl->calls_eh_return || really_return);
26272
26273   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
26274     {
26275       arm_expand_epilogue_apcs_frame (really_return);
26276       return;
26277     }
26278
26279   /* Get frame offsets for ARM.  */
26280   offsets = arm_get_frame_offsets ();
26281   saved_regs_mask = offsets->saved_regs_mask;
26282   num_regs = bit_count (saved_regs_mask);
26283
26284   if (frame_pointer_needed)
26285     {
26286       rtx_insn *insn;
26287       /* Restore stack pointer if necessary.  */
26288       if (TARGET_ARM)
26289         {
26290           /* In ARM mode, frame pointer points to first saved register.
26291              Restore stack pointer to last saved register.  */
26292           amount = offsets->frame - offsets->saved_regs;
26293
26294           /* Force out any pending memory operations that reference stacked data
26295              before stack de-allocation occurs.  */
26296           emit_insn (gen_blockage ());
26297           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26298                             hard_frame_pointer_rtx,
26299                             GEN_INT (amount)));
26300           arm_add_cfa_adjust_cfa_note (insn, amount,
26301                                        stack_pointer_rtx,
26302                                        hard_frame_pointer_rtx);
26303
26304           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26305              deleted.  */
26306           emit_insn (gen_force_register_use (stack_pointer_rtx));
26307         }
26308       else
26309         {
26310           /* In Thumb-2 mode, the frame pointer points to the last saved
26311              register.  */
26312           amount = offsets->locals_base - offsets->saved_regs;
26313           if (amount)
26314             {
26315               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
26316                                 hard_frame_pointer_rtx,
26317                                 GEN_INT (amount)));
26318               arm_add_cfa_adjust_cfa_note (insn, amount,
26319                                            hard_frame_pointer_rtx,
26320                                            hard_frame_pointer_rtx);
26321             }
26322
26323           /* Force out any pending memory operations that reference stacked data
26324              before stack de-allocation occurs.  */
26325           emit_insn (gen_blockage ());
26326           insn = emit_insn (gen_movsi (stack_pointer_rtx,
26327                                        hard_frame_pointer_rtx));
26328           arm_add_cfa_adjust_cfa_note (insn, 0,
26329                                        stack_pointer_rtx,
26330                                        hard_frame_pointer_rtx);
26331           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26332              deleted.  */
26333           emit_insn (gen_force_register_use (stack_pointer_rtx));
26334         }
26335     }
26336   else
26337     {
26338       /* Pop off outgoing args and local frame to adjust stack pointer to
26339          last saved register.  */
26340       amount = offsets->outgoing_args - offsets->saved_regs;
26341       if (amount)
26342         {
26343           rtx_insn *tmp;
26344           /* Force out any pending memory operations that reference stacked data
26345              before stack de-allocation occurs.  */
26346           emit_insn (gen_blockage ());
26347           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26348                                        stack_pointer_rtx,
26349                                        GEN_INT (amount)));
26350           arm_add_cfa_adjust_cfa_note (tmp, amount,
26351                                        stack_pointer_rtx, stack_pointer_rtx);
26352           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26353              not deleted.  */
26354           emit_insn (gen_force_register_use (stack_pointer_rtx));
26355         }
26356     }
26357
26358   if (TARGET_HARD_FLOAT)
26359     {
26360       /* Generate VFP register multi-pop.  */
26361       int end_reg = LAST_VFP_REGNUM + 1;
26362
26363       /* Scan the registers in reverse order.  We need to match
26364          any groupings made in the prologue and generate matching
26365          vldm operations.  The need to match groups is because,
26366          unlike pop, vldm can only do consecutive regs.  */
26367       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
26368         /* Look for a case where a reg does not need restoring.  */
26369         if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
26370             && (!df_regs_ever_live_p (i + 1)
26371                 || call_used_or_fixed_reg_p (i + 1)))
26372           {
26373             /* Restore the regs discovered so far (from reg+2 to
26374                end_reg).  */
26375             if (end_reg > i + 2)
26376               arm_emit_vfp_multi_reg_pop (i + 2,
26377                                           (end_reg - (i + 2)) / 2,
26378                                           stack_pointer_rtx);
26379             end_reg = i;
26380           }
26381
26382       /* Restore the remaining regs that we have discovered (or possibly
26383          even all of them, if the conditional in the for loop never
26384          fired).  */
26385       if (end_reg > i + 2)
26386         arm_emit_vfp_multi_reg_pop (i + 2,
26387                                     (end_reg - (i + 2)) / 2,
26388                                     stack_pointer_rtx);
26389     }
26390
26391   if (TARGET_IWMMXT)
26392     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
26393       if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
26394         {
26395           rtx_insn *insn;
26396           rtx addr = gen_rtx_MEM (V2SImode,
26397                                   gen_rtx_POST_INC (SImode,
26398                                                     stack_pointer_rtx));
26399           set_mem_alias_set (addr, get_frame_alias_set ());
26400           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26401           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26402                                              gen_rtx_REG (V2SImode, i),
26403                                              NULL_RTX);
26404           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26405                                        stack_pointer_rtx, stack_pointer_rtx);
26406         }
26407
26408   if (saved_regs_mask)
26409     {
26410       rtx insn;
26411       bool return_in_pc = false;
26412
26413       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
26414           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
26415           && !IS_CMSE_ENTRY (func_type)
26416           && !IS_STACKALIGN (func_type)
26417           && really_return
26418           && crtl->args.pretend_args_size == 0
26419           && saved_regs_mask & (1 << LR_REGNUM)
26420           && !crtl->calls_eh_return)
26421         {
26422           saved_regs_mask &= ~(1 << LR_REGNUM);
26423           saved_regs_mask |= (1 << PC_REGNUM);
26424           return_in_pc = true;
26425         }
26426
26427       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26428         {
26429           for (i = 0; i <= LAST_ARM_REGNUM; i++)
26430             if (saved_regs_mask & (1 << i))
26431               {
26432                 rtx addr = gen_rtx_MEM (SImode,
26433                                         gen_rtx_POST_INC (SImode,
26434                                                           stack_pointer_rtx));
26435                 set_mem_alias_set (addr, get_frame_alias_set ());
26436
26437                 if (i == PC_REGNUM)
26438                   {
26439                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26440                     XVECEXP (insn, 0, 0) = ret_rtx;
26441                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
26442                                                         addr);
26443                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26444                     insn = emit_jump_insn (insn);
26445                   }
26446                 else
26447                   {
26448                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26449                                                  addr));
26450                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26451                                                        gen_rtx_REG (SImode, i),
26452                                                        NULL_RTX);
26453                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26454                                                  stack_pointer_rtx,
26455                                                  stack_pointer_rtx);
26456                   }
26457               }
26458         }
26459       else
26460         {
26461           if (TARGET_LDRD
26462               && current_tune->prefer_ldrd_strd
26463               && !optimize_function_for_size_p (cfun))
26464             {
26465               if (TARGET_THUMB2)
26466                 thumb2_emit_ldrd_pop (saved_regs_mask);
26467               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26468                 arm_emit_ldrd_pop (saved_regs_mask);
26469               else
26470                 arm_emit_multi_reg_pop (saved_regs_mask);
26471             }
26472           else
26473             arm_emit_multi_reg_pop (saved_regs_mask);
26474         }
26475
26476       if (return_in_pc)
26477         return;
26478     }
26479
26480   amount
26481     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
26482   if (amount)
26483     {
26484       int i, j;
26485       rtx dwarf = NULL_RTX;
26486       rtx_insn *tmp =
26487         emit_insn (gen_addsi3 (stack_pointer_rtx,
26488                                stack_pointer_rtx,
26489                                GEN_INT (amount)));
26490
26491       RTX_FRAME_RELATED_P (tmp) = 1;
26492
26493       if (cfun->machine->uses_anonymous_args)
26494         {
26495           /* Restore pretend args.  Refer arm_expand_prologue on how to save
26496              pretend_args in stack.  */
26497           int num_regs = crtl->args.pretend_args_size / 4;
26498           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26499           for (j = 0, i = 0; j < num_regs; i++)
26500             if (saved_regs_mask & (1 << i))
26501               {
26502                 rtx reg = gen_rtx_REG (SImode, i);
26503                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26504                 j++;
26505               }
26506           REG_NOTES (tmp) = dwarf;
26507         }
26508       arm_add_cfa_adjust_cfa_note (tmp, amount,
26509                                    stack_pointer_rtx, stack_pointer_rtx);
26510     }
26511
26512     /* Clear all caller-saved regs that are not used to return.  */
26513     if (IS_CMSE_ENTRY (arm_current_func_type ()))
26514       {
26515         /* CMSE_ENTRY always returns.  */
26516         gcc_assert (really_return);
26517         cmse_nonsecure_entry_clear_before_return ();
26518       }
26519
26520   if (!really_return)
26521     return;
26522
26523   if (crtl->calls_eh_return)
26524     emit_insn (gen_addsi3 (stack_pointer_rtx,
26525                            stack_pointer_rtx,
26526                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26527
26528   if (IS_STACKALIGN (func_type))
26529     /* Restore the original stack pointer.  Before prologue, the stack was
26530        realigned and the original stack pointer saved in r0.  For details,
26531        see comment in arm_expand_prologue.  */
26532     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26533
26534   emit_jump_insn (simple_return_rtx);
26535 }
26536
26537 /* Implementation of insn prologue_thumb1_interwork.  This is the first
26538    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
26539
26540 const char *
26541 thumb1_output_interwork (void)
26542 {
26543   const char * name;
26544   FILE *f = asm_out_file;
26545
26546   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26547   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26548               == SYMBOL_REF);
26549   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26550
26551   /* Generate code sequence to switch us into Thumb mode.  */
26552   /* The .code 32 directive has already been emitted by
26553      ASM_DECLARE_FUNCTION_NAME.  */
26554   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26555   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26556
26557   /* Generate a label, so that the debugger will notice the
26558      change in instruction sets.  This label is also used by
26559      the assembler to bypass the ARM code when this function
26560      is called from a Thumb encoded function elsewhere in the
26561      same file.  Hence the definition of STUB_NAME here must
26562      agree with the definition in gas/config/tc-arm.c.  */
26563
26564 #define STUB_NAME ".real_start_of"
26565
26566   fprintf (f, "\t.code\t16\n");
26567 #ifdef ARM_PE
26568   if (arm_dllexport_name_p (name))
26569     name = arm_strip_name_encoding (name);
26570 #endif
26571   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26572   fprintf (f, "\t.thumb_func\n");
26573   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26574
26575   return "";
26576 }
26577
26578 /* Handle the case of a double word load into a low register from
26579    a computed memory address.  The computed address may involve a
26580    register which is overwritten by the load.  */
26581 const char *
26582 thumb_load_double_from_address (rtx *operands)
26583 {
26584   rtx addr;
26585   rtx base;
26586   rtx offset;
26587   rtx arg1;
26588   rtx arg2;
26589
26590   gcc_assert (REG_P (operands[0]));
26591   gcc_assert (MEM_P (operands[1]));
26592
26593   /* Get the memory address.  */
26594   addr = XEXP (operands[1], 0);
26595
26596   /* Work out how the memory address is computed.  */
26597   switch (GET_CODE (addr))
26598     {
26599     case REG:
26600       operands[2] = adjust_address (operands[1], SImode, 4);
26601
26602       if (REGNO (operands[0]) == REGNO (addr))
26603         {
26604           output_asm_insn ("ldr\t%H0, %2", operands);
26605           output_asm_insn ("ldr\t%0, %1", operands);
26606         }
26607       else
26608         {
26609           output_asm_insn ("ldr\t%0, %1", operands);
26610           output_asm_insn ("ldr\t%H0, %2", operands);
26611         }
26612       break;
26613
26614     case CONST:
26615       /* Compute <address> + 4 for the high order load.  */
26616       operands[2] = adjust_address (operands[1], SImode, 4);
26617
26618       output_asm_insn ("ldr\t%0, %1", operands);
26619       output_asm_insn ("ldr\t%H0, %2", operands);
26620       break;
26621
26622     case PLUS:
26623       arg1   = XEXP (addr, 0);
26624       arg2   = XEXP (addr, 1);
26625
26626       if (CONSTANT_P (arg1))
26627         base = arg2, offset = arg1;
26628       else
26629         base = arg1, offset = arg2;
26630
26631       gcc_assert (REG_P (base));
26632
26633       /* Catch the case of <address> = <reg> + <reg> */
26634       if (REG_P (offset))
26635         {
26636           int reg_offset = REGNO (offset);
26637           int reg_base   = REGNO (base);
26638           int reg_dest   = REGNO (operands[0]);
26639
26640           /* Add the base and offset registers together into the
26641              higher destination register.  */
26642           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26643                        reg_dest + 1, reg_base, reg_offset);
26644
26645           /* Load the lower destination register from the address in
26646              the higher destination register.  */
26647           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26648                        reg_dest, reg_dest + 1);
26649
26650           /* Load the higher destination register from its own address
26651              plus 4.  */
26652           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26653                        reg_dest + 1, reg_dest + 1);
26654         }
26655       else
26656         {
26657           /* Compute <address> + 4 for the high order load.  */
26658           operands[2] = adjust_address (operands[1], SImode, 4);
26659
26660           /* If the computed address is held in the low order register
26661              then load the high order register first, otherwise always
26662              load the low order register first.  */
26663           if (REGNO (operands[0]) == REGNO (base))
26664             {
26665               output_asm_insn ("ldr\t%H0, %2", operands);
26666               output_asm_insn ("ldr\t%0, %1", operands);
26667             }
26668           else
26669             {
26670               output_asm_insn ("ldr\t%0, %1", operands);
26671               output_asm_insn ("ldr\t%H0, %2", operands);
26672             }
26673         }
26674       break;
26675
26676     case LABEL_REF:
26677       /* With no registers to worry about we can just load the value
26678          directly.  */
26679       operands[2] = adjust_address (operands[1], SImode, 4);
26680
26681       output_asm_insn ("ldr\t%H0, %2", operands);
26682       output_asm_insn ("ldr\t%0, %1", operands);
26683       break;
26684
26685     default:
26686       gcc_unreachable ();
26687     }
26688
26689   return "";
26690 }
26691
26692 const char *
26693 thumb_output_move_mem_multiple (int n, rtx *operands)
26694 {
26695   switch (n)
26696     {
26697     case 2:
26698       if (REGNO (operands[4]) > REGNO (operands[5]))
26699         std::swap (operands[4], operands[5]);
26700
26701       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26702       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26703       break;
26704
26705     case 3:
26706       if (REGNO (operands[4]) > REGNO (operands[5]))
26707         std::swap (operands[4], operands[5]);
26708       if (REGNO (operands[5]) > REGNO (operands[6]))
26709         std::swap (operands[5], operands[6]);
26710       if (REGNO (operands[4]) > REGNO (operands[5]))
26711         std::swap (operands[4], operands[5]);
26712
26713       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26714       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26715       break;
26716
26717     default:
26718       gcc_unreachable ();
26719     }
26720
26721   return "";
26722 }
26723
26724 /* Output a call-via instruction for thumb state.  */
26725 const char *
26726 thumb_call_via_reg (rtx reg)
26727 {
26728   int regno = REGNO (reg);
26729   rtx *labelp;
26730
26731   gcc_assert (regno < LR_REGNUM);
26732
26733   /* If we are in the normal text section we can use a single instance
26734      per compilation unit.  If we are doing function sections, then we need
26735      an entry per section, since we can't rely on reachability.  */
26736   if (in_section == text_section)
26737     {
26738       thumb_call_reg_needed = 1;
26739
26740       if (thumb_call_via_label[regno] == NULL)
26741         thumb_call_via_label[regno] = gen_label_rtx ();
26742       labelp = thumb_call_via_label + regno;
26743     }
26744   else
26745     {
26746       if (cfun->machine->call_via[regno] == NULL)
26747         cfun->machine->call_via[regno] = gen_label_rtx ();
26748       labelp = cfun->machine->call_via + regno;
26749     }
26750
26751   output_asm_insn ("bl\t%a0", labelp);
26752   return "";
26753 }
26754
26755 /* Routines for generating rtl.  */
26756 void
26757 thumb_expand_cpymemqi (rtx *operands)
26758 {
26759   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26760   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26761   HOST_WIDE_INT len = INTVAL (operands[2]);
26762   HOST_WIDE_INT offset = 0;
26763
26764   while (len >= 12)
26765     {
26766       emit_insn (gen_cpymem12b (out, in, out, in));
26767       len -= 12;
26768     }
26769
26770   if (len >= 8)
26771     {
26772       emit_insn (gen_cpymem8b (out, in, out, in));
26773       len -= 8;
26774     }
26775
26776   if (len >= 4)
26777     {
26778       rtx reg = gen_reg_rtx (SImode);
26779       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26780       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26781       len -= 4;
26782       offset += 4;
26783     }
26784
26785   if (len >= 2)
26786     {
26787       rtx reg = gen_reg_rtx (HImode);
26788       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26789                                               plus_constant (Pmode, in,
26790                                                              offset))));
26791       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26792                                                                 offset)),
26793                             reg));
26794       len -= 2;
26795       offset += 2;
26796     }
26797
26798   if (len)
26799     {
26800       rtx reg = gen_reg_rtx (QImode);
26801       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26802                                               plus_constant (Pmode, in,
26803                                                              offset))));
26804       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26805                                                                 offset)),
26806                             reg));
26807     }
26808 }
26809
26810 void
26811 thumb_reload_out_hi (rtx *operands)
26812 {
26813   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26814 }
26815
26816 /* Return the length of a function name prefix
26817     that starts with the character 'c'.  */
26818 static int
26819 arm_get_strip_length (int c)
26820 {
26821   switch (c)
26822     {
26823     ARM_NAME_ENCODING_LENGTHS
26824       default: return 0;
26825     }
26826 }
26827
26828 /* Return a pointer to a function's name with any
26829    and all prefix encodings stripped from it.  */
26830 const char *
26831 arm_strip_name_encoding (const char *name)
26832 {
26833   int skip;
26834
26835   while ((skip = arm_get_strip_length (* name)))
26836     name += skip;
26837
26838   return name;
26839 }
26840
26841 /* If there is a '*' anywhere in the name's prefix, then
26842    emit the stripped name verbatim, otherwise prepend an
26843    underscore if leading underscores are being used.  */
26844 void
26845 arm_asm_output_labelref (FILE *stream, const char *name)
26846 {
26847   int skip;
26848   int verbatim = 0;
26849
26850   while ((skip = arm_get_strip_length (* name)))
26851     {
26852       verbatim |= (*name == '*');
26853       name += skip;
26854     }
26855
26856   if (verbatim)
26857     fputs (name, stream);
26858   else
26859     asm_fprintf (stream, "%U%s", name);
26860 }
26861
26862 /* This function is used to emit an EABI tag and its associated value.
26863    We emit the numerical value of the tag in case the assembler does not
26864    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26865    the tag name in a comment so that anyone reading the assembler output
26866    will know which tag is being set.
26867
26868    This function is not static because arm-c.c needs it too.  */
26869
26870 void
26871 arm_emit_eabi_attribute (const char *name, int num, int val)
26872 {
26873   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26874   if (flag_verbose_asm || flag_debug_asm)
26875     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26876   asm_fprintf (asm_out_file, "\n");
26877 }
26878
26879 /* This function is used to print CPU tuning information as comment
26880    in assembler file.  Pointers are not printed for now.  */
26881
26882 void
26883 arm_print_tune_info (void)
26884 {
26885   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26886   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26887                current_tune->constant_limit);
26888   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26889                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26890   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26891                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26892   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26893                "prefetch.l1_cache_size:\t%d\n",
26894                current_tune->prefetch.l1_cache_size);
26895   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26896                "prefetch.l1_cache_line_size:\t%d\n",
26897                current_tune->prefetch.l1_cache_line_size);
26898   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26899                "prefer_constant_pool:\t%d\n",
26900                (int) current_tune->prefer_constant_pool);
26901   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26902                "branch_cost:\t(s:speed, p:predictable)\n");
26903   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26904   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26905                current_tune->branch_cost (false, false));
26906   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26907                current_tune->branch_cost (false, true));
26908   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26909                current_tune->branch_cost (true, false));
26910   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26911                current_tune->branch_cost (true, true));
26912   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26913                "prefer_ldrd_strd:\t%d\n",
26914                (int) current_tune->prefer_ldrd_strd);
26915   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26916                "logical_op_non_short_circuit:\t[%d,%d]\n",
26917                (int) current_tune->logical_op_non_short_circuit_thumb,
26918                (int) current_tune->logical_op_non_short_circuit_arm);
26919   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26920                "disparage_flag_setting_t16_encodings:\t%d\n",
26921                (int) current_tune->disparage_flag_setting_t16_encodings);
26922   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26923                "string_ops_prefer_neon:\t%d\n",
26924                (int) current_tune->string_ops_prefer_neon);
26925   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26926                "max_insns_inline_memset:\t%d\n",
26927                current_tune->max_insns_inline_memset);
26928   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26929                current_tune->fusible_ops);
26930   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26931                (int) current_tune->sched_autopref);
26932 }
26933
26934 /* Print .arch and .arch_extension directives corresponding to the
26935    current architecture configuration.  */
26936 static void
26937 arm_print_asm_arch_directives ()
26938 {
26939   const arch_option *arch
26940     = arm_parse_arch_option_name (all_architectures, "-march",
26941                                   arm_active_target.arch_name);
26942   auto_sbitmap opt_bits (isa_num_bits);
26943
26944   gcc_assert (arch);
26945
26946   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26947   arm_last_printed_arch_string = arm_active_target.arch_name;
26948   if (!arch->common.extensions)
26949     return;
26950
26951   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26952        opt->name != NULL;
26953        opt++)
26954     {
26955       if (!opt->remove)
26956         {
26957           arm_initialize_isa (opt_bits, opt->isa_bits);
26958
26959           /* If every feature bit of this option is set in the target
26960              ISA specification, print out the option name.  However,
26961              don't print anything if all the bits are part of the
26962              FPU specification.  */
26963           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26964               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26965             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26966         }
26967     }
26968 }
26969
26970 static void
26971 arm_file_start (void)
26972 {
26973   int val;
26974
26975   if (TARGET_BPABI)
26976     {
26977       /* We don't have a specified CPU.  Use the architecture to
26978          generate the tags.
26979
26980          Note: it might be better to do this unconditionally, then the
26981          assembler would not need to know about all new CPU names as
26982          they are added.  */
26983       if (!arm_active_target.core_name)
26984         {
26985           /* armv7ve doesn't support any extensions.  */
26986           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26987             {
26988               /* Keep backward compatability for assemblers
26989                  which don't support armv7ve.  */
26990               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26991               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26992               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26993               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26994               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26995               arm_last_printed_arch_string = "armv7ve";
26996             }
26997           else
26998             arm_print_asm_arch_directives ();
26999         }
27000       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
27001         {
27002           asm_fprintf (asm_out_file, "\t.arch %s\n",
27003                        arm_active_target.core_name + 8);
27004           arm_last_printed_arch_string = arm_active_target.core_name + 8;
27005         }
27006       else
27007         {
27008           const char* truncated_name
27009             = arm_rewrite_selected_cpu (arm_active_target.core_name);
27010           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27011         }
27012
27013       if (print_tune_info)
27014         arm_print_tune_info ();
27015
27016       if (! TARGET_SOFT_FLOAT)
27017         {
27018           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
27019             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
27020
27021           if (TARGET_HARD_FLOAT_ABI)
27022             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27023         }
27024
27025       /* Some of these attributes only apply when the corresponding features
27026          are used.  However we don't have any easy way of figuring this out.
27027          Conservatively record the setting that would have been used.  */
27028
27029       if (flag_rounding_math)
27030         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27031
27032       if (!flag_unsafe_math_optimizations)
27033         {
27034           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27035           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27036         }
27037       if (flag_signaling_nans)
27038         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27039
27040       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27041                            flag_finite_math_only ? 1 : 3);
27042
27043       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27044       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27045       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27046                                flag_short_enums ? 1 : 2);
27047
27048       /* Tag_ABI_optimization_goals.  */
27049       if (optimize_size)
27050         val = 4;
27051       else if (optimize >= 2)
27052         val = 2;
27053       else if (optimize)
27054         val = 1;
27055       else
27056         val = 6;
27057       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27058
27059       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27060                                unaligned_access);
27061
27062       if (arm_fp16_format)
27063         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27064                              (int) arm_fp16_format);
27065
27066       if (arm_lang_output_object_attributes_hook)
27067         arm_lang_output_object_attributes_hook();
27068     }
27069
27070   default_file_start ();
27071 }
27072
27073 static void
27074 arm_file_end (void)
27075 {
27076   int regno;
27077
27078   if (NEED_INDICATE_EXEC_STACK)
27079     /* Add .note.GNU-stack.  */
27080     file_end_indicate_exec_stack ();
27081
27082   if (! thumb_call_reg_needed)
27083     return;
27084
27085   switch_to_section (text_section);
27086   asm_fprintf (asm_out_file, "\t.code 16\n");
27087   ASM_OUTPUT_ALIGN (asm_out_file, 1);
27088
27089   for (regno = 0; regno < LR_REGNUM; regno++)
27090     {
27091       rtx label = thumb_call_via_label[regno];
27092
27093       if (label != 0)
27094         {
27095           targetm.asm_out.internal_label (asm_out_file, "L",
27096                                           CODE_LABEL_NUMBER (label));
27097           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27098         }
27099     }
27100 }
27101
27102 #ifndef ARM_PE
27103 /* Symbols in the text segment can be accessed without indirecting via the
27104    constant pool; it may take an extra binary operation, but this is still
27105    faster than indirecting via memory.  Don't do this when not optimizing,
27106    since we won't be calculating al of the offsets necessary to do this
27107    simplification.  */
27108
27109 static void
27110 arm_encode_section_info (tree decl, rtx rtl, int first)
27111 {
27112   if (optimize > 0 && TREE_CONSTANT (decl))
27113     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27114
27115   default_encode_section_info (decl, rtl, first);
27116 }
27117 #endif /* !ARM_PE */
27118
27119 static void
27120 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27121 {
27122   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27123       && !strcmp (prefix, "L"))
27124     {
27125       arm_ccfsm_state = 0;
27126       arm_target_insn = NULL;
27127     }
27128   default_internal_label (stream, prefix, labelno);
27129 }
27130
27131 /* Output code to add DELTA to the first argument, and then jump
27132    to FUNCTION.  Used for C++ multiple inheritance.  */
27133
27134 static void
27135 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
27136                      HOST_WIDE_INT, tree function)
27137 {
27138   static int thunk_label = 0;
27139   char label[256];
27140   char labelpc[256];
27141   int mi_delta = delta;
27142   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
27143   int shift = 0;
27144   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
27145                     ? 1 : 0);
27146   if (mi_delta < 0)
27147     mi_delta = - mi_delta;
27148
27149   final_start_function (emit_barrier (), file, 1);
27150
27151   if (TARGET_THUMB1)
27152     {
27153       int labelno = thunk_label++;
27154       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
27155       /* Thunks are entered in arm mode when available.  */
27156       if (TARGET_THUMB1_ONLY)
27157         {
27158           /* push r3 so we can use it as a temporary.  */
27159           /* TODO: Omit this save if r3 is not used.  */
27160           fputs ("\tpush {r3}\n", file);
27161           fputs ("\tldr\tr3, ", file);
27162         }
27163       else
27164         {
27165           fputs ("\tldr\tr12, ", file);
27166         }
27167       assemble_name (file, label);
27168       fputc ('\n', file);
27169       if (flag_pic)
27170         {
27171           /* If we are generating PIC, the ldr instruction below loads
27172              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
27173              the address of the add + 8, so we have:
27174
27175              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27176                  = target + 1.
27177
27178              Note that we have "+ 1" because some versions of GNU ld
27179              don't set the low bit of the result for R_ARM_REL32
27180              relocations against thumb function symbols.
27181              On ARMv6M this is +4, not +8.  */
27182           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
27183           assemble_name (file, labelpc);
27184           fputs (":\n", file);
27185           if (TARGET_THUMB1_ONLY)
27186             {
27187               /* This is 2 insns after the start of the thunk, so we know it
27188                  is 4-byte aligned.  */
27189               fputs ("\tadd\tr3, pc, r3\n", file);
27190               fputs ("\tmov r12, r3\n", file);
27191             }
27192           else
27193             fputs ("\tadd\tr12, pc, r12\n", file);
27194         }
27195       else if (TARGET_THUMB1_ONLY)
27196         fputs ("\tmov r12, r3\n", file);
27197     }
27198   if (TARGET_THUMB1_ONLY)
27199     {
27200       if (mi_delta > 255)
27201         {
27202           fputs ("\tldr\tr3, ", file);
27203           assemble_name (file, label);
27204           fputs ("+4\n", file);
27205           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
27206                        mi_op, this_regno, this_regno);
27207         }
27208       else if (mi_delta != 0)
27209         {
27210           /* Thumb1 unified syntax requires s suffix in instruction name when
27211              one of the operands is immediate.  */
27212           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
27213                        mi_op, this_regno, this_regno,
27214                        mi_delta);
27215         }
27216     }
27217   else
27218     {
27219       /* TODO: Use movw/movt for large constants when available.  */
27220       while (mi_delta != 0)
27221         {
27222           if ((mi_delta & (3 << shift)) == 0)
27223             shift += 2;
27224           else
27225             {
27226               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27227                            mi_op, this_regno, this_regno,
27228                            mi_delta & (0xff << shift));
27229               mi_delta &= ~(0xff << shift);
27230               shift += 8;
27231             }
27232         }
27233     }
27234   if (TARGET_THUMB1)
27235     {
27236       if (TARGET_THUMB1_ONLY)
27237         fputs ("\tpop\t{r3}\n", file);
27238
27239       fprintf (file, "\tbx\tr12\n");
27240       ASM_OUTPUT_ALIGN (file, 2);
27241       assemble_name (file, label);
27242       fputs (":\n", file);
27243       if (flag_pic)
27244         {
27245           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
27246           rtx tem = XEXP (DECL_RTL (function), 0);
27247           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
27248              pipeline offset is four rather than eight.  Adjust the offset
27249              accordingly.  */
27250           tem = plus_constant (GET_MODE (tem), tem,
27251                                TARGET_THUMB1_ONLY ? -3 : -7);
27252           tem = gen_rtx_MINUS (GET_MODE (tem),
27253                                tem,
27254                                gen_rtx_SYMBOL_REF (Pmode,
27255                                                    ggc_strdup (labelpc)));
27256           assemble_integer (tem, 4, BITS_PER_WORD, 1);
27257         }
27258       else
27259         /* Output ".word .LTHUNKn".  */
27260         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
27261
27262       if (TARGET_THUMB1_ONLY && mi_delta > 255)
27263         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
27264     }
27265   else
27266     {
27267       fputs ("\tb\t", file);
27268       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
27269       if (NEED_PLT_RELOC)
27270         fputs ("(PLT)", file);
27271       fputc ('\n', file);
27272     }
27273
27274   final_end_function ();
27275 }
27276
27277 /* MI thunk handling for TARGET_32BIT.  */
27278
27279 static void
27280 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
27281                        HOST_WIDE_INT vcall_offset, tree function)
27282 {
27283   const bool long_call_p = arm_is_long_call_p (function);
27284
27285   /* On ARM, this_regno is R0 or R1 depending on
27286      whether the function returns an aggregate or not.
27287   */
27288   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
27289                                        function)
27290                     ? R1_REGNUM : R0_REGNUM);
27291
27292   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
27293   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
27294   reload_completed = 1;
27295   emit_note (NOTE_INSN_PROLOGUE_END);
27296
27297   /* Add DELTA to THIS_RTX.  */
27298   if (delta != 0)
27299     arm_split_constant (PLUS, Pmode, NULL_RTX,
27300                         delta, this_rtx, this_rtx, false);
27301
27302   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
27303   if (vcall_offset != 0)
27304     {
27305       /* Load *THIS_RTX.  */
27306       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
27307       /* Compute *THIS_RTX + VCALL_OFFSET.  */
27308       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
27309                           false);
27310       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
27311       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
27312       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
27313     }
27314
27315   /* Generate a tail call to the target function.  */
27316   if (!TREE_USED (function))
27317     {
27318       assemble_external (function);
27319       TREE_USED (function) = 1;
27320     }
27321   rtx funexp = XEXP (DECL_RTL (function), 0);
27322   if (long_call_p)
27323     {
27324       emit_move_insn (temp, funexp);
27325       funexp = temp;
27326     }
27327   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
27328   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
27329   SIBLING_CALL_P (insn) = 1;
27330   emit_barrier ();
27331
27332   /* Indirect calls require a bit of fixup in PIC mode.  */
27333   if (long_call_p)
27334     {
27335       split_all_insns_noflow ();
27336       arm_reorg ();
27337     }
27338
27339   insn = get_insns ();
27340   shorten_branches (insn);
27341   final_start_function (insn, file, 1);
27342   final (insn, file, 1);
27343   final_end_function ();
27344
27345   /* Stop pretending this is a post-reload pass.  */
27346   reload_completed = 0;
27347 }
27348
27349 /* Output code to add DELTA to the first argument, and then jump
27350    to FUNCTION.  Used for C++ multiple inheritance.  */
27351
27352 static void
27353 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
27354                      HOST_WIDE_INT vcall_offset, tree function)
27355 {
27356   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
27357
27358   assemble_start_function (thunk, fnname);
27359   if (TARGET_32BIT)
27360     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
27361   else
27362     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
27363   assemble_end_function (thunk, fnname);
27364 }
27365
27366 int
27367 arm_emit_vector_const (FILE *file, rtx x)
27368 {
27369   int i;
27370   const char * pattern;
27371
27372   gcc_assert (GET_CODE (x) == CONST_VECTOR);
27373
27374   switch (GET_MODE (x))
27375     {
27376     case E_V2SImode: pattern = "%08x"; break;
27377     case E_V4HImode: pattern = "%04x"; break;
27378     case E_V8QImode: pattern = "%02x"; break;
27379     default:       gcc_unreachable ();
27380     }
27381
27382   fprintf (file, "0x");
27383   for (i = CONST_VECTOR_NUNITS (x); i--;)
27384     {
27385       rtx element;
27386
27387       element = CONST_VECTOR_ELT (x, i);
27388       fprintf (file, pattern, INTVAL (element));
27389     }
27390
27391   return 1;
27392 }
27393
27394 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27395    HFmode constant pool entries are actually loaded with ldr.  */
27396 void
27397 arm_emit_fp16_const (rtx c)
27398 {
27399   long bits;
27400
27401   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
27402   if (WORDS_BIG_ENDIAN)
27403     assemble_zeros (2);
27404   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27405   if (!WORDS_BIG_ENDIAN)
27406     assemble_zeros (2);
27407 }
27408
27409 const char *
27410 arm_output_load_gr (rtx *operands)
27411 {
27412   rtx reg;
27413   rtx offset;
27414   rtx wcgr;
27415   rtx sum;
27416
27417   if (!MEM_P (operands [1])
27418       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27419       || !REG_P (reg = XEXP (sum, 0))
27420       || !CONST_INT_P (offset = XEXP (sum, 1))
27421       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27422     return "wldrw%?\t%0, %1";
27423
27424   /* Fix up an out-of-range load of a GR register.  */
27425   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27426   wcgr = operands[0];
27427   operands[0] = reg;
27428   output_asm_insn ("ldr%?\t%0, %1", operands);
27429
27430   operands[0] = wcgr;
27431   operands[1] = reg;
27432   output_asm_insn ("tmcr%?\t%0, %1", operands);
27433   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27434
27435   return "";
27436 }
27437
27438 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27439
27440    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27441    named arg and all anonymous args onto the stack.
27442    XXX I know the prologue shouldn't be pushing registers, but it is faster
27443    that way.  */
27444
27445 static void
27446 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27447                             const function_arg_info &arg,
27448                             int *pretend_size,
27449                             int second_time ATTRIBUTE_UNUSED)
27450 {
27451   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27452   int nregs;
27453
27454   cfun->machine->uses_anonymous_args = 1;
27455   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27456     {
27457       nregs = pcum->aapcs_ncrn;
27458       if (nregs & 1)
27459         {
27460           int res = arm_needs_doubleword_align (arg.mode, arg.type);
27461           if (res < 0 && warn_psabi)
27462             inform (input_location, "parameter passing for argument of "
27463                     "type %qT changed in GCC 7.1", arg.type);
27464           else if (res > 0)
27465             {
27466               nregs++;
27467               if (res > 1 && warn_psabi)
27468                 inform (input_location,
27469                         "parameter passing for argument of type "
27470                         "%qT changed in GCC 9.1", arg.type);
27471             }
27472         }
27473     }
27474   else
27475     nregs = pcum->nregs;
27476
27477   if (nregs < NUM_ARG_REGS)
27478     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27479 }
27480
27481 /* We can't rely on the caller doing the proper promotion when
27482    using APCS or ATPCS.  */
27483
27484 static bool
27485 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27486 {
27487     return !TARGET_AAPCS_BASED;
27488 }
27489
27490 static machine_mode
27491 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27492                            machine_mode mode,
27493                            int *punsignedp ATTRIBUTE_UNUSED,
27494                            const_tree fntype ATTRIBUTE_UNUSED,
27495                            int for_return ATTRIBUTE_UNUSED)
27496 {
27497   if (GET_MODE_CLASS (mode) == MODE_INT
27498       && GET_MODE_SIZE (mode) < 4)
27499     return SImode;
27500
27501   return mode;
27502 }
27503
27504
27505 static bool
27506 arm_default_short_enums (void)
27507 {
27508   return ARM_DEFAULT_SHORT_ENUMS;
27509 }
27510
27511
27512 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
27513
27514 static bool
27515 arm_align_anon_bitfield (void)
27516 {
27517   return TARGET_AAPCS_BASED;
27518 }
27519
27520
27521 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
27522
27523 static tree
27524 arm_cxx_guard_type (void)
27525 {
27526   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27527 }
27528
27529
27530 /* The EABI says test the least significant bit of a guard variable.  */
27531
27532 static bool
27533 arm_cxx_guard_mask_bit (void)
27534 {
27535   return TARGET_AAPCS_BASED;
27536 }
27537
27538
27539 /* The EABI specifies that all array cookies are 8 bytes long.  */
27540
27541 static tree
27542 arm_get_cookie_size (tree type)
27543 {
27544   tree size;
27545
27546   if (!TARGET_AAPCS_BASED)
27547     return default_cxx_get_cookie_size (type);
27548
27549   size = build_int_cst (sizetype, 8);
27550   return size;
27551 }
27552
27553
27554 /* The EABI says that array cookies should also contain the element size.  */
27555
27556 static bool
27557 arm_cookie_has_size (void)
27558 {
27559   return TARGET_AAPCS_BASED;
27560 }
27561
27562
27563 /* The EABI says constructors and destructors should return a pointer to
27564    the object constructed/destroyed.  */
27565
27566 static bool
27567 arm_cxx_cdtor_returns_this (void)
27568 {
27569   return TARGET_AAPCS_BASED;
27570 }
27571
27572 /* The EABI says that an inline function may never be the key
27573    method.  */
27574
27575 static bool
27576 arm_cxx_key_method_may_be_inline (void)
27577 {
27578   return !TARGET_AAPCS_BASED;
27579 }
27580
27581 static void
27582 arm_cxx_determine_class_data_visibility (tree decl)
27583 {
27584   if (!TARGET_AAPCS_BASED
27585       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27586     return;
27587
27588   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27589      is exported.  However, on systems without dynamic vague linkage,
27590      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
27591   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27592     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27593   else
27594     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27595   DECL_VISIBILITY_SPECIFIED (decl) = 1;
27596 }
27597
27598 static bool
27599 arm_cxx_class_data_always_comdat (void)
27600 {
27601   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27602      vague linkage if the class has no key function.  */
27603   return !TARGET_AAPCS_BASED;
27604 }
27605
27606
27607 /* The EABI says __aeabi_atexit should be used to register static
27608    destructors.  */
27609
27610 static bool
27611 arm_cxx_use_aeabi_atexit (void)
27612 {
27613   return TARGET_AAPCS_BASED;
27614 }
27615
27616
27617 void
27618 arm_set_return_address (rtx source, rtx scratch)
27619 {
27620   arm_stack_offsets *offsets;
27621   HOST_WIDE_INT delta;
27622   rtx addr, mem;
27623   unsigned long saved_regs;
27624
27625   offsets = arm_get_frame_offsets ();
27626   saved_regs = offsets->saved_regs_mask;
27627
27628   if ((saved_regs & (1 << LR_REGNUM)) == 0)
27629     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27630   else
27631     {
27632       if (frame_pointer_needed)
27633         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27634       else
27635         {
27636           /* LR will be the first saved register.  */
27637           delta = offsets->outgoing_args - (offsets->frame + 4);
27638
27639
27640           if (delta >= 4096)
27641             {
27642               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27643                                      GEN_INT (delta & ~4095)));
27644               addr = scratch;
27645               delta &= 4095;
27646             }
27647           else
27648             addr = stack_pointer_rtx;
27649
27650           addr = plus_constant (Pmode, addr, delta);
27651         }
27652
27653       /* The store needs to be marked to prevent DSE from deleting
27654          it as dead if it is based on fp.  */
27655       mem = gen_frame_mem (Pmode, addr);
27656       MEM_VOLATILE_P (mem) = true;
27657       emit_move_insn (mem, source);
27658     }
27659 }
27660
27661
27662 void
27663 thumb_set_return_address (rtx source, rtx scratch)
27664 {
27665   arm_stack_offsets *offsets;
27666   HOST_WIDE_INT delta;
27667   HOST_WIDE_INT limit;
27668   int reg;
27669   rtx addr, mem;
27670   unsigned long mask;
27671
27672   emit_use (source);
27673
27674   offsets = arm_get_frame_offsets ();
27675   mask = offsets->saved_regs_mask;
27676   if (mask & (1 << LR_REGNUM))
27677     {
27678       limit = 1024;
27679       /* Find the saved regs.  */
27680       if (frame_pointer_needed)
27681         {
27682           delta = offsets->soft_frame - offsets->saved_args;
27683           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27684           if (TARGET_THUMB1)
27685             limit = 128;
27686         }
27687       else
27688         {
27689           delta = offsets->outgoing_args - offsets->saved_args;
27690           reg = SP_REGNUM;
27691         }
27692       /* Allow for the stack frame.  */
27693       if (TARGET_THUMB1 && TARGET_BACKTRACE)
27694         delta -= 16;
27695       /* The link register is always the first saved register.  */
27696       delta -= 4;
27697
27698       /* Construct the address.  */
27699       addr = gen_rtx_REG (SImode, reg);
27700       if (delta > limit)
27701         {
27702           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27703           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27704           addr = scratch;
27705         }
27706       else
27707         addr = plus_constant (Pmode, addr, delta);
27708
27709       /* The store needs to be marked to prevent DSE from deleting
27710          it as dead if it is based on fp.  */
27711       mem = gen_frame_mem (Pmode, addr);
27712       MEM_VOLATILE_P (mem) = true;
27713       emit_move_insn (mem, source);
27714     }
27715   else
27716     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27717 }
27718
27719 /* Implements target hook vector_mode_supported_p.  */
27720 bool
27721 arm_vector_mode_supported_p (machine_mode mode)
27722 {
27723   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27724   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27725       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27726       || mode == V2DImode || mode == V8HFmode))
27727     return true;
27728
27729   if ((TARGET_NEON || TARGET_IWMMXT)
27730       && ((mode == V2SImode)
27731           || (mode == V4HImode)
27732           || (mode == V8QImode)))
27733     return true;
27734
27735   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27736       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27737       || mode == V2HAmode))
27738     return true;
27739
27740   return false;
27741 }
27742
27743 /* Implements target hook array_mode_supported_p.  */
27744
27745 static bool
27746 arm_array_mode_supported_p (machine_mode mode,
27747                             unsigned HOST_WIDE_INT nelems)
27748 {
27749   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27750      for now, as the lane-swapping logic needs to be extended in the expanders.
27751      See PR target/82518.  */
27752   if (TARGET_NEON && !BYTES_BIG_ENDIAN
27753       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27754       && (nelems >= 2 && nelems <= 4))
27755     return true;
27756
27757   return false;
27758 }
27759
27760 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27761    registers when autovectorizing for Neon, at least until multiple vector
27762    widths are supported properly by the middle-end.  */
27763
27764 static machine_mode
27765 arm_preferred_simd_mode (scalar_mode mode)
27766 {
27767   if (TARGET_NEON)
27768     switch (mode)
27769       {
27770       case E_SFmode:
27771         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27772       case E_SImode:
27773         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27774       case E_HImode:
27775         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27776       case E_QImode:
27777         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27778       case E_DImode:
27779         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27780           return V2DImode;
27781         break;
27782
27783       default:;
27784       }
27785
27786   if (TARGET_REALLY_IWMMXT)
27787     switch (mode)
27788       {
27789       case E_SImode:
27790         return V2SImode;
27791       case E_HImode:
27792         return V4HImode;
27793       case E_QImode:
27794         return V8QImode;
27795
27796       default:;
27797       }
27798
27799   return word_mode;
27800 }
27801
27802 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27803
27804    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27805    using r0-r4 for function arguments, r7 for the stack frame and don't have
27806    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27807    potentially problematic instructions accept high registers so this is not
27808    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27809    that require many low registers.  */
27810 static bool
27811 arm_class_likely_spilled_p (reg_class_t rclass)
27812 {
27813   if ((TARGET_THUMB1 && rclass == LO_REGS)
27814       || rclass  == CC_REG)
27815     return true;
27816
27817   return false;
27818 }
27819
27820 /* Implements target hook small_register_classes_for_mode_p.  */
27821 bool
27822 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27823 {
27824   return TARGET_THUMB1;
27825 }
27826
27827 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27828    ARM insns and therefore guarantee that the shift count is modulo 256.
27829    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27830    guarantee no particular behavior for out-of-range counts.  */
27831
27832 static unsigned HOST_WIDE_INT
27833 arm_shift_truncation_mask (machine_mode mode)
27834 {
27835   return mode == SImode ? 255 : 0;
27836 }
27837
27838
27839 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27840
27841 unsigned int
27842 arm_dbx_register_number (unsigned int regno)
27843 {
27844   if (regno < 16)
27845     return regno;
27846
27847   if (IS_VFP_REGNUM (regno))
27848     {
27849       /* See comment in arm_dwarf_register_span.  */
27850       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27851         return 64 + regno - FIRST_VFP_REGNUM;
27852       else
27853         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27854     }
27855
27856   if (IS_IWMMXT_GR_REGNUM (regno))
27857     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27858
27859   if (IS_IWMMXT_REGNUM (regno))
27860     return 112 + regno - FIRST_IWMMXT_REGNUM;
27861
27862   return DWARF_FRAME_REGISTERS;
27863 }
27864
27865 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27866    GCC models tham as 64 32-bit registers, so we need to describe this to
27867    the DWARF generation code.  Other registers can use the default.  */
27868 static rtx
27869 arm_dwarf_register_span (rtx rtl)
27870 {
27871   machine_mode mode;
27872   unsigned regno;
27873   rtx parts[16];
27874   int nregs;
27875   int i;
27876
27877   regno = REGNO (rtl);
27878   if (!IS_VFP_REGNUM (regno))
27879     return NULL_RTX;
27880
27881   /* XXX FIXME: The EABI defines two VFP register ranges:
27882         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27883         256-287: D0-D31
27884      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27885      corresponding D register.  Until GDB supports this, we shall use the
27886      legacy encodings.  We also use these encodings for D0-D15 for
27887      compatibility with older debuggers.  */
27888   mode = GET_MODE (rtl);
27889   if (GET_MODE_SIZE (mode) < 8)
27890     return NULL_RTX;
27891
27892   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27893     {
27894       nregs = GET_MODE_SIZE (mode) / 4;
27895       for (i = 0; i < nregs; i += 2)
27896         if (TARGET_BIG_END)
27897           {
27898             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27899             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27900           }
27901         else
27902           {
27903             parts[i] = gen_rtx_REG (SImode, regno + i);
27904             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27905           }
27906     }
27907   else
27908     {
27909       nregs = GET_MODE_SIZE (mode) / 8;
27910       for (i = 0; i < nregs; i++)
27911         parts[i] = gen_rtx_REG (DImode, regno + i);
27912     }
27913
27914   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27915 }
27916
27917 #if ARM_UNWIND_INFO
27918 /* Emit unwind directives for a store-multiple instruction or stack pointer
27919    push during alignment.
27920    These should only ever be generated by the function prologue code, so
27921    expect them to have a particular form.
27922    The store-multiple instruction sometimes pushes pc as the last register,
27923    although it should not be tracked into unwind information, or for -Os
27924    sometimes pushes some dummy registers before first register that needs
27925    to be tracked in unwind information; such dummy registers are there just
27926    to avoid separate stack adjustment, and will not be restored in the
27927    epilogue.  */
27928
27929 static void
27930 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27931 {
27932   int i;
27933   HOST_WIDE_INT offset;
27934   HOST_WIDE_INT nregs;
27935   int reg_size;
27936   unsigned reg;
27937   unsigned lastreg;
27938   unsigned padfirst = 0, padlast = 0;
27939   rtx e;
27940
27941   e = XVECEXP (p, 0, 0);
27942   gcc_assert (GET_CODE (e) == SET);
27943
27944   /* First insn will adjust the stack pointer.  */
27945   gcc_assert (GET_CODE (e) == SET
27946               && REG_P (SET_DEST (e))
27947               && REGNO (SET_DEST (e)) == SP_REGNUM
27948               && GET_CODE (SET_SRC (e)) == PLUS);
27949
27950   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27951   nregs = XVECLEN (p, 0) - 1;
27952   gcc_assert (nregs);
27953
27954   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27955   if (reg < 16)
27956     {
27957       /* For -Os dummy registers can be pushed at the beginning to
27958          avoid separate stack pointer adjustment.  */
27959       e = XVECEXP (p, 0, 1);
27960       e = XEXP (SET_DEST (e), 0);
27961       if (GET_CODE (e) == PLUS)
27962         padfirst = INTVAL (XEXP (e, 1));
27963       gcc_assert (padfirst == 0 || optimize_size);
27964       /* The function prologue may also push pc, but not annotate it as it is
27965          never restored.  We turn this into a stack pointer adjustment.  */
27966       e = XVECEXP (p, 0, nregs);
27967       e = XEXP (SET_DEST (e), 0);
27968       if (GET_CODE (e) == PLUS)
27969         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27970       else
27971         padlast = offset - 4;
27972       gcc_assert (padlast == 0 || padlast == 4);
27973       if (padlast == 4)
27974         fprintf (asm_out_file, "\t.pad #4\n");
27975       reg_size = 4;
27976       fprintf (asm_out_file, "\t.save {");
27977     }
27978   else if (IS_VFP_REGNUM (reg))
27979     {
27980       reg_size = 8;
27981       fprintf (asm_out_file, "\t.vsave {");
27982     }
27983   else
27984     /* Unknown register type.  */
27985     gcc_unreachable ();
27986
27987   /* If the stack increment doesn't match the size of the saved registers,
27988      something has gone horribly wrong.  */
27989   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27990
27991   offset = padfirst;
27992   lastreg = 0;
27993   /* The remaining insns will describe the stores.  */
27994   for (i = 1; i <= nregs; i++)
27995     {
27996       /* Expect (set (mem <addr>) (reg)).
27997          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27998       e = XVECEXP (p, 0, i);
27999       gcc_assert (GET_CODE (e) == SET
28000                   && MEM_P (SET_DEST (e))
28001                   && REG_P (SET_SRC (e)));
28002
28003       reg = REGNO (SET_SRC (e));
28004       gcc_assert (reg >= lastreg);
28005
28006       if (i != 1)
28007         fprintf (asm_out_file, ", ");
28008       /* We can't use %r for vfp because we need to use the
28009          double precision register names.  */
28010       if (IS_VFP_REGNUM (reg))
28011         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28012       else
28013         asm_fprintf (asm_out_file, "%r", reg);
28014
28015       if (flag_checking)
28016         {
28017           /* Check that the addresses are consecutive.  */
28018           e = XEXP (SET_DEST (e), 0);
28019           if (GET_CODE (e) == PLUS)
28020             gcc_assert (REG_P (XEXP (e, 0))
28021                         && REGNO (XEXP (e, 0)) == SP_REGNUM
28022                         && CONST_INT_P (XEXP (e, 1))
28023                         && offset == INTVAL (XEXP (e, 1)));
28024           else
28025             gcc_assert (i == 1
28026                         && REG_P (e)
28027                         && REGNO (e) == SP_REGNUM);
28028           offset += reg_size;
28029         }
28030     }
28031   fprintf (asm_out_file, "}\n");
28032   if (padfirst)
28033     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
28034 }
28035
28036 /*  Emit unwind directives for a SET.  */
28037
28038 static void
28039 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28040 {
28041   rtx e0;
28042   rtx e1;
28043   unsigned reg;
28044
28045   e0 = XEXP (p, 0);
28046   e1 = XEXP (p, 1);
28047   switch (GET_CODE (e0))
28048     {
28049     case MEM:
28050       /* Pushing a single register.  */
28051       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28052           || !REG_P (XEXP (XEXP (e0, 0), 0))
28053           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28054         abort ();
28055
28056       asm_fprintf (asm_out_file, "\t.save ");
28057       if (IS_VFP_REGNUM (REGNO (e1)))
28058         asm_fprintf(asm_out_file, "{d%d}\n",
28059                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28060       else
28061         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28062       break;
28063
28064     case REG:
28065       if (REGNO (e0) == SP_REGNUM)
28066         {
28067           /* A stack increment.  */
28068           if (GET_CODE (e1) != PLUS
28069               || !REG_P (XEXP (e1, 0))
28070               || REGNO (XEXP (e1, 0)) != SP_REGNUM
28071               || !CONST_INT_P (XEXP (e1, 1)))
28072             abort ();
28073
28074           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28075                        -INTVAL (XEXP (e1, 1)));
28076         }
28077       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28078         {
28079           HOST_WIDE_INT offset;
28080
28081           if (GET_CODE (e1) == PLUS)
28082             {
28083               if (!REG_P (XEXP (e1, 0))
28084                   || !CONST_INT_P (XEXP (e1, 1)))
28085                 abort ();
28086               reg = REGNO (XEXP (e1, 0));
28087               offset = INTVAL (XEXP (e1, 1));
28088               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28089                            HARD_FRAME_POINTER_REGNUM, reg,
28090                            offset);
28091             }
28092           else if (REG_P (e1))
28093             {
28094               reg = REGNO (e1);
28095               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28096                            HARD_FRAME_POINTER_REGNUM, reg);
28097             }
28098           else
28099             abort ();
28100         }
28101       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28102         {
28103           /* Move from sp to reg.  */
28104           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28105         }
28106      else if (GET_CODE (e1) == PLUS
28107               && REG_P (XEXP (e1, 0))
28108               && REGNO (XEXP (e1, 0)) == SP_REGNUM
28109               && CONST_INT_P (XEXP (e1, 1)))
28110         {
28111           /* Set reg to offset from sp.  */
28112           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28113                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28114         }
28115       else
28116         abort ();
28117       break;
28118
28119     default:
28120       abort ();
28121     }
28122 }
28123
28124
28125 /* Emit unwind directives for the given insn.  */
28126
28127 static void
28128 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
28129 {
28130   rtx note, pat;
28131   bool handled_one = false;
28132
28133   if (arm_except_unwind_info (&global_options) != UI_TARGET)
28134     return;
28135
28136   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28137       && (TREE_NOTHROW (current_function_decl)
28138           || crtl->all_throwers_are_sibcalls))
28139     return;
28140
28141   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28142     return;
28143
28144   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28145     {
28146       switch (REG_NOTE_KIND (note))
28147         {
28148         case REG_FRAME_RELATED_EXPR:
28149           pat = XEXP (note, 0);
28150           goto found;
28151
28152         case REG_CFA_REGISTER:
28153           pat = XEXP (note, 0);
28154           if (pat == NULL)
28155             {
28156               pat = PATTERN (insn);
28157               if (GET_CODE (pat) == PARALLEL)
28158                 pat = XVECEXP (pat, 0, 0);
28159             }
28160
28161           /* Only emitted for IS_STACKALIGN re-alignment.  */
28162           {
28163             rtx dest, src;
28164             unsigned reg;
28165
28166             src = SET_SRC (pat);
28167             dest = SET_DEST (pat);
28168
28169             gcc_assert (src == stack_pointer_rtx);
28170             reg = REGNO (dest);
28171             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28172                          reg + 0x90, reg);
28173           }
28174           handled_one = true;
28175           break;
28176
28177         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
28178            to get correct dwarf information for shrink-wrap.  We should not
28179            emit unwind information for it because these are used either for
28180            pretend arguments or notes to adjust sp and restore registers from
28181            stack.  */
28182         case REG_CFA_DEF_CFA:
28183         case REG_CFA_ADJUST_CFA:
28184         case REG_CFA_RESTORE:
28185           return;
28186
28187         case REG_CFA_EXPRESSION:
28188         case REG_CFA_OFFSET:
28189           /* ??? Only handling here what we actually emit.  */
28190           gcc_unreachable ();
28191
28192         default:
28193           break;
28194         }
28195     }
28196   if (handled_one)
28197     return;
28198   pat = PATTERN (insn);
28199  found:
28200
28201   switch (GET_CODE (pat))
28202     {
28203     case SET:
28204       arm_unwind_emit_set (asm_out_file, pat);
28205       break;
28206
28207     case SEQUENCE:
28208       /* Store multiple.  */
28209       arm_unwind_emit_sequence (asm_out_file, pat);
28210       break;
28211
28212     default:
28213       abort();
28214     }
28215 }
28216
28217
28218 /* Output a reference from a function exception table to the type_info
28219    object X.  The EABI specifies that the symbol should be relocated by
28220    an R_ARM_TARGET2 relocation.  */
28221
28222 static bool
28223 arm_output_ttype (rtx x)
28224 {
28225   fputs ("\t.word\t", asm_out_file);
28226   output_addr_const (asm_out_file, x);
28227   /* Use special relocations for symbol references.  */
28228   if (!CONST_INT_P (x))
28229     fputs ("(TARGET2)", asm_out_file);
28230   fputc ('\n', asm_out_file);
28231
28232   return TRUE;
28233 }
28234
28235 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
28236
28237 static void
28238 arm_asm_emit_except_personality (rtx personality)
28239 {
28240   fputs ("\t.personality\t", asm_out_file);
28241   output_addr_const (asm_out_file, personality);
28242   fputc ('\n', asm_out_file);
28243 }
28244 #endif /* ARM_UNWIND_INFO */
28245
28246 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
28247
28248 static void
28249 arm_asm_init_sections (void)
28250 {
28251 #if ARM_UNWIND_INFO
28252   exception_section = get_unnamed_section (0, output_section_asm_op,
28253                                            "\t.handlerdata");
28254 #endif /* ARM_UNWIND_INFO */
28255
28256 #ifdef OBJECT_FORMAT_ELF
28257   if (target_pure_code)
28258     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
28259 #endif
28260 }
28261
28262 /* Output unwind directives for the start/end of a function.  */
28263
28264 void
28265 arm_output_fn_unwind (FILE * f, bool prologue)
28266 {
28267   if (arm_except_unwind_info (&global_options) != UI_TARGET)
28268     return;
28269
28270   if (prologue)
28271     fputs ("\t.fnstart\n", f);
28272   else
28273     {
28274       /* If this function will never be unwound, then mark it as such.
28275          The came condition is used in arm_unwind_emit to suppress
28276          the frame annotations.  */
28277       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28278           && (TREE_NOTHROW (current_function_decl)
28279               || crtl->all_throwers_are_sibcalls))
28280         fputs("\t.cantunwind\n", f);
28281
28282       fputs ("\t.fnend\n", f);
28283     }
28284 }
28285
28286 static bool
28287 arm_emit_tls_decoration (FILE *fp, rtx x)
28288 {
28289   enum tls_reloc reloc;
28290   rtx val;
28291
28292   val = XVECEXP (x, 0, 0);
28293   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
28294
28295   output_addr_const (fp, val);
28296
28297   switch (reloc)
28298     {
28299     case TLS_GD32:
28300       fputs ("(tlsgd)", fp);
28301       break;
28302     case TLS_GD32_FDPIC:
28303       fputs ("(tlsgd_fdpic)", fp);
28304       break;
28305     case TLS_LDM32:
28306       fputs ("(tlsldm)", fp);
28307       break;
28308     case TLS_LDM32_FDPIC:
28309       fputs ("(tlsldm_fdpic)", fp);
28310       break;
28311     case TLS_LDO32:
28312       fputs ("(tlsldo)", fp);
28313       break;
28314     case TLS_IE32:
28315       fputs ("(gottpoff)", fp);
28316       break;
28317     case TLS_IE32_FDPIC:
28318       fputs ("(gottpoff_fdpic)", fp);
28319       break;
28320     case TLS_LE32:
28321       fputs ("(tpoff)", fp);
28322       break;
28323     case TLS_DESCSEQ:
28324       fputs ("(tlsdesc)", fp);
28325       break;
28326     default:
28327       gcc_unreachable ();
28328     }
28329
28330   switch (reloc)
28331     {
28332     case TLS_GD32:
28333     case TLS_LDM32:
28334     case TLS_IE32:
28335     case TLS_DESCSEQ:
28336       fputs (" + (. - ", fp);
28337       output_addr_const (fp, XVECEXP (x, 0, 2));
28338       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28339       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
28340       output_addr_const (fp, XVECEXP (x, 0, 3));
28341       fputc (')', fp);
28342       break;
28343     default:
28344       break;
28345     }
28346
28347   return TRUE;
28348 }
28349
28350 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
28351
28352 static void
28353 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
28354 {
28355   gcc_assert (size == 4);
28356   fputs ("\t.word\t", file);
28357   output_addr_const (file, x);
28358   fputs ("(tlsldo)", file);
28359 }
28360
28361 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
28362
28363 static bool
28364 arm_output_addr_const_extra (FILE *fp, rtx x)
28365 {
28366   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
28367     return arm_emit_tls_decoration (fp, x);
28368   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
28369     {
28370       char label[256];
28371       int labelno = INTVAL (XVECEXP (x, 0, 0));
28372
28373       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
28374       assemble_name_raw (fp, label);
28375
28376       return TRUE;
28377     }
28378   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
28379     {
28380       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
28381       if (GOT_PCREL)
28382         fputs ("+.", fp);
28383       fputs ("-(", fp);
28384       output_addr_const (fp, XVECEXP (x, 0, 0));
28385       fputc (')', fp);
28386       return TRUE;
28387     }
28388   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
28389     {
28390       output_addr_const (fp, XVECEXP (x, 0, 0));
28391       if (GOT_PCREL)
28392         fputs ("+.", fp);
28393       fputs ("-(", fp);
28394       output_addr_const (fp, XVECEXP (x, 0, 1));
28395       fputc (')', fp);
28396       return TRUE;
28397     }
28398   else if (GET_CODE (x) == CONST_VECTOR)
28399     return arm_emit_vector_const (fp, x);
28400
28401   return FALSE;
28402 }
28403
28404 /* Output assembly for a shift instruction.
28405    SET_FLAGS determines how the instruction modifies the condition codes.
28406    0 - Do not set condition codes.
28407    1 - Set condition codes.
28408    2 - Use smallest instruction.  */
28409 const char *
28410 arm_output_shift(rtx * operands, int set_flags)
28411 {
28412   char pattern[100];
28413   static const char flag_chars[3] = {'?', '.', '!'};
28414   const char *shift;
28415   HOST_WIDE_INT val;
28416   char c;
28417
28418   c = flag_chars[set_flags];
28419   shift = shift_op(operands[3], &val);
28420   if (shift)
28421     {
28422       if (val != -1)
28423         operands[2] = GEN_INT(val);
28424       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28425     }
28426   else
28427     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28428
28429   output_asm_insn (pattern, operands);
28430   return "";
28431 }
28432
28433 /* Output assembly for a WMMX immediate shift instruction.  */
28434 const char *
28435 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28436 {
28437   int shift = INTVAL (operands[2]);
28438   char templ[50];
28439   machine_mode opmode = GET_MODE (operands[0]);
28440
28441   gcc_assert (shift >= 0);
28442
28443   /* If the shift value in the register versions is > 63 (for D qualifier),
28444      31 (for W qualifier) or 15 (for H qualifier).  */
28445   if (((opmode == V4HImode) && (shift > 15))
28446         || ((opmode == V2SImode) && (shift > 31))
28447         || ((opmode == DImode) && (shift > 63)))
28448   {
28449     if (wror_or_wsra)
28450       {
28451         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28452         output_asm_insn (templ, operands);
28453         if (opmode == DImode)
28454           {
28455             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28456             output_asm_insn (templ, operands);
28457           }
28458       }
28459     else
28460       {
28461         /* The destination register will contain all zeros.  */
28462         sprintf (templ, "wzero\t%%0");
28463         output_asm_insn (templ, operands);
28464       }
28465     return "";
28466   }
28467
28468   if ((opmode == DImode) && (shift > 32))
28469     {
28470       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28471       output_asm_insn (templ, operands);
28472       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28473       output_asm_insn (templ, operands);
28474     }
28475   else
28476     {
28477       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28478       output_asm_insn (templ, operands);
28479     }
28480   return "";
28481 }
28482
28483 /* Output assembly for a WMMX tinsr instruction.  */
28484 const char *
28485 arm_output_iwmmxt_tinsr (rtx *operands)
28486 {
28487   int mask = INTVAL (operands[3]);
28488   int i;
28489   char templ[50];
28490   int units = mode_nunits[GET_MODE (operands[0])];
28491   gcc_assert ((mask & (mask - 1)) == 0);
28492   for (i = 0; i < units; ++i)
28493     {
28494       if ((mask & 0x01) == 1)
28495         {
28496           break;
28497         }
28498       mask >>= 1;
28499     }
28500   gcc_assert (i < units);
28501   {
28502     switch (GET_MODE (operands[0]))
28503       {
28504       case E_V8QImode:
28505         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28506         break;
28507       case E_V4HImode:
28508         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28509         break;
28510       case E_V2SImode:
28511         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28512         break;
28513       default:
28514         gcc_unreachable ();
28515         break;
28516       }
28517     output_asm_insn (templ, operands);
28518   }
28519   return "";
28520 }
28521
28522 /* Output a Thumb-1 casesi dispatch sequence.  */
28523 const char *
28524 thumb1_output_casesi (rtx *operands)
28525 {
28526   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28527
28528   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28529
28530   switch (GET_MODE(diff_vec))
28531     {
28532     case E_QImode:
28533       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28534               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28535     case E_HImode:
28536       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28537               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28538     case E_SImode:
28539       return "bl\t%___gnu_thumb1_case_si";
28540     default:
28541       gcc_unreachable ();
28542     }
28543 }
28544
28545 /* Output a Thumb-2 casesi instruction.  */
28546 const char *
28547 thumb2_output_casesi (rtx *operands)
28548 {
28549   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28550
28551   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28552
28553   output_asm_insn ("cmp\t%0, %1", operands);
28554   output_asm_insn ("bhi\t%l3", operands);
28555   switch (GET_MODE(diff_vec))
28556     {
28557     case E_QImode:
28558       return "tbb\t[%|pc, %0]";
28559     case E_HImode:
28560       return "tbh\t[%|pc, %0, lsl #1]";
28561     case E_SImode:
28562       if (flag_pic)
28563         {
28564           output_asm_insn ("adr\t%4, %l2", operands);
28565           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28566           output_asm_insn ("add\t%4, %4, %5", operands);
28567           return "bx\t%4";
28568         }
28569       else
28570         {
28571           output_asm_insn ("adr\t%4, %l2", operands);
28572           return "ldr\t%|pc, [%4, %0, lsl #2]";
28573         }
28574     default:
28575       gcc_unreachable ();
28576     }
28577 }
28578
28579 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
28580    per-core tuning structs.  */
28581 static int
28582 arm_issue_rate (void)
28583 {
28584   return current_tune->issue_rate;
28585 }
28586
28587 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
28588 static int
28589 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
28590 {
28591   if (DEBUG_INSN_P (insn))
28592     return more;
28593
28594   rtx_code code = GET_CODE (PATTERN (insn));
28595   if (code == USE || code == CLOBBER)
28596     return more;
28597
28598   if (get_attr_type (insn) == TYPE_NO_INSN)
28599     return more;
28600
28601   return more - 1;
28602 }
28603
28604 /* Return how many instructions should scheduler lookahead to choose the
28605    best one.  */
28606 static int
28607 arm_first_cycle_multipass_dfa_lookahead (void)
28608 {
28609   int issue_rate = arm_issue_rate ();
28610
28611   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28612 }
28613
28614 /* Enable modeling of L2 auto-prefetcher.  */
28615 static int
28616 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28617 {
28618   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28619 }
28620
28621 const char *
28622 arm_mangle_type (const_tree type)
28623 {
28624   /* The ARM ABI documents (10th October 2008) say that "__va_list"
28625      has to be managled as if it is in the "std" namespace.  */
28626   if (TARGET_AAPCS_BASED
28627       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28628     return "St9__va_list";
28629
28630   /* Half-precision float.  */
28631   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28632     return "Dh";
28633
28634   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28635      builtin type.  */
28636   if (TYPE_NAME (type) != NULL)
28637     return arm_mangle_builtin_type (type);
28638
28639   /* Use the default mangling.  */
28640   return NULL;
28641 }
28642
28643 /* Order of allocation of core registers for Thumb: this allocation is
28644    written over the corresponding initial entries of the array
28645    initialized with REG_ALLOC_ORDER.  We allocate all low registers
28646    first.  Saving and restoring a low register is usually cheaper than
28647    using a call-clobbered high register.  */
28648
28649 static const int thumb_core_reg_alloc_order[] =
28650 {
28651    3,  2,  1,  0,  4,  5,  6,  7,
28652   12, 14,  8,  9, 10, 11
28653 };
28654
28655 /* Adjust register allocation order when compiling for Thumb.  */
28656
28657 void
28658 arm_order_regs_for_local_alloc (void)
28659 {
28660   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28661   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28662   if (TARGET_THUMB)
28663     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28664             sizeof (thumb_core_reg_alloc_order));
28665 }
28666
28667 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
28668
28669 bool
28670 arm_frame_pointer_required (void)
28671 {
28672   if (SUBTARGET_FRAME_POINTER_REQUIRED)
28673     return true;
28674
28675   /* If the function receives nonlocal gotos, it needs to save the frame
28676      pointer in the nonlocal_goto_save_area object.  */
28677   if (cfun->has_nonlocal_label)
28678     return true;
28679
28680   /* The frame pointer is required for non-leaf APCS frames.  */
28681   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28682     return true;
28683
28684   /* If we are probing the stack in the prologue, we will have a faulting
28685      instruction prior to the stack adjustment and this requires a frame
28686      pointer if we want to catch the exception using the EABI unwinder.  */
28687   if (!IS_INTERRUPT (arm_current_func_type ())
28688       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28689           || flag_stack_clash_protection)
28690       && arm_except_unwind_info (&global_options) == UI_TARGET
28691       && cfun->can_throw_non_call_exceptions)
28692     {
28693       HOST_WIDE_INT size = get_frame_size ();
28694
28695       /* That's irrelevant if there is no stack adjustment.  */
28696       if (size <= 0)
28697         return false;
28698
28699       /* That's relevant only if there is a stack probe.  */
28700       if (crtl->is_leaf && !cfun->calls_alloca)
28701         {
28702           /* We don't have the final size of the frame so adjust.  */
28703           size += 32 * UNITS_PER_WORD;
28704           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28705             return true;
28706         }
28707       else
28708         return true;
28709     }
28710
28711   return false;
28712 }
28713
28714 /* Only thumb1 can't support conditional execution, so return true if
28715    the target is not thumb1.  */
28716 static bool
28717 arm_have_conditional_execution (void)
28718 {
28719   return !TARGET_THUMB1;
28720 }
28721
28722 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
28723 static HOST_WIDE_INT
28724 arm_vector_alignment (const_tree type)
28725 {
28726   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28727
28728   if (TARGET_AAPCS_BASED)
28729     align = MIN (align, 64);
28730
28731   return align;
28732 }
28733
28734 static void
28735 arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
28736 {
28737   if (!TARGET_NEON_VECTORIZE_DOUBLE)
28738     {
28739       sizes->safe_push (16);
28740       sizes->safe_push (8);
28741     }
28742 }
28743
28744 static bool
28745 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28746 {
28747   /* Vectors which aren't in packed structures will not be less aligned than
28748      the natural alignment of their element type, so this is safe.  */
28749   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28750     return !is_packed;
28751
28752   return default_builtin_vector_alignment_reachable (type, is_packed);
28753 }
28754
28755 static bool
28756 arm_builtin_support_vector_misalignment (machine_mode mode,
28757                                          const_tree type, int misalignment,
28758                                          bool is_packed)
28759 {
28760   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28761     {
28762       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28763
28764       if (is_packed)
28765         return align == 1;
28766
28767       /* If the misalignment is unknown, we should be able to handle the access
28768          so long as it is not to a member of a packed data structure.  */
28769       if (misalignment == -1)
28770         return true;
28771
28772       /* Return true if the misalignment is a multiple of the natural alignment
28773          of the vector's element type.  This is probably always going to be
28774          true in practice, since we've already established that this isn't a
28775          packed access.  */
28776       return ((misalignment % align) == 0);
28777     }
28778
28779   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28780                                                       is_packed);
28781 }
28782
28783 static void
28784 arm_conditional_register_usage (void)
28785 {
28786   int regno;
28787
28788   if (TARGET_THUMB1 && optimize_size)
28789     {
28790       /* When optimizing for size on Thumb-1, it's better not
28791         to use the HI regs, because of the overhead of
28792         stacking them.  */
28793       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28794         fixed_regs[regno] = call_used_regs[regno] = 1;
28795     }
28796
28797   /* The link register can be clobbered by any branch insn,
28798      but we have no way to track that at present, so mark
28799      it as unavailable.  */
28800   if (TARGET_THUMB1)
28801     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28802
28803   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28804     {
28805       /* VFPv3 registers are disabled when earlier VFP
28806          versions are selected due to the definition of
28807          LAST_VFP_REGNUM.  */
28808       for (regno = FIRST_VFP_REGNUM;
28809            regno <= LAST_VFP_REGNUM; ++ regno)
28810         {
28811           fixed_regs[regno] = 0;
28812           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28813             || regno >= FIRST_VFP_REGNUM + 32;
28814         }
28815     }
28816
28817   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
28818     {
28819       regno = FIRST_IWMMXT_GR_REGNUM;
28820       /* The 2002/10/09 revision of the XScale ABI has wCG0
28821          and wCG1 as call-preserved registers.  The 2002/11/21
28822          revision changed this so that all wCG registers are
28823          scratch registers.  */
28824       for (regno = FIRST_IWMMXT_GR_REGNUM;
28825            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28826         fixed_regs[regno] = 0;
28827       /* The XScale ABI has wR0 - wR9 as scratch registers,
28828          the rest as call-preserved registers.  */
28829       for (regno = FIRST_IWMMXT_REGNUM;
28830            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28831         {
28832           fixed_regs[regno] = 0;
28833           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28834         }
28835     }
28836
28837   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28838     {
28839       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28840       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28841     }
28842   else if (TARGET_APCS_STACK)
28843     {
28844       fixed_regs[10]     = 1;
28845       call_used_regs[10] = 1;
28846     }
28847   /* -mcaller-super-interworking reserves r11 for calls to
28848      _interwork_r11_call_via_rN().  Making the register global
28849      is an easy way of ensuring that it remains valid for all
28850      calls.  */
28851   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28852       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28853     {
28854       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28855       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28856       if (TARGET_CALLER_INTERWORKING)
28857         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28858     }
28859   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28860 }
28861
28862 static reg_class_t
28863 arm_preferred_rename_class (reg_class_t rclass)
28864 {
28865   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28866      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28867      and code size can be reduced.  */
28868   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28869     return LO_REGS;
28870   else
28871     return NO_REGS;
28872 }
28873
28874 /* Compute the attribute "length" of insn "*push_multi".
28875    So this function MUST be kept in sync with that insn pattern.  */
28876 int
28877 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28878 {
28879   int i, regno, hi_reg;
28880   int num_saves = XVECLEN (parallel_op, 0);
28881
28882   /* ARM mode.  */
28883   if (TARGET_ARM)
28884     return 4;
28885   /* Thumb1 mode.  */
28886   if (TARGET_THUMB1)
28887     return 2;
28888
28889   /* Thumb2 mode.  */
28890   regno = REGNO (first_op);
28891   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28892      list is 8-bit.  Normally this means all registers in the list must be
28893      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28894      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28895      with 16-bit encoding.  */
28896   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28897   for (i = 1; i < num_saves && !hi_reg; i++)
28898     {
28899       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28900       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28901     }
28902
28903   if (!hi_reg)
28904     return 2;
28905   return 4;
28906 }
28907
28908 /* Compute the attribute "length" of insn.  Currently, this function is used
28909    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28910    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28911    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28912    true if OPERANDS contains insn which explicit updates base register.  */
28913
28914 int
28915 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28916 {
28917   /* ARM mode.  */
28918   if (TARGET_ARM)
28919     return 4;
28920   /* Thumb1 mode.  */
28921   if (TARGET_THUMB1)
28922     return 2;
28923
28924   rtx parallel_op = operands[0];
28925   /* Initialize to elements number of PARALLEL.  */
28926   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28927   /* Initialize the value to base register.  */
28928   unsigned regno = REGNO (operands[1]);
28929   /* Skip return and write back pattern.
28930      We only need register pop pattern for later analysis.  */
28931   unsigned first_indx = 0;
28932   first_indx += return_pc ? 1 : 0;
28933   first_indx += write_back_p ? 1 : 0;
28934
28935   /* A pop operation can be done through LDM or POP.  If the base register is SP
28936      and if it's with write back, then a LDM will be alias of POP.  */
28937   bool pop_p = (regno == SP_REGNUM && write_back_p);
28938   bool ldm_p = !pop_p;
28939
28940   /* Check base register for LDM.  */
28941   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28942     return 4;
28943
28944   /* Check each register in the list.  */
28945   for (; indx >= first_indx; indx--)
28946     {
28947       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28948       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28949          comment in arm_attr_length_push_multi.  */
28950       if (REGNO_REG_CLASS (regno) == HI_REGS
28951           && (regno != PC_REGNUM || ldm_p))
28952         return 4;
28953     }
28954
28955   return 2;
28956 }
28957
28958 /* Compute the number of instructions emitted by output_move_double.  */
28959 int
28960 arm_count_output_move_double_insns (rtx *operands)
28961 {
28962   int count;
28963   rtx ops[2];
28964   /* output_move_double may modify the operands array, so call it
28965      here on a copy of the array.  */
28966   ops[0] = operands[0];
28967   ops[1] = operands[1];
28968   output_move_double (ops, false, &count);
28969   return count;
28970 }
28971
28972 /* Same as above, but operands are a register/memory pair in SImode.
28973    Assumes operands has the base register in position 0 and memory in position
28974    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
28975 int
28976 arm_count_ldrdstrd_insns (rtx *operands, bool load)
28977 {
28978   int count;
28979   rtx ops[2];
28980   int regnum, memnum;
28981   if (load)
28982     regnum = 0, memnum = 1;
28983   else
28984     regnum = 1, memnum = 0;
28985   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
28986   ops[memnum] = adjust_address (operands[2], DImode, 0);
28987   output_move_double (ops, false, &count);
28988   return count;
28989 }
28990
28991
28992 int
28993 vfp3_const_double_for_fract_bits (rtx operand)
28994 {
28995   REAL_VALUE_TYPE r0;
28996
28997   if (!CONST_DOUBLE_P (operand))
28998     return 0;
28999
29000   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
29001   if (exact_real_inverse (DFmode, &r0)
29002       && !REAL_VALUE_NEGATIVE (r0))
29003     {
29004       if (exact_real_truncate (DFmode, &r0))
29005         {
29006           HOST_WIDE_INT value = real_to_integer (&r0);
29007           value = value & 0xffffffff;
29008           if ((value != 0) && ( (value & (value - 1)) == 0))
29009             {
29010               int ret = exact_log2 (value);
29011               gcc_assert (IN_RANGE (ret, 0, 31));
29012               return ret;
29013             }
29014         }
29015     }
29016   return 0;
29017 }
29018
29019 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
29020    log2 is in [1, 32], return that log2.  Otherwise return -1.
29021    This is used in the patterns for vcvt.s32.f32 floating-point to
29022    fixed-point conversions.  */
29023
29024 int
29025 vfp3_const_double_for_bits (rtx x)
29026 {
29027   const REAL_VALUE_TYPE *r;
29028
29029   if (!CONST_DOUBLE_P (x))
29030     return -1;
29031
29032   r = CONST_DOUBLE_REAL_VALUE (x);
29033
29034   if (REAL_VALUE_NEGATIVE (*r)
29035       || REAL_VALUE_ISNAN (*r)
29036       || REAL_VALUE_ISINF (*r)
29037       || !real_isinteger (r, SFmode))
29038     return -1;
29039
29040   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
29041
29042 /* The exact_log2 above will have returned -1 if this is
29043    not an exact log2.  */
29044   if (!IN_RANGE (hwint, 1, 32))
29045     return -1;
29046
29047   return hwint;
29048 }
29049
29050 \f
29051 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
29052
29053 static void
29054 arm_pre_atomic_barrier (enum memmodel model)
29055 {
29056   if (need_atomic_barrier_p (model, true))
29057     emit_insn (gen_memory_barrier ());
29058 }
29059
29060 static void
29061 arm_post_atomic_barrier (enum memmodel model)
29062 {
29063   if (need_atomic_barrier_p (model, false))
29064     emit_insn (gen_memory_barrier ());
29065 }
29066
29067 /* Emit the load-exclusive and store-exclusive instructions.
29068    Use acquire and release versions if necessary.  */
29069
29070 static void
29071 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
29072 {
29073   rtx (*gen) (rtx, rtx);
29074
29075   if (acq)
29076     {
29077       switch (mode)
29078         {
29079         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29080         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29081         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29082         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29083         default:
29084           gcc_unreachable ();
29085         }
29086     }
29087   else
29088     {
29089       switch (mode)
29090         {
29091         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
29092         case E_HImode: gen = gen_arm_load_exclusivehi; break;
29093         case E_SImode: gen = gen_arm_load_exclusivesi; break;
29094         case E_DImode: gen = gen_arm_load_exclusivedi; break;
29095         default:
29096           gcc_unreachable ();
29097         }
29098     }
29099
29100   emit_insn (gen (rval, mem));
29101 }
29102
29103 static void
29104 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
29105                           rtx mem, bool rel)
29106 {
29107   rtx (*gen) (rtx, rtx, rtx);
29108
29109   if (rel)
29110     {
29111       switch (mode)
29112         {
29113         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
29114         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
29115         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
29116         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
29117         default:
29118           gcc_unreachable ();
29119         }
29120     }
29121   else
29122     {
29123       switch (mode)
29124         {
29125         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
29126         case E_HImode: gen = gen_arm_store_exclusivehi; break;
29127         case E_SImode: gen = gen_arm_store_exclusivesi; break;
29128         case E_DImode: gen = gen_arm_store_exclusivedi; break;
29129         default:
29130           gcc_unreachable ();
29131         }
29132     }
29133
29134   emit_insn (gen (bval, rval, mem));
29135 }
29136
29137 /* Mark the previous jump instruction as unlikely.  */
29138
29139 static void
29140 emit_unlikely_jump (rtx insn)
29141 {
29142   rtx_insn *jump = emit_jump_insn (insn);
29143   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
29144 }
29145
29146 /* Expand a compare and swap pattern.  */
29147
29148 void
29149 arm_expand_compare_and_swap (rtx operands[])
29150 {
29151   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29152   machine_mode mode, cmp_mode;
29153
29154   bval = operands[0];
29155   rval = operands[1];
29156   mem = operands[2];
29157   oldval = operands[3];
29158   newval = operands[4];
29159   is_weak = operands[5];
29160   mod_s = operands[6];
29161   mod_f = operands[7];
29162   mode = GET_MODE (mem);
29163
29164   /* Normally the succ memory model must be stronger than fail, but in the
29165      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29166      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
29167
29168   if (TARGET_HAVE_LDACQ
29169       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
29170       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
29171     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29172
29173   switch (mode)
29174     {
29175     case E_QImode:
29176     case E_HImode:
29177       /* For narrow modes, we're going to perform the comparison in SImode,
29178          so do the zero-extension now.  */
29179       rval = gen_reg_rtx (SImode);
29180       oldval = convert_modes (SImode, mode, oldval, true);
29181       /* FALLTHRU */
29182
29183     case E_SImode:
29184       /* Force the value into a register if needed.  We waited until after
29185          the zero-extension above to do this properly.  */
29186       if (!arm_add_operand (oldval, SImode))
29187         oldval = force_reg (SImode, oldval);
29188       break;
29189
29190     case E_DImode:
29191       if (!cmpdi_operand (oldval, mode))
29192         oldval = force_reg (mode, oldval);
29193       break;
29194
29195     default:
29196       gcc_unreachable ();
29197     }
29198
29199   if (TARGET_THUMB1)
29200     cmp_mode = E_SImode;
29201   else
29202     cmp_mode = CC_Zmode;
29203
29204   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
29205   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
29206                                         oldval, newval, is_weak, mod_s, mod_f));
29207
29208   if (mode == QImode || mode == HImode)
29209     emit_move_insn (operands[1], gen_lowpart (mode, rval));
29210
29211   /* In all cases, we arrange for success to be signaled by Z set.
29212      This arrangement allows for the boolean result to be used directly
29213      in a subsequent branch, post optimization.  For Thumb-1 targets, the
29214      boolean negation of the result is also stored in bval because Thumb-1
29215      backend lacks dependency tracking for CC flag due to flag-setting not
29216      being represented at RTL level.  */
29217   if (TARGET_THUMB1)
29218       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
29219   else
29220     {
29221       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
29222       emit_insn (gen_rtx_SET (bval, x));
29223     }
29224 }
29225
29226 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
29227    another memory store between the load-exclusive and store-exclusive can
29228    reset the monitor from Exclusive to Open state.  This means we must wait
29229    until after reload to split the pattern, lest we get a register spill in
29230    the middle of the atomic sequence.  Success of the compare and swap is
29231    indicated by the Z flag set for 32bit targets and by neg_bval being zero
29232    for Thumb-1 targets (ie. negation of the boolean value returned by
29233    atomic_compare_and_swapmode standard pattern in operand 0).  */
29234
29235 void
29236 arm_split_compare_and_swap (rtx operands[])
29237 {
29238   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
29239   machine_mode mode;
29240   enum memmodel mod_s, mod_f;
29241   bool is_weak;
29242   rtx_code_label *label1, *label2;
29243   rtx x, cond;
29244
29245   rval = operands[1];
29246   mem = operands[2];
29247   oldval = operands[3];
29248   newval = operands[4];
29249   is_weak = (operands[5] != const0_rtx);
29250   mod_s_rtx = operands[6];
29251   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
29252   mod_f = memmodel_from_int (INTVAL (operands[7]));
29253   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
29254   mode = GET_MODE (mem);
29255
29256   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
29257
29258   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
29259   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
29260
29261   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
29262      a full barrier is emitted after the store-release.  */
29263   if (is_armv8_sync)
29264     use_acquire = false;
29265
29266   /* Checks whether a barrier is needed and emits one accordingly.  */
29267   if (!(use_acquire || use_release))
29268     arm_pre_atomic_barrier (mod_s);
29269
29270   label1 = NULL;
29271   if (!is_weak)
29272     {
29273       label1 = gen_label_rtx ();
29274       emit_label (label1);
29275     }
29276   label2 = gen_label_rtx ();
29277
29278   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29279
29280   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
29281      as required to communicate with arm_expand_compare_and_swap.  */
29282   if (TARGET_32BIT)
29283     {
29284       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
29285       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29286       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29287                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29288       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
29289     }
29290   else
29291     {
29292       emit_move_insn (neg_bval, const1_rtx);
29293       cond = gen_rtx_NE (VOIDmode, rval, oldval);
29294       if (thumb1_cmpneg_operand (oldval, SImode))
29295         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
29296                                                     label2, cond));
29297       else
29298         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
29299     }
29300
29301   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
29302
29303   /* Weak or strong, we want EQ to be true for success, so that we
29304      match the flags that we got from the compare above.  */
29305   if (TARGET_32BIT)
29306     {
29307       cond = gen_rtx_REG (CCmode, CC_REGNUM);
29308       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
29309       emit_insn (gen_rtx_SET (cond, x));
29310     }
29311
29312   if (!is_weak)
29313     {
29314       /* Z is set to boolean value of !neg_bval, as required to communicate
29315          with arm_expand_compare_and_swap.  */
29316       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
29317       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
29318     }
29319
29320   if (!is_mm_relaxed (mod_f))
29321     emit_label (label2);
29322
29323   /* Checks whether a barrier is needed and emits one accordingly.  */
29324   if (is_armv8_sync
29325       || !(use_acquire || use_release))
29326     arm_post_atomic_barrier (mod_s);
29327
29328   if (is_mm_relaxed (mod_f))
29329     emit_label (label2);
29330 }
29331
29332 /* Split an atomic operation pattern.  Operation is given by CODE and is one
29333    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
29334    operation).  Operation is performed on the content at MEM and on VALUE
29335    following the memory model MODEL_RTX.  The content at MEM before and after
29336    the operation is returned in OLD_OUT and NEW_OUT respectively while the
29337    success of the operation is returned in COND.  Using a scratch register or
29338    an operand register for these determines what result is returned for that
29339    pattern.  */
29340
29341 void
29342 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29343                      rtx value, rtx model_rtx, rtx cond)
29344 {
29345   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
29346   machine_mode mode = GET_MODE (mem);
29347   machine_mode wmode = (mode == DImode ? DImode : SImode);
29348   rtx_code_label *label;
29349   bool all_low_regs, bind_old_new;
29350   rtx x;
29351
29352   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
29353
29354   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
29355   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
29356
29357   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
29358      a full barrier is emitted after the store-release.  */
29359   if (is_armv8_sync)
29360     use_acquire = false;
29361
29362   /* Checks whether a barrier is needed and emits one accordingly.  */
29363   if (!(use_acquire || use_release))
29364     arm_pre_atomic_barrier (model);
29365
29366   label = gen_label_rtx ();
29367   emit_label (label);
29368
29369   if (new_out)
29370     new_out = gen_lowpart (wmode, new_out);
29371   if (old_out)
29372     old_out = gen_lowpart (wmode, old_out);
29373   else
29374     old_out = new_out;
29375   value = simplify_gen_subreg (wmode, value, mode, 0);
29376
29377   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
29378
29379   /* Does the operation require destination and first operand to use the same
29380      register?  This is decided by register constraints of relevant insn
29381      patterns in thumb1.md.  */
29382   gcc_assert (!new_out || REG_P (new_out));
29383   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
29384                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
29385                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
29386   bind_old_new =
29387     (TARGET_THUMB1
29388      && code != SET
29389      && code != MINUS
29390      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
29391
29392   /* We want to return the old value while putting the result of the operation
29393      in the same register as the old value so copy the old value over to the
29394      destination register and use that register for the operation.  */
29395   if (old_out && bind_old_new)
29396     {
29397       emit_move_insn (new_out, old_out);
29398       old_out = new_out;
29399     }
29400
29401   switch (code)
29402     {
29403     case SET:
29404       new_out = value;
29405       break;
29406
29407     case NOT:
29408       x = gen_rtx_AND (wmode, old_out, value);
29409       emit_insn (gen_rtx_SET (new_out, x));
29410       x = gen_rtx_NOT (wmode, new_out);
29411       emit_insn (gen_rtx_SET (new_out, x));
29412       break;
29413
29414     case MINUS:
29415       if (CONST_INT_P (value))
29416         {
29417           value = GEN_INT (-INTVAL (value));
29418           code = PLUS;
29419         }
29420       /* FALLTHRU */
29421
29422     case PLUS:
29423       if (mode == DImode)
29424         {
29425           /* DImode plus/minus need to clobber flags.  */
29426           /* The adddi3 and subdi3 patterns are incorrectly written so that
29427              they require matching operands, even when we could easily support
29428              three operands.  Thankfully, this can be fixed up post-splitting,
29429              as the individual add+adc patterns do accept three operands and
29430              post-reload cprop can make these moves go away.  */
29431           emit_move_insn (new_out, old_out);
29432           if (code == PLUS)
29433             x = gen_adddi3 (new_out, new_out, value);
29434           else
29435             x = gen_subdi3 (new_out, new_out, value);
29436           emit_insn (x);
29437           break;
29438         }
29439       /* FALLTHRU */
29440
29441     default:
29442       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29443       emit_insn (gen_rtx_SET (new_out, x));
29444       break;
29445     }
29446
29447   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29448                             use_release);
29449
29450   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29451   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29452
29453   /* Checks whether a barrier is needed and emits one accordingly.  */
29454   if (is_armv8_sync
29455       || !(use_acquire || use_release))
29456     arm_post_atomic_barrier (model);
29457 }
29458 \f
29459 #define MAX_VECT_LEN 16
29460
29461 struct expand_vec_perm_d
29462 {
29463   rtx target, op0, op1;
29464   vec_perm_indices perm;
29465   machine_mode vmode;
29466   bool one_vector_p;
29467   bool testing_p;
29468 };
29469
29470 /* Generate a variable permutation.  */
29471
29472 static void
29473 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29474 {
29475   machine_mode vmode = GET_MODE (target);
29476   bool one_vector_p = rtx_equal_p (op0, op1);
29477
29478   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29479   gcc_checking_assert (GET_MODE (op0) == vmode);
29480   gcc_checking_assert (GET_MODE (op1) == vmode);
29481   gcc_checking_assert (GET_MODE (sel) == vmode);
29482   gcc_checking_assert (TARGET_NEON);
29483
29484   if (one_vector_p)
29485     {
29486       if (vmode == V8QImode)
29487         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29488       else
29489         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29490     }
29491   else
29492     {
29493       rtx pair;
29494
29495       if (vmode == V8QImode)
29496         {
29497           pair = gen_reg_rtx (V16QImode);
29498           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29499           pair = gen_lowpart (TImode, pair);
29500           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29501         }
29502       else
29503         {
29504           pair = gen_reg_rtx (OImode);
29505           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29506           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29507         }
29508     }
29509 }
29510
29511 void
29512 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29513 {
29514   machine_mode vmode = GET_MODE (target);
29515   unsigned int nelt = GET_MODE_NUNITS (vmode);
29516   bool one_vector_p = rtx_equal_p (op0, op1);
29517   rtx mask;
29518
29519   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29520      numbering of elements for big-endian, we must reverse the order.  */
29521   gcc_checking_assert (!BYTES_BIG_ENDIAN);
29522
29523   /* The VTBL instruction does not use a modulo index, so we must take care
29524      of that ourselves.  */
29525   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29526   mask = gen_const_vec_duplicate (vmode, mask);
29527   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29528
29529   arm_expand_vec_perm_1 (target, op0, op1, sel);
29530 }
29531
29532 /* Map lane ordering between architectural lane order, and GCC lane order,
29533    taking into account ABI.  See comment above output_move_neon for details.  */
29534
29535 static int
29536 neon_endian_lane_map (machine_mode mode, int lane)
29537 {
29538   if (BYTES_BIG_ENDIAN)
29539   {
29540     int nelems = GET_MODE_NUNITS (mode);
29541     /* Reverse lane order.  */
29542     lane = (nelems - 1 - lane);
29543     /* Reverse D register order, to match ABI.  */
29544     if (GET_MODE_SIZE (mode) == 16)
29545       lane = lane ^ (nelems / 2);
29546   }
29547   return lane;
29548 }
29549
29550 /* Some permutations index into pairs of vectors, this is a helper function
29551    to map indexes into those pairs of vectors.  */
29552
29553 static int
29554 neon_pair_endian_lane_map (machine_mode mode, int lane)
29555 {
29556   int nelem = GET_MODE_NUNITS (mode);
29557   if (BYTES_BIG_ENDIAN)
29558     lane =
29559       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29560   return lane;
29561 }
29562
29563 /* Generate or test for an insn that supports a constant permutation.  */
29564
29565 /* Recognize patterns for the VUZP insns.  */
29566
29567 static bool
29568 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29569 {
29570   unsigned int i, odd, mask, nelt = d->perm.length ();
29571   rtx out0, out1, in0, in1;
29572   int first_elem;
29573   int swap_nelt;
29574
29575   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29576     return false;
29577
29578   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29579      big endian pattern on 64 bit vectors, so we correct for that.  */
29580   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29581     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29582
29583   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29584
29585   if (first_elem == neon_endian_lane_map (d->vmode, 0))
29586     odd = 0;
29587   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29588     odd = 1;
29589   else
29590     return false;
29591   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29592
29593   for (i = 0; i < nelt; i++)
29594     {
29595       unsigned elt =
29596         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29597       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29598         return false;
29599     }
29600
29601   /* Success!  */
29602   if (d->testing_p)
29603     return true;
29604
29605   in0 = d->op0;
29606   in1 = d->op1;
29607   if (swap_nelt != 0)
29608     std::swap (in0, in1);
29609
29610   out0 = d->target;
29611   out1 = gen_reg_rtx (d->vmode);
29612   if (odd)
29613     std::swap (out0, out1);
29614
29615   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29616   return true;
29617 }
29618
29619 /* Recognize patterns for the VZIP insns.  */
29620
29621 static bool
29622 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29623 {
29624   unsigned int i, high, mask, nelt = d->perm.length ();
29625   rtx out0, out1, in0, in1;
29626   int first_elem;
29627   bool is_swapped;
29628
29629   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29630     return false;
29631
29632   is_swapped = BYTES_BIG_ENDIAN;
29633
29634   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29635
29636   high = nelt / 2;
29637   if (first_elem == neon_endian_lane_map (d->vmode, high))
29638     ;
29639   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29640     high = 0;
29641   else
29642     return false;
29643   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29644
29645   for (i = 0; i < nelt / 2; i++)
29646     {
29647       unsigned elt =
29648         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29649       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29650           != elt)
29651         return false;
29652       elt =
29653         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29654       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29655           != elt)
29656         return false;
29657     }
29658
29659   /* Success!  */
29660   if (d->testing_p)
29661     return true;
29662
29663   in0 = d->op0;
29664   in1 = d->op1;
29665   if (is_swapped)
29666     std::swap (in0, in1);
29667
29668   out0 = d->target;
29669   out1 = gen_reg_rtx (d->vmode);
29670   if (high)
29671     std::swap (out0, out1);
29672
29673   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29674   return true;
29675 }
29676
29677 /* Recognize patterns for the VREV insns.  */
29678 static bool
29679 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29680 {
29681   unsigned int i, j, diff, nelt = d->perm.length ();
29682   rtx (*gen) (machine_mode, rtx, rtx);
29683
29684   if (!d->one_vector_p)
29685     return false;
29686
29687   diff = d->perm[0];
29688   switch (diff)
29689     {
29690     case 7:
29691        switch (d->vmode)
29692         {
29693          case E_V16QImode:
29694          case E_V8QImode:
29695           gen = gen_neon_vrev64;
29696           break;
29697          default:
29698           return false;
29699         }
29700        break;
29701     case 3:
29702        switch (d->vmode)
29703         {
29704         case E_V16QImode:
29705         case E_V8QImode:
29706           gen = gen_neon_vrev32;
29707           break;
29708         case E_V8HImode:
29709         case E_V4HImode:
29710         case E_V8HFmode:
29711         case E_V4HFmode:
29712           gen = gen_neon_vrev64;
29713           break;
29714         default:
29715           return false;
29716         }
29717       break;
29718     case 1:
29719       switch (d->vmode)
29720         {
29721         case E_V16QImode:
29722         case E_V8QImode:
29723           gen = gen_neon_vrev16;
29724           break;
29725         case E_V8HImode:
29726         case E_V4HImode:
29727           gen = gen_neon_vrev32;
29728           break;
29729         case E_V4SImode:
29730         case E_V2SImode:
29731         case E_V4SFmode:
29732         case E_V2SFmode:
29733           gen = gen_neon_vrev64;
29734           break;
29735         default:
29736           return false;
29737         }
29738       break;
29739     default:
29740       return false;
29741     }
29742
29743   for (i = 0; i < nelt ; i += diff + 1)
29744     for (j = 0; j <= diff; j += 1)
29745       {
29746         /* This is guaranteed to be true as the value of diff
29747            is 7, 3, 1 and we should have enough elements in the
29748            queue to generate this. Getting a vector mask with a
29749            value of diff other than these values implies that
29750            something is wrong by the time we get here.  */
29751         gcc_assert (i + j < nelt);
29752         if (d->perm[i + j] != i + diff - j)
29753           return false;
29754       }
29755
29756   /* Success! */
29757   if (d->testing_p)
29758     return true;
29759
29760   emit_insn (gen (d->vmode, d->target, d->op0));
29761   return true;
29762 }
29763
29764 /* Recognize patterns for the VTRN insns.  */
29765
29766 static bool
29767 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29768 {
29769   unsigned int i, odd, mask, nelt = d->perm.length ();
29770   rtx out0, out1, in0, in1;
29771
29772   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29773     return false;
29774
29775   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29776   if (d->perm[0] == 0)
29777     odd = 0;
29778   else if (d->perm[0] == 1)
29779     odd = 1;
29780   else
29781     return false;
29782   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29783
29784   for (i = 0; i < nelt; i += 2)
29785     {
29786       if (d->perm[i] != i + odd)
29787         return false;
29788       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29789         return false;
29790     }
29791
29792   /* Success!  */
29793   if (d->testing_p)
29794     return true;
29795
29796   in0 = d->op0;
29797   in1 = d->op1;
29798   if (BYTES_BIG_ENDIAN)
29799     {
29800       std::swap (in0, in1);
29801       odd = !odd;
29802     }
29803
29804   out0 = d->target;
29805   out1 = gen_reg_rtx (d->vmode);
29806   if (odd)
29807     std::swap (out0, out1);
29808
29809   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
29810   return true;
29811 }
29812
29813 /* Recognize patterns for the VEXT insns.  */
29814
29815 static bool
29816 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29817 {
29818   unsigned int i, nelt = d->perm.length ();
29819   rtx offset;
29820
29821   unsigned int location;
29822
29823   unsigned int next  = d->perm[0] + 1;
29824
29825   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29826   if (BYTES_BIG_ENDIAN)
29827     return false;
29828
29829   /* Check if the extracted indexes are increasing by one.  */
29830   for (i = 1; i < nelt; next++, i++)
29831     {
29832       /* If we hit the most significant element of the 2nd vector in
29833          the previous iteration, no need to test further.  */
29834       if (next == 2 * nelt)
29835         return false;
29836
29837       /* If we are operating on only one vector: it could be a
29838          rotation.  If there are only two elements of size < 64, let
29839          arm_evpc_neon_vrev catch it.  */
29840       if (d->one_vector_p && (next == nelt))
29841         {
29842           if ((nelt == 2) && (d->vmode != V2DImode))
29843             return false;
29844           else
29845             next = 0;
29846         }
29847
29848       if (d->perm[i] != next)
29849         return false;
29850     }
29851
29852   location = d->perm[0];
29853
29854   /* Success! */
29855   if (d->testing_p)
29856     return true;
29857
29858   offset = GEN_INT (location);
29859
29860   if(d->vmode == E_DImode)
29861     return false;
29862
29863   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
29864   return true;
29865 }
29866
29867 /* The NEON VTBL instruction is a fully variable permuation that's even
29868    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29869    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29870    can do slightly better by expanding this as a constant where we don't
29871    have to apply a mask.  */
29872
29873 static bool
29874 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29875 {
29876   rtx rperm[MAX_VECT_LEN], sel;
29877   machine_mode vmode = d->vmode;
29878   unsigned int i, nelt = d->perm.length ();
29879
29880   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29881      numbering of elements for big-endian, we must reverse the order.  */
29882   if (BYTES_BIG_ENDIAN)
29883     return false;
29884
29885   if (d->testing_p)
29886     return true;
29887
29888   /* Generic code will try constant permutation twice.  Once with the
29889      original mode and again with the elements lowered to QImode.
29890      So wait and don't do the selector expansion ourselves.  */
29891   if (vmode != V8QImode && vmode != V16QImode)
29892     return false;
29893
29894   for (i = 0; i < nelt; ++i)
29895     rperm[i] = GEN_INT (d->perm[i]);
29896   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29897   sel = force_reg (vmode, sel);
29898
29899   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29900   return true;
29901 }
29902
29903 static bool
29904 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29905 {
29906   /* Check if the input mask matches vext before reordering the
29907      operands.  */
29908   if (TARGET_NEON)
29909     if (arm_evpc_neon_vext (d))
29910       return true;
29911
29912   /* The pattern matching functions above are written to look for a small
29913      number to begin the sequence (0, 1, N/2).  If we begin with an index
29914      from the second operand, we can swap the operands.  */
29915   unsigned int nelt = d->perm.length ();
29916   if (d->perm[0] >= nelt)
29917     {
29918       d->perm.rotate_inputs (1);
29919       std::swap (d->op0, d->op1);
29920     }
29921
29922   if (TARGET_NEON)
29923     {
29924       if (arm_evpc_neon_vuzp (d))
29925         return true;
29926       if (arm_evpc_neon_vzip (d))
29927         return true;
29928       if (arm_evpc_neon_vrev (d))
29929         return true;
29930       if (arm_evpc_neon_vtrn (d))
29931         return true;
29932       return arm_evpc_neon_vtbl (d);
29933     }
29934   return false;
29935 }
29936
29937 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
29938
29939 static bool
29940 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29941                               const vec_perm_indices &sel)
29942 {
29943   struct expand_vec_perm_d d;
29944   int i, nelt, which;
29945
29946   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29947     return false;
29948
29949   d.target = target;
29950   d.op0 = op0;
29951   d.op1 = op1;
29952
29953   d.vmode = vmode;
29954   gcc_assert (VECTOR_MODE_P (d.vmode));
29955   d.testing_p = !target;
29956
29957   nelt = GET_MODE_NUNITS (d.vmode);
29958   for (i = which = 0; i < nelt; ++i)
29959     {
29960       int ei = sel[i] & (2 * nelt - 1);
29961       which |= (ei < nelt ? 1 : 2);
29962     }
29963
29964   switch (which)
29965     {
29966     default:
29967       gcc_unreachable();
29968
29969     case 3:
29970       d.one_vector_p = false;
29971       if (d.testing_p || !rtx_equal_p (op0, op1))
29972         break;
29973
29974       /* The elements of PERM do not suggest that only the first operand
29975          is used, but both operands are identical.  Allow easier matching
29976          of the permutation by folding the permutation into the single
29977          input vector.  */
29978       /* FALLTHRU */
29979     case 2:
29980       d.op0 = op1;
29981       d.one_vector_p = true;
29982       break;
29983
29984     case 1:
29985       d.op1 = op0;
29986       d.one_vector_p = true;
29987       break;
29988     }
29989
29990   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29991
29992   if (!d.testing_p)
29993     return arm_expand_vec_perm_const_1 (&d);
29994
29995   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29996   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29997   if (!d.one_vector_p)
29998     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29999
30000   start_sequence ();
30001   bool ret = arm_expand_vec_perm_const_1 (&d);
30002   end_sequence ();
30003
30004   return ret;
30005 }
30006
30007 bool
30008 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
30009 {
30010   /* If we are soft float and we do not have ldrd
30011      then all auto increment forms are ok.  */
30012   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30013     return true;
30014
30015   switch (code)
30016     {
30017       /* Post increment and Pre Decrement are supported for all
30018          instruction forms except for vector forms.  */
30019     case ARM_POST_INC:
30020     case ARM_PRE_DEC:
30021       if (VECTOR_MODE_P (mode))
30022         {
30023           if (code != ARM_PRE_DEC)
30024             return true;
30025           else
30026             return false;
30027         }
30028
30029       return true;
30030
30031     case ARM_POST_DEC:
30032     case ARM_PRE_INC:
30033       /* Without LDRD and mode size greater than
30034          word size, there is no point in auto-incrementing
30035          because ldm and stm will not have these forms.  */
30036       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30037         return false;
30038
30039       /* Vector and floating point modes do not support
30040          these auto increment forms.  */
30041       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30042         return false;
30043
30044       return true;
30045
30046     default:
30047       return false;
30048
30049     }
30050
30051   return false;
30052 }
30053
30054 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30055    on ARM, since we know that shifts by negative amounts are no-ops.
30056    Additionally, the default expansion code is not available or suitable
30057    for post-reload insn splits (this can occur when the register allocator
30058    chooses not to do a shift in NEON).
30059
30060    This function is used in both initial expand and post-reload splits, and
30061    handles all kinds of 64-bit shifts.
30062
30063    Input requirements:
30064     - It is safe for the input and output to be the same register, but
30065       early-clobber rules apply for the shift amount and scratch registers.
30066     - Shift by register requires both scratch registers.  In all other cases
30067       the scratch registers may be NULL.
30068     - Ashiftrt by a register also clobbers the CC register.  */
30069 void
30070 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30071                                rtx amount, rtx scratch1, rtx scratch2)
30072 {
30073   rtx out_high = gen_highpart (SImode, out);
30074   rtx out_low = gen_lowpart (SImode, out);
30075   rtx in_high = gen_highpart (SImode, in);
30076   rtx in_low = gen_lowpart (SImode, in);
30077
30078   /* Terminology:
30079         in = the register pair containing the input value.
30080         out = the destination register pair.
30081         up = the high- or low-part of each pair.
30082         down = the opposite part to "up".
30083      In a shift, we can consider bits to shift from "up"-stream to
30084      "down"-stream, so in a left-shift "up" is the low-part and "down"
30085      is the high-part of each register pair.  */
30086
30087   rtx out_up   = code == ASHIFT ? out_low : out_high;
30088   rtx out_down = code == ASHIFT ? out_high : out_low;
30089   rtx in_up   = code == ASHIFT ? in_low : in_high;
30090   rtx in_down = code == ASHIFT ? in_high : in_low;
30091
30092   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30093   gcc_assert (out
30094               && (REG_P (out) || GET_CODE (out) == SUBREG)
30095               && GET_MODE (out) == DImode);
30096   gcc_assert (in
30097               && (REG_P (in) || GET_CODE (in) == SUBREG)
30098               && GET_MODE (in) == DImode);
30099   gcc_assert (amount
30100               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30101                    && GET_MODE (amount) == SImode)
30102                   || CONST_INT_P (amount)));
30103   gcc_assert (scratch1 == NULL
30104               || (GET_CODE (scratch1) == SCRATCH)
30105               || (GET_MODE (scratch1) == SImode
30106                   && REG_P (scratch1)));
30107   gcc_assert (scratch2 == NULL
30108               || (GET_CODE (scratch2) == SCRATCH)
30109               || (GET_MODE (scratch2) == SImode
30110                   && REG_P (scratch2)));
30111   gcc_assert (!REG_P (out) || !REG_P (amount)
30112               || !HARD_REGISTER_P (out)
30113               || (REGNO (out) != REGNO (amount)
30114                   && REGNO (out) + 1 != REGNO (amount)));
30115
30116   /* Macros to make following code more readable.  */
30117   #define SUB_32(DEST,SRC) \
30118             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30119   #define RSB_32(DEST,SRC) \
30120             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30121   #define SUB_S_32(DEST,SRC) \
30122             gen_addsi3_compare0 ((DEST), (SRC), \
30123                                  GEN_INT (-32))
30124   #define SET(DEST,SRC) \
30125             gen_rtx_SET ((DEST), (SRC))
30126   #define SHIFT(CODE,SRC,AMOUNT) \
30127             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30128   #define LSHIFT(CODE,SRC,AMOUNT) \
30129             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30130                             SImode, (SRC), (AMOUNT))
30131   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30132             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30133                             SImode, (SRC), (AMOUNT))
30134   #define ORR(A,B) \
30135             gen_rtx_IOR (SImode, (A), (B))
30136   #define BRANCH(COND,LABEL) \
30137             gen_arm_cond_branch ((LABEL), \
30138                                  gen_rtx_ ## COND (CCmode, cc_reg, \
30139                                                    const0_rtx), \
30140                                  cc_reg)
30141
30142   /* Shifts by register and shifts by constant are handled separately.  */
30143   if (CONST_INT_P (amount))
30144     {
30145       /* We have a shift-by-constant.  */
30146
30147       /* First, handle out-of-range shift amounts.
30148          In both cases we try to match the result an ARM instruction in a
30149          shift-by-register would give.  This helps reduce execution
30150          differences between optimization levels, but it won't stop other
30151          parts of the compiler doing different things.  This is "undefined
30152          behavior, in any case.  */
30153       if (INTVAL (amount) <= 0)
30154         emit_insn (gen_movdi (out, in));
30155       else if (INTVAL (amount) >= 64)
30156         {
30157           if (code == ASHIFTRT)
30158             {
30159               rtx const31_rtx = GEN_INT (31);
30160               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30161               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30162             }
30163           else
30164             emit_insn (gen_movdi (out, const0_rtx));
30165         }
30166
30167       /* Now handle valid shifts. */
30168       else if (INTVAL (amount) < 32)
30169         {
30170           /* Shifts by a constant less than 32.  */
30171           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30172
30173           /* Clearing the out register in DImode first avoids lots
30174              of spilling and results in less stack usage.
30175              Later this redundant insn is completely removed.
30176              Do that only if "in" and "out" are different registers.  */
30177           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
30178             emit_insn (SET (out, const0_rtx));
30179           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30180           emit_insn (SET (out_down,
30181                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
30182                                out_down)));
30183           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30184         }
30185       else
30186         {
30187           /* Shifts by a constant greater than 31.  */
30188           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30189
30190           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
30191             emit_insn (SET (out, const0_rtx));
30192           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30193           if (code == ASHIFTRT)
30194             emit_insn (gen_ashrsi3 (out_up, in_up,
30195                                     GEN_INT (31)));
30196           else
30197             emit_insn (SET (out_up, const0_rtx));
30198         }
30199     }
30200   else
30201     {
30202       /* We have a shift-by-register.  */
30203       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30204
30205       /* This alternative requires the scratch registers.  */
30206       gcc_assert (scratch1 && REG_P (scratch1));
30207       gcc_assert (scratch2 && REG_P (scratch2));
30208
30209       /* We will need the values "amount-32" and "32-amount" later.
30210          Swapping them around now allows the later code to be more general. */
30211       switch (code)
30212         {
30213         case ASHIFT:
30214           emit_insn (SUB_32 (scratch1, amount));
30215           emit_insn (RSB_32 (scratch2, amount));
30216           break;
30217         case ASHIFTRT:
30218           emit_insn (RSB_32 (scratch1, amount));
30219           /* Also set CC = amount > 32.  */
30220           emit_insn (SUB_S_32 (scratch2, amount));
30221           break;
30222         case LSHIFTRT:
30223           emit_insn (RSB_32 (scratch1, amount));
30224           emit_insn (SUB_32 (scratch2, amount));
30225           break;
30226         default:
30227           gcc_unreachable ();
30228         }
30229
30230       /* Emit code like this:
30231
30232          arithmetic-left:
30233             out_down = in_down << amount;
30234             out_down = (in_up << (amount - 32)) | out_down;
30235             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30236             out_up = in_up << amount;
30237
30238          arithmetic-right:
30239             out_down = in_down >> amount;
30240             out_down = (in_up << (32 - amount)) | out_down;
30241             if (amount < 32)
30242               out_down = ((signed)in_up >> (amount - 32)) | out_down;
30243             out_up = in_up << amount;
30244
30245          logical-right:
30246             out_down = in_down >> amount;
30247             out_down = (in_up << (32 - amount)) | out_down;
30248             if (amount < 32)
30249               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30250             out_up = in_up << amount;
30251
30252           The ARM and Thumb2 variants are the same but implemented slightly
30253           differently.  If this were only called during expand we could just
30254           use the Thumb2 case and let combine do the right thing, but this
30255           can also be called from post-reload splitters.  */
30256
30257       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30258
30259       if (!TARGET_THUMB2)
30260         {
30261           /* Emit code for ARM mode.  */
30262           emit_insn (SET (out_down,
30263                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30264           if (code == ASHIFTRT)
30265             {
30266               rtx_code_label *done_label = gen_label_rtx ();
30267               emit_jump_insn (BRANCH (LT, done_label));
30268               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30269                                              out_down)));
30270               emit_label (done_label);
30271             }
30272           else
30273             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30274                                            out_down)));
30275         }
30276       else
30277         {
30278           /* Emit code for Thumb2 mode.
30279              Thumb2 can't do shift and or in one insn.  */
30280           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30281           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30282
30283           if (code == ASHIFTRT)
30284             {
30285               rtx_code_label *done_label = gen_label_rtx ();
30286               emit_jump_insn (BRANCH (LT, done_label));
30287               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30288               emit_insn (SET (out_down, ORR (out_down, scratch2)));
30289               emit_label (done_label);
30290             }
30291           else
30292             {
30293               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30294               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30295             }
30296         }
30297
30298       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30299     }
30300
30301   #undef SUB_32
30302   #undef RSB_32
30303   #undef SUB_S_32
30304   #undef SET
30305   #undef SHIFT
30306   #undef LSHIFT
30307   #undef REV_LSHIFT
30308   #undef ORR
30309   #undef BRANCH
30310 }
30311
30312 /* Returns true if the pattern is a valid symbolic address, which is either a
30313    symbol_ref or (symbol_ref + addend).
30314
30315    According to the ARM ELF ABI, the initial addend of REL-type relocations
30316    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
30317    literal field of the instruction as a 16-bit signed value in the range
30318    -32768 <= A < 32768.  */
30319
30320 bool
30321 arm_valid_symbolic_address_p (rtx addr)
30322 {
30323   rtx xop0, xop1 = NULL_RTX;
30324   rtx tmp = addr;
30325
30326   if (target_word_relocations)
30327     return false;
30328
30329   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
30330     return true;
30331
30332   /* (const (plus: symbol_ref const_int))  */
30333   if (GET_CODE (addr) == CONST)
30334     tmp = XEXP (addr, 0);
30335
30336   if (GET_CODE (tmp) == PLUS)
30337     {
30338       xop0 = XEXP (tmp, 0);
30339       xop1 = XEXP (tmp, 1);
30340
30341       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
30342           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
30343     }
30344
30345   return false;
30346 }
30347
30348 /* Returns true if a valid comparison operation and makes
30349    the operands in a form that is valid.  */
30350 bool
30351 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30352 {
30353   enum rtx_code code = GET_CODE (*comparison);
30354   int code_int;
30355   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30356     ? GET_MODE (*op2) : GET_MODE (*op1);
30357
30358   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30359
30360   if (code == UNEQ || code == LTGT)
30361     return false;
30362
30363   code_int = (int)code;
30364   arm_canonicalize_comparison (&code_int, op1, op2, 0);
30365   PUT_CODE (*comparison, (enum rtx_code)code_int);
30366
30367   switch (mode)
30368     {
30369     case E_SImode:
30370       if (!arm_add_operand (*op1, mode))
30371         *op1 = force_reg (mode, *op1);
30372       if (!arm_add_operand (*op2, mode))
30373         *op2 = force_reg (mode, *op2);
30374       return true;
30375
30376     case E_DImode:
30377       if (!cmpdi_operand (*op1, mode))
30378         *op1 = force_reg (mode, *op1);
30379       if (!cmpdi_operand (*op2, mode))
30380         *op2 = force_reg (mode, *op2);
30381       return true;
30382
30383     case E_HFmode:
30384       if (!TARGET_VFP_FP16INST)
30385         break;
30386       /* FP16 comparisons are done in SF mode.  */
30387       mode = SFmode;
30388       *op1 = convert_to_mode (mode, *op1, 1);
30389       *op2 = convert_to_mode (mode, *op2, 1);
30390       /* Fall through.  */
30391     case E_SFmode:
30392     case E_DFmode:
30393       if (!vfp_compare_operand (*op1, mode))
30394         *op1 = force_reg (mode, *op1);
30395       if (!vfp_compare_operand (*op2, mode))
30396         *op2 = force_reg (mode, *op2);
30397       return true;
30398     default:
30399       break;
30400     }
30401
30402   return false;
30403
30404 }
30405
30406 /* Maximum number of instructions to set block of memory.  */
30407 static int
30408 arm_block_set_max_insns (void)
30409 {
30410   if (optimize_function_for_size_p (cfun))
30411     return 4;
30412   else
30413     return current_tune->max_insns_inline_memset;
30414 }
30415
30416 /* Return TRUE if it's profitable to set block of memory for
30417    non-vectorized case.  VAL is the value to set the memory
30418    with.  LENGTH is the number of bytes to set.  ALIGN is the
30419    alignment of the destination memory in bytes.  UNALIGNED_P
30420    is TRUE if we can only set the memory with instructions
30421    meeting alignment requirements.  USE_STRD_P is TRUE if we
30422    can use strd to set the memory.  */
30423 static bool
30424 arm_block_set_non_vect_profit_p (rtx val,
30425                                  unsigned HOST_WIDE_INT length,
30426                                  unsigned HOST_WIDE_INT align,
30427                                  bool unaligned_p, bool use_strd_p)
30428 {
30429   int num = 0;
30430   /* For leftovers in bytes of 0-7, we can set the memory block using
30431      strb/strh/str with minimum instruction number.  */
30432   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30433
30434   if (unaligned_p)
30435     {
30436       num = arm_const_inline_cost (SET, val);
30437       num += length / align + length % align;
30438     }
30439   else if (use_strd_p)
30440     {
30441       num = arm_const_double_inline_cost (val);
30442       num += (length >> 3) + leftover[length & 7];
30443     }
30444   else
30445     {
30446       num = arm_const_inline_cost (SET, val);
30447       num += (length >> 2) + leftover[length & 3];
30448     }
30449
30450   /* We may be able to combine last pair STRH/STRB into a single STR
30451      by shifting one byte back.  */
30452   if (unaligned_access && length > 3 && (length & 3) == 3)
30453     num--;
30454
30455   return (num <= arm_block_set_max_insns ());
30456 }
30457
30458 /* Return TRUE if it's profitable to set block of memory for
30459    vectorized case.  LENGTH is the number of bytes to set.
30460    ALIGN is the alignment of destination memory in bytes.
30461    MODE is the vector mode used to set the memory.  */
30462 static bool
30463 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
30464                              unsigned HOST_WIDE_INT align,
30465                              machine_mode mode)
30466 {
30467   int num;
30468   bool unaligned_p = ((align & 3) != 0);
30469   unsigned int nelt = GET_MODE_NUNITS (mode);
30470
30471   /* Instruction loading constant value.  */
30472   num = 1;
30473   /* Instructions storing the memory.  */
30474   num += (length + nelt - 1) / nelt;
30475   /* Instructions adjusting the address expression.  Only need to
30476      adjust address expression if it's 4 bytes aligned and bytes
30477      leftover can only be stored by mis-aligned store instruction.  */
30478   if (!unaligned_p && (length & 3) != 0)
30479     num++;
30480
30481   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
30482   if (!unaligned_p && mode == V16QImode)
30483     num--;
30484
30485   return (num <= arm_block_set_max_insns ());
30486 }
30487
30488 /* Set a block of memory using vectorization instructions for the
30489    unaligned case.  We fill the first LENGTH bytes of the memory
30490    area starting from DSTBASE with byte constant VALUE.  ALIGN is
30491    the alignment requirement of memory.  Return TRUE if succeeded.  */
30492 static bool
30493 arm_block_set_unaligned_vect (rtx dstbase,
30494                               unsigned HOST_WIDE_INT length,
30495                               unsigned HOST_WIDE_INT value,
30496                               unsigned HOST_WIDE_INT align)
30497 {
30498   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30499   rtx dst, mem;
30500   rtx val_vec, reg;
30501   rtx (*gen_func) (rtx, rtx);
30502   machine_mode mode;
30503   unsigned HOST_WIDE_INT v = value;
30504   unsigned int offset = 0;
30505   gcc_assert ((align & 0x3) != 0);
30506   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30507   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30508   if (length >= nelt_v16)
30509     {
30510       mode = V16QImode;
30511       gen_func = gen_movmisalignv16qi;
30512     }
30513   else
30514     {
30515       mode = V8QImode;
30516       gen_func = gen_movmisalignv8qi;
30517     }
30518   nelt_mode = GET_MODE_NUNITS (mode);
30519   gcc_assert (length >= nelt_mode);
30520   /* Skip if it isn't profitable.  */
30521   if (!arm_block_set_vect_profit_p (length, align, mode))
30522     return false;
30523
30524   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30525   mem = adjust_automodify_address (dstbase, mode, dst, offset);
30526
30527   v = sext_hwi (v, BITS_PER_WORD);
30528
30529   reg = gen_reg_rtx (mode);
30530   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30531   /* Emit instruction loading the constant value.  */
30532   emit_move_insn (reg, val_vec);
30533
30534   /* Handle nelt_mode bytes in a vector.  */
30535   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30536     {
30537       emit_insn ((*gen_func) (mem, reg));
30538       if (i + 2 * nelt_mode <= length)
30539         {
30540           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30541           offset += nelt_mode;
30542           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30543         }
30544     }
30545
30546   /* If there are not less than nelt_v8 bytes leftover, we must be in
30547      V16QI mode.  */
30548   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30549
30550   /* Handle (8, 16) bytes leftover.  */
30551   if (i + nelt_v8 < length)
30552     {
30553       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30554       offset += length - i;
30555       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30556
30557       /* We are shifting bytes back, set the alignment accordingly.  */
30558       if ((length & 1) != 0 && align >= 2)
30559         set_mem_align (mem, BITS_PER_UNIT);
30560
30561       emit_insn (gen_movmisalignv16qi (mem, reg));
30562     }
30563   /* Handle (0, 8] bytes leftover.  */
30564   else if (i < length && i + nelt_v8 >= length)
30565     {
30566       if (mode == V16QImode)
30567         reg = gen_lowpart (V8QImode, reg);
30568
30569       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30570                                               + (nelt_mode - nelt_v8))));
30571       offset += (length - i) + (nelt_mode - nelt_v8);
30572       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30573
30574       /* We are shifting bytes back, set the alignment accordingly.  */
30575       if ((length & 1) != 0 && align >= 2)
30576         set_mem_align (mem, BITS_PER_UNIT);
30577
30578       emit_insn (gen_movmisalignv8qi (mem, reg));
30579     }
30580
30581   return true;
30582 }
30583
30584 /* Set a block of memory using vectorization instructions for the
30585    aligned case.  We fill the first LENGTH bytes of the memory area
30586    starting from DSTBASE with byte constant VALUE.  ALIGN is the
30587    alignment requirement of memory.  Return TRUE if succeeded.  */
30588 static bool
30589 arm_block_set_aligned_vect (rtx dstbase,
30590                             unsigned HOST_WIDE_INT length,
30591                             unsigned HOST_WIDE_INT value,
30592                             unsigned HOST_WIDE_INT align)
30593 {
30594   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30595   rtx dst, addr, mem;
30596   rtx val_vec, reg;
30597   machine_mode mode;
30598   unsigned int offset = 0;
30599
30600   gcc_assert ((align & 0x3) == 0);
30601   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30602   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30603   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30604     mode = V16QImode;
30605   else
30606     mode = V8QImode;
30607
30608   nelt_mode = GET_MODE_NUNITS (mode);
30609   gcc_assert (length >= nelt_mode);
30610   /* Skip if it isn't profitable.  */
30611   if (!arm_block_set_vect_profit_p (length, align, mode))
30612     return false;
30613
30614   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30615
30616   reg = gen_reg_rtx (mode);
30617   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30618   /* Emit instruction loading the constant value.  */
30619   emit_move_insn (reg, val_vec);
30620
30621   i = 0;
30622   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
30623   if (mode == V16QImode)
30624     {
30625       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30626       emit_insn (gen_movmisalignv16qi (mem, reg));
30627       i += nelt_mode;
30628       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
30629       if (i + nelt_v8 < length && i + nelt_v16 > length)
30630         {
30631           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30632           offset += length - nelt_mode;
30633           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30634           /* We are shifting bytes back, set the alignment accordingly.  */
30635           if ((length & 0x3) == 0)
30636             set_mem_align (mem, BITS_PER_UNIT * 4);
30637           else if ((length & 0x1) == 0)
30638             set_mem_align (mem, BITS_PER_UNIT * 2);
30639           else
30640             set_mem_align (mem, BITS_PER_UNIT);
30641
30642           emit_insn (gen_movmisalignv16qi (mem, reg));
30643           return true;
30644         }
30645       /* Fall through for bytes leftover.  */
30646       mode = V8QImode;
30647       nelt_mode = GET_MODE_NUNITS (mode);
30648       reg = gen_lowpart (V8QImode, reg);
30649     }
30650
30651   /* Handle 8 bytes in a vector.  */
30652   for (; (i + nelt_mode <= length); i += nelt_mode)
30653     {
30654       addr = plus_constant (Pmode, dst, i);
30655       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30656       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
30657         emit_move_insn (mem, reg);
30658       else
30659         emit_insn (gen_unaligned_storev8qi (mem, reg));
30660     }
30661
30662   /* Handle single word leftover by shifting 4 bytes back.  We can
30663      use aligned access for this case.  */
30664   if (i + UNITS_PER_WORD == length)
30665     {
30666       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30667       offset += i - UNITS_PER_WORD;
30668       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30669       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30670       if (align > UNITS_PER_WORD)
30671         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30672
30673       emit_insn (gen_unaligned_storev8qi (mem, reg));
30674     }
30675   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30676      We have to use unaligned access for this case.  */
30677   else if (i < length)
30678     {
30679       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30680       offset += length - nelt_mode;
30681       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30682       /* We are shifting bytes back, set the alignment accordingly.  */
30683       if ((length & 1) == 0)
30684         set_mem_align (mem, BITS_PER_UNIT * 2);
30685       else
30686         set_mem_align (mem, BITS_PER_UNIT);
30687
30688       emit_insn (gen_movmisalignv8qi (mem, reg));
30689     }
30690
30691   return true;
30692 }
30693
30694 /* Set a block of memory using plain strh/strb instructions, only
30695    using instructions allowed by ALIGN on processor.  We fill the
30696    first LENGTH bytes of the memory area starting from DSTBASE
30697    with byte constant VALUE.  ALIGN is the alignment requirement
30698    of memory.  */
30699 static bool
30700 arm_block_set_unaligned_non_vect (rtx dstbase,
30701                                   unsigned HOST_WIDE_INT length,
30702                                   unsigned HOST_WIDE_INT value,
30703                                   unsigned HOST_WIDE_INT align)
30704 {
30705   unsigned int i;
30706   rtx dst, addr, mem;
30707   rtx val_exp, val_reg, reg;
30708   machine_mode mode;
30709   HOST_WIDE_INT v = value;
30710
30711   gcc_assert (align == 1 || align == 2);
30712
30713   if (align == 2)
30714     v |= (value << BITS_PER_UNIT);
30715
30716   v = sext_hwi (v, BITS_PER_WORD);
30717   val_exp = GEN_INT (v);
30718   /* Skip if it isn't profitable.  */
30719   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30720                                         align, true, false))
30721     return false;
30722
30723   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30724   mode = (align == 2 ? HImode : QImode);
30725   val_reg = force_reg (SImode, val_exp);
30726   reg = gen_lowpart (mode, val_reg);
30727
30728   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30729     {
30730       addr = plus_constant (Pmode, dst, i);
30731       mem = adjust_automodify_address (dstbase, mode, addr, i);
30732       emit_move_insn (mem, reg);
30733     }
30734
30735   /* Handle single byte leftover.  */
30736   if (i + 1 == length)
30737     {
30738       reg = gen_lowpart (QImode, val_reg);
30739       addr = plus_constant (Pmode, dst, i);
30740       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30741       emit_move_insn (mem, reg);
30742       i++;
30743     }
30744
30745   gcc_assert (i == length);
30746   return true;
30747 }
30748
30749 /* Set a block of memory using plain strd/str/strh/strb instructions,
30750    to permit unaligned copies on processors which support unaligned
30751    semantics for those instructions.  We fill the first LENGTH bytes
30752    of the memory area starting from DSTBASE with byte constant VALUE.
30753    ALIGN is the alignment requirement of memory.  */
30754 static bool
30755 arm_block_set_aligned_non_vect (rtx dstbase,
30756                                 unsigned HOST_WIDE_INT length,
30757                                 unsigned HOST_WIDE_INT value,
30758                                 unsigned HOST_WIDE_INT align)
30759 {
30760   unsigned int i;
30761   rtx dst, addr, mem;
30762   rtx val_exp, val_reg, reg;
30763   unsigned HOST_WIDE_INT v;
30764   bool use_strd_p;
30765
30766   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30767                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30768
30769   v = (value | (value << 8) | (value << 16) | (value << 24));
30770   if (length < UNITS_PER_WORD)
30771     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30772
30773   if (use_strd_p)
30774     v |= (v << BITS_PER_WORD);
30775   else
30776     v = sext_hwi (v, BITS_PER_WORD);
30777
30778   val_exp = GEN_INT (v);
30779   /* Skip if it isn't profitable.  */
30780   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30781                                         align, false, use_strd_p))
30782     {
30783       if (!use_strd_p)
30784         return false;
30785
30786       /* Try without strd.  */
30787       v = (v >> BITS_PER_WORD);
30788       v = sext_hwi (v, BITS_PER_WORD);
30789       val_exp = GEN_INT (v);
30790       use_strd_p = false;
30791       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30792                                             align, false, use_strd_p))
30793         return false;
30794     }
30795
30796   i = 0;
30797   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30798   /* Handle double words using strd if possible.  */
30799   if (use_strd_p)
30800     {
30801       val_reg = force_reg (DImode, val_exp);
30802       reg = val_reg;
30803       for (; (i + 8 <= length); i += 8)
30804         {
30805           addr = plus_constant (Pmode, dst, i);
30806           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30807           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
30808             emit_move_insn (mem, reg);
30809           else
30810             emit_insn (gen_unaligned_storedi (mem, reg));
30811         }
30812     }
30813   else
30814     val_reg = force_reg (SImode, val_exp);
30815
30816   /* Handle words.  */
30817   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30818   for (; (i + 4 <= length); i += 4)
30819     {
30820       addr = plus_constant (Pmode, dst, i);
30821       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30822       if ((align & 3) == 0)
30823         emit_move_insn (mem, reg);
30824       else
30825         emit_insn (gen_unaligned_storesi (mem, reg));
30826     }
30827
30828   /* Merge last pair of STRH and STRB into a STR if possible.  */
30829   if (unaligned_access && i > 0 && (i + 3) == length)
30830     {
30831       addr = plus_constant (Pmode, dst, i - 1);
30832       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30833       /* We are shifting one byte back, set the alignment accordingly.  */
30834       if ((align & 1) == 0)
30835         set_mem_align (mem, BITS_PER_UNIT);
30836
30837       /* Most likely this is an unaligned access, and we can't tell at
30838          compilation time.  */
30839       emit_insn (gen_unaligned_storesi (mem, reg));
30840       return true;
30841     }
30842
30843   /* Handle half word leftover.  */
30844   if (i + 2 <= length)
30845     {
30846       reg = gen_lowpart (HImode, val_reg);
30847       addr = plus_constant (Pmode, dst, i);
30848       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30849       if ((align & 1) == 0)
30850         emit_move_insn (mem, reg);
30851       else
30852         emit_insn (gen_unaligned_storehi (mem, reg));
30853
30854       i += 2;
30855     }
30856
30857   /* Handle single byte leftover.  */
30858   if (i + 1 == length)
30859     {
30860       reg = gen_lowpart (QImode, val_reg);
30861       addr = plus_constant (Pmode, dst, i);
30862       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30863       emit_move_insn (mem, reg);
30864     }
30865
30866   return true;
30867 }
30868
30869 /* Set a block of memory using vectorization instructions for both
30870    aligned and unaligned cases.  We fill the first LENGTH bytes of
30871    the memory area starting from DSTBASE with byte constant VALUE.
30872    ALIGN is the alignment requirement of memory.  */
30873 static bool
30874 arm_block_set_vect (rtx dstbase,
30875                     unsigned HOST_WIDE_INT length,
30876                     unsigned HOST_WIDE_INT value,
30877                     unsigned HOST_WIDE_INT align)
30878 {
30879   /* Check whether we need to use unaligned store instruction.  */
30880   if (((align & 3) != 0 || (length & 3) != 0)
30881       /* Check whether unaligned store instruction is available.  */
30882       && (!unaligned_access || BYTES_BIG_ENDIAN))
30883     return false;
30884
30885   if ((align & 3) == 0)
30886     return arm_block_set_aligned_vect (dstbase, length, value, align);
30887   else
30888     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30889 }
30890
30891 /* Expand string store operation.  Firstly we try to do that by using
30892    vectorization instructions, then try with ARM unaligned access and
30893    double-word store if profitable.  OPERANDS[0] is the destination,
30894    OPERANDS[1] is the number of bytes, operands[2] is the value to
30895    initialize the memory, OPERANDS[3] is the known alignment of the
30896    destination.  */
30897 bool
30898 arm_gen_setmem (rtx *operands)
30899 {
30900   rtx dstbase = operands[0];
30901   unsigned HOST_WIDE_INT length;
30902   unsigned HOST_WIDE_INT value;
30903   unsigned HOST_WIDE_INT align;
30904
30905   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30906     return false;
30907
30908   length = UINTVAL (operands[1]);
30909   if (length > 64)
30910     return false;
30911
30912   value = (UINTVAL (operands[2]) & 0xFF);
30913   align = UINTVAL (operands[3]);
30914   if (TARGET_NEON && length >= 8
30915       && current_tune->string_ops_prefer_neon
30916       && arm_block_set_vect (dstbase, length, value, align))
30917     return true;
30918
30919   if (!unaligned_access && (align & 3) != 0)
30920     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30921
30922   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30923 }
30924
30925
30926 static bool
30927 arm_macro_fusion_p (void)
30928 {
30929   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30930 }
30931
30932 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30933    for MOVW / MOVT macro fusion.  */
30934
30935 static bool
30936 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30937 {
30938   /* We are trying to fuse
30939      movw imm / movt imm
30940     instructions as a group that gets scheduled together.  */
30941
30942   rtx set_dest = SET_DEST (curr_set);
30943
30944   if (GET_MODE (set_dest) != SImode)
30945     return false;
30946
30947   /* We are trying to match:
30948      prev (movw)  == (set (reg r0) (const_int imm16))
30949      curr (movt) == (set (zero_extract (reg r0)
30950                                         (const_int 16)
30951                                         (const_int 16))
30952                           (const_int imm16_1))
30953      or
30954      prev (movw) == (set (reg r1)
30955                           (high (symbol_ref ("SYM"))))
30956     curr (movt) == (set (reg r0)
30957                         (lo_sum (reg r1)
30958                                 (symbol_ref ("SYM"))))  */
30959
30960     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30961       {
30962         if (CONST_INT_P (SET_SRC (curr_set))
30963             && CONST_INT_P (SET_SRC (prev_set))
30964             && REG_P (XEXP (set_dest, 0))
30965             && REG_P (SET_DEST (prev_set))
30966             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30967           return true;
30968
30969       }
30970     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30971              && REG_P (SET_DEST (curr_set))
30972              && REG_P (SET_DEST (prev_set))
30973              && GET_CODE (SET_SRC (prev_set)) == HIGH
30974              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30975       return true;
30976
30977   return false;
30978 }
30979
30980 static bool
30981 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30982 {
30983   rtx prev_set = single_set (prev);
30984   rtx curr_set = single_set (curr);
30985
30986   if (!prev_set
30987       || !curr_set)
30988     return false;
30989
30990   if (any_condjump_p (curr))
30991     return false;
30992
30993   if (!arm_macro_fusion_p ())
30994     return false;
30995
30996   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30997       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30998     return true;
30999
31000   return false;
31001 }
31002
31003 /* Return true iff the instruction fusion described by OP is enabled.  */
31004 bool
31005 arm_fusion_enabled_p (tune_params::fuse_ops op)
31006 {
31007   return current_tune->fusible_ops & op;
31008 }
31009
31010 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
31011    scheduled for speculative execution.  Reject the long-running division
31012    and square-root instructions.  */
31013
31014 static bool
31015 arm_sched_can_speculate_insn (rtx_insn *insn)
31016 {
31017   switch (get_attr_type (insn))
31018     {
31019       case TYPE_SDIV:
31020       case TYPE_UDIV:
31021       case TYPE_FDIVS:
31022       case TYPE_FDIVD:
31023       case TYPE_FSQRTS:
31024       case TYPE_FSQRTD:
31025       case TYPE_NEON_FP_SQRT_S:
31026       case TYPE_NEON_FP_SQRT_D:
31027       case TYPE_NEON_FP_SQRT_S_Q:
31028       case TYPE_NEON_FP_SQRT_D_Q:
31029       case TYPE_NEON_FP_DIV_S:
31030       case TYPE_NEON_FP_DIV_D:
31031       case TYPE_NEON_FP_DIV_S_Q:
31032       case TYPE_NEON_FP_DIV_D_Q:
31033         return false;
31034       default:
31035         return true;
31036     }
31037 }
31038
31039 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
31040
31041 static unsigned HOST_WIDE_INT
31042 arm_asan_shadow_offset (void)
31043 {
31044   return HOST_WIDE_INT_1U << 29;
31045 }
31046
31047
31048 /* This is a temporary fix for PR60655.  Ideally we need
31049    to handle most of these cases in the generic part but
31050    currently we reject minus (..) (sym_ref).  We try to
31051    ameliorate the case with minus (sym_ref1) (sym_ref2)
31052    where they are in the same section.  */
31053
31054 static bool
31055 arm_const_not_ok_for_debug_p (rtx p)
31056 {
31057   tree decl_op0 = NULL;
31058   tree decl_op1 = NULL;
31059
31060   if (GET_CODE (p) == UNSPEC)
31061     return true;
31062   if (GET_CODE (p) == MINUS)
31063     {
31064       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31065         {
31066           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31067           if (decl_op1
31068               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31069               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31070             {
31071               if ((VAR_P (decl_op1)
31072                    || TREE_CODE (decl_op1) == CONST_DECL)
31073                   && (VAR_P (decl_op0)
31074                       || TREE_CODE (decl_op0) == CONST_DECL))
31075                 return (get_variable_section (decl_op1, false)
31076                         != get_variable_section (decl_op0, false));
31077
31078               if (TREE_CODE (decl_op1) == LABEL_DECL
31079                   && TREE_CODE (decl_op0) == LABEL_DECL)
31080                 return (DECL_CONTEXT (decl_op1)
31081                         != DECL_CONTEXT (decl_op0));
31082             }
31083
31084           return true;
31085         }
31086     }
31087
31088   return false;
31089 }
31090
31091 /* return TRUE if x is a reference to a value in a constant pool */
31092 extern bool
31093 arm_is_constant_pool_ref (rtx x)
31094 {
31095   return (MEM_P (x)
31096           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
31097           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
31098 }
31099
31100 /* Remember the last target of arm_set_current_function.  */
31101 static GTY(()) tree arm_previous_fndecl;
31102
31103 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
31104
31105 void
31106 save_restore_target_globals (tree new_tree)
31107 {
31108   /* If we have a previous state, use it.  */
31109   if (TREE_TARGET_GLOBALS (new_tree))
31110     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31111   else if (new_tree == target_option_default_node)
31112     restore_target_globals (&default_target_globals);
31113   else
31114     {
31115       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
31116       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
31117     }
31118
31119   arm_option_params_internal ();
31120 }
31121
31122 /* Invalidate arm_previous_fndecl.  */
31123
31124 void
31125 arm_reset_previous_fndecl (void)
31126 {
31127   arm_previous_fndecl = NULL_TREE;
31128 }
31129
31130 /* Establish appropriate back-end context for processing the function
31131    FNDECL.  The argument might be NULL to indicate processing at top
31132    level, outside of any function scope.  */
31133
31134 static void
31135 arm_set_current_function (tree fndecl)
31136 {
31137   if (!fndecl || fndecl == arm_previous_fndecl)
31138     return;
31139
31140   tree old_tree = (arm_previous_fndecl
31141                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
31142                    : NULL_TREE);
31143
31144   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31145
31146   /* If current function has no attributes but previous one did,
31147      use the default node.  */
31148   if (! new_tree && old_tree)
31149     new_tree = target_option_default_node;
31150
31151   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
31152      the default have been handled by save_restore_target_globals from
31153      arm_pragma_target_parse.  */
31154   if (old_tree == new_tree)
31155     return;
31156
31157   arm_previous_fndecl = fndecl;
31158
31159   /* First set the target options.  */
31160   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
31161
31162   save_restore_target_globals (new_tree);
31163 }
31164
31165 /* Implement TARGET_OPTION_PRINT.  */
31166
31167 static void
31168 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
31169 {
31170   int flags = ptr->x_target_flags;
31171   const char *fpu_name;
31172
31173   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
31174               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
31175
31176   fprintf (file, "%*sselected isa %s\n", indent, "",
31177            TARGET_THUMB2_P (flags) ? "thumb2" :
31178            TARGET_THUMB_P (flags) ? "thumb1" :
31179            "arm");
31180
31181   if (ptr->x_arm_arch_string)
31182     fprintf (file, "%*sselected architecture %s\n", indent, "",
31183              ptr->x_arm_arch_string);
31184
31185   if (ptr->x_arm_cpu_string)
31186     fprintf (file, "%*sselected CPU %s\n", indent, "",
31187              ptr->x_arm_cpu_string);
31188
31189   if (ptr->x_arm_tune_string)
31190     fprintf (file, "%*sselected tune %s\n", indent, "",
31191              ptr->x_arm_tune_string);
31192
31193   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
31194 }
31195
31196 /* Hook to determine if one function can safely inline another.  */
31197
31198 static bool
31199 arm_can_inline_p (tree caller, tree callee)
31200 {
31201   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
31202   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
31203   bool can_inline = true;
31204
31205   struct cl_target_option *caller_opts
31206         = TREE_TARGET_OPTION (caller_tree ? caller_tree
31207                                            : target_option_default_node);
31208
31209   struct cl_target_option *callee_opts
31210         = TREE_TARGET_OPTION (callee_tree ? callee_tree
31211                                            : target_option_default_node);
31212
31213   if (callee_opts == caller_opts)
31214     return true;
31215
31216   /* Callee's ISA features should be a subset of the caller's.  */
31217   struct arm_build_target caller_target;
31218   struct arm_build_target callee_target;
31219   caller_target.isa = sbitmap_alloc (isa_num_bits);
31220   callee_target.isa = sbitmap_alloc (isa_num_bits);
31221
31222   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
31223                               false);
31224   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
31225                               false);
31226   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
31227     can_inline = false;
31228
31229   sbitmap_free (caller_target.isa);
31230   sbitmap_free (callee_target.isa);
31231
31232   /* OK to inline between different modes.
31233      Function with mode specific instructions, e.g using asm,
31234      must be explicitly protected with noinline.  */
31235   return can_inline;
31236 }
31237
31238 /* Hook to fix function's alignment affected by target attribute.  */
31239
31240 static void
31241 arm_relayout_function (tree fndecl)
31242 {
31243   if (DECL_USER_ALIGN (fndecl))
31244     return;
31245
31246   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31247
31248   if (!callee_tree)
31249     callee_tree = target_option_default_node;
31250
31251   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
31252   SET_DECL_ALIGN
31253     (fndecl,
31254      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
31255 }
31256
31257 /* Inner function to process the attribute((target(...))), take an argument and
31258    set the current options from the argument.  If we have a list, recursively
31259    go over the list.  */
31260
31261 static bool
31262 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
31263 {
31264   if (TREE_CODE (args) == TREE_LIST)
31265     {
31266       bool ret = true;
31267
31268       for (; args; args = TREE_CHAIN (args))
31269         if (TREE_VALUE (args)
31270             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
31271           ret = false;
31272       return ret;
31273     }
31274
31275   else if (TREE_CODE (args) != STRING_CST)
31276     {
31277       error ("attribute %<target%> argument not a string");
31278       return false;
31279     }
31280
31281   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
31282   char *q;
31283
31284   while ((q = strtok (argstr, ",")) != NULL)
31285     {
31286       argstr = NULL;
31287       if (!strcmp (q, "thumb"))
31288         {
31289           opts->x_target_flags |= MASK_THUMB;
31290           if (TARGET_FDPIC && !arm_arch_thumb2)
31291             sorry ("FDPIC mode is not supported in Thumb-1 mode");
31292         }
31293
31294       else if (!strcmp (q, "arm"))
31295         opts->x_target_flags &= ~MASK_THUMB;
31296
31297       else if (!strcmp (q, "general-regs-only"))
31298         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
31299
31300       else if (!strncmp (q, "fpu=", 4))
31301         {
31302           int fpu_index;
31303           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
31304                                        &fpu_index, CL_TARGET))
31305             {
31306               error ("invalid fpu for target attribute or pragma %qs", q);
31307               return false;
31308             }
31309           if (fpu_index == TARGET_FPU_auto)
31310             {
31311               /* This doesn't really make sense until we support
31312                  general dynamic selection of the architecture and all
31313                  sub-features.  */
31314               sorry ("auto fpu selection not currently permitted here");
31315               return false;
31316             }
31317           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
31318         }
31319       else if (!strncmp (q, "arch=", 5))
31320         {
31321           char *arch = q + 5;
31322           const arch_option *arm_selected_arch
31323              = arm_parse_arch_option_name (all_architectures, "arch", arch);
31324
31325           if (!arm_selected_arch)
31326             {
31327               error ("invalid architecture for target attribute or pragma %qs",
31328                      q);
31329               return false;
31330             }
31331
31332           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
31333         }
31334       else if (q[0] == '+')
31335         {
31336           opts->x_arm_arch_string
31337             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
31338         }
31339       else
31340         {
31341           error ("unknown target attribute or pragma %qs", q);
31342           return false;
31343         }
31344     }
31345
31346   return true;
31347 }
31348
31349 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
31350
31351 tree
31352 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
31353                                  struct gcc_options *opts_set)
31354 {
31355   struct cl_target_option cl_opts;
31356
31357   if (!arm_valid_target_attribute_rec (args, opts))
31358     return NULL_TREE;
31359
31360   cl_target_option_save (&cl_opts, opts);
31361   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
31362   arm_option_check_internal (opts);
31363   /* Do any overrides, such as global options arch=xxx.
31364      We do this since arm_active_target was overridden.  */
31365   arm_option_reconfigure_globals ();
31366   arm_options_perform_arch_sanity_checks ();
31367   arm_option_override_internal (opts, opts_set);
31368
31369   return build_target_option_node (opts);
31370 }
31371
31372 static void
31373 add_attribute  (const char * mode, tree *attributes)
31374 {
31375   size_t len = strlen (mode);
31376   tree value = build_string (len, mode);
31377
31378   TREE_TYPE (value) = build_array_type (char_type_node,
31379                                         build_index_type (size_int (len)));
31380
31381   *attributes = tree_cons (get_identifier ("target"),
31382                            build_tree_list (NULL_TREE, value),
31383                            *attributes);
31384 }
31385
31386 /* For testing. Insert thumb or arm modes alternatively on functions.  */
31387
31388 static void
31389 arm_insert_attributes (tree fndecl, tree * attributes)
31390 {
31391   const char *mode;
31392
31393   if (! TARGET_FLIP_THUMB)
31394     return;
31395
31396   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
31397       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
31398    return;
31399
31400   /* Nested definitions must inherit mode.  */
31401   if (current_function_decl)
31402    {
31403      mode = TARGET_THUMB ? "thumb" : "arm";
31404      add_attribute (mode, attributes);
31405      return;
31406    }
31407
31408   /* If there is already a setting don't change it.  */
31409   if (lookup_attribute ("target", *attributes) != NULL)
31410     return;
31411
31412   mode = thumb_flipper ? "thumb" : "arm";
31413   add_attribute (mode, attributes);
31414
31415   thumb_flipper = !thumb_flipper;
31416 }
31417
31418 /* Hook to validate attribute((target("string"))).  */
31419
31420 static bool
31421 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
31422                               tree args, int ARG_UNUSED (flags))
31423 {
31424   bool ret = true;
31425   struct gcc_options func_options;
31426   tree cur_tree, new_optimize;
31427   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31428
31429   /* Get the optimization options of the current function.  */
31430   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31431
31432   /* If the function changed the optimization levels as well as setting target
31433      options, start with the optimizations specified.  */
31434   if (!func_optimize)
31435     func_optimize = optimization_default_node;
31436
31437   /* Init func_options.  */
31438   memset (&func_options, 0, sizeof (func_options));
31439   init_options_struct (&func_options, NULL);
31440   lang_hooks.init_options_struct (&func_options);
31441
31442   /* Initialize func_options to the defaults.  */
31443   cl_optimization_restore (&func_options,
31444                            TREE_OPTIMIZATION (func_optimize));
31445
31446   cl_target_option_restore (&func_options,
31447                             TREE_TARGET_OPTION (target_option_default_node));
31448
31449   /* Set func_options flags with new target mode.  */
31450   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
31451                                               &global_options_set);
31452
31453   if (cur_tree == NULL_TREE)
31454     ret = false;
31455
31456   new_optimize = build_optimization_node (&func_options);
31457
31458   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
31459
31460   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31461
31462   finalize_options_struct (&func_options);
31463
31464   return ret;
31465 }
31466
31467 /* Match an ISA feature bitmap to a named FPU.  We always use the
31468    first entry that exactly matches the feature set, so that we
31469    effectively canonicalize the FPU name for the assembler.  */
31470 static const char*
31471 arm_identify_fpu_from_isa (sbitmap isa)
31472 {
31473   auto_sbitmap fpubits (isa_num_bits);
31474   auto_sbitmap cand_fpubits (isa_num_bits);
31475
31476   bitmap_and (fpubits, isa, isa_all_fpubits);
31477
31478   /* If there are no ISA feature bits relating to the FPU, we must be
31479      doing soft-float.  */
31480   if (bitmap_empty_p (fpubits))
31481     return "softvfp";
31482
31483   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31484     {
31485       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
31486       if (bitmap_equal_p (fpubits, cand_fpubits))
31487         return all_fpus[i].name;
31488     }
31489   /* We must find an entry, or things have gone wrong.  */
31490   gcc_unreachable ();
31491 }
31492
31493 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
31494    by the function fndecl.  */
31495 void
31496 arm_declare_function_name (FILE *stream, const char *name, tree decl)
31497 {
31498   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31499
31500   struct cl_target_option *targ_options;
31501   if (target_parts)
31502     targ_options = TREE_TARGET_OPTION (target_parts);
31503   else
31504     targ_options = TREE_TARGET_OPTION (target_option_current_node);
31505   gcc_assert (targ_options);
31506
31507   /* Only update the assembler .arch string if it is distinct from the last
31508      such string we printed. arch_to_print is set conditionally in case
31509      targ_options->x_arm_arch_string is NULL which can be the case
31510      when cc1 is invoked directly without passing -march option.  */
31511   std::string arch_to_print;
31512   if (targ_options->x_arm_arch_string)
31513     arch_to_print = targ_options->x_arm_arch_string;
31514
31515   if (arch_to_print != arm_last_printed_arch_string)
31516     {
31517       std::string arch_name
31518         = arch_to_print.substr (0, arch_to_print.find ("+"));
31519       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31520       const arch_option *arch
31521         = arm_parse_arch_option_name (all_architectures, "-march",
31522                                       targ_options->x_arm_arch_string);
31523       auto_sbitmap opt_bits (isa_num_bits);
31524
31525       gcc_assert (arch);
31526       if (arch->common.extensions)
31527         {
31528           for (const struct cpu_arch_extension *opt = arch->common.extensions;
31529                opt->name != NULL;
31530                opt++)
31531             {
31532               if (!opt->remove)
31533                 {
31534                   arm_initialize_isa (opt_bits, opt->isa_bits);
31535                   if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31536                       && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31537                     asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31538                                  opt->name);
31539                 }
31540              }
31541         }
31542
31543       arm_last_printed_arch_string = arch_to_print;
31544     }
31545
31546   fprintf (stream, "\t.syntax unified\n");
31547
31548   if (TARGET_THUMB)
31549     {
31550       if (is_called_in_ARM_mode (decl)
31551           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31552               && cfun->is_thunk))
31553         fprintf (stream, "\t.code 32\n");
31554       else if (TARGET_THUMB1)
31555         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31556       else
31557         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31558     }
31559   else
31560     fprintf (stream, "\t.arm\n");
31561
31562   std::string fpu_to_print
31563     = TARGET_SOFT_FLOAT
31564         ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31565
31566   if (fpu_to_print != arm_last_printed_arch_string)
31567     {
31568       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31569       arm_last_printed_fpu_string = fpu_to_print;
31570     }
31571
31572   if (TARGET_POKE_FUNCTION_NAME)
31573     arm_poke_function_name (stream, (const char *) name);
31574 }
31575
31576 /* If MEM is in the form of [base+offset], extract the two parts
31577    of address and set to BASE and OFFSET, otherwise return false
31578    after clearing BASE and OFFSET.  */
31579
31580 static bool
31581 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31582 {
31583   rtx addr;
31584
31585   gcc_assert (MEM_P (mem));
31586
31587   addr = XEXP (mem, 0);
31588
31589   /* Strip off const from addresses like (const (addr)).  */
31590   if (GET_CODE (addr) == CONST)
31591     addr = XEXP (addr, 0);
31592
31593   if (GET_CODE (addr) == REG)
31594     {
31595       *base = addr;
31596       *offset = const0_rtx;
31597       return true;
31598     }
31599
31600   if (GET_CODE (addr) == PLUS
31601       && GET_CODE (XEXP (addr, 0)) == REG
31602       && CONST_INT_P (XEXP (addr, 1)))
31603     {
31604       *base = XEXP (addr, 0);
31605       *offset = XEXP (addr, 1);
31606       return true;
31607     }
31608
31609   *base = NULL_RTX;
31610   *offset = NULL_RTX;
31611
31612   return false;
31613 }
31614
31615 /* If INSN is a load or store of address in the form of [base+offset],
31616    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
31617    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
31618    otherwise return FALSE.  */
31619
31620 static bool
31621 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31622 {
31623   rtx x, dest, src;
31624
31625   gcc_assert (INSN_P (insn));
31626   x = PATTERN (insn);
31627   if (GET_CODE (x) != SET)
31628     return false;
31629
31630   src = SET_SRC (x);
31631   dest = SET_DEST (x);
31632   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31633     {
31634       *is_load = false;
31635       extract_base_offset_in_addr (dest, base, offset);
31636     }
31637   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31638     {
31639       *is_load = true;
31640       extract_base_offset_in_addr (src, base, offset);
31641     }
31642   else
31643     return false;
31644
31645   return (*base != NULL_RTX && *offset != NULL_RTX);
31646 }
31647
31648 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31649
31650    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31651    and PRI are only calculated for these instructions.  For other instruction,
31652    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
31653    instruction fusion can be supported by returning different priorities.
31654
31655    It's important that irrelevant instructions get the largest FUSION_PRI.  */
31656
31657 static void
31658 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31659                            int *fusion_pri, int *pri)
31660 {
31661   int tmp, off_val;
31662   bool is_load;
31663   rtx base, offset;
31664
31665   gcc_assert (INSN_P (insn));
31666
31667   tmp = max_pri - 1;
31668   if (!fusion_load_store (insn, &base, &offset, &is_load))
31669     {
31670       *pri = tmp;
31671       *fusion_pri = tmp;
31672       return;
31673     }
31674
31675   /* Load goes first.  */
31676   if (is_load)
31677     *fusion_pri = tmp - 1;
31678   else
31679     *fusion_pri = tmp - 2;
31680
31681   tmp /= 2;
31682
31683   /* INSN with smaller base register goes first.  */
31684   tmp -= ((REGNO (base) & 0xff) << 20);
31685
31686   /* INSN with smaller offset goes first.  */
31687   off_val = (int)(INTVAL (offset));
31688   if (off_val >= 0)
31689     tmp -= (off_val & 0xfffff);
31690   else
31691     tmp += ((- off_val) & 0xfffff);
31692
31693   *pri = tmp;
31694   return;
31695 }
31696
31697
31698 /* Construct and return a PARALLEL RTX vector with elements numbering the
31699    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31700    the vector - from the perspective of the architecture.  This does not
31701    line up with GCC's perspective on lane numbers, so we end up with
31702    different masks depending on our target endian-ness.  The diagram
31703    below may help.  We must draw the distinction when building masks
31704    which select one half of the vector.  An instruction selecting
31705    architectural low-lanes for a big-endian target, must be described using
31706    a mask selecting GCC high-lanes.
31707
31708                  Big-Endian             Little-Endian
31709
31710 GCC             0   1   2   3           3   2   1   0
31711               | x | x | x | x |       | x | x | x | x |
31712 Architecture    3   2   1   0           3   2   1   0
31713
31714 Low Mask:         { 2, 3 }                { 0, 1 }
31715 High Mask:        { 0, 1 }                { 2, 3 }
31716 */
31717
31718 rtx
31719 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31720 {
31721   int nunits = GET_MODE_NUNITS (mode);
31722   rtvec v = rtvec_alloc (nunits / 2);
31723   int high_base = nunits / 2;
31724   int low_base = 0;
31725   int base;
31726   rtx t1;
31727   int i;
31728
31729   if (BYTES_BIG_ENDIAN)
31730     base = high ? low_base : high_base;
31731   else
31732     base = high ? high_base : low_base;
31733
31734   for (i = 0; i < nunits / 2; i++)
31735     RTVEC_ELT (v, i) = GEN_INT (base + i);
31736
31737   t1 = gen_rtx_PARALLEL (mode, v);
31738   return t1;
31739 }
31740
31741 /* Check OP for validity as a PARALLEL RTX vector with elements
31742    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31743    from the perspective of the architecture.  See the diagram above
31744    arm_simd_vect_par_cnst_half_p for more details.  */
31745
31746 bool
31747 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31748                                        bool high)
31749 {
31750   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31751   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31752   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31753   int i = 0;
31754
31755   if (!VECTOR_MODE_P (mode))
31756     return false;
31757
31758   if (count_op != count_ideal)
31759     return false;
31760
31761   for (i = 0; i < count_ideal; i++)
31762     {
31763       rtx elt_op = XVECEXP (op, 0, i);
31764       rtx elt_ideal = XVECEXP (ideal, 0, i);
31765
31766       if (!CONST_INT_P (elt_op)
31767           || INTVAL (elt_ideal) != INTVAL (elt_op))
31768         return false;
31769     }
31770   return true;
31771 }
31772
31773 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31774    in Thumb1.  */
31775 static bool
31776 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31777                          const_tree)
31778 {
31779   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31780   if (vcall_offset && TARGET_THUMB1)
31781     return false;
31782
31783   /* Otherwise ok.  */
31784   return true;
31785 }
31786
31787 /* Generate RTL for a conditional branch with rtx comparison CODE in
31788    mode CC_MODE. The destination of the unlikely conditional branch
31789    is LABEL_REF.  */
31790
31791 void
31792 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31793                           rtx label_ref)
31794 {
31795   rtx x;
31796   x = gen_rtx_fmt_ee (code, VOIDmode,
31797                       gen_rtx_REG (cc_mode, CC_REGNUM),
31798                       const0_rtx);
31799
31800   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31801                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31802                             pc_rtx);
31803   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31804 }
31805
31806 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31807
31808    For pure-code sections there is no letter code for this attribute, so
31809    output all the section flags numerically when this is needed.  */
31810
31811 static bool
31812 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31813 {
31814
31815   if (flags & SECTION_ARM_PURECODE)
31816     {
31817       *num = 0x20000000;
31818
31819       if (!(flags & SECTION_DEBUG))
31820         *num |= 0x2;
31821       if (flags & SECTION_EXCLUDE)
31822         *num |= 0x80000000;
31823       if (flags & SECTION_WRITE)
31824         *num |= 0x1;
31825       if (flags & SECTION_CODE)
31826         *num |= 0x4;
31827       if (flags & SECTION_MERGE)
31828         *num |= 0x10;
31829       if (flags & SECTION_STRINGS)
31830         *num |= 0x20;
31831       if (flags & SECTION_TLS)
31832         *num |= 0x400;
31833       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31834         *num |= 0x200;
31835
31836         return true;
31837     }
31838
31839   return false;
31840 }
31841
31842 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31843
31844    If pure-code is passed as an option, make sure all functions are in
31845    sections that have the SHF_ARM_PURECODE attribute.  */
31846
31847 static section *
31848 arm_function_section (tree decl, enum node_frequency freq,
31849                       bool startup, bool exit)
31850 {
31851   const char * section_name;
31852   section * sec;
31853
31854   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31855     return default_function_section (decl, freq, startup, exit);
31856
31857   if (!target_pure_code)
31858     return default_function_section (decl, freq, startup, exit);
31859
31860
31861   section_name = DECL_SECTION_NAME (decl);
31862
31863   /* If a function is not in a named section then it falls under the 'default'
31864      text section, also known as '.text'.  We can preserve previous behavior as
31865      the default text section already has the SHF_ARM_PURECODE section
31866      attribute.  */
31867   if (!section_name)
31868     {
31869       section *default_sec = default_function_section (decl, freq, startup,
31870                                                        exit);
31871
31872       /* If default_sec is not null, then it must be a special section like for
31873          example .text.startup.  We set the pure-code attribute and return the
31874          same section to preserve existing behavior.  */
31875       if (default_sec)
31876           default_sec->common.flags |= SECTION_ARM_PURECODE;
31877       return default_sec;
31878     }
31879
31880   /* Otherwise look whether a section has already been created with
31881      'section_name'.  */
31882   sec = get_named_section (decl, section_name, 0);
31883   if (!sec)
31884     /* If that is not the case passing NULL as the section's name to
31885        'get_named_section' will create a section with the declaration's
31886        section name.  */
31887     sec = get_named_section (decl, NULL, 0);
31888
31889   /* Set the SHF_ARM_PURECODE attribute.  */
31890   sec->common.flags |= SECTION_ARM_PURECODE;
31891
31892   return sec;
31893 }
31894
31895 /* Implements the TARGET_SECTION_FLAGS hook.
31896
31897    If DECL is a function declaration and pure-code is passed as an option
31898    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31899    section's name and RELOC indicates whether the declarations initializer may
31900    contain runtime relocations.  */
31901
31902 static unsigned int
31903 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31904 {
31905   unsigned int flags = default_section_type_flags (decl, name, reloc);
31906
31907   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31908     flags |= SECTION_ARM_PURECODE;
31909
31910   return flags;
31911 }
31912
31913 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31914
31915 static void
31916 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31917                            rtx op0, rtx op1,
31918                            rtx *quot_p, rtx *rem_p)
31919 {
31920   if (mode == SImode)
31921     gcc_assert (!TARGET_IDIV);
31922
31923   scalar_int_mode libval_mode
31924     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31925
31926   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31927                                         libval_mode,
31928                                         op0, GET_MODE (op0),
31929                                         op1, GET_MODE (op1));
31930
31931   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31932   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31933                                        GET_MODE_SIZE (mode));
31934
31935   gcc_assert (quotient);
31936   gcc_assert (remainder);
31937
31938   *quot_p = quotient;
31939   *rem_p = remainder;
31940 }
31941
31942 /*  This function checks for the availability of the coprocessor builtin passed
31943     in BUILTIN for the current target.  Returns true if it is available and
31944     false otherwise.  If a BUILTIN is passed for which this function has not
31945     been implemented it will cause an exception.  */
31946
31947 bool
31948 arm_coproc_builtin_available (enum unspecv builtin)
31949 {
31950   /* None of these builtins are available in Thumb mode if the target only
31951      supports Thumb-1.  */
31952   if (TARGET_THUMB1)
31953     return false;
31954
31955   switch (builtin)
31956     {
31957       case VUNSPEC_CDP:
31958       case VUNSPEC_LDC:
31959       case VUNSPEC_LDCL:
31960       case VUNSPEC_STC:
31961       case VUNSPEC_STCL:
31962       case VUNSPEC_MCR:
31963       case VUNSPEC_MRC:
31964         if (arm_arch4)
31965           return true;
31966         break;
31967       case VUNSPEC_CDP2:
31968       case VUNSPEC_LDC2:
31969       case VUNSPEC_LDC2L:
31970       case VUNSPEC_STC2:
31971       case VUNSPEC_STC2L:
31972       case VUNSPEC_MCR2:
31973       case VUNSPEC_MRC2:
31974         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31975            ARMv8-{A,M}.  */
31976         if (arm_arch5t)
31977           return true;
31978         break;
31979       case VUNSPEC_MCRR:
31980       case VUNSPEC_MRRC:
31981         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31982            ARMv8-{A,M}.  */
31983         if (arm_arch6 || arm_arch5te)
31984           return true;
31985         break;
31986       case VUNSPEC_MCRR2:
31987       case VUNSPEC_MRRC2:
31988         if (arm_arch6)
31989           return true;
31990         break;
31991       default:
31992         gcc_unreachable ();
31993     }
31994   return false;
31995 }
31996
31997 /* This function returns true if OP is a valid memory operand for the ldc and
31998    stc coprocessor instructions and false otherwise.  */
31999
32000 bool
32001 arm_coproc_ldc_stc_legitimate_address (rtx op)
32002 {
32003   HOST_WIDE_INT range;
32004   /* Has to be a memory operand.  */
32005   if (!MEM_P (op))
32006     return false;
32007
32008   op = XEXP (op, 0);
32009
32010   /* We accept registers.  */
32011   if (REG_P (op))
32012     return true;
32013
32014   switch GET_CODE (op)
32015     {
32016       case PLUS:
32017         {
32018           /* Or registers with an offset.  */
32019           if (!REG_P (XEXP (op, 0)))
32020             return false;
32021
32022           op = XEXP (op, 1);
32023
32024           /* The offset must be an immediate though.  */
32025           if (!CONST_INT_P (op))
32026             return false;
32027
32028           range = INTVAL (op);
32029
32030           /* Within the range of [-1020,1020].  */
32031           if (!IN_RANGE (range, -1020, 1020))
32032             return false;
32033
32034           /* And a multiple of 4.  */
32035           return (range % 4) == 0;
32036         }
32037       case PRE_INC:
32038       case POST_INC:
32039       case PRE_DEC:
32040       case POST_DEC:
32041         return REG_P (XEXP (op, 0));
32042       default:
32043         gcc_unreachable ();
32044     }
32045   return false;
32046 }
32047
32048 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
32049
32050    In VFPv1, VFP registers could only be accessed in the mode they were
32051    set, so subregs would be invalid there.  However, we don't support
32052    VFPv1 at the moment, and the restriction was lifted in VFPv2.
32053
32054    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
32055    VFP registers in little-endian order.  We can't describe that accurately to
32056    GCC, so avoid taking subregs of such values.
32057
32058    The only exception is going from a 128-bit to a 64-bit type.  In that
32059    case the data layout happens to be consistent for big-endian, so we
32060    explicitly allow that case.  */
32061
32062 static bool
32063 arm_can_change_mode_class (machine_mode from, machine_mode to,
32064                            reg_class_t rclass)
32065 {
32066   if (TARGET_BIG_END
32067       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
32068       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
32069           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
32070       && reg_classes_intersect_p (VFP_REGS, rclass))
32071     return false;
32072   return true;
32073 }
32074
32075 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
32076    strcpy from constants will be faster.  */
32077
32078 static HOST_WIDE_INT
32079 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
32080 {
32081   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
32082   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
32083     return MAX (align, BITS_PER_WORD * factor);
32084   return align;
32085 }
32086
32087 /* Emit a speculation barrier on target architectures that do not have
32088    DSB/ISB directly.  Such systems probably don't need a barrier
32089    themselves, but if the code is ever run on a later architecture, it
32090    might become a problem.  */
32091 void
32092 arm_emit_speculation_barrier_function ()
32093 {
32094   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
32095 }
32096
32097 #if CHECKING_P
32098 namespace selftest {
32099
32100 /* Scan the static data tables generated by parsecpu.awk looking for
32101    potential issues with the data.  We primarily check for
32102    inconsistencies in the option extensions at present (extensions
32103    that duplicate others but aren't marked as aliases).  Furthermore,
32104    for correct canonicalization later options must never be a subset
32105    of an earlier option.  Any extension should also only specify other
32106    feature bits and never an architecture bit.  The architecture is inferred
32107    from the declaration of the extension.  */
32108 static void
32109 arm_test_cpu_arch_data (void)
32110 {
32111   const arch_option *arch;
32112   const cpu_option *cpu;
32113   auto_sbitmap target_isa (isa_num_bits);
32114   auto_sbitmap isa1 (isa_num_bits);
32115   auto_sbitmap isa2 (isa_num_bits);
32116
32117   for (arch = all_architectures; arch->common.name != NULL; ++arch)
32118     {
32119       const cpu_arch_extension *ext1, *ext2;
32120
32121       if (arch->common.extensions == NULL)
32122         continue;
32123
32124       arm_initialize_isa (target_isa, arch->common.isa_bits);
32125
32126       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
32127         {
32128           if (ext1->alias)
32129             continue;
32130
32131           arm_initialize_isa (isa1, ext1->isa_bits);
32132           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
32133             {
32134               if (ext2->alias || ext1->remove != ext2->remove)
32135                 continue;
32136
32137               arm_initialize_isa (isa2, ext2->isa_bits);
32138               /* If the option is a subset of the parent option, it doesn't
32139                  add anything and so isn't useful.  */
32140               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
32141
32142               /* If the extension specifies any architectural bits then
32143                  disallow it.  Extensions should only specify feature bits.  */
32144               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
32145             }
32146         }
32147     }
32148
32149   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
32150     {
32151       const cpu_arch_extension *ext1, *ext2;
32152
32153       if (cpu->common.extensions == NULL)
32154         continue;
32155
32156       arm_initialize_isa (target_isa, arch->common.isa_bits);
32157
32158       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
32159         {
32160           if (ext1->alias)
32161             continue;
32162
32163           arm_initialize_isa (isa1, ext1->isa_bits);
32164           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
32165             {
32166               if (ext2->alias || ext1->remove != ext2->remove)
32167                 continue;
32168
32169               arm_initialize_isa (isa2, ext2->isa_bits);
32170               /* If the option is a subset of the parent option, it doesn't
32171                  add anything and so isn't useful.  */
32172               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
32173
32174               /* If the extension specifies any architectural bits then
32175                  disallow it.  Extensions should only specify feature bits.  */
32176               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
32177             }
32178         }
32179     }
32180 }
32181
32182 /* Scan the static data tables generated by parsecpu.awk looking for
32183    potential issues with the data.  Here we check for consistency between the
32184    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
32185    a feature bit that is not defined by any FPU flag.  */
32186 static void
32187 arm_test_fpu_data (void)
32188 {
32189   auto_sbitmap isa_all_fpubits (isa_num_bits);
32190   auto_sbitmap fpubits (isa_num_bits);
32191   auto_sbitmap tmpset (isa_num_bits);
32192
32193   static const enum isa_feature fpu_bitlist[]
32194     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
32195   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
32196
32197   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
32198   {
32199     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
32200     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
32201     bitmap_clear (isa_all_fpubits);
32202     bitmap_copy (isa_all_fpubits, tmpset);
32203   }
32204
32205   if (!bitmap_empty_p (isa_all_fpubits))
32206     {
32207         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
32208                          " group that are not defined by any FPU.\n"
32209                          "       Check your arm-cpus.in.\n");
32210         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
32211     }
32212 }
32213
32214 static void
32215 arm_run_selftests (void)
32216 {
32217   arm_test_cpu_arch_data ();
32218   arm_test_fpu_data ();
32219 }
32220 } /* Namespace selftest.  */
32221
32222 #undef TARGET_RUN_TARGET_SELFTESTS
32223 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
32224 #endif /* CHECKING_P */
32225
32226 struct gcc_target targetm = TARGET_INITIALIZER;
32227
32228 #include "gt-arm.h"