gcc/config/arm/arm.cc

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2023 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "cfgloop.h"
  36 #include "df.h"
  37 #include "tm_p.h"
  38 #include "stringpool.h"
  39 #include "attribs.h"
  40 #include "optabs.h"
  41 #include "regs.h"
  42 #include "emit-rtl.h"
  43 #include "recog.h"
  44 #include "cgraph.h"
  45 #include "diagnostic-core.h"
  46 #include "alias.h"
  47 #include "fold-const.h"
  48 #include "stor-layout.h"
  49 #include "calls.h"
  50 #include "varasm.h"
  51 #include "output.h"
  52 #include "insn-attr.h"
  53 #include "flags.h"
  54 #include "reload.h"
  55 #include "explow.h"
  56 #include "expr.h"
  57 #include "cfgrtl.h"
  58 #include "sched-int.h"
  59 #include "common/common-target.h"
  60 #include "langhooks.h"
  61 #include "intl.h"
  62 #include "libfuncs.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "gimple-iterator.h"
  73 #include "selftest.h"
  74 #include "tree-vectorizer.h"
  75 #include "opts.h"
  76 #include "aarch-common.h"
  77 #include "aarch-common-protos.h"
  78
  79 /* This file should be included last.  */
  80 #include "target-def.h"
  81
  82 /* Forward definitions of types.  */
  83 typedef struct minipool_node    Mnode;
  84 typedef struct minipool_fixup   Mfix;
  85
  86 void (*arm_lang_output_object_attributes_hook)(void);
  87
  88 struct four_ints
  89 {
  90   int i[4];
  91 };
  92
  93 /* Forward function declarations.  */
  94 static bool arm_const_not_ok_for_debug_p (rtx);
  95 static int arm_needs_doubleword_align (machine_mode, const_tree);
  96 static int arm_compute_static_chain_stack_bytes (void);
  97 static arm_stack_offsets *arm_get_frame_offsets (void);
  98 static void arm_compute_frame_layout (void);
  99 static void arm_add_gc_roots (void);
 100 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 101                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 102 static unsigned bit_count (unsigned long);
 103 static unsigned bitmap_popcount (const sbitmap);
 104 static int arm_address_register_rtx_p (rtx, int);
 105 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 106 static bool is_called_in_ARM_mode (tree);
 107 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 108 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 109 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 110 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 111 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 112 inline static int thumb1_index_register_rtx_p (rtx, int);
 113 static int thumb_far_jump_used_p (void);
 114 static bool thumb_force_lr_save (void);
 115 static unsigned arm_size_return_regs (void);
 116 static bool arm_assemble_integer (rtx, unsigned int, int);
 117 static void arm_print_operand (FILE *, rtx, int);
 118 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 119 static bool arm_print_operand_punct_valid_p (unsigned char code);
 120 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 121 static arm_cc get_arm_condition_code (rtx);
 122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 123 static const char *output_multi_immediate (rtx *, const char *, const char *,
 124                                            int, HOST_WIDE_INT);
 125 static const char *shift_op (rtx, HOST_WIDE_INT *);
 126 static struct machine_function *arm_init_machine_status (void);
 127 static void thumb_exit (FILE *, int);
 128 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 129 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 130 static Mnode *add_minipool_forward_ref (Mfix *);
 131 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 132 static Mnode *add_minipool_backward_ref (Mfix *);
 133 static void assign_minipool_offsets (Mfix *);
 134 static void arm_print_value (FILE *, rtx);
 135 static void dump_minipool (rtx_insn *);
 136 static int arm_barrier_cost (rtx_insn *);
 137 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 138 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 139 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 140                                machine_mode, rtx);
 141 static void arm_reorg (void);
 142 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 144 static unsigned long arm_compute_save_core_reg_mask (void);
 145 static unsigned long arm_isr_value (tree);
 146 static unsigned long arm_compute_func_type (void);
 147 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 149 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 151 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 152 #endif
 153 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 154 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 155 static void arm_output_function_epilogue (FILE *);
 156 static void arm_output_function_prologue (FILE *);
 157 static int arm_comp_type_attributes (const_tree, const_tree);
 158 static void arm_set_default_type_attributes (tree);
 159 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 161 static int optimal_immediate_sequence (enum rtx_code code,
 162                                        unsigned HOST_WIDE_INT val,
 163                                        struct four_ints *return_sequence);
 164 static int optimal_immediate_sequence_1 (enum rtx_code code,
 165                                          unsigned HOST_WIDE_INT val,
 166                                          struct four_ints *return_sequence,
 167                                          int i);
 168 static int arm_get_strip_length (int);
 169 static bool arm_function_ok_for_sibcall (tree, tree);
 170 static machine_mode arm_promote_function_mode (const_tree,
 171                                                     machine_mode, int *,
 172                                                     const_tree, int);
 173 static bool arm_return_in_memory (const_tree, const_tree);
 174 static rtx arm_function_value (const_tree, const_tree, bool);
 175 static rtx arm_libcall_value_1 (machine_mode);
 176 static rtx arm_libcall_value (machine_mode, const_rtx);
 177 static bool arm_function_value_regno_p (const unsigned int);
 178 static void arm_internal_label (FILE *, const char *, unsigned long);
 179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 180                                  tree);
 181 static bool arm_have_conditional_execution (void);
 182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 183 static bool arm_legitimate_constant_p (machine_mode, rtx);
 184 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 185 static int arm_insn_cost (rtx_insn *, bool);
 186 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 187 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 188 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 189 static void emit_constant_insn (rtx cond, rtx pattern);
 190 static rtx_insn *emit_set_insn (rtx, rtx);
 191 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 193 static void arm_emit_multi_reg_pop (unsigned long);
 194 static int vfp_emit_fstmd (int, int);
 195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 196 static int arm_arg_partial_bytes (cumulative_args_t,
 197                                   const function_arg_info &);
 198 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 199 static void arm_function_arg_advance (cumulative_args_t,
 200                                       const function_arg_info &);
 201 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 202 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 203 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 204                                       const_tree);
 205 static rtx aapcs_libcall_value (machine_mode);
 206 static int aapcs_select_return_coproc (const_tree, const_tree);
 207
 208 #ifdef OBJECT_FORMAT_ELF
 209 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 210 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 211 #endif
 212 #ifndef ARM_PE
 213 static void arm_encode_section_info (tree, rtx, int);
 214 #endif
 215
 216 static void arm_file_end (void);
 217 static void arm_file_start (void);
 218 static void arm_insert_attributes (tree, tree *);
 219
 220 static void arm_setup_incoming_varargs (cumulative_args_t,
 221                                         const function_arg_info &, int *, int);
 222 static bool arm_pass_by_reference (cumulative_args_t,
 223                                    const function_arg_info &);
 224 static bool arm_promote_prototypes (const_tree);
 225 static bool arm_default_short_enums (void);
 226 static bool arm_align_anon_bitfield (void);
 227 static bool arm_return_in_msb (const_tree);
 228 static bool arm_must_pass_in_stack (const function_arg_info &);
 229 static bool arm_return_in_memory (const_tree, const_tree);
 230 #if ARM_UNWIND_INFO
 231 static void arm_unwind_emit (FILE *, rtx_insn *);
 232 static bool arm_output_ttype (rtx);
 233 static void arm_asm_emit_except_personality (rtx);
 234 #endif
 235 static void arm_asm_init_sections (void);
 236 static rtx arm_dwarf_register_span (rtx);
 237
 238 static tree arm_cxx_guard_type (void);
 239 static bool arm_cxx_guard_mask_bit (void);
 240 static tree arm_get_cookie_size (tree);
 241 static bool arm_cookie_has_size (void);
 242 static bool arm_cxx_cdtor_returns_this (void);
 243 static bool arm_cxx_key_method_may_be_inline (void);
 244 static void arm_cxx_determine_class_data_visibility (tree);
 245 static bool arm_cxx_class_data_always_comdat (void);
 246 static bool arm_cxx_use_aeabi_atexit (void);
 247 static void arm_init_libfuncs (void);
 248 static tree arm_build_builtin_va_list (void);
 249 static void arm_expand_builtin_va_start (tree, rtx);
 250 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 251 static void arm_option_override (void);
 252 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 253                                 struct cl_target_option *);
 254 static void arm_override_options_after_change (void);
 255 static void arm_option_print (FILE *, int, struct cl_target_option *);
 256 static void arm_set_current_function (tree);
 257 static bool arm_can_inline_p (tree, tree);
 258 static void arm_relayout_function (tree);
 259 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 260 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 261 static bool arm_sched_can_speculate_insn (rtx_insn *);
 262 static bool arm_macro_fusion_p (void);
 263 static bool arm_cannot_copy_insn_p (rtx_insn *);
 264 static int arm_issue_rate (void);
 265 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 266 static int arm_first_cycle_multipass_dfa_lookahead (void);
 267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 268 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 269 static bool arm_output_addr_const_extra (FILE *, rtx);
 270 static bool arm_allocate_stack_slots_for_args (void);
 271 static bool arm_warn_func_return (tree);
 272 static tree arm_promoted_type (const_tree t);
 273 static bool arm_scalar_mode_supported_p (scalar_mode);
 274 static bool arm_frame_pointer_required (void);
 275 static bool arm_can_eliminate (const int, const int);
 276 static void arm_asm_trampoline_template (FILE *);
 277 static void arm_trampoline_init (rtx, tree, rtx);
 278 static rtx arm_trampoline_adjust_address (rtx);
 279 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 280 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 281 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 282 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 283 static bool arm_array_mode_supported_p (machine_mode,
 284                                         unsigned HOST_WIDE_INT);
 285 static machine_mode arm_preferred_simd_mode (scalar_mode);
 286 static bool arm_class_likely_spilled_p (reg_class_t);
 287 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 288 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 289 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 290                                                      const_tree type,
 291                                                      int misalignment,
 292                                                      bool is_packed);
 293 static void arm_conditional_register_usage (void);
 294 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 295 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 296 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 297 static int arm_default_branch_cost (bool, bool);
 298 static int arm_cortex_a5_branch_cost (bool, bool);
 299 static int arm_cortex_m_branch_cost (bool, bool);
 300 static int arm_cortex_m7_branch_cost (bool, bool);
 301
 302 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
 303                                           rtx, const vec_perm_indices &);
 304
 305 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 306
 307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 308                                            tree vectype,
 309                                            int misalign ATTRIBUTE_UNUSED);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314
 315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 317                                      const_tree);
 318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 321                                                 int reloc);
 322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 328 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 329                                        vec<machine_mode> &,
 330                                        vec<const char *> &, vec<rtx> &,
 331                                        vec<rtx> &, HARD_REG_SET &, location_t);
 332 static const char *arm_identify_fpu_from_isa (sbitmap);
 333 \f
 334 /* Table of machine attributes.  */
 335 static const attribute_spec arm_gnu_attributes[] =
 336 {
 337   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 338        affects_type_identity, handler, exclude } */
 339   /* Function calls made to this symbol must be done indirectly, because
 340      it may lie outside of the 26 bit addressing range of a normal function
 341      call.  */
 342   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 343   /* Whereas these functions are always known to reside within the 26 bit
 344      addressing range.  */
 345   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 346   /* Specify the procedure call conventions for a function.  */
 347   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 348     NULL },
 349   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 350   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 351     NULL },
 352   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 353     NULL },
 354   { "naked",        0, 0, true,  false, false, false,
 355     arm_handle_fndecl_attribute, NULL },
 356 #ifdef ARM_PE
 357   /* ARM/PE has three new attributes:
 358      interfacearm - ?
 359      dllexport - for exporting a function/variable that will live in a dll
 360      dllimport - for importing a function/variable from a dll
 361
 362      Microsoft allows multiple declspecs in one __declspec, separating
 363      them with spaces.  We do NOT support this.  Instead, use __declspec
 364      multiple times.
 365   */
 366   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 367   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 368   { "interfacearm", 0, 0, true,  false, false, false,
 369     arm_handle_fndecl_attribute, NULL },
 370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 371   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 372     NULL },
 373   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 374     NULL },
 375   { "notshared",    0, 0, false, true, false, false,
 376     arm_handle_notshared_attribute, NULL },
 377 #endif
 378   /* ARMv8-M Security Extensions support.  */
 379   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 380     arm_handle_cmse_nonsecure_entry, NULL },
 381   { "cmse_nonsecure_call", 0, 0, false, false, false, true,
 382     arm_handle_cmse_nonsecure_call, NULL },
 383   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
 384 };
 385
 386 static const scoped_attribute_specs arm_gnu_attribute_table =
 387 {
 388   "gnu", { arm_gnu_attributes }
 389 };
 390
 391 static const scoped_attribute_specs *const arm_attribute_table[] =
 392 {
 393   &arm_gnu_attribute_table
 394 };
 395 \f
 396 /* Initialize the GCC target structure.  */
 397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 398 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 400 #endif
 401
 402 #undef TARGET_CHECK_BUILTIN_CALL
 403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 404
 405 #undef TARGET_LEGITIMIZE_ADDRESS
 406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 407
 408 #undef  TARGET_ATTRIBUTE_TABLE
 409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 410
 411 #undef  TARGET_INSERT_ATTRIBUTES
 412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 413
 414 #undef TARGET_ASM_FILE_START
 415 #define TARGET_ASM_FILE_START arm_file_start
 416 #undef TARGET_ASM_FILE_END
 417 #define TARGET_ASM_FILE_END arm_file_end
 418
 419 #undef  TARGET_ASM_ALIGNED_SI_OP
 420 #define TARGET_ASM_ALIGNED_SI_OP NULL
 421 #undef  TARGET_ASM_INTEGER
 422 #define TARGET_ASM_INTEGER arm_assemble_integer
 423
 424 #undef TARGET_PRINT_OPERAND
 425 #define TARGET_PRINT_OPERAND arm_print_operand
 426 #undef TARGET_PRINT_OPERAND_ADDRESS
 427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 430
 431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 433
 434 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 436
 437 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 439
 440 #undef TARGET_CAN_INLINE_P
 441 #define TARGET_CAN_INLINE_P arm_can_inline_p
 442
 443 #undef TARGET_RELAYOUT_FUNCTION
 444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 445
 446 #undef  TARGET_OPTION_OVERRIDE
 447 #define TARGET_OPTION_OVERRIDE arm_option_override
 448
 449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 451
 452 #undef TARGET_OPTION_RESTORE
 453 #define TARGET_OPTION_RESTORE arm_option_restore
 454
 455 #undef TARGET_OPTION_PRINT
 456 #define TARGET_OPTION_PRINT arm_option_print
 457
 458 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 460
 461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 463
 464 #undef TARGET_SCHED_MACRO_FUSION_P
 465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 466
 467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 469
 470 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 472
 473 #undef  TARGET_SCHED_ADJUST_COST
 474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 475
 476 #undef TARGET_SET_CURRENT_FUNCTION
 477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 478
 479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 481
 482 #undef TARGET_SCHED_REORDER
 483 #define TARGET_SCHED_REORDER arm_sched_reorder
 484
 485 #undef TARGET_REGISTER_MOVE_COST
 486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 487
 488 #undef TARGET_MEMORY_MOVE_COST
 489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 490
 491 #undef TARGET_ENCODE_SECTION_INFO
 492 #ifdef ARM_PE
 493 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 494 #else
 495 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 496 #endif
 497
 498 #undef  TARGET_STRIP_NAME_ENCODING
 499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 500
 501 #undef  TARGET_ASM_INTERNAL_LABEL
 502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 503
 504 #undef TARGET_FLOATN_MODE
 505 #define TARGET_FLOATN_MODE arm_floatn_mode
 506
 507 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 509
 510 #undef  TARGET_FUNCTION_VALUE
 511 #define TARGET_FUNCTION_VALUE arm_function_value
 512
 513 #undef  TARGET_LIBCALL_VALUE
 514 #define TARGET_LIBCALL_VALUE arm_libcall_value
 515
 516 #undef TARGET_FUNCTION_VALUE_REGNO_P
 517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 518
 519 #undef TARGET_GIMPLE_FOLD_BUILTIN
 520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
 521
 522 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 524 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 526
 527 #undef  TARGET_RTX_COSTS
 528 #define TARGET_RTX_COSTS arm_rtx_costs
 529 #undef  TARGET_ADDRESS_COST
 530 #define TARGET_ADDRESS_COST arm_address_cost
 531 #undef TARGET_INSN_COST
 532 #define TARGET_INSN_COST arm_insn_cost
 533
 534 #undef TARGET_SHIFT_TRUNCATION_MASK
 535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 544   arm_autovectorize_vector_modes
 545
 546 #undef  TARGET_MACHINE_DEPENDENT_REORG
 547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 548
 549 #undef  TARGET_INIT_BUILTINS
 550 #define TARGET_INIT_BUILTINS  arm_init_builtins
 551 #undef  TARGET_EXPAND_BUILTIN
 552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 553 #undef  TARGET_BUILTIN_DECL
 554 #define TARGET_BUILTIN_DECL arm_builtin_decl
 555
 556 #undef TARGET_INIT_LIBFUNCS
 557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 558
 559 #undef TARGET_PROMOTE_FUNCTION_MODE
 560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 561 #undef TARGET_PROMOTE_PROTOTYPES
 562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 563 #undef TARGET_PASS_BY_REFERENCE
 564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 565 #undef TARGET_ARG_PARTIAL_BYTES
 566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 567 #undef TARGET_FUNCTION_ARG
 568 #define TARGET_FUNCTION_ARG arm_function_arg
 569 #undef TARGET_FUNCTION_ARG_ADVANCE
 570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 571 #undef TARGET_FUNCTION_ARG_PADDING
 572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 573 #undef TARGET_FUNCTION_ARG_BOUNDARY
 574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 575
 576 #undef  TARGET_SETUP_INCOMING_VARARGS
 577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 578
 579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 581
 582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 584 #undef TARGET_TRAMPOLINE_INIT
 585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 588
 589 #undef TARGET_WARN_FUNC_RETURN
 590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 591
 592 #undef TARGET_DEFAULT_SHORT_ENUMS
 593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 594
 595 #undef TARGET_ALIGN_ANON_BITFIELD
 596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 597
 598 #undef TARGET_NARROW_VOLATILE_BITFIELD
 599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 600
 601 #undef TARGET_CXX_GUARD_TYPE
 602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 603
 604 #undef TARGET_CXX_GUARD_MASK_BIT
 605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 606
 607 #undef TARGET_CXX_GET_COOKIE_SIZE
 608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 609
 610 #undef TARGET_CXX_COOKIE_HAS_SIZE
 611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 612
 613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 615
 616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 618
 619 #undef TARGET_CXX_USE_AEABI_ATEXIT
 620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 621
 622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 624   arm_cxx_determine_class_data_visibility
 625
 626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 628
 629 #undef TARGET_RETURN_IN_MSB
 630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 631
 632 #undef TARGET_RETURN_IN_MEMORY
 633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 634
 635 #undef TARGET_MUST_PASS_IN_STACK
 636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 637
 638 #if ARM_UNWIND_INFO
 639 #undef TARGET_ASM_UNWIND_EMIT
 640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 641
 642 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 643 #undef TARGET_ASM_TTYPE
 644 #define TARGET_ASM_TTYPE arm_output_ttype
 645
 646 #undef TARGET_ARM_EABI_UNWINDER
 647 #define TARGET_ARM_EABI_UNWINDER true
 648
 649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 651
 652 #endif /* ARM_UNWIND_INFO */
 653
 654 #undef TARGET_ASM_INIT_SECTIONS
 655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 656
 657 #undef TARGET_DWARF_REGISTER_SPAN
 658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 659
 660 #undef  TARGET_CANNOT_COPY_INSN_P
 661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 662
 663 #ifdef HAVE_AS_TLS
 664 #undef TARGET_HAVE_TLS
 665 #define TARGET_HAVE_TLS true
 666 #endif
 667
 668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 670
 671 #undef TARGET_LEGITIMATE_CONSTANT_P
 672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 673
 674 #undef TARGET_CANNOT_FORCE_CONST_MEM
 675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 676
 677 #undef TARGET_MAX_ANCHOR_OFFSET
 678 #define TARGET_MAX_ANCHOR_OFFSET 4095
 679
 680 /* The minimum is set such that the total size of the block
 681    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 682    divisible by eight, ensuring natural spacing of anchors.  */
 683 #undef TARGET_MIN_ANCHOR_OFFSET
 684 #define TARGET_MIN_ANCHOR_OFFSET -4088
 685
 686 #undef TARGET_SCHED_ISSUE_RATE
 687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 688
 689 #undef TARGET_SCHED_VARIABLE_ISSUE
 690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 691
 692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 694   arm_first_cycle_multipass_dfa_lookahead
 695
 696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 698   arm_first_cycle_multipass_dfa_lookahead_guard
 699
 700 #undef TARGET_MANGLE_TYPE
 701 #define TARGET_MANGLE_TYPE arm_mangle_type
 702
 703 #undef TARGET_INVALID_CONVERSION
 704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 705
 706 #undef TARGET_INVALID_UNARY_OP
 707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 708
 709 #undef TARGET_INVALID_BINARY_OP
 710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 711
 712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 714
 715 #undef TARGET_BUILD_BUILTIN_VA_LIST
 716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 717 #undef TARGET_EXPAND_BUILTIN_VA_START
 718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 721
 722 #ifdef HAVE_AS_TLS
 723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 725 #endif
 726
 727 #undef TARGET_LEGITIMATE_ADDRESS_P
 728 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 729
 730 #undef TARGET_PREFERRED_RELOAD_CLASS
 731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 732
 733 #undef TARGET_PROMOTED_TYPE
 734 #define TARGET_PROMOTED_TYPE arm_promoted_type
 735
 736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 738
 739 #undef TARGET_COMPUTE_FRAME_LAYOUT
 740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 741
 742 #undef TARGET_FRAME_POINTER_REQUIRED
 743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 744
 745 #undef TARGET_CAN_ELIMINATE
 746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 747
 748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 750
 751 #undef TARGET_CLASS_LIKELY_SPILLED_P
 752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 753
 754 #undef TARGET_VECTORIZE_BUILTINS
 755 #define TARGET_VECTORIZE_BUILTINS
 756
 757 #undef TARGET_VECTOR_ALIGNMENT
 758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 759
 760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 762   arm_vector_alignment_reachable
 763
 764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 766   arm_builtin_support_vector_misalignment
 767
 768 #undef TARGET_PREFERRED_RENAME_CLASS
 769 #define TARGET_PREFERRED_RENAME_CLASS \
 770   arm_preferred_rename_class
 771
 772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 774
 775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 777   arm_builtin_vectorization_cost
 778
 779 #undef TARGET_CANONICALIZE_COMPARISON
 780 #define TARGET_CANONICALIZE_COMPARISON \
 781   arm_canonicalize_comparison
 782
 783 #undef TARGET_ASAN_SHADOW_OFFSET
 784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 785
 786 #undef MAX_INSN_PER_IT_BLOCK
 787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 788
 789 #undef TARGET_CAN_USE_DOLOOP_P
 790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 791
 792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 794
 795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 797
 798 #undef TARGET_SCHED_FUSION_PRIORITY
 799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 800
 801 #undef  TARGET_ASM_FUNCTION_SECTION
 802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 803
 804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 806
 807 #undef TARGET_SECTION_TYPE_FLAGS
 808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 809
 810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 812
 813 #undef TARGET_C_EXCESS_PRECISION
 814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 815
 816 /* Although the architecture reserves bits 0 and 1, only the former is
 817    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 820
 821 #undef TARGET_FIXED_CONDITION_CODE_REGS
 822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 823
 824 #undef TARGET_HARD_REGNO_NREGS
 825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 826 #undef TARGET_HARD_REGNO_MODE_OK
 827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 828
 829 #undef TARGET_MODES_TIEABLE_P
 830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 831
 832 #undef TARGET_CAN_CHANGE_MODE_CLASS
 833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 834
 835 #undef TARGET_CONSTANT_ALIGNMENT
 836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 837
 838 #undef TARGET_INVALID_WITHIN_DOLOOP
 839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 840
 841 #undef TARGET_MD_ASM_ADJUST
 842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 843
 844 #undef TARGET_STACK_PROTECT_GUARD
 845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 846
 847 #undef TARGET_VECTORIZE_GET_MASK_MODE
 848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 849 \f
 850 /* Obstack for minipool constant handling.  */
 851 static struct obstack minipool_obstack;
 852 static char *         minipool_startobj;
 853
 854 /* The maximum number of insns skipped which
 855    will be conditionalised if possible.  */
 856 static int max_insns_skipped = 5;
 857
 858 /* True if we are currently building a constant table.  */
 859 int making_const_table;
 860
 861 /* The processor for which instructions should be scheduled.  */
 862 enum processor_type arm_tune = TARGET_CPU_arm_none;
 863
 864 /* The current tuning set.  */
 865 const struct tune_params *current_tune;
 866
 867 /* Which floating point hardware to schedule for.  */
 868 int arm_fpu_attr;
 869
 870 /* Used for Thumb call_via trampolines.  */
 871 rtx thumb_call_via_label[14];
 872 static int thumb_call_reg_needed;
 873
 874 /* The bits in this mask specify which instruction scheduling options should
 875    be used.  */
 876 unsigned int tune_flags = 0;
 877
 878 /* The highest ARM architecture version supported by the
 879    target.  */
 880 enum base_architecture arm_base_arch = BASE_ARCH_0;
 881
 882 /* Active target architecture and tuning.  */
 883
 884 struct arm_build_target arm_active_target;
 885
 886 /* The following are used in the arm.md file as equivalents to bits
 887    in the above two flag variables.  */
 888
 889 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 890 int arm_arch4 = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 893 int arm_arch4t = 0;
 894
 895 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 896 int arm_arch5t = 0;
 897
 898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 899 int arm_arch5te = 0;
 900
 901 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 902 int arm_arch6 = 0;
 903
 904 /* Nonzero if this chip supports the ARM 6K extensions.  */
 905 int arm_arch6k = 0;
 906
 907 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 908 int arm_arch6kz = 0;
 909
 910 /* Nonzero if instructions present in ARMv6-M can be used.  */
 911 int arm_arch6m = 0;
 912
 913 /* Nonzero if this chip supports the ARM 7 extensions.  */
 914 int arm_arch7 = 0;
 915
 916 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 917 int arm_arch_lpae = 0;
 918
 919 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 920 int arm_arch_notm = 0;
 921
 922 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 923 int arm_arch7em = 0;
 924
 925 /* Nonzero if instructions present in ARMv8 can be used.  */
 926 int arm_arch8 = 0;
 927
 928 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 929 int arm_arch8_1 = 0;
 930
 931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 932 int arm_arch8_2 = 0;
 933
 934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 935 int arm_arch8_3 = 0;
 936
 937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 938 int arm_arch8_4 = 0;
 939
 940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
 941    extensions.  */
 942 int arm_arch8m_main = 0;
 943
 944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 945    extensions.  */
 946 int arm_arch8_1m_main = 0;
 947
 948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 949    Architecture 8.2.  */
 950 int arm_fp16_inst = 0;
 951
 952 /* Nonzero if this chip can benefit from load scheduling.  */
 953 int arm_ld_sched = 0;
 954
 955 /* Nonzero if this chip is a StrongARM.  */
 956 int arm_tune_strongarm = 0;
 957
 958 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 959 int arm_arch_iwmmxt = 0;
 960
 961 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 962 int arm_arch_iwmmxt2 = 0;
 963
 964 /* Nonzero if this chip is an XScale.  */
 965 int arm_arch_xscale = 0;
 966
 967 /* Nonzero if tuning for XScale  */
 968 int arm_tune_xscale = 0;
 969
 970 /* Nonzero if we want to tune for stores that access the write-buffer.
 971    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 972 int arm_tune_wbuf = 0;
 973
 974 /* Nonzero if tuning for Cortex-A9.  */
 975 int arm_tune_cortex_a9 = 0;
 976
 977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 978    preprocessor.
 979    XXX This is a bit of a hack, it's intended to help work around
 980    problems in GLD which doesn't understand that armv5t code is
 981    interworking clean.  */
 982 int arm_cpp_interwork = 0;
 983
 984 /* Nonzero if chip supports Thumb 1.  */
 985 int arm_arch_thumb1;
 986
 987 /* Nonzero if chip supports Thumb 2.  */
 988 int arm_arch_thumb2;
 989
 990 /* Nonzero if chip supports integer division instruction.  */
 991 int arm_arch_arm_hwdiv;
 992 int arm_arch_thumb_hwdiv;
 993
 994 /* Nonzero if chip disallows volatile memory access in IT block.  */
 995 int arm_arch_no_volatile_ce;
 996
 997 /* Nonzero if we shouldn't use literal pools.  */
 998 bool arm_disable_literal_pool = false;
 999
1000 /* The register number to be used for the PIC offset register.  */
1001 unsigned arm_pic_register = INVALID_REGNUM;
1002
1003 enum arm_pcs arm_pcs_default;
1004
1005 /* For an explanation of these variables, see final_prescan_insn below.  */
1006 int arm_ccfsm_state;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
1008 enum arm_cond_code arm_current_cc;
1009
1010 rtx arm_target_insn;
1011 int arm_target_label;
1012 /* The number of conditionally executed insns, including the current insn.  */
1013 int arm_condexec_count = 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015    Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask = 0;
1017 /* The number of bits used in arm_condexec_mask.  */
1018 int arm_condexec_masklen = 0;
1019
1020 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1021 int arm_arch_crc = 0;
1022
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1024 int arm_arch_dotprod = 0;
1025
1026 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1027 int arm_arch_cmse = 0;
1028
1029 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1030 int arm_m_profile_small_mul = 0;
1031
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1033 int arm_arch_i8mm = 0;
1034
1035 /* Nonzero if chip supports the BFloat16 instructions.  */
1036 int arm_arch_bf16 = 0;
1037
1038 /* Nonzero if chip supports the Custom Datapath Extension.  */
1039 int arm_arch_cde = 0;
1040 int arm_arch_cde_coproc = 0;
1041 const int arm_arch_cde_coproc_bits[] = {
1042   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1043 };
1044
1045 /* The condition codes of the ARM, and the inverse function.  */
1046 static const char * const arm_condition_codes[] =
1047 {
1048   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1050 };
1051
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1053 int arm_regs_in_sequence[] =
1054 {
1055   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1056 };
1057
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1060   FP_SYSREGS
1061 };
1062 #undef DEF_FP_SYSREG
1063
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1066
1067 #define THUMB2_WORK_REGS                                        \
1068   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)              \
1069             | (1 << SP_REGNUM)                                  \
1070             | (1 << PC_REGNUM)                                  \
1071             | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM        \
1072                ? (1 << PIC_OFFSET_TABLE_REGNUM)                 \
1073                : 0)))
1074 \f
1075 /* Initialization code.  */
1076
1077 struct cpu_tune
1078 {
1079   enum processor_type scheduler;
1080   unsigned int tune_flags;
1081   const struct tune_params *tune;
1082 };
1083
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1086   {                                                             \
1087     num_slots,                                                  \
1088     l1_size,                                                    \
1089     l1_line_size                                                \
1090   }
1091
1092 /* arm generic vectorizer costs.  */
1093 static const
1094 struct cpu_vec_costs arm_default_vec_cost = {
1095   1,                                    /* scalar_stmt_cost.  */
1096   1,                                    /* scalar load_cost.  */
1097   1,                                    /* scalar_store_cost.  */
1098   1,                                    /* vec_stmt_cost.  */
1099   1,                                    /* vec_to_scalar_cost.  */
1100   1,                                    /* scalar_to_vec_cost.  */
1101   1,                                    /* vec_align_load_cost.  */
1102   1,                                    /* vec_unalign_load_cost.  */
1103   1,                                    /* vec_unalign_store_cost.  */
1104   1,                                    /* vec_store_cost.  */
1105   3,                                    /* cond_taken_branch_cost.  */
1106   1,                                    /* cond_not_taken_branch_cost.  */
1107 };
1108
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1110 #include "aarch-cost-tables.h"
1111
1112
1113
1114 const struct cpu_cost_table cortexa9_extra_costs =
1115 {
1116   /* ALU */
1117   {
1118     0,                  /* arith.  */
1119     0,                  /* logical.  */
1120     0,                  /* shift.  */
1121     COSTS_N_INSNS (1),  /* shift_reg.  */
1122     COSTS_N_INSNS (1),  /* arith_shift.  */
1123     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1124     0,                  /* log_shift.  */
1125     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1126     COSTS_N_INSNS (1),  /* extend.  */
1127     COSTS_N_INSNS (2),  /* extend_arith.  */
1128     COSTS_N_INSNS (1),  /* bfi.  */
1129     COSTS_N_INSNS (1),  /* bfx.  */
1130     0,                  /* clz.  */
1131     0,                  /* rev.  */
1132     0,                  /* non_exec.  */
1133     true                /* non_exec_costs_exec.  */
1134   },
1135   {
1136     /* MULT SImode */
1137     {
1138       COSTS_N_INSNS (3),        /* simple.  */
1139       COSTS_N_INSNS (3),        /* flag_setting.  */
1140       COSTS_N_INSNS (2),        /* extend.  */
1141       COSTS_N_INSNS (3),        /* add.  */
1142       COSTS_N_INSNS (2),        /* extend_add.  */
1143       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1144     },
1145     /* MULT DImode */
1146     {
1147       0,                        /* simple (N/A).  */
1148       0,                        /* flag_setting (N/A).  */
1149       COSTS_N_INSNS (4),        /* extend.  */
1150       0,                        /* add (N/A).  */
1151       COSTS_N_INSNS (4),        /* extend_add.  */
1152       0                         /* idiv (N/A).  */
1153     }
1154   },
1155   /* LD/ST */
1156   {
1157     COSTS_N_INSNS (2),  /* load.  */
1158     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1159     COSTS_N_INSNS (2),  /* ldrd.  */
1160     COSTS_N_INSNS (2),  /* ldm_1st.  */
1161     1,                  /* ldm_regs_per_insn_1st.  */
1162     2,                  /* ldm_regs_per_insn_subsequent.  */
1163     COSTS_N_INSNS (5),  /* loadf.  */
1164     COSTS_N_INSNS (5),  /* loadd.  */
1165     COSTS_N_INSNS (1),  /* load_unaligned.  */
1166     COSTS_N_INSNS (2),  /* store.  */
1167     COSTS_N_INSNS (2),  /* strd.  */
1168     COSTS_N_INSNS (2),  /* stm_1st.  */
1169     1,                  /* stm_regs_per_insn_1st.  */
1170     2,                  /* stm_regs_per_insn_subsequent.  */
1171     COSTS_N_INSNS (1),  /* storef.  */
1172     COSTS_N_INSNS (1),  /* stored.  */
1173     COSTS_N_INSNS (1),  /* store_unaligned.  */
1174     COSTS_N_INSNS (1),  /* loadv.  */
1175     COSTS_N_INSNS (1)   /* storev.  */
1176   },
1177   {
1178     /* FP SFmode */
1179     {
1180       COSTS_N_INSNS (14),       /* div.  */
1181       COSTS_N_INSNS (4),        /* mult.  */
1182       COSTS_N_INSNS (7),        /* mult_addsub. */
1183       COSTS_N_INSNS (30),       /* fma.  */
1184       COSTS_N_INSNS (3),        /* addsub.  */
1185       COSTS_N_INSNS (1),        /* fpconst.  */
1186       COSTS_N_INSNS (1),        /* neg.  */
1187       COSTS_N_INSNS (3),        /* compare.  */
1188       COSTS_N_INSNS (3),        /* widen.  */
1189       COSTS_N_INSNS (3),        /* narrow.  */
1190       COSTS_N_INSNS (3),        /* toint.  */
1191       COSTS_N_INSNS (3),        /* fromint.  */
1192       COSTS_N_INSNS (3)         /* roundint.  */
1193     },
1194     /* FP DFmode */
1195     {
1196       COSTS_N_INSNS (24),       /* div.  */
1197       COSTS_N_INSNS (5),        /* mult.  */
1198       COSTS_N_INSNS (8),        /* mult_addsub.  */
1199       COSTS_N_INSNS (30),       /* fma.  */
1200       COSTS_N_INSNS (3),        /* addsub.  */
1201       COSTS_N_INSNS (1),        /* fpconst.  */
1202       COSTS_N_INSNS (1),        /* neg.  */
1203       COSTS_N_INSNS (3),        /* compare.  */
1204       COSTS_N_INSNS (3),        /* widen.  */
1205       COSTS_N_INSNS (3),        /* narrow.  */
1206       COSTS_N_INSNS (3),        /* toint.  */
1207       COSTS_N_INSNS (3),        /* fromint.  */
1208       COSTS_N_INSNS (3)         /* roundint.  */
1209     }
1210   },
1211   /* Vector */
1212   {
1213     COSTS_N_INSNS (1),  /* alu.  */
1214     COSTS_N_INSNS (4),  /* mult.  */
1215     COSTS_N_INSNS (1),  /* movi.  */
1216     COSTS_N_INSNS (2),  /* dup.  */
1217     COSTS_N_INSNS (2)   /* extract.  */
1218   }
1219 };
1220
1221 const struct cpu_cost_table cortexa8_extra_costs =
1222 {
1223   /* ALU */
1224   {
1225     0,                  /* arith.  */
1226     0,                  /* logical.  */
1227     COSTS_N_INSNS (1),  /* shift.  */
1228     0,                  /* shift_reg.  */
1229     COSTS_N_INSNS (1),  /* arith_shift.  */
1230     0,                  /* arith_shift_reg.  */
1231     COSTS_N_INSNS (1),  /* log_shift.  */
1232     0,                  /* log_shift_reg.  */
1233     0,                  /* extend.  */
1234     0,                  /* extend_arith.  */
1235     0,                  /* bfi.  */
1236     0,                  /* bfx.  */
1237     0,                  /* clz.  */
1238     0,                  /* rev.  */
1239     0,                  /* non_exec.  */
1240     true                /* non_exec_costs_exec.  */
1241   },
1242   {
1243     /* MULT SImode */
1244     {
1245       COSTS_N_INSNS (1),        /* simple.  */
1246       COSTS_N_INSNS (1),        /* flag_setting.  */
1247       COSTS_N_INSNS (1),        /* extend.  */
1248       COSTS_N_INSNS (1),        /* add.  */
1249       COSTS_N_INSNS (1),        /* extend_add.  */
1250       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1251     },
1252     /* MULT DImode */
1253     {
1254       0,                        /* simple (N/A).  */
1255       0,                        /* flag_setting (N/A).  */
1256       COSTS_N_INSNS (2),        /* extend.  */
1257       0,                        /* add (N/A).  */
1258       COSTS_N_INSNS (2),        /* extend_add.  */
1259       0                         /* idiv (N/A).  */
1260     }
1261   },
1262   /* LD/ST */
1263   {
1264     COSTS_N_INSNS (1),  /* load.  */
1265     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1266     COSTS_N_INSNS (1),  /* ldrd.  */
1267     COSTS_N_INSNS (1),  /* ldm_1st.  */
1268     1,                  /* ldm_regs_per_insn_1st.  */
1269     2,                  /* ldm_regs_per_insn_subsequent.  */
1270     COSTS_N_INSNS (1),  /* loadf.  */
1271     COSTS_N_INSNS (1),  /* loadd.  */
1272     COSTS_N_INSNS (1),  /* load_unaligned.  */
1273     COSTS_N_INSNS (1),  /* store.  */
1274     COSTS_N_INSNS (1),  /* strd.  */
1275     COSTS_N_INSNS (1),  /* stm_1st.  */
1276     1,                  /* stm_regs_per_insn_1st.  */
1277     2,                  /* stm_regs_per_insn_subsequent.  */
1278     COSTS_N_INSNS (1),  /* storef.  */
1279     COSTS_N_INSNS (1),  /* stored.  */
1280     COSTS_N_INSNS (1),  /* store_unaligned.  */
1281     COSTS_N_INSNS (1),  /* loadv.  */
1282     COSTS_N_INSNS (1)   /* storev.  */
1283   },
1284   {
1285     /* FP SFmode */
1286     {
1287       COSTS_N_INSNS (36),       /* div.  */
1288       COSTS_N_INSNS (11),       /* mult.  */
1289       COSTS_N_INSNS (20),       /* mult_addsub. */
1290       COSTS_N_INSNS (30),       /* fma.  */
1291       COSTS_N_INSNS (9),        /* addsub.  */
1292       COSTS_N_INSNS (3),        /* fpconst.  */
1293       COSTS_N_INSNS (3),        /* neg.  */
1294       COSTS_N_INSNS (6),        /* compare.  */
1295       COSTS_N_INSNS (4),        /* widen.  */
1296       COSTS_N_INSNS (4),        /* narrow.  */
1297       COSTS_N_INSNS (8),        /* toint.  */
1298       COSTS_N_INSNS (8),        /* fromint.  */
1299       COSTS_N_INSNS (8)         /* roundint.  */
1300     },
1301     /* FP DFmode */
1302     {
1303       COSTS_N_INSNS (64),       /* div.  */
1304       COSTS_N_INSNS (16),       /* mult.  */
1305       COSTS_N_INSNS (25),       /* mult_addsub.  */
1306       COSTS_N_INSNS (30),       /* fma.  */
1307       COSTS_N_INSNS (9),        /* addsub.  */
1308       COSTS_N_INSNS (3),        /* fpconst.  */
1309       COSTS_N_INSNS (3),        /* neg.  */
1310       COSTS_N_INSNS (6),        /* compare.  */
1311       COSTS_N_INSNS (6),        /* widen.  */
1312       COSTS_N_INSNS (6),        /* narrow.  */
1313       COSTS_N_INSNS (8),        /* toint.  */
1314       COSTS_N_INSNS (8),        /* fromint.  */
1315       COSTS_N_INSNS (8)         /* roundint.  */
1316     }
1317   },
1318   /* Vector */
1319   {
1320     COSTS_N_INSNS (1),  /* alu.  */
1321     COSTS_N_INSNS (4),  /* mult.  */
1322     COSTS_N_INSNS (1),  /* movi.  */
1323     COSTS_N_INSNS (2),  /* dup.  */
1324     COSTS_N_INSNS (2)   /* extract.  */
1325   }
1326 };
1327
1328 const struct cpu_cost_table cortexa5_extra_costs =
1329 {
1330   /* ALU */
1331   {
1332     0,                  /* arith.  */
1333     0,                  /* logical.  */
1334     COSTS_N_INSNS (1),  /* shift.  */
1335     COSTS_N_INSNS (1),  /* shift_reg.  */
1336     COSTS_N_INSNS (1),  /* arith_shift.  */
1337     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1338     COSTS_N_INSNS (1),  /* log_shift.  */
1339     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1340     COSTS_N_INSNS (1),  /* extend.  */
1341     COSTS_N_INSNS (1),  /* extend_arith.  */
1342     COSTS_N_INSNS (1),  /* bfi.  */
1343     COSTS_N_INSNS (1),  /* bfx.  */
1344     COSTS_N_INSNS (1),  /* clz.  */
1345     COSTS_N_INSNS (1),  /* rev.  */
1346     0,                  /* non_exec.  */
1347     true                /* non_exec_costs_exec.  */
1348   },
1349
1350   {
1351     /* MULT SImode */
1352     {
1353       0,                        /* simple.  */
1354       COSTS_N_INSNS (1),        /* flag_setting.  */
1355       COSTS_N_INSNS (1),        /* extend.  */
1356       COSTS_N_INSNS (1),        /* add.  */
1357       COSTS_N_INSNS (1),        /* extend_add.  */
1358       COSTS_N_INSNS (7)         /* idiv.  */
1359     },
1360     /* MULT DImode */
1361     {
1362       0,                        /* simple (N/A).  */
1363       0,                        /* flag_setting (N/A).  */
1364       COSTS_N_INSNS (1),        /* extend.  */
1365       0,                        /* add.  */
1366       COSTS_N_INSNS (2),        /* extend_add.  */
1367       0                         /* idiv (N/A).  */
1368     }
1369   },
1370   /* LD/ST */
1371   {
1372     COSTS_N_INSNS (1),  /* load.  */
1373     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1374     COSTS_N_INSNS (6),  /* ldrd.  */
1375     COSTS_N_INSNS (1),  /* ldm_1st.  */
1376     1,                  /* ldm_regs_per_insn_1st.  */
1377     2,                  /* ldm_regs_per_insn_subsequent.  */
1378     COSTS_N_INSNS (2),  /* loadf.  */
1379     COSTS_N_INSNS (4),  /* loadd.  */
1380     COSTS_N_INSNS (1),  /* load_unaligned.  */
1381     COSTS_N_INSNS (1),  /* store.  */
1382     COSTS_N_INSNS (3),  /* strd.  */
1383     COSTS_N_INSNS (1),  /* stm_1st.  */
1384     1,                  /* stm_regs_per_insn_1st.  */
1385     2,                  /* stm_regs_per_insn_subsequent.  */
1386     COSTS_N_INSNS (2),  /* storef.  */
1387     COSTS_N_INSNS (2),  /* stored.  */
1388     COSTS_N_INSNS (1),  /* store_unaligned.  */
1389     COSTS_N_INSNS (1),  /* loadv.  */
1390     COSTS_N_INSNS (1)   /* storev.  */
1391   },
1392   {
1393     /* FP SFmode */
1394     {
1395       COSTS_N_INSNS (15),       /* div.  */
1396       COSTS_N_INSNS (3),        /* mult.  */
1397       COSTS_N_INSNS (7),        /* mult_addsub. */
1398       COSTS_N_INSNS (7),        /* fma.  */
1399       COSTS_N_INSNS (3),        /* addsub.  */
1400       COSTS_N_INSNS (3),        /* fpconst.  */
1401       COSTS_N_INSNS (3),        /* neg.  */
1402       COSTS_N_INSNS (3),        /* compare.  */
1403       COSTS_N_INSNS (3),        /* widen.  */
1404       COSTS_N_INSNS (3),        /* narrow.  */
1405       COSTS_N_INSNS (3),        /* toint.  */
1406       COSTS_N_INSNS (3),        /* fromint.  */
1407       COSTS_N_INSNS (3)         /* roundint.  */
1408     },
1409     /* FP DFmode */
1410     {
1411       COSTS_N_INSNS (30),       /* div.  */
1412       COSTS_N_INSNS (6),        /* mult.  */
1413       COSTS_N_INSNS (10),       /* mult_addsub.  */
1414       COSTS_N_INSNS (7),        /* fma.  */
1415       COSTS_N_INSNS (3),        /* addsub.  */
1416       COSTS_N_INSNS (3),        /* fpconst.  */
1417       COSTS_N_INSNS (3),        /* neg.  */
1418       COSTS_N_INSNS (3),        /* compare.  */
1419       COSTS_N_INSNS (3),        /* widen.  */
1420       COSTS_N_INSNS (3),        /* narrow.  */
1421       COSTS_N_INSNS (3),        /* toint.  */
1422       COSTS_N_INSNS (3),        /* fromint.  */
1423       COSTS_N_INSNS (3)         /* roundint.  */
1424     }
1425   },
1426   /* Vector */
1427   {
1428     COSTS_N_INSNS (1),  /* alu.  */
1429     COSTS_N_INSNS (4),  /* mult.  */
1430     COSTS_N_INSNS (1),  /* movi.  */
1431     COSTS_N_INSNS (2),  /* dup.  */
1432     COSTS_N_INSNS (2)   /* extract.  */
1433   }
1434 };
1435
1436
1437 const struct cpu_cost_table cortexa7_extra_costs =
1438 {
1439   /* ALU */
1440   {
1441     0,                  /* arith.  */
1442     0,                  /* logical.  */
1443     COSTS_N_INSNS (1),  /* shift.  */
1444     COSTS_N_INSNS (1),  /* shift_reg.  */
1445     COSTS_N_INSNS (1),  /* arith_shift.  */
1446     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1447     COSTS_N_INSNS (1),  /* log_shift.  */
1448     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1449     COSTS_N_INSNS (1),  /* extend.  */
1450     COSTS_N_INSNS (1),  /* extend_arith.  */
1451     COSTS_N_INSNS (1),  /* bfi.  */
1452     COSTS_N_INSNS (1),  /* bfx.  */
1453     COSTS_N_INSNS (1),  /* clz.  */
1454     COSTS_N_INSNS (1),  /* rev.  */
1455     0,                  /* non_exec.  */
1456     true                /* non_exec_costs_exec.  */
1457   },
1458
1459   {
1460     /* MULT SImode */
1461     {
1462       0,                        /* simple.  */
1463       COSTS_N_INSNS (1),        /* flag_setting.  */
1464       COSTS_N_INSNS (1),        /* extend.  */
1465       COSTS_N_INSNS (1),        /* add.  */
1466       COSTS_N_INSNS (1),        /* extend_add.  */
1467       COSTS_N_INSNS (7)         /* idiv.  */
1468     },
1469     /* MULT DImode */
1470     {
1471       0,                        /* simple (N/A).  */
1472       0,                        /* flag_setting (N/A).  */
1473       COSTS_N_INSNS (1),        /* extend.  */
1474       0,                        /* add.  */
1475       COSTS_N_INSNS (2),        /* extend_add.  */
1476       0                         /* idiv (N/A).  */
1477     }
1478   },
1479   /* LD/ST */
1480   {
1481     COSTS_N_INSNS (1),  /* load.  */
1482     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1483     COSTS_N_INSNS (3),  /* ldrd.  */
1484     COSTS_N_INSNS (1),  /* ldm_1st.  */
1485     1,                  /* ldm_regs_per_insn_1st.  */
1486     2,                  /* ldm_regs_per_insn_subsequent.  */
1487     COSTS_N_INSNS (2),  /* loadf.  */
1488     COSTS_N_INSNS (2),  /* loadd.  */
1489     COSTS_N_INSNS (1),  /* load_unaligned.  */
1490     COSTS_N_INSNS (1),  /* store.  */
1491     COSTS_N_INSNS (3),  /* strd.  */
1492     COSTS_N_INSNS (1),  /* stm_1st.  */
1493     1,                  /* stm_regs_per_insn_1st.  */
1494     2,                  /* stm_regs_per_insn_subsequent.  */
1495     COSTS_N_INSNS (2),  /* storef.  */
1496     COSTS_N_INSNS (2),  /* stored.  */
1497     COSTS_N_INSNS (1),  /* store_unaligned.  */
1498     COSTS_N_INSNS (1),  /* loadv.  */
1499     COSTS_N_INSNS (1)   /* storev.  */
1500   },
1501   {
1502     /* FP SFmode */
1503     {
1504       COSTS_N_INSNS (15),       /* div.  */
1505       COSTS_N_INSNS (3),        /* mult.  */
1506       COSTS_N_INSNS (7),        /* mult_addsub. */
1507       COSTS_N_INSNS (7),        /* fma.  */
1508       COSTS_N_INSNS (3),        /* addsub.  */
1509       COSTS_N_INSNS (3),        /* fpconst.  */
1510       COSTS_N_INSNS (3),        /* neg.  */
1511       COSTS_N_INSNS (3),        /* compare.  */
1512       COSTS_N_INSNS (3),        /* widen.  */
1513       COSTS_N_INSNS (3),        /* narrow.  */
1514       COSTS_N_INSNS (3),        /* toint.  */
1515       COSTS_N_INSNS (3),        /* fromint.  */
1516       COSTS_N_INSNS (3)         /* roundint.  */
1517     },
1518     /* FP DFmode */
1519     {
1520       COSTS_N_INSNS (30),       /* div.  */
1521       COSTS_N_INSNS (6),        /* mult.  */
1522       COSTS_N_INSNS (10),       /* mult_addsub.  */
1523       COSTS_N_INSNS (7),        /* fma.  */
1524       COSTS_N_INSNS (3),        /* addsub.  */
1525       COSTS_N_INSNS (3),        /* fpconst.  */
1526       COSTS_N_INSNS (3),        /* neg.  */
1527       COSTS_N_INSNS (3),        /* compare.  */
1528       COSTS_N_INSNS (3),        /* widen.  */
1529       COSTS_N_INSNS (3),        /* narrow.  */
1530       COSTS_N_INSNS (3),        /* toint.  */
1531       COSTS_N_INSNS (3),        /* fromint.  */
1532       COSTS_N_INSNS (3)         /* roundint.  */
1533     }
1534   },
1535   /* Vector */
1536   {
1537     COSTS_N_INSNS (1),  /* alu.  */
1538     COSTS_N_INSNS (4),  /* mult.  */
1539     COSTS_N_INSNS (1),  /* movi.  */
1540     COSTS_N_INSNS (2),  /* dup.  */
1541     COSTS_N_INSNS (2)   /* extract.  */
1542   }
1543 };
1544
1545 const struct cpu_cost_table cortexa12_extra_costs =
1546 {
1547   /* ALU */
1548   {
1549     0,                  /* arith.  */
1550     0,                  /* logical.  */
1551     0,                  /* shift.  */
1552     COSTS_N_INSNS (1),  /* shift_reg.  */
1553     COSTS_N_INSNS (1),  /* arith_shift.  */
1554     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1555     COSTS_N_INSNS (1),  /* log_shift.  */
1556     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1557     0,                  /* extend.  */
1558     COSTS_N_INSNS (1),  /* extend_arith.  */
1559     0,                  /* bfi.  */
1560     COSTS_N_INSNS (1),  /* bfx.  */
1561     COSTS_N_INSNS (1),  /* clz.  */
1562     COSTS_N_INSNS (1),  /* rev.  */
1563     0,                  /* non_exec.  */
1564     true                /* non_exec_costs_exec.  */
1565   },
1566   /* MULT SImode */
1567   {
1568     {
1569       COSTS_N_INSNS (2),        /* simple.  */
1570       COSTS_N_INSNS (3),        /* flag_setting.  */
1571       COSTS_N_INSNS (2),        /* extend.  */
1572       COSTS_N_INSNS (3),        /* add.  */
1573       COSTS_N_INSNS (2),        /* extend_add.  */
1574       COSTS_N_INSNS (18)        /* idiv.  */
1575     },
1576     /* MULT DImode */
1577     {
1578       0,                        /* simple (N/A).  */
1579       0,                        /* flag_setting (N/A).  */
1580       COSTS_N_INSNS (3),        /* extend.  */
1581       0,                        /* add (N/A).  */
1582       COSTS_N_INSNS (3),        /* extend_add.  */
1583       0                         /* idiv (N/A).  */
1584     }
1585   },
1586   /* LD/ST */
1587   {
1588     COSTS_N_INSNS (3),  /* load.  */
1589     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1590     COSTS_N_INSNS (3),  /* ldrd.  */
1591     COSTS_N_INSNS (3),  /* ldm_1st.  */
1592     1,                  /* ldm_regs_per_insn_1st.  */
1593     2,                  /* ldm_regs_per_insn_subsequent.  */
1594     COSTS_N_INSNS (3),  /* loadf.  */
1595     COSTS_N_INSNS (3),  /* loadd.  */
1596     0,                  /* load_unaligned.  */
1597     0,                  /* store.  */
1598     0,                  /* strd.  */
1599     0,                  /* stm_1st.  */
1600     1,                  /* stm_regs_per_insn_1st.  */
1601     2,                  /* stm_regs_per_insn_subsequent.  */
1602     COSTS_N_INSNS (2),  /* storef.  */
1603     COSTS_N_INSNS (2),  /* stored.  */
1604     0,                  /* store_unaligned.  */
1605     COSTS_N_INSNS (1),  /* loadv.  */
1606     COSTS_N_INSNS (1)   /* storev.  */
1607   },
1608   {
1609     /* FP SFmode */
1610     {
1611       COSTS_N_INSNS (17),       /* div.  */
1612       COSTS_N_INSNS (4),        /* mult.  */
1613       COSTS_N_INSNS (8),        /* mult_addsub. */
1614       COSTS_N_INSNS (8),        /* fma.  */
1615       COSTS_N_INSNS (4),        /* addsub.  */
1616       COSTS_N_INSNS (2),        /* fpconst. */
1617       COSTS_N_INSNS (2),        /* neg.  */
1618       COSTS_N_INSNS (2),        /* compare.  */
1619       COSTS_N_INSNS (4),        /* widen.  */
1620       COSTS_N_INSNS (4),        /* narrow.  */
1621       COSTS_N_INSNS (4),        /* toint.  */
1622       COSTS_N_INSNS (4),        /* fromint.  */
1623       COSTS_N_INSNS (4)         /* roundint.  */
1624     },
1625     /* FP DFmode */
1626     {
1627       COSTS_N_INSNS (31),       /* div.  */
1628       COSTS_N_INSNS (4),        /* mult.  */
1629       COSTS_N_INSNS (8),        /* mult_addsub.  */
1630       COSTS_N_INSNS (8),        /* fma.  */
1631       COSTS_N_INSNS (4),        /* addsub.  */
1632       COSTS_N_INSNS (2),        /* fpconst.  */
1633       COSTS_N_INSNS (2),        /* neg.  */
1634       COSTS_N_INSNS (2),        /* compare.  */
1635       COSTS_N_INSNS (4),        /* widen.  */
1636       COSTS_N_INSNS (4),        /* narrow.  */
1637       COSTS_N_INSNS (4),        /* toint.  */
1638       COSTS_N_INSNS (4),        /* fromint.  */
1639       COSTS_N_INSNS (4)         /* roundint.  */
1640     }
1641   },
1642   /* Vector */
1643   {
1644     COSTS_N_INSNS (1),  /* alu.  */
1645     COSTS_N_INSNS (4),  /* mult.  */
1646     COSTS_N_INSNS (1),  /* movi.  */
1647     COSTS_N_INSNS (2),  /* dup.  */
1648     COSTS_N_INSNS (2)   /* extract.  */
1649   }
1650 };
1651
1652 const struct cpu_cost_table cortexa15_extra_costs =
1653 {
1654   /* ALU */
1655   {
1656     0,                  /* arith.  */
1657     0,                  /* logical.  */
1658     0,                  /* shift.  */
1659     0,                  /* shift_reg.  */
1660     COSTS_N_INSNS (1),  /* arith_shift.  */
1661     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1662     COSTS_N_INSNS (1),  /* log_shift.  */
1663     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1664     0,                  /* extend.  */
1665     COSTS_N_INSNS (1),  /* extend_arith.  */
1666     COSTS_N_INSNS (1),  /* bfi.  */
1667     0,                  /* bfx.  */
1668     0,                  /* clz.  */
1669     0,                  /* rev.  */
1670     0,                  /* non_exec.  */
1671     true                /* non_exec_costs_exec.  */
1672   },
1673   /* MULT SImode */
1674   {
1675     {
1676       COSTS_N_INSNS (2),        /* simple.  */
1677       COSTS_N_INSNS (3),        /* flag_setting.  */
1678       COSTS_N_INSNS (2),        /* extend.  */
1679       COSTS_N_INSNS (2),        /* add.  */
1680       COSTS_N_INSNS (2),        /* extend_add.  */
1681       COSTS_N_INSNS (18)        /* idiv.  */
1682     },
1683     /* MULT DImode */
1684     {
1685       0,                        /* simple (N/A).  */
1686       0,                        /* flag_setting (N/A).  */
1687       COSTS_N_INSNS (3),        /* extend.  */
1688       0,                        /* add (N/A).  */
1689       COSTS_N_INSNS (3),        /* extend_add.  */
1690       0                         /* idiv (N/A).  */
1691     }
1692   },
1693   /* LD/ST */
1694   {
1695     COSTS_N_INSNS (3),  /* load.  */
1696     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1697     COSTS_N_INSNS (3),  /* ldrd.  */
1698     COSTS_N_INSNS (4),  /* ldm_1st.  */
1699     1,                  /* ldm_regs_per_insn_1st.  */
1700     2,                  /* ldm_regs_per_insn_subsequent.  */
1701     COSTS_N_INSNS (4),  /* loadf.  */
1702     COSTS_N_INSNS (4),  /* loadd.  */
1703     0,                  /* load_unaligned.  */
1704     0,                  /* store.  */
1705     0,                  /* strd.  */
1706     COSTS_N_INSNS (1),  /* stm_1st.  */
1707     1,                  /* stm_regs_per_insn_1st.  */
1708     2,                  /* stm_regs_per_insn_subsequent.  */
1709     0,                  /* storef.  */
1710     0,                  /* stored.  */
1711     0,                  /* store_unaligned.  */
1712     COSTS_N_INSNS (1),  /* loadv.  */
1713     COSTS_N_INSNS (1)   /* storev.  */
1714   },
1715   {
1716     /* FP SFmode */
1717     {
1718       COSTS_N_INSNS (17),       /* div.  */
1719       COSTS_N_INSNS (4),        /* mult.  */
1720       COSTS_N_INSNS (8),        /* mult_addsub. */
1721       COSTS_N_INSNS (8),        /* fma.  */
1722       COSTS_N_INSNS (4),        /* addsub.  */
1723       COSTS_N_INSNS (2),        /* fpconst. */
1724       COSTS_N_INSNS (2),        /* neg.  */
1725       COSTS_N_INSNS (5),        /* compare.  */
1726       COSTS_N_INSNS (4),        /* widen.  */
1727       COSTS_N_INSNS (4),        /* narrow.  */
1728       COSTS_N_INSNS (4),        /* toint.  */
1729       COSTS_N_INSNS (4),        /* fromint.  */
1730       COSTS_N_INSNS (4)         /* roundint.  */
1731     },
1732     /* FP DFmode */
1733     {
1734       COSTS_N_INSNS (31),       /* div.  */
1735       COSTS_N_INSNS (4),        /* mult.  */
1736       COSTS_N_INSNS (8),        /* mult_addsub.  */
1737       COSTS_N_INSNS (8),        /* fma.  */
1738       COSTS_N_INSNS (4),        /* addsub.  */
1739       COSTS_N_INSNS (2),        /* fpconst.  */
1740       COSTS_N_INSNS (2),        /* neg.  */
1741       COSTS_N_INSNS (2),        /* compare.  */
1742       COSTS_N_INSNS (4),        /* widen.  */
1743       COSTS_N_INSNS (4),        /* narrow.  */
1744       COSTS_N_INSNS (4),        /* toint.  */
1745       COSTS_N_INSNS (4),        /* fromint.  */
1746       COSTS_N_INSNS (4)         /* roundint.  */
1747     }
1748   },
1749   /* Vector */
1750   {
1751     COSTS_N_INSNS (1),  /* alu.  */
1752     COSTS_N_INSNS (4),  /* mult.  */
1753     COSTS_N_INSNS (1),  /* movi.  */
1754     COSTS_N_INSNS (2),  /* dup.  */
1755     COSTS_N_INSNS (2)   /* extract.  */
1756   }
1757 };
1758
1759 const struct cpu_cost_table v7m_extra_costs =
1760 {
1761   /* ALU */
1762   {
1763     0,                  /* arith.  */
1764     0,                  /* logical.  */
1765     0,                  /* shift.  */
1766     0,                  /* shift_reg.  */
1767     0,                  /* arith_shift.  */
1768     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1769     0,                  /* log_shift.  */
1770     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1771     0,                  /* extend.  */
1772     COSTS_N_INSNS (1),  /* extend_arith.  */
1773     0,                  /* bfi.  */
1774     0,                  /* bfx.  */
1775     0,                  /* clz.  */
1776     0,                  /* rev.  */
1777     COSTS_N_INSNS (1),  /* non_exec.  */
1778     false               /* non_exec_costs_exec.  */
1779   },
1780   {
1781     /* MULT SImode */
1782     {
1783       COSTS_N_INSNS (1),        /* simple.  */
1784       COSTS_N_INSNS (1),        /* flag_setting.  */
1785       COSTS_N_INSNS (2),        /* extend.  */
1786       COSTS_N_INSNS (1),        /* add.  */
1787       COSTS_N_INSNS (3),        /* extend_add.  */
1788       COSTS_N_INSNS (8)         /* idiv.  */
1789     },
1790     /* MULT DImode */
1791     {
1792       0,                        /* simple (N/A).  */
1793       0,                        /* flag_setting (N/A).  */
1794       COSTS_N_INSNS (2),        /* extend.  */
1795       0,                        /* add (N/A).  */
1796       COSTS_N_INSNS (3),        /* extend_add.  */
1797       0                         /* idiv (N/A).  */
1798     }
1799   },
1800   /* LD/ST */
1801   {
1802     COSTS_N_INSNS (2),  /* load.  */
1803     0,                  /* load_sign_extend.  */
1804     COSTS_N_INSNS (3),  /* ldrd.  */
1805     COSTS_N_INSNS (2),  /* ldm_1st.  */
1806     1,                  /* ldm_regs_per_insn_1st.  */
1807     1,                  /* ldm_regs_per_insn_subsequent.  */
1808     COSTS_N_INSNS (2),  /* loadf.  */
1809     COSTS_N_INSNS (3),  /* loadd.  */
1810     COSTS_N_INSNS (1),  /* load_unaligned.  */
1811     COSTS_N_INSNS (2),  /* store.  */
1812     COSTS_N_INSNS (3),  /* strd.  */
1813     COSTS_N_INSNS (2),  /* stm_1st.  */
1814     1,                  /* stm_regs_per_insn_1st.  */
1815     1,                  /* stm_regs_per_insn_subsequent.  */
1816     COSTS_N_INSNS (2),  /* storef.  */
1817     COSTS_N_INSNS (3),  /* stored.  */
1818     COSTS_N_INSNS (1),  /* store_unaligned.  */
1819     COSTS_N_INSNS (1),  /* loadv.  */
1820     COSTS_N_INSNS (1)   /* storev.  */
1821   },
1822   {
1823     /* FP SFmode */
1824     {
1825       COSTS_N_INSNS (7),        /* div.  */
1826       COSTS_N_INSNS (2),        /* mult.  */
1827       COSTS_N_INSNS (5),        /* mult_addsub.  */
1828       COSTS_N_INSNS (3),        /* fma.  */
1829       COSTS_N_INSNS (1),        /* addsub.  */
1830       0,                        /* fpconst.  */
1831       0,                        /* neg.  */
1832       0,                        /* compare.  */
1833       0,                        /* widen.  */
1834       0,                        /* narrow.  */
1835       0,                        /* toint.  */
1836       0,                        /* fromint.  */
1837       0                         /* roundint.  */
1838     },
1839     /* FP DFmode */
1840     {
1841       COSTS_N_INSNS (15),       /* div.  */
1842       COSTS_N_INSNS (5),        /* mult.  */
1843       COSTS_N_INSNS (7),        /* mult_addsub.  */
1844       COSTS_N_INSNS (7),        /* fma.  */
1845       COSTS_N_INSNS (3),        /* addsub.  */
1846       0,                        /* fpconst.  */
1847       0,                        /* neg.  */
1848       0,                        /* compare.  */
1849       0,                        /* widen.  */
1850       0,                        /* narrow.  */
1851       0,                        /* toint.  */
1852       0,                        /* fromint.  */
1853       0                         /* roundint.  */
1854     }
1855   },
1856   /* Vector */
1857   {
1858     COSTS_N_INSNS (1),  /* alu.  */
1859     COSTS_N_INSNS (4),  /* mult.  */
1860     COSTS_N_INSNS (1),  /* movi.  */
1861     COSTS_N_INSNS (2),  /* dup.  */
1862     COSTS_N_INSNS (2)   /* extract.  */
1863   }
1864 };
1865
1866 const struct addr_mode_cost_table generic_addr_mode_costs =
1867 {
1868   /* int.  */
1869   {
1870     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1871     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1872     COSTS_N_INSNS (0)   /* AMO_WB.  */
1873   },
1874   /* float.  */
1875   {
1876     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1877     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1878     COSTS_N_INSNS (0)   /* AMO_WB.  */
1879   },
1880   /* vector.  */
1881   {
1882     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1883     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1884     COSTS_N_INSNS (0)   /* AMO_WB.  */
1885   }
1886 };
1887
1888 const struct tune_params arm_slowmul_tune =
1889 {
1890   &generic_extra_costs,                 /* Insn extra costs.  */
1891   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1892   NULL,                                 /* Sched adj cost.  */
1893   arm_default_branch_cost,
1894   &arm_default_vec_cost,
1895   3,                                            /* Constant limit.  */
1896   5,                                            /* Max cond insns.  */
1897   8,                                            /* Memset max inline.  */
1898   1,                                            /* Issue rate.  */
1899   ARM_PREFETCH_NOT_BENEFICIAL,
1900   tune_params::PREF_CONST_POOL_TRUE,
1901   tune_params::PREF_LDRD_FALSE,
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1903   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1904   tune_params::DISPARAGE_FLAGS_NEITHER,
1905   tune_params::PREF_NEON_STRINGOPS_FALSE,
1906   tune_params::FUSE_NOTHING,
1907   tune_params::SCHED_AUTOPREF_OFF
1908 };
1909
1910 const struct tune_params arm_fastmul_tune =
1911 {
1912   &generic_extra_costs,                 /* Insn extra costs.  */
1913   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1914   NULL,                                 /* Sched adj cost.  */
1915   arm_default_branch_cost,
1916   &arm_default_vec_cost,
1917   1,                                            /* Constant limit.  */
1918   5,                                            /* Max cond insns.  */
1919   8,                                            /* Memset max inline.  */
1920   1,                                            /* Issue rate.  */
1921   ARM_PREFETCH_NOT_BENEFICIAL,
1922   tune_params::PREF_CONST_POOL_TRUE,
1923   tune_params::PREF_LDRD_FALSE,
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1925   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1926   tune_params::DISPARAGE_FLAGS_NEITHER,
1927   tune_params::PREF_NEON_STRINGOPS_FALSE,
1928   tune_params::FUSE_NOTHING,
1929   tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1934
1935 const struct tune_params arm_strongarm_tune =
1936 {
1937   &generic_extra_costs,                 /* Insn extra costs.  */
1938   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1939   NULL,                                 /* Sched adj cost.  */
1940   arm_default_branch_cost,
1941   &arm_default_vec_cost,
1942   1,                                            /* Constant limit.  */
1943   3,                                            /* Max cond insns.  */
1944   8,                                            /* Memset max inline.  */
1945   1,                                            /* Issue rate.  */
1946   ARM_PREFETCH_NOT_BENEFICIAL,
1947   tune_params::PREF_CONST_POOL_TRUE,
1948   tune_params::PREF_LDRD_FALSE,
1949   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1950   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1951   tune_params::DISPARAGE_FLAGS_NEITHER,
1952   tune_params::PREF_NEON_STRINGOPS_FALSE,
1953   tune_params::FUSE_NOTHING,
1954   tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_xscale_tune =
1958 {
1959   &generic_extra_costs,                 /* Insn extra costs.  */
1960   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1961   xscale_sched_adjust_cost,
1962   arm_default_branch_cost,
1963   &arm_default_vec_cost,
1964   2,                                            /* Constant limit.  */
1965   3,                                            /* Max cond insns.  */
1966   8,                                            /* Memset max inline.  */
1967   1,                                            /* Issue rate.  */
1968   ARM_PREFETCH_NOT_BENEFICIAL,
1969   tune_params::PREF_CONST_POOL_TRUE,
1970   tune_params::PREF_LDRD_FALSE,
1971   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1972   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1973   tune_params::DISPARAGE_FLAGS_NEITHER,
1974   tune_params::PREF_NEON_STRINGOPS_FALSE,
1975   tune_params::FUSE_NOTHING,
1976   tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_9e_tune =
1980 {
1981   &generic_extra_costs,                 /* Insn extra costs.  */
1982   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1983   NULL,                                 /* Sched adj cost.  */
1984   arm_default_branch_cost,
1985   &arm_default_vec_cost,
1986   1,                                            /* Constant limit.  */
1987   5,                                            /* Max cond insns.  */
1988   8,                                            /* Memset max inline.  */
1989   1,                                            /* Issue rate.  */
1990   ARM_PREFETCH_NOT_BENEFICIAL,
1991   tune_params::PREF_CONST_POOL_TRUE,
1992   tune_params::PREF_LDRD_FALSE,
1993   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1994   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1995   tune_params::DISPARAGE_FLAGS_NEITHER,
1996   tune_params::PREF_NEON_STRINGOPS_FALSE,
1997   tune_params::FUSE_NOTHING,
1998   tune_params::SCHED_AUTOPREF_OFF
1999 };
2000
2001 const struct tune_params arm_marvell_pj4_tune =
2002 {
2003   &generic_extra_costs,                 /* Insn extra costs.  */
2004   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2005   NULL,                                 /* Sched adj cost.  */
2006   arm_default_branch_cost,
2007   &arm_default_vec_cost,
2008   1,                                            /* Constant limit.  */
2009   5,                                            /* Max cond insns.  */
2010   8,                                            /* Memset max inline.  */
2011   2,                                            /* Issue rate.  */
2012   ARM_PREFETCH_NOT_BENEFICIAL,
2013   tune_params::PREF_CONST_POOL_TRUE,
2014   tune_params::PREF_LDRD_FALSE,
2015   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2016   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2017   tune_params::DISPARAGE_FLAGS_NEITHER,
2018   tune_params::PREF_NEON_STRINGOPS_FALSE,
2019   tune_params::FUSE_NOTHING,
2020   tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_v6t2_tune =
2024 {
2025   &generic_extra_costs,                 /* Insn extra costs.  */
2026   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2027   NULL,                                 /* Sched adj cost.  */
2028   arm_default_branch_cost,
2029   &arm_default_vec_cost,
2030   1,                                            /* Constant limit.  */
2031   5,                                            /* Max cond insns.  */
2032   8,                                            /* Memset max inline.  */
2033   1,                                            /* Issue rate.  */
2034   ARM_PREFETCH_NOT_BENEFICIAL,
2035   tune_params::PREF_CONST_POOL_FALSE,
2036   tune_params::PREF_LDRD_FALSE,
2037   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2038   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2039   tune_params::DISPARAGE_FLAGS_NEITHER,
2040   tune_params::PREF_NEON_STRINGOPS_FALSE,
2041   tune_params::FUSE_NOTHING,
2042   tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045
2046 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2047 const struct tune_params arm_cortex_tune =
2048 {
2049   &generic_extra_costs,
2050   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2051   NULL,                                 /* Sched adj cost.  */
2052   arm_default_branch_cost,
2053   &arm_default_vec_cost,
2054   1,                                            /* Constant limit.  */
2055   5,                                            /* Max cond insns.  */
2056   8,                                            /* Memset max inline.  */
2057   2,                                            /* Issue rate.  */
2058   ARM_PREFETCH_NOT_BENEFICIAL,
2059   tune_params::PREF_CONST_POOL_FALSE,
2060   tune_params::PREF_LDRD_FALSE,
2061   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2062   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2063   tune_params::DISPARAGE_FLAGS_NEITHER,
2064   tune_params::PREF_NEON_STRINGOPS_FALSE,
2065   tune_params::FUSE_NOTHING,
2066   tune_params::SCHED_AUTOPREF_OFF
2067 };
2068
2069 const struct tune_params arm_cortex_a8_tune =
2070 {
2071   &cortexa8_extra_costs,
2072   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2073   NULL,                                 /* Sched adj cost.  */
2074   arm_default_branch_cost,
2075   &arm_default_vec_cost,
2076   1,                                            /* Constant limit.  */
2077   5,                                            /* Max cond insns.  */
2078   8,                                            /* Memset max inline.  */
2079   2,                                            /* Issue rate.  */
2080   ARM_PREFETCH_NOT_BENEFICIAL,
2081   tune_params::PREF_CONST_POOL_FALSE,
2082   tune_params::PREF_LDRD_FALSE,
2083   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2084   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2085   tune_params::DISPARAGE_FLAGS_NEITHER,
2086   tune_params::PREF_NEON_STRINGOPS_TRUE,
2087   tune_params::FUSE_NOTHING,
2088   tune_params::SCHED_AUTOPREF_OFF
2089 };
2090
2091 const struct tune_params arm_cortex_a7_tune =
2092 {
2093   &cortexa7_extra_costs,
2094   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2095   NULL,                                 /* Sched adj cost.  */
2096   arm_default_branch_cost,
2097   &arm_default_vec_cost,
2098   1,                                            /* Constant limit.  */
2099   5,                                            /* Max cond insns.  */
2100   8,                                            /* Memset max inline.  */
2101   2,                                            /* Issue rate.  */
2102   ARM_PREFETCH_NOT_BENEFICIAL,
2103   tune_params::PREF_CONST_POOL_FALSE,
2104   tune_params::PREF_LDRD_FALSE,
2105   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2106   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2107   tune_params::DISPARAGE_FLAGS_NEITHER,
2108   tune_params::PREF_NEON_STRINGOPS_TRUE,
2109   tune_params::FUSE_NOTHING,
2110   tune_params::SCHED_AUTOPREF_OFF
2111 };
2112
2113 const struct tune_params arm_cortex_a15_tune =
2114 {
2115   &cortexa15_extra_costs,
2116   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2117   NULL,                                 /* Sched adj cost.  */
2118   arm_default_branch_cost,
2119   &arm_default_vec_cost,
2120   1,                                            /* Constant limit.  */
2121   2,                                            /* Max cond insns.  */
2122   8,                                            /* Memset max inline.  */
2123   3,                                            /* Issue rate.  */
2124   ARM_PREFETCH_NOT_BENEFICIAL,
2125   tune_params::PREF_CONST_POOL_FALSE,
2126   tune_params::PREF_LDRD_TRUE,
2127   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2128   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2129   tune_params::DISPARAGE_FLAGS_ALL,
2130   tune_params::PREF_NEON_STRINGOPS_TRUE,
2131   tune_params::FUSE_NOTHING,
2132   tune_params::SCHED_AUTOPREF_FULL
2133 };
2134
2135 const struct tune_params arm_cortex_a35_tune =
2136 {
2137   &cortexa53_extra_costs,
2138   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2139   NULL,                                 /* Sched adj cost.  */
2140   arm_default_branch_cost,
2141   &arm_default_vec_cost,
2142   1,                                            /* Constant limit.  */
2143   5,                                            /* Max cond insns.  */
2144   8,                                            /* Memset max inline.  */
2145   1,                                            /* Issue rate.  */
2146   ARM_PREFETCH_NOT_BENEFICIAL,
2147   tune_params::PREF_CONST_POOL_FALSE,
2148   tune_params::PREF_LDRD_FALSE,
2149   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2150   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2151   tune_params::DISPARAGE_FLAGS_NEITHER,
2152   tune_params::PREF_NEON_STRINGOPS_TRUE,
2153   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2154   tune_params::SCHED_AUTOPREF_OFF
2155 };
2156
2157 const struct tune_params arm_cortex_a53_tune =
2158 {
2159   &cortexa53_extra_costs,
2160   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2161   NULL,                                 /* Sched adj cost.  */
2162   arm_default_branch_cost,
2163   &arm_default_vec_cost,
2164   1,                                            /* Constant limit.  */
2165   5,                                            /* Max cond insns.  */
2166   8,                                            /* Memset max inline.  */
2167   2,                                            /* Issue rate.  */
2168   ARM_PREFETCH_NOT_BENEFICIAL,
2169   tune_params::PREF_CONST_POOL_FALSE,
2170   tune_params::PREF_LDRD_FALSE,
2171   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2172   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2173   tune_params::DISPARAGE_FLAGS_NEITHER,
2174   tune_params::PREF_NEON_STRINGOPS_TRUE,
2175   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2176   tune_params::SCHED_AUTOPREF_OFF
2177 };
2178
2179 const struct tune_params arm_cortex_a57_tune =
2180 {
2181   &cortexa57_extra_costs,
2182   &generic_addr_mode_costs,             /* addressing mode costs */
2183   NULL,                                 /* Sched adj cost.  */
2184   arm_default_branch_cost,
2185   &arm_default_vec_cost,
2186   1,                                            /* Constant limit.  */
2187   2,                                            /* Max cond insns.  */
2188   8,                                            /* Memset max inline.  */
2189   3,                                            /* Issue rate.  */
2190   ARM_PREFETCH_NOT_BENEFICIAL,
2191   tune_params::PREF_CONST_POOL_FALSE,
2192   tune_params::PREF_LDRD_TRUE,
2193   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2194   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2195   tune_params::DISPARAGE_FLAGS_ALL,
2196   tune_params::PREF_NEON_STRINGOPS_TRUE,
2197   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2198   tune_params::SCHED_AUTOPREF_FULL
2199 };
2200
2201 const struct tune_params arm_exynosm1_tune =
2202 {
2203   &exynosm1_extra_costs,
2204   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2205   NULL,                                         /* Sched adj cost.  */
2206   arm_default_branch_cost,
2207   &arm_default_vec_cost,
2208   1,                                            /* Constant limit.  */
2209   2,                                            /* Max cond insns.  */
2210   8,                                            /* Memset max inline.  */
2211   3,                                            /* Issue rate.  */
2212   ARM_PREFETCH_NOT_BENEFICIAL,
2213   tune_params::PREF_CONST_POOL_FALSE,
2214   tune_params::PREF_LDRD_TRUE,
2215   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2216   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2217   tune_params::DISPARAGE_FLAGS_ALL,
2218   tune_params::PREF_NEON_STRINGOPS_TRUE,
2219   tune_params::FUSE_NOTHING,
2220   tune_params::SCHED_AUTOPREF_OFF
2221 };
2222
2223 const struct tune_params arm_xgene1_tune =
2224 {
2225   &xgene1_extra_costs,
2226   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2227   NULL,                                 /* Sched adj cost.  */
2228   arm_default_branch_cost,
2229   &arm_default_vec_cost,
2230   1,                                            /* Constant limit.  */
2231   2,                                            /* Max cond insns.  */
2232   32,                                           /* Memset max inline.  */
2233   4,                                            /* Issue rate.  */
2234   ARM_PREFETCH_NOT_BENEFICIAL,
2235   tune_params::PREF_CONST_POOL_FALSE,
2236   tune_params::PREF_LDRD_TRUE,
2237   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2238   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2239   tune_params::DISPARAGE_FLAGS_ALL,
2240   tune_params::PREF_NEON_STRINGOPS_FALSE,
2241   tune_params::FUSE_NOTHING,
2242   tune_params::SCHED_AUTOPREF_OFF
2243 };
2244
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246    less appealing.  Set max_insns_skipped to a low value.  */
2247
2248 const struct tune_params arm_cortex_a5_tune =
2249 {
2250   &cortexa5_extra_costs,
2251   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2252   NULL,                                 /* Sched adj cost.  */
2253   arm_cortex_a5_branch_cost,
2254   &arm_default_vec_cost,
2255   1,                                            /* Constant limit.  */
2256   1,                                            /* Max cond insns.  */
2257   8,                                            /* Memset max inline.  */
2258   2,                                            /* Issue rate.  */
2259   ARM_PREFETCH_NOT_BENEFICIAL,
2260   tune_params::PREF_CONST_POOL_FALSE,
2261   tune_params::PREF_LDRD_FALSE,
2262   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2263   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2264   tune_params::DISPARAGE_FLAGS_NEITHER,
2265   tune_params::PREF_NEON_STRINGOPS_TRUE,
2266   tune_params::FUSE_NOTHING,
2267   tune_params::SCHED_AUTOPREF_OFF
2268 };
2269
2270 const struct tune_params arm_cortex_a9_tune =
2271 {
2272   &cortexa9_extra_costs,
2273   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2274   cortex_a9_sched_adjust_cost,
2275   arm_default_branch_cost,
2276   &arm_default_vec_cost,
2277   1,                                            /* Constant limit.  */
2278   5,                                            /* Max cond insns.  */
2279   8,                                            /* Memset max inline.  */
2280   2,                                            /* Issue rate.  */
2281   ARM_PREFETCH_BENEFICIAL(4,32,32),
2282   tune_params::PREF_CONST_POOL_FALSE,
2283   tune_params::PREF_LDRD_FALSE,
2284   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2285   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2286   tune_params::DISPARAGE_FLAGS_NEITHER,
2287   tune_params::PREF_NEON_STRINGOPS_FALSE,
2288   tune_params::FUSE_NOTHING,
2289   tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_cortex_a12_tune =
2293 {
2294   &cortexa12_extra_costs,
2295   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2296   NULL,                                 /* Sched adj cost.  */
2297   arm_default_branch_cost,
2298   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2299   1,                                            /* Constant limit.  */
2300   2,                                            /* Max cond insns.  */
2301   8,                                            /* Memset max inline.  */
2302   2,                                            /* Issue rate.  */
2303   ARM_PREFETCH_NOT_BENEFICIAL,
2304   tune_params::PREF_CONST_POOL_FALSE,
2305   tune_params::PREF_LDRD_TRUE,
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2307   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2308   tune_params::DISPARAGE_FLAGS_ALL,
2309   tune_params::PREF_NEON_STRINGOPS_TRUE,
2310   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 const struct tune_params arm_cortex_a73_tune =
2315 {
2316   &cortexa57_extra_costs,
2317   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2318   NULL,                                         /* Sched adj cost.  */
2319   arm_default_branch_cost,
2320   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2321   1,                                            /* Constant limit.  */
2322   2,                                            /* Max cond insns.  */
2323   8,                                            /* Memset max inline.  */
2324   2,                                            /* Issue rate.  */
2325   ARM_PREFETCH_NOT_BENEFICIAL,
2326   tune_params::PREF_CONST_POOL_FALSE,
2327   tune_params::PREF_LDRD_TRUE,
2328   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2329   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2330   tune_params::DISPARAGE_FLAGS_ALL,
2331   tune_params::PREF_NEON_STRINGOPS_TRUE,
2332   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2333   tune_params::SCHED_AUTOPREF_FULL
2334 };
2335
2336 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2337    cycle to execute each.  An LDR from the constant pool also takes two cycles
2338    to execute, but mildly increases pipelining opportunity (consecutive
2339    loads/stores can be pipelined together, saving one cycle), and may also
2340    improve icache utilisation.  Hence we prefer the constant pool for such
2341    processors.  */
2342
2343 const struct tune_params arm_v7m_tune =
2344 {
2345   &v7m_extra_costs,
2346   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2347   NULL,                                 /* Sched adj cost.  */
2348   arm_cortex_m_branch_cost,
2349   &arm_default_vec_cost,
2350   1,                                            /* Constant limit.  */
2351   2,                                            /* Max cond insns.  */
2352   8,                                            /* Memset max inline.  */
2353   1,                                            /* Issue rate.  */
2354   ARM_PREFETCH_NOT_BENEFICIAL,
2355   tune_params::PREF_CONST_POOL_TRUE,
2356   tune_params::PREF_LDRD_FALSE,
2357   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2358   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2359   tune_params::DISPARAGE_FLAGS_NEITHER,
2360   tune_params::PREF_NEON_STRINGOPS_FALSE,
2361   tune_params::FUSE_NOTHING,
2362   tune_params::SCHED_AUTOPREF_OFF
2363 };
2364
2365 /* Cortex-M7 tuning.  */
2366
2367 const struct tune_params arm_cortex_m7_tune =
2368 {
2369   &v7m_extra_costs,
2370   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2371   NULL,                                 /* Sched adj cost.  */
2372   arm_cortex_m7_branch_cost,
2373   &arm_default_vec_cost,
2374   0,                                            /* Constant limit.  */
2375   1,                                            /* Max cond insns.  */
2376   8,                                            /* Memset max inline.  */
2377   2,                                            /* Issue rate.  */
2378   ARM_PREFETCH_NOT_BENEFICIAL,
2379   tune_params::PREF_CONST_POOL_TRUE,
2380   tune_params::PREF_LDRD_FALSE,
2381   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2382   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2383   tune_params::DISPARAGE_FLAGS_NEITHER,
2384   tune_params::PREF_NEON_STRINGOPS_FALSE,
2385   tune_params::FUSE_NOTHING,
2386   tune_params::SCHED_AUTOPREF_OFF
2387 };
2388
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2391    cortex-m23.  */
2392 const struct tune_params arm_v6m_tune =
2393 {
2394   &generic_extra_costs,                 /* Insn extra costs.  */
2395   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2396   NULL,                                 /* Sched adj cost.  */
2397   arm_default_branch_cost,
2398   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2399   1,                                            /* Constant limit.  */
2400   5,                                            /* Max cond insns.  */
2401   8,                                            /* Memset max inline.  */
2402   1,                                            /* Issue rate.  */
2403   ARM_PREFETCH_NOT_BENEFICIAL,
2404   tune_params::PREF_CONST_POOL_FALSE,
2405   tune_params::PREF_LDRD_FALSE,
2406   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2407   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2408   tune_params::DISPARAGE_FLAGS_NEITHER,
2409   tune_params::PREF_NEON_STRINGOPS_FALSE,
2410   tune_params::FUSE_NOTHING,
2411   tune_params::SCHED_AUTOPREF_OFF
2412 };
2413
2414 const struct tune_params arm_fa726te_tune =
2415 {
2416   &generic_extra_costs,                         /* Insn extra costs.  */
2417   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2418   fa726te_sched_adjust_cost,
2419   arm_default_branch_cost,
2420   &arm_default_vec_cost,
2421   1,                                            /* Constant limit.  */
2422   5,                                            /* Max cond insns.  */
2423   8,                                            /* Memset max inline.  */
2424   2,                                            /* Issue rate.  */
2425   ARM_PREFETCH_NOT_BENEFICIAL,
2426   tune_params::PREF_CONST_POOL_TRUE,
2427   tune_params::PREF_LDRD_FALSE,
2428   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2429   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2430   tune_params::DISPARAGE_FLAGS_NEITHER,
2431   tune_params::PREF_NEON_STRINGOPS_FALSE,
2432   tune_params::FUSE_NOTHING,
2433   tune_params::SCHED_AUTOPREF_OFF
2434 };
2435
2436 /* Auto-generated CPU, FPU and architecture tables.  */
2437 #include "arm-cpu-data.h"
2438
2439 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2440    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2441    is thus chosen to be big enough to hold the longest architecture name.  */
2442
2443 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2444
2445 /* Supported TLS relocations.  */
2446
2447 enum tls_reloc {
2448   TLS_GD32,
2449   TLS_GD32_FDPIC,
2450   TLS_LDM32,
2451   TLS_LDM32_FDPIC,
2452   TLS_LDO32,
2453   TLS_IE32,
2454   TLS_IE32_FDPIC,
2455   TLS_LE32,
2456   TLS_DESCSEQ   /* GNU scheme */
2457 };
2458
2459 /* The maximum number of insns to be used when loading a constant.  */
2460 inline static int
2461 arm_constant_limit (bool size_p)
2462 {
2463   return size_p ? 1 : current_tune->constant_limit;
2464 }
2465
2466 /* Emit an insn that's a simple single-set.  Both the operands must be known
2467    to be valid.  */
2468 inline static rtx_insn *
2469 emit_set_insn (rtx x, rtx y)
2470 {
2471   return emit_insn (gen_rtx_SET (x, y));
2472 }
2473
2474 /* Return the number of bits set in VALUE.  */
2475 static unsigned
2476 bit_count (unsigned long value)
2477 {
2478   unsigned long count = 0;
2479
2480   while (value)
2481     {
2482       count++;
2483       value &= value - 1;  /* Clear the least-significant set bit.  */
2484     }
2485
2486   return count;
2487 }
2488
2489 /* Return the number of bits set in BMAP.  */
2490 static unsigned
2491 bitmap_popcount (const sbitmap bmap)
2492 {
2493   unsigned int count = 0;
2494   unsigned int n = 0;
2495   sbitmap_iterator sbi;
2496
2497   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2498     count++;
2499   return count;
2500 }
2501
2502 typedef struct
2503 {
2504   machine_mode mode;
2505   const char *name;
2506 } arm_fixed_mode_set;
2507
2508 /* A small helper for setting fixed-point library libfuncs.  */
2509
2510 static void
2511 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2512                              const char *funcname, const char *modename,
2513                              int num_suffix)
2514 {
2515   char buffer[50];
2516
2517   if (num_suffix == 0)
2518     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2519   else
2520     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2521
2522   set_optab_libfunc (optable, mode, buffer);
2523 }
2524
2525 static void
2526 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2527                             machine_mode from, const char *funcname,
2528                             const char *toname, const char *fromname)
2529 {
2530   char buffer[50];
2531   const char *maybe_suffix_2 = "";
2532
2533   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2534   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2535       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2536       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2537     maybe_suffix_2 = "2";
2538
2539   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2540            maybe_suffix_2);
2541
2542   set_conv_libfunc (optable, to, from, buffer);
2543 }
2544
2545 static GTY(()) rtx speculation_barrier_libfunc;
2546
2547 /* Record that we have no arithmetic or comparison libfuncs for
2548    machine mode MODE.  */
2549
2550 static void
2551 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2552 {
2553   /* Arithmetic.  */
2554   set_optab_libfunc (add_optab, mode, NULL);
2555   set_optab_libfunc (sdiv_optab, mode, NULL);
2556   set_optab_libfunc (smul_optab, mode, NULL);
2557   set_optab_libfunc (neg_optab, mode, NULL);
2558   set_optab_libfunc (sub_optab, mode, NULL);
2559
2560   /* Comparisons.  */
2561   set_optab_libfunc (eq_optab, mode, NULL);
2562   set_optab_libfunc (ne_optab, mode, NULL);
2563   set_optab_libfunc (lt_optab, mode, NULL);
2564   set_optab_libfunc (le_optab, mode, NULL);
2565   set_optab_libfunc (ge_optab, mode, NULL);
2566   set_optab_libfunc (gt_optab, mode, NULL);
2567   set_optab_libfunc (unord_optab, mode, NULL);
2568 }
2569
2570 /* Set up library functions unique to ARM.  */
2571 static void
2572 arm_init_libfuncs (void)
2573 {
2574   machine_mode mode_iter;
2575
2576   /* For Linux, we have access to kernel support for atomic operations.  */
2577   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2578     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2579
2580   /* There are no special library functions unless we are using the
2581      ARM BPABI.  */
2582   if (!TARGET_BPABI)
2583     return;
2584
2585   /* The functions below are described in Section 4 of the "Run-Time
2586      ABI for the ARM architecture", Version 1.0.  */
2587
2588   /* Double-precision floating-point arithmetic.  Table 2.  */
2589   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2590   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2591   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2592   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2593   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2594
2595   /* Double-precision comparisons.  Table 3.  */
2596   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2597   set_optab_libfunc (ne_optab, DFmode, NULL);
2598   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2599   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2600   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2601   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2602   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2603
2604   /* Single-precision floating-point arithmetic.  Table 4.  */
2605   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2606   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2607   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2608   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2609   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2610
2611   /* Single-precision comparisons.  Table 5.  */
2612   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2613   set_optab_libfunc (ne_optab, SFmode, NULL);
2614   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2615   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2616   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2617   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2618   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2619
2620   /* Floating-point to integer conversions.  Table 6.  */
2621   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2622   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2623   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2624   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2625   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2626   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2627   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2628   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2629
2630   /* Conversions between floating types.  Table 7.  */
2631   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2632   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2633
2634   /* Integer to floating-point conversions.  Table 8.  */
2635   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2636   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2637   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2638   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2639   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2640   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2641   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2642   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2643
2644   /* Long long.  Table 9.  */
2645   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2646   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2647   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2648   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2649   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2650   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2651   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2652   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2653
2654   /* Integer (32/32->32) division.  \S 4.3.1.  */
2655   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2656   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2657
2658   /* The divmod functions are designed so that they can be used for
2659      plain division, even though they return both the quotient and the
2660      remainder.  The quotient is returned in the usual location (i.e.,
2661      r0 for SImode, {r0, r1} for DImode), just as would be expected
2662      for an ordinary division routine.  Because the AAPCS calling
2663      conventions specify that all of { r0, r1, r2, r3 } are
2664      callee-saved registers, there is no need to tell the compiler
2665      explicitly that those registers are clobbered by these
2666      routines.  */
2667   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2668   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2669
2670   /* For SImode division the ABI provides div-without-mod routines,
2671      which are faster.  */
2672   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2673   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2674
2675   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2676      divmod libcalls instead.  */
2677   set_optab_libfunc (smod_optab, DImode, NULL);
2678   set_optab_libfunc (umod_optab, DImode, NULL);
2679   set_optab_libfunc (smod_optab, SImode, NULL);
2680   set_optab_libfunc (umod_optab, SImode, NULL);
2681
2682   /* Half-precision float operations.  The compiler handles all operations
2683      with NULL libfuncs by converting the SFmode.  */
2684   switch (arm_fp16_format)
2685     {
2686     case ARM_FP16_FORMAT_IEEE:
2687     case ARM_FP16_FORMAT_ALTERNATIVE:
2688
2689       /* Conversions.  */
2690       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2691                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2692                          ? "__gnu_f2h_ieee"
2693                          : "__gnu_f2h_alternative"));
2694       set_conv_libfunc (sext_optab, SFmode, HFmode,
2695                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2696                          ? "__gnu_h2f_ieee"
2697                          : "__gnu_h2f_alternative"));
2698
2699       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2700                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2701                          ? "__gnu_d2h_ieee"
2702                          : "__gnu_d2h_alternative"));
2703
2704       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2705       break;
2706
2707     default:
2708       break;
2709     }
2710
2711   /* For all possible libcalls in BFmode, record NULL.  */
2712   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2713     {
2714       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2715       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2716       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2717       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2718     }
2719   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2720
2721   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2722   {
2723     const arm_fixed_mode_set fixed_arith_modes[] =
2724       {
2725         { E_QQmode, "qq" },
2726         { E_UQQmode, "uqq" },
2727         { E_HQmode, "hq" },
2728         { E_UHQmode, "uhq" },
2729         { E_SQmode, "sq" },
2730         { E_USQmode, "usq" },
2731         { E_DQmode, "dq" },
2732         { E_UDQmode, "udq" },
2733         { E_TQmode, "tq" },
2734         { E_UTQmode, "utq" },
2735         { E_HAmode, "ha" },
2736         { E_UHAmode, "uha" },
2737         { E_SAmode, "sa" },
2738         { E_USAmode, "usa" },
2739         { E_DAmode, "da" },
2740         { E_UDAmode, "uda" },
2741         { E_TAmode, "ta" },
2742         { E_UTAmode, "uta" }
2743       };
2744     const arm_fixed_mode_set fixed_conv_modes[] =
2745       {
2746         { E_QQmode, "qq" },
2747         { E_UQQmode, "uqq" },
2748         { E_HQmode, "hq" },
2749         { E_UHQmode, "uhq" },
2750         { E_SQmode, "sq" },
2751         { E_USQmode, "usq" },
2752         { E_DQmode, "dq" },
2753         { E_UDQmode, "udq" },
2754         { E_TQmode, "tq" },
2755         { E_UTQmode, "utq" },
2756         { E_HAmode, "ha" },
2757         { E_UHAmode, "uha" },
2758         { E_SAmode, "sa" },
2759         { E_USAmode, "usa" },
2760         { E_DAmode, "da" },
2761         { E_UDAmode, "uda" },
2762         { E_TAmode, "ta" },
2763         { E_UTAmode, "uta" },
2764         { E_QImode, "qi" },
2765         { E_HImode, "hi" },
2766         { E_SImode, "si" },
2767         { E_DImode, "di" },
2768         { E_TImode, "ti" },
2769         { E_SFmode, "sf" },
2770         { E_DFmode, "df" }
2771       };
2772     unsigned int i, j;
2773
2774     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2775       {
2776         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2777                                      "add", fixed_arith_modes[i].name, 3);
2778         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2779                                      "ssadd", fixed_arith_modes[i].name, 3);
2780         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2781                                      "usadd", fixed_arith_modes[i].name, 3);
2782         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2783                                      "sub", fixed_arith_modes[i].name, 3);
2784         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2785                                      "sssub", fixed_arith_modes[i].name, 3);
2786         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2787                                      "ussub", fixed_arith_modes[i].name, 3);
2788         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2789                                      "mul", fixed_arith_modes[i].name, 3);
2790         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2791                                      "ssmul", fixed_arith_modes[i].name, 3);
2792         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2793                                      "usmul", fixed_arith_modes[i].name, 3);
2794         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2795                                      "div", fixed_arith_modes[i].name, 3);
2796         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2797                                      "udiv", fixed_arith_modes[i].name, 3);
2798         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2799                                      "ssdiv", fixed_arith_modes[i].name, 3);
2800         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2801                                      "usdiv", fixed_arith_modes[i].name, 3);
2802         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2803                                      "neg", fixed_arith_modes[i].name, 2);
2804         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2805                                      "ssneg", fixed_arith_modes[i].name, 2);
2806         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2807                                      "usneg", fixed_arith_modes[i].name, 2);
2808         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2809                                      "ashl", fixed_arith_modes[i].name, 3);
2810         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2811                                      "ashr", fixed_arith_modes[i].name, 3);
2812         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2813                                      "lshr", fixed_arith_modes[i].name, 3);
2814         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2815                                      "ssashl", fixed_arith_modes[i].name, 3);
2816         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2817                                      "usashl", fixed_arith_modes[i].name, 3);
2818         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2819                                      "cmp", fixed_arith_modes[i].name, 2);
2820       }
2821
2822     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2823       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2824         {
2825           if (i == j
2826               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2827                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2828             continue;
2829
2830           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2831                                       fixed_conv_modes[j].mode, "fract",
2832                                       fixed_conv_modes[i].name,
2833                                       fixed_conv_modes[j].name);
2834           arm_set_fixed_conv_libfunc (satfract_optab,
2835                                       fixed_conv_modes[i].mode,
2836                                       fixed_conv_modes[j].mode, "satfract",
2837                                       fixed_conv_modes[i].name,
2838                                       fixed_conv_modes[j].name);
2839           arm_set_fixed_conv_libfunc (fractuns_optab,
2840                                       fixed_conv_modes[i].mode,
2841                                       fixed_conv_modes[j].mode, "fractuns",
2842                                       fixed_conv_modes[i].name,
2843                                       fixed_conv_modes[j].name);
2844           arm_set_fixed_conv_libfunc (satfractuns_optab,
2845                                       fixed_conv_modes[i].mode,
2846                                       fixed_conv_modes[j].mode, "satfractuns",
2847                                       fixed_conv_modes[i].name,
2848                                       fixed_conv_modes[j].name);
2849         }
2850   }
2851
2852   if (TARGET_AAPCS_BASED)
2853     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2854
2855   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2856 }
2857
2858 /* Implement TARGET_GIMPLE_FOLD_BUILTIN.  */
2859 static bool
2860 arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
2861 {
2862   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
2863   tree fndecl = gimple_call_fndecl (stmt);
2864   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
2865   unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
2866   gimple *new_stmt = NULL;
2867   switch (code & ARM_BUILTIN_CLASS)
2868     {
2869     case ARM_BUILTIN_GENERAL:
2870       break;
2871     case ARM_BUILTIN_MVE:
2872       new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
2873     }
2874   if (!new_stmt)
2875     return false;
2876
2877   gsi_replace (gsi, new_stmt, true);
2878   return true;
2879 }
2880
2881 /* On AAPCS systems, this is the "struct __va_list".  */
2882 static GTY(()) tree va_list_type;
2883
2884 /* Return the type to use as __builtin_va_list.  */
2885 static tree
2886 arm_build_builtin_va_list (void)
2887 {
2888   tree va_list_name;
2889   tree ap_field;
2890
2891   if (!TARGET_AAPCS_BASED)
2892     return std_build_builtin_va_list ();
2893
2894   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2895      defined as:
2896
2897        struct __va_list
2898        {
2899          void *__ap;
2900        };
2901
2902      The C Library ABI further reinforces this definition in \S
2903      4.1.
2904
2905      We must follow this definition exactly.  The structure tag
2906      name is visible in C++ mangled names, and thus forms a part
2907      of the ABI.  The field name may be used by people who
2908      #include <stdarg.h>.  */
2909   /* Create the type.  */
2910   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2911   /* Give it the required name.  */
2912   va_list_name = build_decl (BUILTINS_LOCATION,
2913                              TYPE_DECL,
2914                              get_identifier ("__va_list"),
2915                              va_list_type);
2916   DECL_ARTIFICIAL (va_list_name) = 1;
2917   TYPE_NAME (va_list_type) = va_list_name;
2918   TYPE_STUB_DECL (va_list_type) = va_list_name;
2919   /* Create the __ap field.  */
2920   ap_field = build_decl (BUILTINS_LOCATION,
2921                          FIELD_DECL,
2922                          get_identifier ("__ap"),
2923                          ptr_type_node);
2924   DECL_ARTIFICIAL (ap_field) = 1;
2925   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2926   TYPE_FIELDS (va_list_type) = ap_field;
2927   /* Compute its layout.  */
2928   layout_type (va_list_type);
2929
2930   return va_list_type;
2931 }
2932
2933 /* Return an expression of type "void *" pointing to the next
2934    available argument in a variable-argument list.  VALIST is the
2935    user-level va_list object, of type __builtin_va_list.  */
2936 static tree
2937 arm_extract_valist_ptr (tree valist)
2938 {
2939   if (TREE_TYPE (valist) == error_mark_node)
2940     return error_mark_node;
2941
2942   /* On an AAPCS target, the pointer is stored within "struct
2943      va_list".  */
2944   if (TARGET_AAPCS_BASED)
2945     {
2946       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2947       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2948                        valist, ap_field, NULL_TREE);
2949     }
2950
2951   return valist;
2952 }
2953
2954 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2955 static void
2956 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2957 {
2958   valist = arm_extract_valist_ptr (valist);
2959   std_expand_builtin_va_start (valist, nextarg);
2960 }
2961
2962 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2963 static tree
2964 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2965                           gimple_seq *post_p)
2966 {
2967   valist = arm_extract_valist_ptr (valist);
2968   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2969 }
2970
2971 /* Check any incompatible options that the user has specified.  */
2972 static void
2973 arm_option_check_internal (struct gcc_options *opts)
2974 {
2975   int flags = opts->x_target_flags;
2976
2977   /* iWMMXt and NEON are incompatible.  */
2978   if (TARGET_IWMMXT
2979       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2980     error ("iWMMXt and NEON are incompatible");
2981
2982   /* Make sure that the processor choice does not conflict with any of the
2983      other command line choices.  */
2984   if (TARGET_ARM_P (flags)
2985       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2986     error ("target CPU does not support ARM mode");
2987
2988   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2989   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2990     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2991
2992   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2993     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2994
2995   /* If this target is normally configured to use APCS frames, warn if they
2996      are turned off and debugging is turned on.  */
2997   if (TARGET_ARM_P (flags)
2998       && write_symbols != NO_DEBUG
2999       && !TARGET_APCS_FRAME
3000       && (TARGET_DEFAULT & MASK_APCS_FRAME))
3001     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3002              "debugging");
3003
3004   /* iWMMXt unsupported under Thumb mode.  */
3005   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
3006     error ("iWMMXt unsupported under Thumb mode");
3007
3008   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
3009     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3010
3011   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
3012     {
3013       error ("RTP PIC is incompatible with Thumb");
3014       flag_pic = 0;
3015     }
3016
3017   if (target_pure_code || target_slow_flash_data)
3018     {
3019       const char *flag = (target_pure_code ? "-mpure-code" :
3020                                              "-mslow-flash-data");
3021       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
3022
3023       /* We only support -mslow-flash-data on M-profile targets with
3024          MOVT.  */
3025       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
3026         error ("%s only supports non-pic code on M-profile targets with the "
3027                "MOVT instruction", flag);
3028
3029       /* We only support -mpure-code on M-profile targets.  */
3030       if (target_pure_code && common_unsupported_modes)
3031         error ("%s only supports non-pic code on M-profile targets", flag);
3032
3033       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3034          -mword-relocations forbids relocation of MOVT/MOVW.  */
3035       if (target_word_relocations)
3036         error ("%s incompatible with %<-mword-relocations%>", flag);
3037     }
3038 }
3039
3040 /* Recompute the global settings depending on target attribute options.  */
3041
3042 static void
3043 arm_option_params_internal (void)
3044 {
3045   /* If we are not using the default (ARM mode) section anchor offset
3046      ranges, then set the correct ranges now.  */
3047   if (TARGET_THUMB1)
3048     {
3049       /* Thumb-1 LDR instructions cannot have negative offsets.
3050          Permissible positive offset ranges are 5-bit (for byte loads),
3051          6-bit (for halfword loads), or 7-bit (for word loads).
3052          Empirical results suggest a 7-bit anchor range gives the best
3053          overall code size.  */
3054       targetm.min_anchor_offset = 0;
3055       targetm.max_anchor_offset = 127;
3056     }
3057   else if (TARGET_THUMB2)
3058     {
3059       /* The minimum is set such that the total size of the block
3060          for a particular anchor is 248 + 1 + 4095 bytes, which is
3061          divisible by eight, ensuring natural spacing of anchors.  */
3062       targetm.min_anchor_offset = -248;
3063       targetm.max_anchor_offset = 4095;
3064     }
3065   else
3066     {
3067       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3068       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3069     }
3070
3071   /* Increase the number of conditional instructions with -Os.  */
3072   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3073
3074   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3075   if (TARGET_THUMB2)
3076     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3077
3078   if (TARGET_THUMB1)
3079     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3080   else
3081     targetm.md_asm_adjust = arm_md_asm_adjust;
3082 }
3083
3084 /* True if -mflip-thumb should next add an attribute for the default
3085    mode, false if it should next add an attribute for the opposite mode.  */
3086 static GTY(()) bool thumb_flipper;
3087
3088 /* Options after initial target override.  */
3089 static GTY(()) tree init_optimize;
3090
3091 static void
3092 arm_override_options_after_change_1 (struct gcc_options *opts,
3093                                      struct gcc_options *opts_set)
3094 {
3095   /* -falign-functions without argument: supply one.  */
3096   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3097     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3098       && opts->x_optimize_size ? "2" : "4";
3099 }
3100
3101 /* Implement targetm.override_options_after_change.  */
3102
3103 static void
3104 arm_override_options_after_change (void)
3105 {
3106   arm_override_options_after_change_1 (&global_options, &global_options_set);
3107 }
3108
3109 /* Implement TARGET_OPTION_RESTORE.  */
3110 static void
3111 arm_option_restore (struct gcc_options */* opts */,
3112                     struct gcc_options */* opts_set */,
3113                     struct cl_target_option *ptr)
3114 {
3115   arm_configure_build_target (&arm_active_target, ptr, false);
3116   arm_option_reconfigure_globals ();
3117 }
3118
3119 /* Reset options between modes that the user has specified.  */
3120 static void
3121 arm_option_override_internal (struct gcc_options *opts,
3122                               struct gcc_options *opts_set)
3123 {
3124   arm_override_options_after_change_1 (opts, opts_set);
3125
3126   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3127     {
3128       /* The default is to enable interworking, so this warning message would
3129          be confusing to users who have just compiled with
3130          eg, -march=armv4.  */
3131       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3132       opts->x_target_flags &= ~MASK_INTERWORK;
3133     }
3134
3135   if (TARGET_THUMB_P (opts->x_target_flags)
3136       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3137     {
3138       warning (0, "target CPU does not support THUMB instructions");
3139       opts->x_target_flags &= ~MASK_THUMB;
3140     }
3141
3142   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3143     {
3144       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3145       opts->x_target_flags &= ~MASK_APCS_FRAME;
3146     }
3147
3148   /* Callee super interworking implies thumb interworking.  Adding
3149      this to the flags here simplifies the logic elsewhere.  */
3150   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3151     opts->x_target_flags |= MASK_INTERWORK;
3152
3153   /* need to remember initial values so combinaisons of options like
3154      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3155   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3156
3157   if (! opts_set->x_arm_restrict_it)
3158     opts->x_arm_restrict_it = arm_arch8;
3159
3160   /* ARM execution state and M profile don't have [restrict] IT.  */
3161   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3162     opts->x_arm_restrict_it = 0;
3163
3164   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3165   if (!opts_set->x_arm_restrict_it
3166       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3167     opts->x_arm_restrict_it = 0;
3168
3169   /* Enable -munaligned-access by default for
3170      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3171      i.e. Thumb2 and ARM state only.
3172      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3173      - ARMv8 architecture-base processors.
3174
3175      Disable -munaligned-access by default for
3176      - all pre-ARMv6 architecture-based processors
3177      - ARMv6-M architecture-based processors
3178      - ARMv8-M Baseline processors.  */
3179
3180   if (! opts_set->x_unaligned_access)
3181     {
3182       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3183                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3184     }
3185   else if (opts->x_unaligned_access == 1
3186            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3187     {
3188       warning (0, "target CPU does not support unaligned accesses");
3189      opts->x_unaligned_access = 0;
3190     }
3191
3192   /* Don't warn since it's on by default in -O2.  */
3193   if (TARGET_THUMB1_P (opts->x_target_flags))
3194     opts->x_flag_schedule_insns = 0;
3195   else
3196     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3197
3198   /* Disable shrink-wrap when optimizing function for size, since it tends to
3199      generate additional returns.  */
3200   if (optimize_function_for_size_p (cfun)
3201       && TARGET_THUMB2_P (opts->x_target_flags))
3202     opts->x_flag_shrink_wrap = false;
3203   else
3204     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3205
3206   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3207      - epilogue_insns - does not accurately model the corresponding insns
3208      emitted in the asm file.  In particular, see the comment in thumb_exit
3209      'Find out how many of the (return) argument registers we can corrupt'.
3210      As a consequence, the epilogue may clobber registers without fipa-ra
3211      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3212      TODO: Accurately model clobbers for epilogue_insns and reenable
3213      fipa-ra.  */
3214   if (TARGET_THUMB1_P (opts->x_target_flags))
3215     opts->x_flag_ipa_ra = 0;
3216   else
3217     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3218
3219   /* Thumb2 inline assembly code should always use unified syntax.
3220      This will apply to ARM and Thumb1 eventually.  */
3221   if (TARGET_THUMB2_P (opts->x_target_flags))
3222     opts->x_inline_asm_unified = true;
3223
3224   if (arm_stack_protector_guard == SSP_GLOBAL
3225       && opts->x_arm_stack_protector_guard_offset_str)
3226     {
3227       error ("incompatible options %<-mstack-protector-guard=global%> and "
3228              "%<-mstack-protector-guard-offset=%s%>",
3229              arm_stack_protector_guard_offset_str);
3230     }
3231
3232   if (opts->x_arm_stack_protector_guard_offset_str)
3233     {
3234       char *end;
3235       const char *str = arm_stack_protector_guard_offset_str;
3236       errno = 0;
3237       long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3238       if (!*str || *end || errno)
3239         error ("%qs is not a valid offset in %qs", str,
3240                "-mstack-protector-guard-offset=");
3241       arm_stack_protector_guard_offset = offs;
3242     }
3243
3244   if (arm_current_function_pac_enabled_p ())
3245     {
3246       if (!arm_arch8m_main)
3247         error ("This architecture does not support branch protection "
3248                "instructions");
3249       if (TARGET_TPCS_FRAME)
3250         sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3251     }
3252
3253 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3254   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3255 #endif
3256 }
3257
3258 static sbitmap isa_all_fpubits_internal;
3259 static sbitmap isa_all_fpbits;
3260 static sbitmap isa_quirkbits;
3261
3262 /* Configure a build target TARGET from the user-specified options OPTS and
3263    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3264    architecture have been specified, but the two are not identical.  */
3265 void
3266 arm_configure_build_target (struct arm_build_target *target,
3267                             struct cl_target_option *opts,
3268                             bool warn_compatible)
3269 {
3270   const cpu_option *arm_selected_tune = NULL;
3271   const arch_option *arm_selected_arch = NULL;
3272   const cpu_option *arm_selected_cpu = NULL;
3273   const arm_fpu_desc *arm_selected_fpu = NULL;
3274   const char *tune_opts = NULL;
3275   const char *arch_opts = NULL;
3276   const char *cpu_opts = NULL;
3277
3278   bitmap_clear (target->isa);
3279   target->core_name = NULL;
3280   target->arch_name = NULL;
3281
3282   if (opts->x_arm_arch_string)
3283     {
3284       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3285                                                       "-march",
3286                                                       opts->x_arm_arch_string);
3287       arch_opts = strchr (opts->x_arm_arch_string, '+');
3288     }
3289
3290   if (opts->x_arm_cpu_string)
3291     {
3292       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3293                                                     opts->x_arm_cpu_string);
3294       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3295       arm_selected_tune = arm_selected_cpu;
3296       /* If taking the tuning from -mcpu, we don't need to rescan the
3297          options for tuning.  */
3298     }
3299
3300   if (opts->x_arm_tune_string)
3301     {
3302       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3303                                                      opts->x_arm_tune_string);
3304       tune_opts = strchr (opts->x_arm_tune_string, '+');
3305     }
3306
3307   if (opts->x_arm_branch_protection_string)
3308     {
3309       aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3310
3311       if (aarch_ra_sign_key != AARCH_KEY_A)
3312         {
3313           warning (0, "invalid key type for %<-mbranch-protection=%>");
3314           aarch_ra_sign_key = AARCH_KEY_A;
3315         }
3316     }
3317
3318   if (arm_selected_arch)
3319     {
3320       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3321       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3322                                  arch_opts);
3323
3324       if (arm_selected_cpu)
3325         {
3326           auto_sbitmap cpu_isa (isa_num_bits);
3327           auto_sbitmap isa_delta (isa_num_bits);
3328
3329           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3330           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3331                                      cpu_opts);
3332           bitmap_xor (isa_delta, cpu_isa, target->isa);
3333           /* Ignore any bits that are quirk bits.  */
3334           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3335           /* If the user (or the default configuration) has specified a
3336              specific FPU, then ignore any bits that depend on the FPU
3337              configuration.  Do similarly if using the soft-float
3338              ABI.  */
3339           if (opts->x_arm_fpu_index != TARGET_FPU_auto
3340               || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3341             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3342
3343           if (!bitmap_empty_p (isa_delta))
3344             {
3345               if (warn_compatible)
3346                 warning (0, "switch %<-mcpu=%s%> conflicts "
3347                          "with switch %<-march=%s%>",
3348                          opts->x_arm_cpu_string,
3349                          opts->x_arm_arch_string);
3350
3351               /* -march wins for code generation.
3352                  -mcpu wins for default tuning.  */
3353               if (!arm_selected_tune)
3354                 arm_selected_tune = arm_selected_cpu;
3355
3356               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3357               target->arch_name = arm_selected_arch->common.name;
3358             }
3359           else
3360             {
3361               /* Architecture and CPU are essentially the same.
3362                  Prefer the CPU setting.  */
3363               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3364               target->core_name = arm_selected_cpu->common.name;
3365               /* Copy the CPU's capabilities, so that we inherit the
3366                  appropriate extensions and quirks.  */
3367               bitmap_copy (target->isa, cpu_isa);
3368             }
3369         }
3370       else
3371         {
3372           /* Pick a CPU based on the architecture.  */
3373           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3374           target->arch_name = arm_selected_arch->common.name;
3375           /* Note: target->core_name is left unset in this path.  */
3376         }
3377     }
3378   else if (arm_selected_cpu)
3379     {
3380       target->core_name = arm_selected_cpu->common.name;
3381       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3382       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3383                                  cpu_opts);
3384       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3385     }
3386   /* If the user did not specify a processor or architecture, choose
3387      one for them.  */
3388   else
3389     {
3390       const cpu_option *sel;
3391       auto_sbitmap sought_isa (isa_num_bits);
3392       bitmap_clear (sought_isa);
3393       auto_sbitmap default_isa (isa_num_bits);
3394
3395       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3396                                                     TARGET_CPU_DEFAULT);
3397       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3398       gcc_assert (arm_selected_cpu->common.name);
3399
3400       /* RWE: All of the selection logic below (to the end of this
3401          'if' clause) looks somewhat suspect.  It appears to be mostly
3402          there to support forcing thumb support when the default CPU
3403          does not have thumb (somewhat dubious in terms of what the
3404          user might be expecting).  I think it should be removed once
3405          support for the pre-thumb era cores is removed.  */
3406       sel = arm_selected_cpu;
3407       arm_initialize_isa (default_isa, sel->common.isa_bits);
3408       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3409                                  cpu_opts);
3410
3411       /* Now check to see if the user has specified any command line
3412          switches that require certain abilities from the cpu.  */
3413
3414       if (TARGET_INTERWORK || TARGET_THUMB)
3415         bitmap_set_bit (sought_isa, isa_bit_thumb);
3416
3417       /* If there are such requirements and the default CPU does not
3418          satisfy them, we need to run over the complete list of
3419          cores looking for one that is satisfactory.  */
3420       if (!bitmap_empty_p (sought_isa)
3421           && !bitmap_subset_p (sought_isa, default_isa))
3422         {
3423           auto_sbitmap candidate_isa (isa_num_bits);
3424           /* We're only interested in a CPU with at least the
3425              capabilities of the default CPU and the required
3426              additional features.  */
3427           bitmap_ior (default_isa, default_isa, sought_isa);
3428
3429           /* Try to locate a CPU type that supports all of the abilities
3430              of the default CPU, plus the extra abilities requested by
3431              the user.  */
3432           for (sel = all_cores; sel->common.name != NULL; sel++)
3433             {
3434               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3435               /* An exact match?  */
3436               if (bitmap_equal_p (default_isa, candidate_isa))
3437                 break;
3438             }
3439
3440           if (sel->common.name == NULL)
3441             {
3442               unsigned current_bit_count = isa_num_bits;
3443               const cpu_option *best_fit = NULL;
3444
3445               /* Ideally we would like to issue an error message here
3446                  saying that it was not possible to find a CPU compatible
3447                  with the default CPU, but which also supports the command
3448                  line options specified by the programmer, and so they
3449                  ought to use the -mcpu=<name> command line option to
3450                  override the default CPU type.
3451
3452                  If we cannot find a CPU that has exactly the
3453                  characteristics of the default CPU and the given
3454                  command line options we scan the array again looking
3455                  for a best match.  The best match must have at least
3456                  the capabilities of the perfect match.  */
3457               for (sel = all_cores; sel->common.name != NULL; sel++)
3458                 {
3459                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3460
3461                   if (bitmap_subset_p (default_isa, candidate_isa))
3462                     {
3463                       unsigned count;
3464
3465                       bitmap_and_compl (candidate_isa, candidate_isa,
3466                                         default_isa);
3467                       count = bitmap_popcount (candidate_isa);
3468
3469                       if (count < current_bit_count)
3470                         {
3471                           best_fit = sel;
3472                           current_bit_count = count;
3473                         }
3474                     }
3475
3476                   gcc_assert (best_fit);
3477                   sel = best_fit;
3478                 }
3479             }
3480           arm_selected_cpu = sel;
3481         }
3482
3483       /* Now we know the CPU, we can finally initialize the target
3484          structure.  */
3485       target->core_name = arm_selected_cpu->common.name;
3486       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3487       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3488                                  cpu_opts);
3489       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3490     }
3491
3492   gcc_assert (arm_selected_cpu);
3493   gcc_assert (arm_selected_arch);
3494
3495   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3496     {
3497       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3498       auto_sbitmap fpu_bits (isa_num_bits);
3499
3500       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3501       /* This should clear out ALL bits relating to the FPU/simd
3502          extensions, to avoid potentially invalid combinations later on
3503          that we can't match.  At present we only clear out those bits
3504          that can be set by -mfpu.  This should be fixed in GCC-12.  */
3505       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3506       bitmap_ior (target->isa, target->isa, fpu_bits);
3507     }
3508
3509   /* If we have the soft-float ABI, clear any feature bits relating to use of
3510      floating-point operations.  They'll just confuse things later on.  */
3511   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3512     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3513
3514   /* There may be implied bits which we still need to enable. These are
3515      non-named features which are needed to complete other sets of features,
3516      but cannot be enabled from arm-cpus.in due to being shared between
3517      multiple fgroups. Each entry in all_implied_fbits is of the form
3518      ante -> cons, meaning that if the feature "ante" is enabled, we should
3519      implicitly enable "cons".  */
3520   const struct fbit_implication *impl = all_implied_fbits;
3521   while (impl->ante)
3522     {
3523       if (bitmap_bit_p (target->isa, impl->ante))
3524         bitmap_set_bit (target->isa, impl->cons);
3525       impl++;
3526     }
3527
3528   if (!arm_selected_tune)
3529     arm_selected_tune = arm_selected_cpu;
3530   else /* Validate the features passed to -mtune.  */
3531     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3532
3533   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3534
3535   /* Finish initializing the target structure.  */
3536   if (!target->arch_name)
3537     target->arch_name = arm_selected_arch->common.name;
3538   target->arch_pp_name = arm_selected_arch->arch;
3539   target->base_arch = arm_selected_arch->base_arch;
3540   target->profile = arm_selected_arch->profile;
3541
3542   target->tune_flags = tune_data->tune_flags;
3543   target->tune = tune_data->tune;
3544   target->tune_core = tune_data->scheduler;
3545 }
3546
3547 /* Fix up any incompatible options that the user has specified.  */
3548 static void
3549 arm_option_override (void)
3550 {
3551   static const enum isa_feature fpu_bitlist_internal[]
3552     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3553   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3554   static const enum isa_feature fp_bitlist[]
3555     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3556   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3557   cl_target_option opts;
3558
3559   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3560   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3561
3562   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3563   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3564   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3565   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3566
3567   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3568
3569   if (!OPTION_SET_P (arm_fpu_index))
3570     {
3571       bool ok;
3572       int fpu_index;
3573
3574       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3575                                   CL_TARGET);
3576       gcc_assert (ok);
3577       arm_fpu_index = (enum fpu_type) fpu_index;
3578     }
3579
3580   cl_target_option_save (&opts, &global_options, &global_options_set);
3581   arm_configure_build_target (&arm_active_target, &opts, true);
3582
3583 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3584   SUBTARGET_OVERRIDE_OPTIONS;
3585 #endif
3586
3587   /* Initialize boolean versions of the architectural flags, for use
3588      in the arm.md file and for enabling feature flags.  */
3589   arm_option_reconfigure_globals ();
3590
3591   arm_tune = arm_active_target.tune_core;
3592   tune_flags = arm_active_target.tune_flags;
3593   current_tune = arm_active_target.tune;
3594
3595   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3596   if (TARGET_APCS_FRAME)
3597     flag_shrink_wrap = false;
3598
3599   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3600     {
3601       warning (0, "%<-mapcs-stack-check%> incompatible with "
3602                "%<-mno-apcs-frame%>");
3603       target_flags |= MASK_APCS_FRAME;
3604     }
3605
3606   if (TARGET_POKE_FUNCTION_NAME)
3607     target_flags |= MASK_APCS_FRAME;
3608
3609   if (TARGET_APCS_REENT && flag_pic)
3610     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3611
3612   if (TARGET_APCS_REENT)
3613     warning (0, "APCS reentrant code not supported.  Ignored");
3614
3615   /* Set up some tuning parameters.  */
3616   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3617   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3618   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3619   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3620   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3621   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3622
3623   /* For arm2/3 there is no need to do any scheduling if we are doing
3624      software floating-point.  */
3625   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3626     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3627
3628   /* Override the default structure alignment for AAPCS ABI.  */
3629   if (!OPTION_SET_P (arm_structure_size_boundary))
3630     {
3631       if (TARGET_AAPCS_BASED)
3632         arm_structure_size_boundary = 8;
3633     }
3634   else
3635     {
3636       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3637
3638       if (arm_structure_size_boundary != 8
3639           && arm_structure_size_boundary != 32
3640           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3641         {
3642           if (ARM_DOUBLEWORD_ALIGN)
3643             warning (0,
3644                      "structure size boundary can only be set to 8, 32 or 64");
3645           else
3646             warning (0, "structure size boundary can only be set to 8 or 32");
3647           arm_structure_size_boundary
3648             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3649         }
3650     }
3651
3652   if (TARGET_VXWORKS_RTP)
3653     {
3654       if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3655         arm_pic_data_is_text_relative = 0;
3656     }
3657   else if (flag_pic
3658            && !arm_pic_data_is_text_relative
3659            && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3660     /* When text & data segments don't have a fixed displacement, the
3661        intended use is with a single, read only, pic base register.
3662        Unless the user explicitly requested not to do that, set
3663        it.  */
3664     target_flags |= MASK_SINGLE_PIC_BASE;
3665
3666   /* If stack checking is disabled, we can use r10 as the PIC register,
3667      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3668   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3669     {
3670       if (TARGET_VXWORKS_RTP)
3671         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3672       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3673     }
3674
3675   if (flag_pic && TARGET_VXWORKS_RTP)
3676     arm_pic_register = 9;
3677
3678   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3679   if (TARGET_FDPIC)
3680     {
3681       arm_pic_register = FDPIC_REGNUM;
3682       if (TARGET_THUMB1)
3683         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3684     }
3685
3686   if (arm_pic_register_string != NULL)
3687     {
3688       int pic_register = decode_reg_name (arm_pic_register_string);
3689
3690       if (!flag_pic)
3691         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3692
3693       /* Prevent the user from choosing an obviously stupid PIC register.  */
3694       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3695                || pic_register == HARD_FRAME_POINTER_REGNUM
3696                || pic_register == STACK_POINTER_REGNUM
3697                || pic_register >= PC_REGNUM
3698                || (TARGET_VXWORKS_RTP
3699                    && (unsigned int) pic_register != arm_pic_register))
3700         error ("unable to use %qs for PIC register", arm_pic_register_string);
3701       else
3702         arm_pic_register = pic_register;
3703     }
3704
3705   if (flag_pic)
3706     target_word_relocations = 1;
3707
3708   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3709   if (fix_cm3_ldrd == 2)
3710     {
3711       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3712         fix_cm3_ldrd = 1;
3713       else
3714         fix_cm3_ldrd = 0;
3715     }
3716
3717   /* Enable fix_vlldm by default if required.  */
3718   if (fix_vlldm == 2)
3719     {
3720       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3721         fix_vlldm = 1;
3722       else
3723         fix_vlldm = 0;
3724     }
3725
3726   /* Enable fix_aes by default if required.  */
3727   if (fix_aes_erratum_1742098 == 2)
3728     {
3729       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3730         fix_aes_erratum_1742098 = 1;
3731       else
3732         fix_aes_erratum_1742098 = 0;
3733     }
3734
3735   /* Hot/Cold partitioning is not currently supported, since we can't
3736      handle literal pool placement in that case.  */
3737   if (flag_reorder_blocks_and_partition)
3738     {
3739       inform (input_location,
3740               "%<-freorder-blocks-and-partition%> not supported "
3741               "on this architecture");
3742       flag_reorder_blocks_and_partition = 0;
3743       flag_reorder_blocks = 1;
3744     }
3745
3746   if (flag_pic)
3747     /* Hoisting PIC address calculations more aggressively provides a small,
3748        but measurable, size reduction for PIC code.  Therefore, we decrease
3749        the bar for unrestricted expression hoisting to the cost of PIC address
3750        calculation, which is 2 instructions.  */
3751     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3752                          param_gcse_unrestricted_cost, 2);
3753
3754   /* ARM EABI defaults to strict volatile bitfields.  */
3755   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3756       && abi_version_at_least(2))
3757     flag_strict_volatile_bitfields = 1;
3758
3759   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3760      have deemed it beneficial (signified by setting
3761      prefetch.num_slots to 1 or more).  */
3762   if (flag_prefetch_loop_arrays < 0
3763       && HAVE_prefetch
3764       && optimize >= 3
3765       && current_tune->prefetch.num_slots > 0)
3766     flag_prefetch_loop_arrays = 1;
3767
3768   /* Set up parameters to be used in prefetching algorithm.  Do not
3769      override the defaults unless we are tuning for a core we have
3770      researched values for.  */
3771   if (current_tune->prefetch.num_slots > 0)
3772     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3773                          param_simultaneous_prefetches,
3774                          current_tune->prefetch.num_slots);
3775   if (current_tune->prefetch.l1_cache_line_size >= 0)
3776     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3777                          param_l1_cache_line_size,
3778                          current_tune->prefetch.l1_cache_line_size);
3779   if (current_tune->prefetch.l1_cache_line_size >= 0)
3780     {
3781       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3782                            param_destruct_interfere_size,
3783                            current_tune->prefetch.l1_cache_line_size);
3784       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3785                            param_construct_interfere_size,
3786                            current_tune->prefetch.l1_cache_line_size);
3787     }
3788   else
3789     {
3790       /* For a generic ARM target, JF Bastien proposed using 64 for both.  */
3791       /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3792          constructive?  */
3793       /* More recent Cortex chips have a 64-byte cache line, but are marked
3794          ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults.  */
3795       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3796                            param_destruct_interfere_size, 64);
3797       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3798                            param_construct_interfere_size, 64);
3799     }
3800
3801   if (current_tune->prefetch.l1_cache_size >= 0)
3802     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3803                          param_l1_cache_size,
3804                          current_tune->prefetch.l1_cache_size);
3805
3806   /* Look through ready list and all of queue for instructions
3807      relevant for L2 auto-prefetcher.  */
3808   int sched_autopref_queue_depth;
3809
3810   switch (current_tune->sched_autopref)
3811     {
3812     case tune_params::SCHED_AUTOPREF_OFF:
3813       sched_autopref_queue_depth = -1;
3814       break;
3815
3816     case tune_params::SCHED_AUTOPREF_RANK:
3817       sched_autopref_queue_depth = 0;
3818       break;
3819
3820     case tune_params::SCHED_AUTOPREF_FULL:
3821       sched_autopref_queue_depth = max_insn_queue_index + 1;
3822       break;
3823
3824     default:
3825       gcc_unreachable ();
3826     }
3827
3828   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3829                        param_sched_autopref_queue_depth,
3830                        sched_autopref_queue_depth);
3831
3832   /* Currently, for slow flash data, we just disable literal pools.  We also
3833      disable it for pure-code.  */
3834   if (target_slow_flash_data || target_pure_code)
3835     arm_disable_literal_pool = true;
3836
3837   /* Disable scheduling fusion by default if it's not armv7 processor
3838      or doesn't prefer ldrd/strd.  */
3839   if (flag_schedule_fusion == 2
3840       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3841     flag_schedule_fusion = 0;
3842
3843   /* Need to remember initial options before they are overriden.  */
3844   init_optimize = build_optimization_node (&global_options,
3845                                            &global_options_set);
3846
3847   arm_options_perform_arch_sanity_checks ();
3848   arm_option_override_internal (&global_options, &global_options_set);
3849   arm_option_check_internal (&global_options);
3850   arm_option_params_internal ();
3851
3852   /* Create the default target_options structure.  */
3853   target_option_default_node = target_option_current_node
3854     = build_target_option_node (&global_options, &global_options_set);
3855
3856   /* Register global variables with the garbage collector.  */
3857   arm_add_gc_roots ();
3858
3859   /* Init initial mode for testing.  */
3860   thumb_flipper = TARGET_THUMB;
3861 }
3862
3863
3864 /* Reconfigure global status flags from the active_target.isa.  */
3865 void
3866 arm_option_reconfigure_globals (void)
3867 {
3868   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3869   arm_base_arch = arm_active_target.base_arch;
3870
3871   /* Initialize boolean versions of the architectural flags, for use
3872      in the arm.md file.  */
3873   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3874   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3875   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3876   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3877   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3878   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3879   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3880   arm_arch6m = arm_arch6 && !arm_arch_notm;
3881   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3882   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3883   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3884   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3885   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3886   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3887   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3888   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3889                                     isa_bit_armv8_1m_main);
3890   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3891   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3892   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3893   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3894   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3895   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3896   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3897   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3898   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3899   arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3900   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3901   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3902   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3903
3904   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3905   if (arm_fp16_inst)
3906     {
3907       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3908         error ("selected fp16 options are incompatible");
3909       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3910     }
3911
3912   arm_arch_cde = 0;
3913   arm_arch_cde_coproc = 0;
3914   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3915                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3916                     isa_bit_cdecp6, isa_bit_cdecp7};
3917   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3918     {
3919       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3920       if (cde_bit)
3921         {
3922           arm_arch_cde |= cde_bit;
3923           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3924         }
3925     }
3926
3927   /* And finally, set up some quirks.  */
3928   arm_arch_no_volatile_ce
3929     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3930   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3931                                             isa_bit_quirk_armv6kz);
3932
3933   /* Use the cp15 method if it is available.  */
3934   if (target_thread_pointer == TP_AUTO)
3935     {
3936       if (arm_arch6k && !TARGET_THUMB1)
3937         target_thread_pointer = TP_TPIDRURO;
3938       else
3939         target_thread_pointer = TP_SOFT;
3940     }
3941
3942   if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3943     error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3944 }
3945
3946 /* Perform some validation between the desired architecture and the rest of the
3947    options.  */
3948 void
3949 arm_options_perform_arch_sanity_checks (void)
3950 {
3951   /* V5T code we generate is completely interworking capable, so we turn off
3952      TARGET_INTERWORK here to avoid many tests later on.  */
3953
3954   /* XXX However, we must pass the right pre-processor defines to CPP
3955      or GLD can get confused.  This is a hack.  */
3956   if (TARGET_INTERWORK)
3957     arm_cpp_interwork = 1;
3958
3959   if (arm_arch5t)
3960     target_flags &= ~MASK_INTERWORK;
3961
3962   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3963     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3964
3965   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3966     error ("iwmmxt abi requires an iwmmxt capable cpu");
3967
3968   /* BPABI targets use linker tricks to allow interworking on cores
3969      without thumb support.  */
3970   if (TARGET_INTERWORK
3971       && !TARGET_BPABI
3972       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3973     {
3974       warning (0, "target CPU does not support interworking" );
3975       target_flags &= ~MASK_INTERWORK;
3976     }
3977
3978   /* If soft-float is specified then don't use FPU.  */
3979   if (TARGET_SOFT_FLOAT)
3980     arm_fpu_attr = FPU_NONE;
3981   else
3982     arm_fpu_attr = FPU_VFP;
3983
3984   if (TARGET_AAPCS_BASED)
3985     {
3986       if (TARGET_CALLER_INTERWORKING)
3987         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3988       else
3989         if (TARGET_CALLEE_INTERWORKING)
3990           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3991     }
3992
3993   /* __fp16 support currently assumes the core has ldrh.  */
3994   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3995     sorry ("%<__fp16%> and no ldrh");
3996
3997   if (use_cmse && !arm_arch_cmse)
3998     error ("target CPU does not support ARMv8-M Security Extensions");
3999
4000   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4001      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
4002   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
4003     error ("ARMv8-M Security Extensions incompatible with selected FPU");
4004
4005
4006   if (TARGET_AAPCS_BASED)
4007     {
4008       if (arm_abi == ARM_ABI_IWMMXT)
4009         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
4010       else if (TARGET_HARD_FLOAT_ABI)
4011         {
4012           arm_pcs_default = ARM_PCS_AAPCS_VFP;
4013           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
4014               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
4015             error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4016         }
4017       else
4018         arm_pcs_default = ARM_PCS_AAPCS;
4019     }
4020   else
4021     {
4022       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
4023         sorry ("%<-mfloat-abi=hard%> and VFP");
4024
4025       if (arm_abi == ARM_ABI_APCS)
4026         arm_pcs_default = ARM_PCS_APCS;
4027       else
4028         arm_pcs_default = ARM_PCS_ATPCS;
4029     }
4030 }
4031
4032 /* Test whether a local function descriptor is canonical, i.e.,
4033    whether we can use GOTOFFFUNCDESC to compute the address of the
4034    function.  */
4035 static bool
4036 arm_fdpic_local_funcdesc_p (rtx fnx)
4037 {
4038   tree fn;
4039   enum symbol_visibility vis;
4040   bool ret;
4041
4042   if (!TARGET_FDPIC)
4043     return true;
4044
4045   if (! SYMBOL_REF_LOCAL_P (fnx))
4046     return false;
4047
4048   fn = SYMBOL_REF_DECL (fnx);
4049
4050   if (! fn)
4051     return false;
4052
4053   vis = DECL_VISIBILITY (fn);
4054
4055   if (vis == VISIBILITY_PROTECTED)
4056     /* Private function descriptors for protected functions are not
4057        canonical.  Temporarily change the visibility to global so that
4058        we can ensure uniqueness of funcdesc pointers.  */
4059     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4060
4061   ret = default_binds_local_p_1 (fn, flag_pic);
4062
4063   DECL_VISIBILITY (fn) = vis;
4064
4065   return ret;
4066 }
4067
4068 static void
4069 arm_add_gc_roots (void)
4070 {
4071   gcc_obstack_init(&minipool_obstack);
4072   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4073 }
4074 \f
4075 /* A table of known ARM exception types.
4076    For use with the interrupt function attribute.  */
4077
4078 typedef struct
4079 {
4080   const char *const arg;
4081   const unsigned long return_value;
4082 }
4083 isr_attribute_arg;
4084
4085 static const isr_attribute_arg isr_attribute_args [] =
4086 {
4087   { "IRQ",   ARM_FT_ISR },
4088   { "irq",   ARM_FT_ISR },
4089   { "FIQ",   ARM_FT_FIQ },
4090   { "fiq",   ARM_FT_FIQ },
4091   { "ABORT", ARM_FT_ISR },
4092   { "abort", ARM_FT_ISR },
4093   { "UNDEF", ARM_FT_EXCEPTION },
4094   { "undef", ARM_FT_EXCEPTION },
4095   { "SWI",   ARM_FT_EXCEPTION },
4096   { "swi",   ARM_FT_EXCEPTION },
4097   { NULL,    ARM_FT_NORMAL }
4098 };
4099
4100 /* Returns the (interrupt) function type of the current
4101    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
4102
4103 static unsigned long
4104 arm_isr_value (tree argument)
4105 {
4106   const isr_attribute_arg * ptr;
4107   const char *              arg;
4108
4109   if (!arm_arch_notm)
4110     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4111
4112   /* No argument - default to IRQ.  */
4113   if (argument == NULL_TREE)
4114     return ARM_FT_ISR;
4115
4116   /* Get the value of the argument.  */
4117   if (TREE_VALUE (argument) == NULL_TREE
4118       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4119     return ARM_FT_UNKNOWN;
4120
4121   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4122
4123   /* Check it against the list of known arguments.  */
4124   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4125     if (streq (arg, ptr->arg))
4126       return ptr->return_value;
4127
4128   /* An unrecognized interrupt type.  */
4129   return ARM_FT_UNKNOWN;
4130 }
4131
4132 /* Computes the type of the current function.  */
4133
4134 static unsigned long
4135 arm_compute_func_type (void)
4136 {
4137   unsigned long type = ARM_FT_UNKNOWN;
4138   tree a;
4139   tree attr;
4140
4141   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4142
4143   /* Decide if the current function is volatile.  Such functions
4144      never return, and many memory cycles can be saved by not storing
4145      register values that will never be needed again.  This optimization
4146      was added to speed up context switching in a kernel application.  */
4147   if (optimize > 0
4148       && (TREE_NOTHROW (current_function_decl)
4149           || !(flag_unwind_tables
4150                || (flag_exceptions
4151                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4152       && TREE_THIS_VOLATILE (current_function_decl))
4153     type |= ARM_FT_VOLATILE;
4154
4155   if (cfun->static_chain_decl != NULL)
4156     type |= ARM_FT_NESTED;
4157
4158   attr = DECL_ATTRIBUTES (current_function_decl);
4159
4160   a = lookup_attribute ("naked", attr);
4161   if (a != NULL_TREE)
4162     type |= ARM_FT_NAKED;
4163
4164   a = lookup_attribute ("isr", attr);
4165   if (a == NULL_TREE)
4166     a = lookup_attribute ("interrupt", attr);
4167
4168   if (a == NULL_TREE)
4169     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4170   else
4171     type |= arm_isr_value (TREE_VALUE (a));
4172
4173   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4174     type |= ARM_FT_CMSE_ENTRY;
4175
4176   return type;
4177 }
4178
4179 /* Returns the type of the current function.  */
4180
4181 unsigned long
4182 arm_current_func_type (void)
4183 {
4184   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4185     cfun->machine->func_type = arm_compute_func_type ();
4186
4187   return cfun->machine->func_type;
4188 }
4189
4190 bool
4191 arm_allocate_stack_slots_for_args (void)
4192 {
4193   /* Naked functions should not allocate stack slots for arguments.  */
4194   return !IS_NAKED (arm_current_func_type ());
4195 }
4196
4197 static bool
4198 arm_warn_func_return (tree decl)
4199 {
4200   /* Naked functions are implemented entirely in assembly, including the
4201      return sequence, so suppress warnings about this.  */
4202   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4203 }
4204
4205 \f
4206 /* Output assembler code for a block containing the constant parts
4207    of a trampoline, leaving space for the variable parts.
4208
4209    On the ARM, (if r8 is the static chain regnum, and remembering that
4210    referencing pc adds an offset of 8) the trampoline looks like:
4211            ldr          r8, [pc, #0]
4212            ldr          pc, [pc]
4213            .word        static chain value
4214            .word        function's address
4215    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4216
4217    In FDPIC mode, the trampoline looks like:
4218            .word        trampoline address
4219            .word        trampoline GOT address
4220            ldr          r12, [pc, #8] ; #4 for Arm mode
4221            ldr          r9,  [pc, #8] ; #4 for Arm mode
4222            ldr          pc,  [pc, #8] ; #4 for Arm mode
4223            .word        static chain value
4224            .word        GOT address
4225            .word        function's address
4226 */
4227
4228 static void
4229 arm_asm_trampoline_template (FILE *f)
4230 {
4231   fprintf (f, "\t.syntax unified\n");
4232
4233   if (TARGET_FDPIC)
4234     {
4235       /* The first two words are a function descriptor pointing to the
4236          trampoline code just below.  */
4237       if (TARGET_ARM)
4238         fprintf (f, "\t.arm\n");
4239       else if (TARGET_THUMB2)
4240         fprintf (f, "\t.thumb\n");
4241       else
4242         /* Only ARM and Thumb-2 are supported.  */
4243         gcc_unreachable ();
4244
4245       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4246       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4247       /* Trampoline code which sets the static chain register but also
4248          PIC register before jumping into real code.  */
4249       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4250                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4251                    TARGET_THUMB2 ? 8 : 4);
4252       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4253                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4254                    TARGET_THUMB2 ? 8 : 4);
4255       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4256                    PC_REGNUM, PC_REGNUM,
4257                    TARGET_THUMB2 ? 8 : 4);
4258       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4259     }
4260   else if (TARGET_ARM)
4261     {
4262       fprintf (f, "\t.arm\n");
4263       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4264       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4265     }
4266   else if (TARGET_THUMB2)
4267     {
4268       fprintf (f, "\t.thumb\n");
4269       /* The Thumb-2 trampoline is similar to the arm implementation.
4270          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4271       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4272                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4273       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4274     }
4275   else
4276     {
4277       ASM_OUTPUT_ALIGN (f, 2);
4278       fprintf (f, "\t.code\t16\n");
4279       fprintf (f, ".Ltrampoline_start:\n");
4280       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4281       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4282       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4283       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4284       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4285       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4286     }
4287   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4288   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4289 }
4290
4291 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4292
4293 static void
4294 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4295 {
4296   rtx fnaddr, mem, a_tramp;
4297
4298   emit_block_move (m_tramp, assemble_trampoline_template (),
4299                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4300
4301   if (TARGET_FDPIC)
4302     {
4303       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4304       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4305       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4306       /* The function start address is at offset 8, but in Thumb mode
4307          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4308          below.  */
4309       rtx trampoline_code_start
4310         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4311
4312       /* Write initial funcdesc which points to the trampoline.  */
4313       mem = adjust_address (m_tramp, SImode, 0);
4314       emit_move_insn (mem, trampoline_code_start);
4315       mem = adjust_address (m_tramp, SImode, 4);
4316       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4317       /* Setup static chain.  */
4318       mem = adjust_address (m_tramp, SImode, 20);
4319       emit_move_insn (mem, chain_value);
4320       /* GOT + real function entry point.  */
4321       mem = adjust_address (m_tramp, SImode, 24);
4322       emit_move_insn (mem, gotaddr);
4323       mem = adjust_address (m_tramp, SImode, 28);
4324       emit_move_insn (mem, fnaddr);
4325     }
4326   else
4327     {
4328       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4329       emit_move_insn (mem, chain_value);
4330
4331       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4332       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4333       emit_move_insn (mem, fnaddr);
4334     }
4335
4336   a_tramp = XEXP (m_tramp, 0);
4337   maybe_emit_call_builtin___clear_cache (a_tramp,
4338                                          plus_constant (ptr_mode,
4339                                                         a_tramp,
4340                                                         TRAMPOLINE_SIZE));
4341 }
4342
4343 /* Thumb trampolines should be entered in thumb mode, so set
4344    the bottom bit of the address.  */
4345
4346 static rtx
4347 arm_trampoline_adjust_address (rtx addr)
4348 {
4349   /* For FDPIC don't fix trampoline address since it's a function
4350      descriptor and not a function address.  */
4351   if (TARGET_THUMB && !TARGET_FDPIC)
4352     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4353                                 NULL, 0, OPTAB_LIB_WIDEN);
4354   return addr;
4355 }
4356 \f
4357 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4358    includes call-clobbered registers too.  If this is a leaf function
4359    we can just examine the registers used by the RTL, but otherwise we
4360    have to assume that whatever function is called might clobber
4361    anything, and so we have to save all the call-clobbered registers
4362    as well.  */
4363 static inline bool reg_needs_saving_p (unsigned reg)
4364 {
4365   unsigned long func_type = arm_current_func_type ();
4366
4367   if (IS_INTERRUPT (func_type))
4368     if (df_regs_ever_live_p (reg)
4369         /* Save call-clobbered core registers.  */
4370         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4371       return true;
4372     else
4373       return false;
4374   else
4375     if (!df_regs_ever_live_p (reg)
4376         || call_used_or_fixed_reg_p (reg))
4377       return false;
4378     else
4379       return true;
4380 }
4381
4382 /* Return 1 if it is possible to return using a single instruction.
4383    If SIBLING is non-null, this is a test for a return before a sibling
4384    call.  SIBLING is the call insn, so we can examine its register usage.  */
4385
4386 int
4387 use_return_insn (int iscond, rtx sibling)
4388 {
4389   int regno;
4390   unsigned int func_type;
4391   unsigned long saved_int_regs;
4392   unsigned HOST_WIDE_INT stack_adjust;
4393   arm_stack_offsets *offsets;
4394
4395   /* Never use a return instruction before reload has run.  */
4396   if (!reload_completed)
4397     return 0;
4398
4399   /* Never use a return instruction when return address signing
4400      mechanism is enabled as it requires more than one
4401      instruction.  */
4402   if (arm_current_function_pac_enabled_p ())
4403     return 0;
4404
4405   func_type = arm_current_func_type ();
4406
4407   /* Naked, volatile and stack alignment functions need special
4408      consideration.  */
4409   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4410     return 0;
4411
4412   /* So do interrupt functions that use the frame pointer and Thumb
4413      interrupt functions.  */
4414   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4415     return 0;
4416
4417   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4418       && !optimize_function_for_size_p (cfun))
4419     return 0;
4420
4421   offsets = arm_get_frame_offsets ();
4422   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4423
4424   /* As do variadic functions.  */
4425   if (crtl->args.pretend_args_size
4426       || cfun->machine->uses_anonymous_args
4427       /* Or if the function calls __builtin_eh_return () */
4428       || crtl->calls_eh_return
4429       /* Or if the function calls alloca */
4430       || cfun->calls_alloca
4431       /* Or if there is a stack adjustment.  However, if the stack pointer
4432          is saved on the stack, we can use a pre-incrementing stack load.  */
4433       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4434                                  && stack_adjust == 4))
4435       /* Or if the static chain register was saved above the frame, under the
4436          assumption that the stack pointer isn't saved on the stack.  */
4437       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4438           && arm_compute_static_chain_stack_bytes() != 0))
4439     return 0;
4440
4441   saved_int_regs = offsets->saved_regs_mask;
4442
4443   /* Unfortunately, the insn
4444
4445        ldmib sp, {..., sp, ...}
4446
4447      triggers a bug on most SA-110 based devices, such that the stack
4448      pointer won't be correctly restored if the instruction takes a
4449      page fault.  We work around this problem by popping r3 along with
4450      the other registers, since that is never slower than executing
4451      another instruction.
4452
4453      We test for !arm_arch5t here, because code for any architecture
4454      less than this could potentially be run on one of the buggy
4455      chips.  */
4456   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4457     {
4458       /* Validate that r3 is a call-clobbered register (always true in
4459          the default abi) ...  */
4460       if (!call_used_or_fixed_reg_p (3))
4461         return 0;
4462
4463       /* ... that it isn't being used for a return value ... */
4464       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4465         return 0;
4466
4467       /* ... or for a tail-call argument ...  */
4468       if (sibling)
4469         {
4470           gcc_assert (CALL_P (sibling));
4471
4472           if (find_regno_fusage (sibling, USE, 3))
4473             return 0;
4474         }
4475
4476       /* ... and that there are no call-saved registers in r0-r2
4477          (always true in the default ABI).  */
4478       if (saved_int_regs & 0x7)
4479         return 0;
4480     }
4481
4482   /* Can't be done if interworking with Thumb, and any registers have been
4483      stacked.  */
4484   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4485     return 0;
4486
4487   /* On StrongARM, conditional returns are expensive if they aren't
4488      taken and multiple registers have been stacked.  */
4489   if (iscond && arm_tune_strongarm)
4490     {
4491       /* Conditional return when just the LR is stored is a simple
4492          conditional-load instruction, that's not expensive.  */
4493       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4494         return 0;
4495
4496       if (flag_pic
4497           && arm_pic_register != INVALID_REGNUM
4498           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4499         return 0;
4500     }
4501
4502   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4503      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4504      also needs several instructions to save and restore FP context.  */
4505   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4506     return 0;
4507
4508   /* If there are saved registers but the LR isn't saved, then we need
4509      two instructions for the return.  */
4510   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4511     return 0;
4512
4513   /* Can't be done if any of the VFP regs are pushed,
4514      since this also requires an insn.  */
4515   if (TARGET_VFP_BASE)
4516     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4517       if (reg_needs_saving_p (regno))
4518         return 0;
4519
4520   if (TARGET_REALLY_IWMMXT)
4521     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4522       if (reg_needs_saving_p (regno))
4523         return 0;
4524
4525   return 1;
4526 }
4527
4528 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4529    shrink-wrapping if possible.  This is the case if we need to emit a
4530    prologue, which we can test by looking at the offsets.  */
4531 bool
4532 use_simple_return_p (void)
4533 {
4534   arm_stack_offsets *offsets;
4535
4536   /* Note this function can be called before or after reload.  */
4537   if (!reload_completed)
4538     arm_compute_frame_layout ();
4539
4540   offsets = arm_get_frame_offsets ();
4541   return offsets->outgoing_args != 0;
4542 }
4543
4544 /* Return TRUE if int I is a valid immediate ARM constant.  */
4545
4546 int
4547 const_ok_for_arm (HOST_WIDE_INT i)
4548 {
4549   int lowbit;
4550
4551   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4552      be all zero, or all one.  */
4553   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4554       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4555           != ((~(unsigned HOST_WIDE_INT) 0)
4556               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4557     return FALSE;
4558
4559   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4560
4561   /* Fast return for 0 and small values.  We must do this for zero, since
4562      the code below can't handle that one case.  */
4563   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4564     return TRUE;
4565
4566   /* Get the number of trailing zeros.  */
4567   lowbit = ffs((int) i) - 1;
4568
4569   /* Only even shifts are allowed in ARM mode so round down to the
4570      nearest even number.  */
4571   if (TARGET_ARM)
4572     lowbit &= ~1;
4573
4574   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4575     return TRUE;
4576
4577   if (TARGET_ARM)
4578     {
4579       /* Allow rotated constants in ARM mode.  */
4580       if (lowbit <= 4
4581            && ((i & ~0xc000003f) == 0
4582                || (i & ~0xf000000f) == 0
4583                || (i & ~0xfc000003) == 0))
4584         return TRUE;
4585     }
4586   else if (TARGET_THUMB2)
4587     {
4588       HOST_WIDE_INT v;
4589
4590       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4591       v = i & 0xff;
4592       v |= v << 16;
4593       if (i == v || i == (v | (v << 8)))
4594         return TRUE;
4595
4596       /* Allow repeated pattern 0xXY00XY00.  */
4597       v = i & 0xff00;
4598       v |= v << 16;
4599       if (i == v)
4600         return TRUE;
4601     }
4602   else if (TARGET_HAVE_MOVT)
4603     {
4604       /* Thumb-1 Targets with MOVT.  */
4605       if (i > 0xffff)
4606         return FALSE;
4607       else
4608         return TRUE;
4609     }
4610
4611   return FALSE;
4612 }
4613
4614 /* Return true if I is a valid constant for the operation CODE.  */
4615 int
4616 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4617 {
4618   if (const_ok_for_arm (i))
4619     return 1;
4620
4621   switch (code)
4622     {
4623     case SET:
4624       /* See if we can use movw.  */
4625       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4626         return 1;
4627       else
4628         /* Otherwise, try mvn.  */
4629         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4630
4631     case PLUS:
4632       /* See if we can use addw or subw.  */
4633       if (TARGET_THUMB2
4634           && ((i & 0xfffff000) == 0
4635               || ((-i) & 0xfffff000) == 0))
4636         return 1;
4637       /* Fall through.  */
4638     case COMPARE:
4639     case EQ:
4640     case NE:
4641     case GT:
4642     case LE:
4643     case LT:
4644     case GE:
4645     case GEU:
4646     case LTU:
4647     case GTU:
4648     case LEU:
4649     case UNORDERED:
4650     case ORDERED:
4651     case UNEQ:
4652     case UNGE:
4653     case UNLT:
4654     case UNGT:
4655     case UNLE:
4656       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4657
4658     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4659     case XOR:
4660       return 0;
4661
4662     case IOR:
4663       if (TARGET_THUMB2)
4664         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4665       return 0;
4666
4667     case AND:
4668       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4669
4670     default:
4671       gcc_unreachable ();
4672     }
4673 }
4674
4675 /* Return true if I is a valid di mode constant for the operation CODE.  */
4676 int
4677 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4678 {
4679   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4680   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4681   rtx hi = GEN_INT (hi_val);
4682   rtx lo = GEN_INT (lo_val);
4683
4684   if (TARGET_THUMB1)
4685     return 0;
4686
4687   switch (code)
4688     {
4689     case AND:
4690     case IOR:
4691     case XOR:
4692       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4693              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4694     case PLUS:
4695       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4696
4697     default:
4698       return 0;
4699     }
4700 }
4701
4702 /* Emit a sequence of insns to handle a large constant.
4703    CODE is the code of the operation required, it can be any of SET, PLUS,
4704    IOR, AND, XOR, MINUS;
4705    MODE is the mode in which the operation is being performed;
4706    VAL is the integer to operate on;
4707    SOURCE is the other operand (a register, or a null-pointer for SET);
4708    SUBTARGETS means it is safe to create scratch registers if that will
4709    either produce a simpler sequence, or we will want to cse the values.
4710    Return value is the number of insns emitted.  */
4711
4712 /* ??? Tweak this for thumb2.  */
4713 int
4714 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4715                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4716 {
4717   rtx cond;
4718
4719   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4720     cond = COND_EXEC_TEST (PATTERN (insn));
4721   else
4722     cond = NULL_RTX;
4723
4724   if (subtargets || code == SET
4725       || (REG_P (target) && REG_P (source)
4726           && REGNO (target) != REGNO (source)))
4727     {
4728       /* After arm_reorg has been called, we can't fix up expensive
4729          constants by pushing them into memory so we must synthesize
4730          them in-line, regardless of the cost.  This is only likely to
4731          be more costly on chips that have load delay slots and we are
4732          compiling without running the scheduler (so no splitting
4733          occurred before the final instruction emission).
4734
4735          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4736       */
4737       if (!cfun->machine->after_arm_reorg
4738           && !cond
4739           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4740                                 1, 0)
4741               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4742                  + (code != SET))))
4743         {
4744           if (code == SET)
4745             {
4746               /* Currently SET is the only monadic value for CODE, all
4747                  the rest are diadic.  */
4748               if (TARGET_USE_MOVT)
4749                 arm_emit_movpair (target, GEN_INT (val));
4750               else
4751                 emit_set_insn (target, GEN_INT (val));
4752
4753               return 1;
4754             }
4755           else
4756             {
4757               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4758
4759               if (TARGET_USE_MOVT)
4760                 arm_emit_movpair (temp, GEN_INT (val));
4761               else
4762                 emit_set_insn (temp, GEN_INT (val));
4763
4764               /* For MINUS, the value is subtracted from, since we never
4765                  have subtraction of a constant.  */
4766               if (code == MINUS)
4767                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4768               else
4769                 emit_set_insn (target,
4770                                gen_rtx_fmt_ee (code, mode, source, temp));
4771               return 2;
4772             }
4773         }
4774     }
4775
4776   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4777                            1);
4778 }
4779
4780 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4781    ARM/THUMB2 immediates, and add up to VAL.
4782    Thr function return value gives the number of insns required.  */
4783 static int
4784 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4785                             struct four_ints *return_sequence)
4786 {
4787   int best_consecutive_zeros = 0;
4788   int i;
4789   int best_start = 0;
4790   int insns1, insns2;
4791   struct four_ints tmp_sequence;
4792
4793   /* If we aren't targeting ARM, the best place to start is always at
4794      the bottom, otherwise look more closely.  */
4795   if (TARGET_ARM)
4796     {
4797       for (i = 0; i < 32; i += 2)
4798         {
4799           int consecutive_zeros = 0;
4800
4801           if (!(val & (3 << i)))
4802             {
4803               while ((i < 32) && !(val & (3 << i)))
4804                 {
4805                   consecutive_zeros += 2;
4806                   i += 2;
4807                 }
4808               if (consecutive_zeros > best_consecutive_zeros)
4809                 {
4810                   best_consecutive_zeros = consecutive_zeros;
4811                   best_start = i - consecutive_zeros;
4812                 }
4813               i -= 2;
4814             }
4815         }
4816     }
4817
4818   /* So long as it won't require any more insns to do so, it's
4819      desirable to emit a small constant (in bits 0...9) in the last
4820      insn.  This way there is more chance that it can be combined with
4821      a later addressing insn to form a pre-indexed load or store
4822      operation.  Consider:
4823
4824            *((volatile int *)0xe0000100) = 1;
4825            *((volatile int *)0xe0000110) = 2;
4826
4827      We want this to wind up as:
4828
4829             mov rA, #0xe0000000
4830             mov rB, #1
4831             str rB, [rA, #0x100]
4832             mov rB, #2
4833             str rB, [rA, #0x110]
4834
4835      rather than having to synthesize both large constants from scratch.
4836
4837      Therefore, we calculate how many insns would be required to emit
4838      the constant starting from `best_start', and also starting from
4839      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4840      yield a shorter sequence, we may as well use zero.  */
4841   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4842   if (best_start != 0
4843       && ((HOST_WIDE_INT_1U << best_start) < val))
4844     {
4845       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4846       if (insns2 <= insns1)
4847         {
4848           *return_sequence = tmp_sequence;
4849           insns1 = insns2;
4850         }
4851     }
4852
4853   return insns1;
4854 }
4855
4856 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4857 static int
4858 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4859                              struct four_ints *return_sequence, int i)
4860 {
4861   int remainder = val & 0xffffffff;
4862   int insns = 0;
4863
4864   /* Try and find a way of doing the job in either two or three
4865      instructions.
4866
4867      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4868      location.  We start at position I.  This may be the MSB, or
4869      optimial_immediate_sequence may have positioned it at the largest block
4870      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4871      wrapping around to the top of the word when we drop off the bottom.
4872      In the worst case this code should produce no more than four insns.
4873
4874      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4875      constants, shifted to any arbitrary location.  We should always start
4876      at the MSB.  */
4877   do
4878     {
4879       int end;
4880       unsigned int b1, b2, b3, b4;
4881       unsigned HOST_WIDE_INT result;
4882       int loc;
4883
4884       gcc_assert (insns < 4);
4885
4886       if (i <= 0)
4887         i += 32;
4888
4889       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4890       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4891         {
4892           loc = i;
4893           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4894             /* We can use addw/subw for the last 12 bits.  */
4895             result = remainder;
4896           else
4897             {
4898               /* Use an 8-bit shifted/rotated immediate.  */
4899               end = i - 8;
4900               if (end < 0)
4901                 end += 32;
4902               result = remainder & ((0x0ff << end)
4903                                    | ((i < end) ? (0xff >> (32 - end))
4904                                                 : 0));
4905               i -= 8;
4906             }
4907         }
4908       else
4909         {
4910           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4911              arbitrary shifts.  */
4912           i -= TARGET_ARM ? 2 : 1;
4913           continue;
4914         }
4915
4916       /* Next, see if we can do a better job with a thumb2 replicated
4917          constant.
4918
4919          We do it this way around to catch the cases like 0x01F001E0 where
4920          two 8-bit immediates would work, but a replicated constant would
4921          make it worse.
4922
4923          TODO: 16-bit constants that don't clear all the bits, but still win.
4924          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4925       if (TARGET_THUMB2)
4926         {
4927           b1 = (remainder & 0xff000000) >> 24;
4928           b2 = (remainder & 0x00ff0000) >> 16;
4929           b3 = (remainder & 0x0000ff00) >> 8;
4930           b4 = remainder & 0xff;
4931
4932           if (loc > 24)
4933             {
4934               /* The 8-bit immediate already found clears b1 (and maybe b2),
4935                  but must leave b3 and b4 alone.  */
4936
4937               /* First try to find a 32-bit replicated constant that clears
4938                  almost everything.  We can assume that we can't do it in one,
4939                  or else we wouldn't be here.  */
4940               unsigned int tmp = b1 & b2 & b3 & b4;
4941               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4942                                   + (tmp << 24);
4943               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4944                                             + (tmp == b3) + (tmp == b4);
4945               if (tmp
4946                   && (matching_bytes >= 3
4947                       || (matching_bytes == 2
4948                           && const_ok_for_op (remainder & ~tmp2, code))))
4949                 {
4950                   /* At least 3 of the bytes match, and the fourth has at
4951                      least as many bits set, or two of the bytes match
4952                      and it will only require one more insn to finish.  */
4953                   result = tmp2;
4954                   i = tmp != b1 ? 32
4955                       : tmp != b2 ? 24
4956                       : tmp != b3 ? 16
4957                       : 8;
4958                 }
4959
4960               /* Second, try to find a 16-bit replicated constant that can
4961                  leave three of the bytes clear.  If b2 or b4 is already
4962                  zero, then we can.  If the 8-bit from above would not
4963                  clear b2 anyway, then we still win.  */
4964               else if (b1 == b3 && (!b2 || !b4
4965                                || (remainder & 0x00ff0000 & ~result)))
4966                 {
4967                   result = remainder & 0xff00ff00;
4968                   i = 24;
4969                 }
4970             }
4971           else if (loc > 16)
4972             {
4973               /* The 8-bit immediate already found clears b2 (and maybe b3)
4974                  and we don't get here unless b1 is alredy clear, but it will
4975                  leave b4 unchanged.  */
4976
4977               /* If we can clear b2 and b4 at once, then we win, since the
4978                  8-bits couldn't possibly reach that far.  */
4979               if (b2 == b4)
4980                 {
4981                   result = remainder & 0x00ff00ff;
4982                   i = 16;
4983                 }
4984             }
4985         }
4986
4987       return_sequence->i[insns++] = result;
4988       remainder &= ~result;
4989
4990       if (code == SET || code == MINUS)
4991         code = PLUS;
4992     }
4993   while (remainder);
4994
4995   return insns;
4996 }
4997
4998 /* Emit an instruction with the indicated PATTERN.  If COND is
4999    non-NULL, conditionalize the execution of the instruction on COND
5000    being true.  */
5001
5002 static void
5003 emit_constant_insn (rtx cond, rtx pattern)
5004 {
5005   if (cond)
5006     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
5007   emit_insn (pattern);
5008 }
5009
5010 /* As above, but extra parameter GENERATE which, if clear, suppresses
5011    RTL generation.  */
5012
5013 static int
5014 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
5015                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
5016                   int subtargets, int generate)
5017 {
5018   int can_invert = 0;
5019   int can_negate = 0;
5020   int final_invert = 0;
5021   int i;
5022   int set_sign_bit_copies = 0;
5023   int clear_sign_bit_copies = 0;
5024   int clear_zero_bit_copies = 0;
5025   int set_zero_bit_copies = 0;
5026   int insns = 0, neg_insns, inv_insns;
5027   unsigned HOST_WIDE_INT temp1, temp2;
5028   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
5029   struct four_ints *immediates;
5030   struct four_ints pos_immediates, neg_immediates, inv_immediates;
5031
5032   /* Find out which operations are safe for a given CODE.  Also do a quick
5033      check for degenerate cases; these can occur when DImode operations
5034      are split.  */
5035   switch (code)
5036     {
5037     case SET:
5038       can_invert = 1;
5039       break;
5040
5041     case PLUS:
5042       can_negate = 1;
5043       break;
5044
5045     case IOR:
5046       if (remainder == 0xffffffff)
5047         {
5048           if (generate)
5049             emit_constant_insn (cond,
5050                                 gen_rtx_SET (target,
5051                                              GEN_INT (ARM_SIGN_EXTEND (val))));
5052           return 1;
5053         }
5054
5055       if (remainder == 0)
5056         {
5057           if (reload_completed && rtx_equal_p (target, source))
5058             return 0;
5059
5060           if (generate)
5061             emit_constant_insn (cond, gen_rtx_SET (target, source));
5062           return 1;
5063         }
5064       break;
5065
5066     case AND:
5067       if (remainder == 0)
5068         {
5069           if (generate)
5070             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5071           return 1;
5072         }
5073       if (remainder == 0xffffffff)
5074         {
5075           if (reload_completed && rtx_equal_p (target, source))
5076             return 0;
5077           if (generate)
5078             emit_constant_insn (cond, gen_rtx_SET (target, source));
5079           return 1;
5080         }
5081       can_invert = 1;
5082       break;
5083
5084     case XOR:
5085       if (remainder == 0)
5086         {
5087           if (reload_completed && rtx_equal_p (target, source))
5088             return 0;
5089           if (generate)
5090             emit_constant_insn (cond, gen_rtx_SET (target, source));
5091           return 1;
5092         }
5093
5094       if (remainder == 0xffffffff)
5095         {
5096           if (generate)
5097             emit_constant_insn (cond,
5098                                 gen_rtx_SET (target,
5099                                              gen_rtx_NOT (mode, source)));
5100           return 1;
5101         }
5102       final_invert = 1;
5103       break;
5104
5105     case MINUS:
5106       /* We treat MINUS as (val - source), since (source - val) is always
5107          passed as (source + (-val)).  */
5108       if (remainder == 0)
5109         {
5110           if (generate)
5111             emit_constant_insn (cond,
5112                                 gen_rtx_SET (target,
5113                                              gen_rtx_NEG (mode, source)));
5114           return 1;
5115         }
5116       if (const_ok_for_arm (val))
5117         {
5118           if (generate)
5119             emit_constant_insn (cond,
5120                                 gen_rtx_SET (target,
5121                                              gen_rtx_MINUS (mode, GEN_INT (val),
5122                                                             source)));
5123           return 1;
5124         }
5125
5126       break;
5127
5128     default:
5129       gcc_unreachable ();
5130     }
5131
5132   /* If we can do it in one insn get out quickly.  */
5133   if (const_ok_for_op (val, code))
5134     {
5135       if (generate)
5136         emit_constant_insn (cond,
5137                             gen_rtx_SET (target,
5138                                          (source
5139                                           ? gen_rtx_fmt_ee (code, mode, source,
5140                                                             GEN_INT (val))
5141                                           : GEN_INT (val))));
5142       return 1;
5143     }
5144
5145   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5146      insn.  */
5147   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5148       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5149     {
5150       if (generate)
5151         {
5152           if (mode == SImode && i == 16)
5153             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5154                smaller insn.  */
5155             emit_constant_insn (cond,
5156                                 gen_zero_extendhisi2
5157                                 (target, gen_lowpart (HImode, source)));
5158           else
5159             /* Extz only supports SImode, but we can coerce the operands
5160                into that mode.  */
5161             emit_constant_insn (cond,
5162                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5163                                               gen_lowpart (SImode, source),
5164                                               GEN_INT (i), const0_rtx));
5165         }
5166
5167       return 1;
5168     }
5169
5170   /* Calculate a few attributes that may be useful for specific
5171      optimizations.  */
5172   /* Count number of leading zeros.  */
5173   for (i = 31; i >= 0; i--)
5174     {
5175       if ((remainder & (1 << i)) == 0)
5176         clear_sign_bit_copies++;
5177       else
5178         break;
5179     }
5180
5181   /* Count number of leading 1's.  */
5182   for (i = 31; i >= 0; i--)
5183     {
5184       if ((remainder & (1 << i)) != 0)
5185         set_sign_bit_copies++;
5186       else
5187         break;
5188     }
5189
5190   /* Count number of trailing zero's.  */
5191   for (i = 0; i <= 31; i++)
5192     {
5193       if ((remainder & (1 << i)) == 0)
5194         clear_zero_bit_copies++;
5195       else
5196         break;
5197     }
5198
5199   /* Count number of trailing 1's.  */
5200   for (i = 0; i <= 31; i++)
5201     {
5202       if ((remainder & (1 << i)) != 0)
5203         set_zero_bit_copies++;
5204       else
5205         break;
5206     }
5207
5208   switch (code)
5209     {
5210     case SET:
5211       /* See if we can do this by sign_extending a constant that is known
5212          to be negative.  This is a good, way of doing it, since the shift
5213          may well merge into a subsequent insn.  */
5214       if (set_sign_bit_copies > 1)
5215         {
5216           if (const_ok_for_arm
5217               (temp1 = ARM_SIGN_EXTEND (remainder
5218                                         << (set_sign_bit_copies - 1))))
5219             {
5220               if (generate)
5221                 {
5222                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5223                   emit_constant_insn (cond,
5224                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5225                   emit_constant_insn (cond,
5226                                       gen_ashrsi3 (target, new_src,
5227                                                    GEN_INT (set_sign_bit_copies - 1)));
5228                 }
5229               return 2;
5230             }
5231           /* For an inverted constant, we will need to set the low bits,
5232              these will be shifted out of harm's way.  */
5233           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5234           if (const_ok_for_arm (~temp1))
5235             {
5236               if (generate)
5237                 {
5238                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5239                   emit_constant_insn (cond,
5240                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5241                   emit_constant_insn (cond,
5242                                       gen_ashrsi3 (target, new_src,
5243                                                    GEN_INT (set_sign_bit_copies - 1)));
5244                 }
5245               return 2;
5246             }
5247         }
5248
5249       /* See if we can calculate the value as the difference between two
5250          valid immediates.  */
5251       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5252         {
5253           int topshift = clear_sign_bit_copies & ~1;
5254
5255           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5256                                    & (0xff000000 >> topshift));
5257
5258           /* If temp1 is zero, then that means the 9 most significant
5259              bits of remainder were 1 and we've caused it to overflow.
5260              When topshift is 0 we don't need to do anything since we
5261              can borrow from 'bit 32'.  */
5262           if (temp1 == 0 && topshift != 0)
5263             temp1 = 0x80000000 >> (topshift - 1);
5264
5265           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5266
5267           if (const_ok_for_arm (temp2))
5268             {
5269               if (generate)
5270                 {
5271                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5272                   emit_constant_insn (cond,
5273                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5274                   emit_constant_insn (cond,
5275                                       gen_addsi3 (target, new_src,
5276                                                   GEN_INT (-temp2)));
5277                 }
5278
5279               return 2;
5280             }
5281         }
5282
5283       /* See if we can generate this by setting the bottom (or the top)
5284          16 bits, and then shifting these into the other half of the
5285          word.  We only look for the simplest cases, to do more would cost
5286          too much.  Be careful, however, not to generate this when the
5287          alternative would take fewer insns.  */
5288       if (val & 0xffff0000)
5289         {
5290           temp1 = remainder & 0xffff0000;
5291           temp2 = remainder & 0x0000ffff;
5292
5293           /* Overlaps outside this range are best done using other methods.  */
5294           for (i = 9; i < 24; i++)
5295             {
5296               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5297                   && !const_ok_for_arm (temp2))
5298                 {
5299                   rtx new_src = (subtargets
5300                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5301                                  : target);
5302                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5303                                             source, subtargets, generate);
5304                   source = new_src;
5305                   if (generate)
5306                     emit_constant_insn
5307                       (cond,
5308                        gen_rtx_SET
5309                        (target,
5310                         gen_rtx_IOR (mode,
5311                                      gen_rtx_ASHIFT (mode, source,
5312                                                      GEN_INT (i)),
5313                                      source)));
5314                   return insns + 1;
5315                 }
5316             }
5317
5318           /* Don't duplicate cases already considered.  */
5319           for (i = 17; i < 24; i++)
5320             {
5321               if (((temp1 | (temp1 >> i)) == remainder)
5322                   && !const_ok_for_arm (temp1))
5323                 {
5324                   rtx new_src = (subtargets
5325                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5326                                  : target);
5327                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5328                                             source, subtargets, generate);
5329                   source = new_src;
5330                   if (generate)
5331                     emit_constant_insn
5332                       (cond,
5333                        gen_rtx_SET (target,
5334                                     gen_rtx_IOR
5335                                     (mode,
5336                                      gen_rtx_LSHIFTRT (mode, source,
5337                                                        GEN_INT (i)),
5338                                      source)));
5339                   return insns + 1;
5340                 }
5341             }
5342         }
5343       break;
5344
5345     case IOR:
5346     case XOR:
5347       /* If we have IOR or XOR, and the constant can be loaded in a
5348          single instruction, and we can find a temporary to put it in,
5349          then this can be done in two instructions instead of 3-4.  */
5350       if (subtargets
5351           /* TARGET can't be NULL if SUBTARGETS is 0 */
5352           || (reload_completed && !reg_mentioned_p (target, source)))
5353         {
5354           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5355             {
5356               if (generate)
5357                 {
5358                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5359
5360                   emit_constant_insn (cond,
5361                                       gen_rtx_SET (sub, GEN_INT (val)));
5362                   emit_constant_insn (cond,
5363                                       gen_rtx_SET (target,
5364                                                    gen_rtx_fmt_ee (code, mode,
5365                                                                    source, sub)));
5366                 }
5367               return 2;
5368             }
5369         }
5370
5371       if (code == XOR)
5372         break;
5373
5374       /*  Convert.
5375           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5376                              and the remainder 0s for e.g. 0xfff00000)
5377           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5378
5379           This can be done in 2 instructions by using shifts with mov or mvn.
5380           e.g. for
5381           x = x | 0xfff00000;
5382           we generate.
5383           mvn   r0, r0, asl #12
5384           mvn   r0, r0, lsr #12  */
5385       if (set_sign_bit_copies > 8
5386           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5387         {
5388           if (generate)
5389             {
5390               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5391               rtx shift = GEN_INT (set_sign_bit_copies);
5392
5393               emit_constant_insn
5394                 (cond,
5395                  gen_rtx_SET (sub,
5396                               gen_rtx_NOT (mode,
5397                                            gen_rtx_ASHIFT (mode,
5398                                                            source,
5399                                                            shift))));
5400               emit_constant_insn
5401                 (cond,
5402                  gen_rtx_SET (target,
5403                               gen_rtx_NOT (mode,
5404                                            gen_rtx_LSHIFTRT (mode, sub,
5405                                                              shift))));
5406             }
5407           return 2;
5408         }
5409
5410       /* Convert
5411           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5412            to
5413           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5414
5415           For eg. r0 = r0 | 0xfff
5416                mvn      r0, r0, lsr #12
5417                mvn      r0, r0, asl #12
5418
5419       */
5420       if (set_zero_bit_copies > 8
5421           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5422         {
5423           if (generate)
5424             {
5425               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5426               rtx shift = GEN_INT (set_zero_bit_copies);
5427
5428               emit_constant_insn
5429                 (cond,
5430                  gen_rtx_SET (sub,
5431                               gen_rtx_NOT (mode,
5432                                            gen_rtx_LSHIFTRT (mode,
5433                                                              source,
5434                                                              shift))));
5435               emit_constant_insn
5436                 (cond,
5437                  gen_rtx_SET (target,
5438                               gen_rtx_NOT (mode,
5439                                            gen_rtx_ASHIFT (mode, sub,
5440                                                            shift))));
5441             }
5442           return 2;
5443         }
5444
5445       /* This will never be reached for Thumb2 because orn is a valid
5446          instruction. This is for Thumb1 and the ARM 32 bit cases.
5447
5448          x = y | constant (such that ~constant is a valid constant)
5449          Transform this to
5450          x = ~(~y & ~constant).
5451       */
5452       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5453         {
5454           if (generate)
5455             {
5456               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5457               emit_constant_insn (cond,
5458                                   gen_rtx_SET (sub,
5459                                                gen_rtx_NOT (mode, source)));
5460               source = sub;
5461               if (subtargets)
5462                 sub = gen_reg_rtx (mode);
5463               emit_constant_insn (cond,
5464                                   gen_rtx_SET (sub,
5465                                                gen_rtx_AND (mode, source,
5466                                                             GEN_INT (temp1))));
5467               emit_constant_insn (cond,
5468                                   gen_rtx_SET (target,
5469                                                gen_rtx_NOT (mode, sub)));
5470             }
5471           return 3;
5472         }
5473       break;
5474
5475     case AND:
5476       /* See if two shifts will do 2 or more insn's worth of work.  */
5477       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5478         {
5479           HOST_WIDE_INT shift_mask = ((0xffffffff
5480                                        << (32 - clear_sign_bit_copies))
5481                                       & 0xffffffff);
5482
5483           if ((remainder | shift_mask) != 0xffffffff)
5484             {
5485               HOST_WIDE_INT new_val
5486                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5487
5488               if (generate)
5489                 {
5490                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5491                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5492                                             new_src, source, subtargets, 1);
5493                   source = new_src;
5494                 }
5495               else
5496                 {
5497                   rtx targ = subtargets ? NULL_RTX : target;
5498                   insns = arm_gen_constant (AND, mode, cond, new_val,
5499                                             targ, source, subtargets, 0);
5500                 }
5501             }
5502
5503           if (generate)
5504             {
5505               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5506               rtx shift = GEN_INT (clear_sign_bit_copies);
5507
5508               emit_insn (gen_ashlsi3 (new_src, source, shift));
5509               emit_insn (gen_lshrsi3 (target, new_src, shift));
5510             }
5511
5512           return insns + 2;
5513         }
5514
5515       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5516         {
5517           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5518
5519           if ((remainder | shift_mask) != 0xffffffff)
5520             {
5521               HOST_WIDE_INT new_val
5522                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5523               if (generate)
5524                 {
5525                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5526
5527                   insns = arm_gen_constant (AND, mode, cond, new_val,
5528                                             new_src, source, subtargets, 1);
5529                   source = new_src;
5530                 }
5531               else
5532                 {
5533                   rtx targ = subtargets ? NULL_RTX : target;
5534
5535                   insns = arm_gen_constant (AND, mode, cond, new_val,
5536                                             targ, source, subtargets, 0);
5537                 }
5538             }
5539
5540           if (generate)
5541             {
5542               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5543               rtx shift = GEN_INT (clear_zero_bit_copies);
5544
5545               emit_insn (gen_lshrsi3 (new_src, source, shift));
5546               emit_insn (gen_ashlsi3 (target, new_src, shift));
5547             }
5548
5549           return insns + 2;
5550         }
5551
5552       break;
5553
5554     default:
5555       break;
5556     }
5557
5558   /* Calculate what the instruction sequences would be if we generated it
5559      normally, negated, or inverted.  */
5560   if (code == AND)
5561     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5562     insns = 99;
5563   else
5564     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5565
5566   if (can_negate)
5567     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5568                                             &neg_immediates);
5569   else
5570     neg_insns = 99;
5571
5572   if (can_invert || final_invert)
5573     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5574                                             &inv_immediates);
5575   else
5576     inv_insns = 99;
5577
5578   immediates = &pos_immediates;
5579
5580   /* Is the negated immediate sequence more efficient?  */
5581   if (neg_insns < insns && neg_insns <= inv_insns)
5582     {
5583       insns = neg_insns;
5584       immediates = &neg_immediates;
5585     }
5586   else
5587     can_negate = 0;
5588
5589   /* Is the inverted immediate sequence more efficient?
5590      We must allow for an extra NOT instruction for XOR operations, although
5591      there is some chance that the final 'mvn' will get optimized later.  */
5592   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5593     {
5594       insns = inv_insns;
5595       immediates = &inv_immediates;
5596     }
5597   else
5598     {
5599       can_invert = 0;
5600       final_invert = 0;
5601     }
5602
5603   /* Now output the chosen sequence as instructions.  */
5604   if (generate)
5605     {
5606       for (i = 0; i < insns; i++)
5607         {
5608           rtx new_src, temp1_rtx;
5609
5610           temp1 = immediates->i[i];
5611
5612           if (code == SET || code == MINUS)
5613             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5614           else if ((final_invert || i < (insns - 1)) && subtargets)
5615             new_src = gen_reg_rtx (mode);
5616           else
5617             new_src = target;
5618
5619           if (can_invert)
5620             temp1 = ~temp1;
5621           else if (can_negate)
5622             temp1 = -temp1;
5623
5624           temp1 = trunc_int_for_mode (temp1, mode);
5625           temp1_rtx = GEN_INT (temp1);
5626
5627           if (code == SET)
5628             ;
5629           else if (code == MINUS)
5630             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5631           else
5632             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5633
5634           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5635           source = new_src;
5636
5637           if (code == SET)
5638             {
5639               can_negate = can_invert;
5640               can_invert = 0;
5641               code = PLUS;
5642             }
5643           else if (code == MINUS)
5644             code = PLUS;
5645         }
5646     }
5647
5648   if (final_invert)
5649     {
5650       if (generate)
5651         emit_constant_insn (cond, gen_rtx_SET (target,
5652                                                gen_rtx_NOT (mode, source)));
5653       insns++;
5654     }
5655
5656   return insns;
5657 }
5658
5659 /* Return TRUE if op is a constant where both the low and top words are
5660    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5661    we do not have RSC in that case.  */
5662 static bool
5663 arm_const_double_prefer_rsbs_rsc (rtx op)
5664 {
5665   /* Thumb lacks RSC, so we never prefer that sequence.  */
5666   if (TARGET_THUMB || !CONST_INT_P (op))
5667     return false;
5668   HOST_WIDE_INT hi, lo;
5669   lo = UINTVAL (op) & 0xffffffffULL;
5670   hi = UINTVAL (op) >> 32;
5671   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5672 }
5673
5674 /* Canonicalize a comparison so that we are more likely to recognize it.
5675    This can be done for a few constant compares, where we can make the
5676    immediate value easier to load.  */
5677
5678 static void
5679 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5680                              bool op0_preserve_value)
5681 {
5682   machine_mode mode;
5683   unsigned HOST_WIDE_INT i, maxval;
5684
5685   mode = GET_MODE (*op0);
5686   if (mode == VOIDmode)
5687     mode = GET_MODE (*op1);
5688
5689   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5690
5691   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5692      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5693      either reversed or (for constant OP1) adjusted to GE/LT.
5694      Similarly for GTU/LEU in Thumb mode.  */
5695   if (mode == DImode)
5696     {
5697
5698       if (*code == GT || *code == LE
5699           || *code == GTU || *code == LEU)
5700         {
5701           /* Missing comparison.  First try to use an available
5702              comparison.  */
5703           if (CONST_INT_P (*op1))
5704             {
5705               i = INTVAL (*op1);
5706               switch (*code)
5707                 {
5708                 case GT:
5709                 case LE:
5710                   if (i != maxval)
5711                     {
5712                       /* Try to convert to GE/LT, unless that would be more
5713                          expensive.  */
5714                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5715                           && arm_const_double_prefer_rsbs_rsc (*op1))
5716                         return;
5717                       *op1 = GEN_INT (i + 1);
5718                       *code = *code == GT ? GE : LT;
5719                     }
5720                   else
5721                     {
5722                       /* GT maxval is always false, LE maxval is always true.
5723                          We can't fold that away here as we must make a
5724                          comparison, but we can fold them to comparisons
5725                          with the same result that can be handled:
5726                            op0 GT maxval -> op0 LT minval
5727                            op0 LE maxval -> op0 GE minval
5728                          where minval = (-maxval - 1).  */
5729                       *op1 = GEN_INT (-maxval - 1);
5730                       *code = *code == GT ? LT : GE;
5731                     }
5732                   return;
5733
5734                 case GTU:
5735                 case LEU:
5736                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5737                     {
5738                       /* Try to convert to GEU/LTU, unless that would
5739                          be more expensive.  */
5740                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5741                           && arm_const_double_prefer_rsbs_rsc (*op1))
5742                         return;
5743                       *op1 = GEN_INT (i + 1);
5744                       *code = *code == GTU ? GEU : LTU;
5745                     }
5746                   else
5747                     {
5748                       /* GTU ~0 is always false, LEU ~0 is always true.
5749                          We can't fold that away here as we must make a
5750                          comparison, but we can fold them to comparisons
5751                          with the same result that can be handled:
5752                            op0 GTU ~0 -> op0 LTU 0
5753                            op0 LEU ~0 -> op0 GEU 0.  */
5754                       *op1 = const0_rtx;
5755                       *code = *code == GTU ? LTU : GEU;
5756                     }
5757                   return;
5758
5759                 default:
5760                   gcc_unreachable ();
5761                 }
5762             }
5763
5764           if (!op0_preserve_value)
5765             {
5766               std::swap (*op0, *op1);
5767               *code = (int)swap_condition ((enum rtx_code)*code);
5768             }
5769         }
5770       return;
5771     }
5772
5773   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5774      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5775      to facilitate possible combining with a cmp into 'ands'.  */
5776   if (mode == SImode
5777       && GET_CODE (*op0) == ZERO_EXTEND
5778       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5779       && GET_MODE (XEXP (*op0, 0)) == QImode
5780       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5781       && subreg_lowpart_p (XEXP (*op0, 0))
5782       && *op1 == const0_rtx)
5783     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5784                         GEN_INT (255));
5785
5786   /* Comparisons smaller than DImode.  Only adjust comparisons against
5787      an out-of-range constant.  */
5788   if (!CONST_INT_P (*op1)
5789       || const_ok_for_arm (INTVAL (*op1))
5790       || const_ok_for_arm (- INTVAL (*op1)))
5791     return;
5792
5793   i = INTVAL (*op1);
5794
5795   switch (*code)
5796     {
5797     case EQ:
5798     case NE:
5799       return;
5800
5801     case GT:
5802     case LE:
5803       if (i != maxval
5804           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5805         {
5806           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5807           *code = *code == GT ? GE : LT;
5808           return;
5809         }
5810       break;
5811
5812     case GE:
5813     case LT:
5814       if (i != ~maxval
5815           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5816         {
5817           *op1 = GEN_INT (i - 1);
5818           *code = *code == GE ? GT : LE;
5819           return;
5820         }
5821       break;
5822
5823     case GTU:
5824     case LEU:
5825       if (i != ~((unsigned HOST_WIDE_INT) 0)
5826           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5827         {
5828           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5829           *code = *code == GTU ? GEU : LTU;
5830           return;
5831         }
5832       break;
5833
5834     case GEU:
5835     case LTU:
5836       if (i != 0
5837           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5838         {
5839           *op1 = GEN_INT (i - 1);
5840           *code = *code == GEU ? GTU : LEU;
5841           return;
5842         }
5843       break;
5844
5845     default:
5846       gcc_unreachable ();
5847     }
5848 }
5849
5850
5851 /* Define how to find the value returned by a function.  */
5852
5853 static rtx
5854 arm_function_value(const_tree type, const_tree func,
5855                    bool outgoing ATTRIBUTE_UNUSED)
5856 {
5857   machine_mode mode;
5858   int unsignedp ATTRIBUTE_UNUSED;
5859   rtx r ATTRIBUTE_UNUSED;
5860
5861   mode = TYPE_MODE (type);
5862
5863   if (TARGET_AAPCS_BASED)
5864     return aapcs_allocate_return_reg (mode, type, func);
5865
5866   /* Promote integer types.  */
5867   if (INTEGRAL_TYPE_P (type))
5868     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5869
5870   /* Promotes small structs returned in a register to full-word size
5871      for big-endian AAPCS.  */
5872   if (arm_return_in_msb (type))
5873     {
5874       HOST_WIDE_INT size = int_size_in_bytes (type);
5875       if (size % UNITS_PER_WORD != 0)
5876         {
5877           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5878           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5879         }
5880     }
5881
5882   return arm_libcall_value_1 (mode);
5883 }
5884
5885 /* libcall hashtable helpers.  */
5886
5887 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5888 {
5889   static inline hashval_t hash (const rtx_def *);
5890   static inline bool equal (const rtx_def *, const rtx_def *);
5891   static inline void remove (rtx_def *);
5892 };
5893
5894 inline bool
5895 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5896 {
5897   return rtx_equal_p (p1, p2);
5898 }
5899
5900 inline hashval_t
5901 libcall_hasher::hash (const rtx_def *p1)
5902 {
5903   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5904 }
5905
5906 typedef hash_table<libcall_hasher> libcall_table_type;
5907
5908 static void
5909 add_libcall (libcall_table_type *htab, rtx libcall)
5910 {
5911   *htab->find_slot (libcall, INSERT) = libcall;
5912 }
5913
5914 static bool
5915 arm_libcall_uses_aapcs_base (const_rtx libcall)
5916 {
5917   static bool init_done = false;
5918   static libcall_table_type *libcall_htab = NULL;
5919
5920   if (!init_done)
5921     {
5922       init_done = true;
5923
5924       libcall_htab = new libcall_table_type (31);
5925       add_libcall (libcall_htab,
5926                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5927       add_libcall (libcall_htab,
5928                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5929       add_libcall (libcall_htab,
5930                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5931       add_libcall (libcall_htab,
5932                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5933
5934       add_libcall (libcall_htab,
5935                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5936       add_libcall (libcall_htab,
5937                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5938       add_libcall (libcall_htab,
5939                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5940       add_libcall (libcall_htab,
5941                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5942
5943       add_libcall (libcall_htab,
5944                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5945       add_libcall (libcall_htab,
5946                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5947       add_libcall (libcall_htab,
5948                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5949       add_libcall (libcall_htab,
5950                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5951       add_libcall (libcall_htab,
5952                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5953       add_libcall (libcall_htab,
5954                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5955       add_libcall (libcall_htab,
5956                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5957       add_libcall (libcall_htab,
5958                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5959       add_libcall (libcall_htab,
5960                    convert_optab_libfunc (sfix_optab, SImode, SFmode));
5961       add_libcall (libcall_htab,
5962                    convert_optab_libfunc (ufix_optab, SImode, SFmode));
5963
5964       /* Values from double-precision helper functions are returned in core
5965          registers if the selected core only supports single-precision
5966          arithmetic, even if we are using the hard-float ABI.  The same is
5967          true for single-precision helpers except in case of MVE, because in
5968          MVE we will be using the hard-float ABI on a CPU which doesn't support
5969          single-precision operations in hardware.  In MVE the following check
5970          enables use of emulation for the single-precision arithmetic
5971          operations.  */
5972       if (TARGET_HAVE_MVE)
5973         {
5974           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5975           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5976           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5977           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5978           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5979           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5980           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5981           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5982           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5983           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5984           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5985         }
5986       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5987       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5988       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5989       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5990       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5991       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5992       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5993       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5994       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5995       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5996       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5997       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5998                                                         SFmode));
5999       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
6000                                                         DFmode));
6001       add_libcall (libcall_htab,
6002                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
6003     }
6004
6005   return libcall && libcall_htab->find (libcall) != NULL;
6006 }
6007
6008 static rtx
6009 arm_libcall_value_1 (machine_mode mode)
6010 {
6011   if (TARGET_AAPCS_BASED)
6012     return aapcs_libcall_value (mode);
6013   else if (TARGET_IWMMXT_ABI
6014            && arm_vector_mode_supported_p (mode))
6015     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
6016   else
6017     return gen_rtx_REG (mode, ARG_REGISTER (1));
6018 }
6019
6020 /* Define how to find the value returned by a library function
6021    assuming the value has mode MODE.  */
6022
6023 static rtx
6024 arm_libcall_value (machine_mode mode, const_rtx libcall)
6025 {
6026   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
6027       && GET_MODE_CLASS (mode) == MODE_FLOAT)
6028     {
6029       /* The following libcalls return their result in integer registers,
6030          even though they return a floating point value.  */
6031       if (arm_libcall_uses_aapcs_base (libcall))
6032         return gen_rtx_REG (mode, ARG_REGISTER(1));
6033
6034     }
6035
6036   return arm_libcall_value_1 (mode);
6037 }
6038
6039 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
6040
6041 static bool
6042 arm_function_value_regno_p (const unsigned int regno)
6043 {
6044   if (regno == ARG_REGISTER (1)
6045       || (TARGET_32BIT
6046           && TARGET_AAPCS_BASED
6047           && TARGET_HARD_FLOAT
6048           && regno == FIRST_VFP_REGNUM)
6049       || (TARGET_IWMMXT_ABI
6050           && regno == FIRST_IWMMXT_REGNUM))
6051     return true;
6052
6053   return false;
6054 }
6055
6056 /* Determine the amount of memory needed to store the possible return
6057    registers of an untyped call.  */
6058 int
6059 arm_apply_result_size (void)
6060 {
6061   int size = 16;
6062
6063   if (TARGET_32BIT)
6064     {
6065       if (TARGET_HARD_FLOAT_ABI)
6066         size += 32;
6067       if (TARGET_IWMMXT_ABI)
6068         size += 8;
6069     }
6070
6071   return size;
6072 }
6073
6074 /* Decide whether TYPE should be returned in memory (true)
6075    or in a register (false).  FNTYPE is the type of the function making
6076    the call.  */
6077 static bool
6078 arm_return_in_memory (const_tree type, const_tree fntype)
6079 {
6080   HOST_WIDE_INT size;
6081
6082   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
6083
6084   if (TARGET_AAPCS_BASED)
6085     {
6086       /* Simple, non-aggregate types (ie not including vectors and
6087          complex) are always returned in a register (or registers).
6088          We don't care about which register here, so we can short-cut
6089          some of the detail.  */
6090       if (!AGGREGATE_TYPE_P (type)
6091           && TREE_CODE (type) != VECTOR_TYPE
6092           && TREE_CODE (type) != COMPLEX_TYPE)
6093         return false;
6094
6095       /* Any return value that is no larger than one word can be
6096          returned in r0.  */
6097       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6098         return false;
6099
6100       /* Check any available co-processors to see if they accept the
6101          type as a register candidate (VFP, for example, can return
6102          some aggregates in consecutive registers).  These aren't
6103          available if the call is variadic.  */
6104       if (aapcs_select_return_coproc (type, fntype) >= 0)
6105         return false;
6106
6107       /* Vector values should be returned using ARM registers, not
6108          memory (unless they're over 16 bytes, which will break since
6109          we only have four call-clobbered registers to play with).  */
6110       if (TREE_CODE (type) == VECTOR_TYPE)
6111         return (size < 0 || size > (4 * UNITS_PER_WORD));
6112
6113       /* The rest go in memory.  */
6114       return true;
6115     }
6116
6117   if (TREE_CODE (type) == VECTOR_TYPE)
6118     return (size < 0 || size > (4 * UNITS_PER_WORD));
6119
6120   if (!AGGREGATE_TYPE_P (type) &&
6121       (TREE_CODE (type) != VECTOR_TYPE))
6122     /* All simple types are returned in registers.  */
6123     return false;
6124
6125   if (arm_abi != ARM_ABI_APCS)
6126     {
6127       /* ATPCS and later return aggregate types in memory only if they are
6128          larger than a word (or are variable size).  */
6129       return (size < 0 || size > UNITS_PER_WORD);
6130     }
6131
6132   /* For the arm-wince targets we choose to be compatible with Microsoft's
6133      ARM and Thumb compilers, which always return aggregates in memory.  */
6134 #ifndef ARM_WINCE
6135   /* All structures/unions bigger than one word are returned in memory.
6136      Also catch the case where int_size_in_bytes returns -1.  In this case
6137      the aggregate is either huge or of variable size, and in either case
6138      we will want to return it via memory and not in a register.  */
6139   if (size < 0 || size > UNITS_PER_WORD)
6140     return true;
6141
6142   if (TREE_CODE (type) == RECORD_TYPE)
6143     {
6144       tree field;
6145
6146       /* For a struct the APCS says that we only return in a register
6147          if the type is 'integer like' and every addressable element
6148          has an offset of zero.  For practical purposes this means
6149          that the structure can have at most one non bit-field element
6150          and that this element must be the first one in the structure.  */
6151
6152       /* Find the first field, ignoring non FIELD_DECL things which will
6153          have been created by C++.  */
6154       /* NOTE: This code is deprecated and has not been updated to handle
6155          DECL_FIELD_ABI_IGNORED.  */
6156       for (field = TYPE_FIELDS (type);
6157            field && TREE_CODE (field) != FIELD_DECL;
6158            field = DECL_CHAIN (field))
6159         continue;
6160
6161       if (field == NULL)
6162         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6163
6164       /* Check that the first field is valid for returning in a register.  */
6165
6166       /* ... Floats are not allowed */
6167       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6168         return true;
6169
6170       /* ... Aggregates that are not themselves valid for returning in
6171          a register are not allowed.  */
6172       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6173         return true;
6174
6175       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6176          since they are not addressable.  */
6177       for (field = DECL_CHAIN (field);
6178            field;
6179            field = DECL_CHAIN (field))
6180         {
6181           if (TREE_CODE (field) != FIELD_DECL)
6182             continue;
6183
6184           if (!DECL_BIT_FIELD_TYPE (field))
6185             return true;
6186         }
6187
6188       return false;
6189     }
6190
6191   if (TREE_CODE (type) == UNION_TYPE)
6192     {
6193       tree field;
6194
6195       /* Unions can be returned in registers if every element is
6196          integral, or can be returned in an integer register.  */
6197       for (field = TYPE_FIELDS (type);
6198            field;
6199            field = DECL_CHAIN (field))
6200         {
6201           if (TREE_CODE (field) != FIELD_DECL)
6202             continue;
6203
6204           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6205             return true;
6206
6207           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6208             return true;
6209         }
6210
6211       return false;
6212     }
6213 #endif /* not ARM_WINCE */
6214
6215   /* Return all other types in memory.  */
6216   return true;
6217 }
6218
6219 const struct pcs_attribute_arg
6220 {
6221   const char *arg;
6222   enum arm_pcs value;
6223 } pcs_attribute_args[] =
6224   {
6225     {"aapcs", ARM_PCS_AAPCS},
6226     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6227 #if 0
6228     /* We could recognize these, but changes would be needed elsewhere
6229      * to implement them.  */
6230     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6231     {"atpcs", ARM_PCS_ATPCS},
6232     {"apcs", ARM_PCS_APCS},
6233 #endif
6234     {NULL, ARM_PCS_UNKNOWN}
6235   };
6236
6237 static enum arm_pcs
6238 arm_pcs_from_attribute (tree attr)
6239 {
6240   const struct pcs_attribute_arg *ptr;
6241   const char *arg;
6242
6243   /* Get the value of the argument.  */
6244   if (TREE_VALUE (attr) == NULL_TREE
6245       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6246     return ARM_PCS_UNKNOWN;
6247
6248   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6249
6250   /* Check it against the list of known arguments.  */
6251   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6252     if (streq (arg, ptr->arg))
6253       return ptr->value;
6254
6255   /* An unrecognized interrupt type.  */
6256   return ARM_PCS_UNKNOWN;
6257 }
6258
6259 /* Get the PCS variant to use for this call.  TYPE is the function's type
6260    specification, DECL is the specific declartion.  DECL may be null if
6261    the call could be indirect or if this is a library call.  */
6262 static enum arm_pcs
6263 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6264 {
6265   bool user_convention = false;
6266   enum arm_pcs user_pcs = arm_pcs_default;
6267   tree attr;
6268
6269   gcc_assert (type);
6270
6271   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6272   if (attr)
6273     {
6274       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6275       user_convention = true;
6276     }
6277
6278   if (TARGET_AAPCS_BASED)
6279     {
6280       /* Detect varargs functions.  These always use the base rules
6281          (no argument is ever a candidate for a co-processor
6282          register).  */
6283       bool base_rules = stdarg_p (type);
6284
6285       if (user_convention)
6286         {
6287           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6288             sorry ("non-AAPCS derived PCS variant");
6289           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6290             error ("variadic functions must use the base AAPCS variant");
6291         }
6292
6293       if (base_rules)
6294         return ARM_PCS_AAPCS;
6295       else if (user_convention)
6296         return user_pcs;
6297 #if 0
6298       /* Unfortunately, this is not safe and can lead to wrong code
6299          being generated (PR96882).  Not all calls into the back-end
6300          pass the DECL, so it is unsafe to make any PCS-changing
6301          decisions based on it.  In particular the RETURN_IN_MEMORY
6302          hook is only ever passed a TYPE.  This needs revisiting to
6303          see if there are any partial improvements that can be
6304          re-enabled.  */
6305       else if (decl && flag_unit_at_a_time)
6306         {
6307           /* Local functions never leak outside this compilation unit,
6308              so we are free to use whatever conventions are
6309              appropriate.  */
6310           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6311           cgraph_node *local_info_node
6312             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6313           if (local_info_node && local_info_node->local)
6314             return ARM_PCS_AAPCS_LOCAL;
6315         }
6316 #endif
6317     }
6318   else if (user_convention && user_pcs != arm_pcs_default)
6319     sorry ("PCS variant");
6320
6321   /* For everything else we use the target's default.  */
6322   return arm_pcs_default;
6323 }
6324
6325
6326 static void
6327 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6328                     const_tree fntype ATTRIBUTE_UNUSED,
6329                     rtx libcall ATTRIBUTE_UNUSED,
6330                     const_tree fndecl ATTRIBUTE_UNUSED)
6331 {
6332   /* Record the unallocated VFP registers.  */
6333   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6334   pcum->aapcs_vfp_reg_alloc = 0;
6335 }
6336
6337 /* Bitmasks that indicate whether earlier versions of GCC would have
6338    taken a different path through the ABI logic.  This should result in
6339    a -Wpsabi warning if the earlier path led to a different ABI decision.
6340
6341    WARN_PSABI_EMPTY_CXX17_BASE
6342       Indicates that the type includes an artificial empty C++17 base field
6343       that, prior to GCC 10.1, would prevent the type from being treated as
6344       a HFA or HVA.  See PR94711 for details.
6345
6346    WARN_PSABI_NO_UNIQUE_ADDRESS
6347       Indicates that the type includes an empty [[no_unique_address]] field
6348       that, prior to GCC 10.1, would prevent the type from being treated as
6349       a HFA or HVA.  */
6350 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6351 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6352 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6353
6354 /* Walk down the type tree of TYPE counting consecutive base elements.
6355    If *MODEP is VOIDmode, then set it to the first valid floating point
6356    type.  If a non-floating point type is found, or if a floating point
6357    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6358    otherwise return the count in the sub-tree.
6359
6360    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6361    function has changed its behavior relative to earlier versions of GCC.
6362    Normally the argument should be nonnull and point to a zero-initialized
6363    variable.  The function then records whether the ABI decision might
6364    be affected by a known fix to the ABI logic, setting the associated
6365    WARN_PSABI_* bits if so.
6366
6367    When the argument is instead a null pointer, the function tries to
6368    simulate the behavior of GCC before all such ABI fixes were made.
6369    This is useful to check whether the function returns something
6370    different after the ABI fixes.  */
6371 static int
6372 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6373                          unsigned int *warn_psabi_flags)
6374 {
6375   machine_mode mode;
6376   HOST_WIDE_INT size;
6377
6378   switch (TREE_CODE (type))
6379     {
6380     case REAL_TYPE:
6381       mode = TYPE_MODE (type);
6382       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6383         return -1;
6384
6385       if (*modep == VOIDmode)
6386         *modep = mode;
6387
6388       if (*modep == mode)
6389         return 1;
6390
6391       break;
6392
6393     case COMPLEX_TYPE:
6394       mode = TYPE_MODE (TREE_TYPE (type));
6395       if (mode != DFmode && mode != SFmode)
6396         return -1;
6397
6398       if (*modep == VOIDmode)
6399         *modep = mode;
6400
6401       if (*modep == mode)
6402         return 2;
6403
6404       break;
6405
6406     case VECTOR_TYPE:
6407       /* Use V2SImode and V4SImode as representatives of all 64-bit
6408          and 128-bit vector types, whether or not those modes are
6409          supported with the present options.  */
6410       size = int_size_in_bytes (type);
6411       switch (size)
6412         {
6413         case 8:
6414           mode = V2SImode;
6415           break;
6416         case 16:
6417           mode = V4SImode;
6418           break;
6419         default:
6420           return -1;
6421         }
6422
6423       if (*modep == VOIDmode)
6424         *modep = mode;
6425
6426       /* Vector modes are considered to be opaque: two vectors are
6427          equivalent for the purposes of being homogeneous aggregates
6428          if they are the same size.  */
6429       if (*modep == mode)
6430         return 1;
6431
6432       break;
6433
6434     case ARRAY_TYPE:
6435       {
6436         int count;
6437         tree index = TYPE_DOMAIN (type);
6438
6439         /* Can't handle incomplete types nor sizes that are not
6440            fixed.  */
6441         if (!COMPLETE_TYPE_P (type)
6442             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6443           return -1;
6444
6445         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6446                                          warn_psabi_flags);
6447         if (count == -1
6448             || !index
6449             || !TYPE_MAX_VALUE (index)
6450             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6451             || !TYPE_MIN_VALUE (index)
6452             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6453             || count < 0)
6454           return -1;
6455
6456         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6457                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6458
6459         /* There must be no padding.  */
6460         if (wi::to_wide (TYPE_SIZE (type))
6461             != count * GET_MODE_BITSIZE (*modep))
6462           return -1;
6463
6464         return count;
6465       }
6466
6467     case RECORD_TYPE:
6468       {
6469         int count = 0;
6470         int sub_count;
6471         tree field;
6472
6473         /* Can't handle incomplete types nor sizes that are not
6474            fixed.  */
6475         if (!COMPLETE_TYPE_P (type)
6476             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6477           return -1;
6478
6479         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6480           {
6481             if (TREE_CODE (field) != FIELD_DECL)
6482               continue;
6483
6484             if (DECL_FIELD_ABI_IGNORED (field))
6485               {
6486                 /* See whether this is something that earlier versions of
6487                    GCC failed to ignore.  */
6488                 unsigned int flag;
6489                 if (lookup_attribute ("no_unique_address",
6490                                       DECL_ATTRIBUTES (field)))
6491                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6492                 else if (cxx17_empty_base_field_p (field))
6493                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6494                 else
6495                   /* No compatibility problem.  */
6496                   continue;
6497
6498                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6499                 if (warn_psabi_flags)
6500                   {
6501                     *warn_psabi_flags |= flag;
6502                     continue;
6503                   }
6504               }
6505             /* A zero-width bitfield may affect layout in some
6506                circumstances, but adds no members.  The determination
6507                of whether or not a type is an HFA is performed after
6508                layout is complete, so if the type still looks like an
6509                HFA afterwards, it is still classed as one.  This is
6510                potentially an ABI break for the hard-float ABI.  */
6511             else if (DECL_BIT_FIELD (field)
6512                      && integer_zerop (DECL_SIZE (field)))
6513               {
6514                 /* Prior to GCC-12 these fields were striped early,
6515                    hiding them from the back-end entirely and
6516                    resulting in the correct behaviour for argument
6517                    passing.  Simulate that old behaviour without
6518                    generating a warning.  */
6519                 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6520                   continue;
6521                 if (warn_psabi_flags)
6522                   {
6523                     *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6524                     continue;
6525                   }
6526               }
6527
6528             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6529                                                  warn_psabi_flags);
6530             if (sub_count < 0)
6531               return -1;
6532             count += sub_count;
6533           }
6534
6535         /* There must be no padding.  */
6536         if (wi::to_wide (TYPE_SIZE (type))
6537             != count * GET_MODE_BITSIZE (*modep))
6538           return -1;
6539
6540         return count;
6541       }
6542
6543     case UNION_TYPE:
6544     case QUAL_UNION_TYPE:
6545       {
6546         /* These aren't very interesting except in a degenerate case.  */
6547         int count = 0;
6548         int sub_count;
6549         tree field;
6550
6551         /* Can't handle incomplete types nor sizes that are not
6552            fixed.  */
6553         if (!COMPLETE_TYPE_P (type)
6554             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6555           return -1;
6556
6557         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6558           {
6559             if (TREE_CODE (field) != FIELD_DECL)
6560               continue;
6561
6562             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6563                                                  warn_psabi_flags);
6564             if (sub_count < 0)
6565               return -1;
6566             count = count > sub_count ? count : sub_count;
6567           }
6568
6569         /* There must be no padding.  */
6570         if (wi::to_wide (TYPE_SIZE (type))
6571             != count * GET_MODE_BITSIZE (*modep))
6572           return -1;
6573
6574         return count;
6575       }
6576
6577     default:
6578       break;
6579     }
6580
6581   return -1;
6582 }
6583
6584 /* Return true if PCS_VARIANT should use VFP registers.  */
6585 static bool
6586 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6587 {
6588   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6589     {
6590       static bool seen_thumb1_vfp = false;
6591
6592       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6593         {
6594           sorry ("Thumb-1 %<hard-float%> VFP ABI");
6595           /* sorry() is not immediately fatal, so only display this once.  */
6596           seen_thumb1_vfp = true;
6597         }
6598
6599       return true;
6600     }
6601
6602   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6603     return false;
6604
6605   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6606          (TARGET_VFP_DOUBLE || !is_double));
6607 }
6608
6609 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6610    suitable for passing or returning in VFP registers for the PCS
6611    variant selected.  If it is, then *BASE_MODE is updated to contain
6612    a machine mode describing each element of the argument's type and
6613    *COUNT to hold the number of such elements.  */
6614 static bool
6615 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6616                                        machine_mode mode, const_tree type,
6617                                        machine_mode *base_mode, int *count)
6618 {
6619   machine_mode new_mode = VOIDmode;
6620
6621   /* If we have the type information, prefer that to working things
6622      out from the mode.  */
6623   if (type)
6624     {
6625       unsigned int warn_psabi_flags = 0;
6626       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6627                                               &warn_psabi_flags);
6628       if (ag_count > 0 && ag_count <= 4)
6629         {
6630           static unsigned last_reported_type_uid;
6631           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6632           int alt;
6633           if (warn_psabi
6634               && warn_psabi_flags
6635               && uid != last_reported_type_uid
6636               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6637                   != ag_count))
6638             {
6639               const char *url10
6640                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6641               const char *url12
6642                 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6643               gcc_assert (alt == -1);
6644               last_reported_type_uid = uid;
6645               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6646                  qualification.  */
6647               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6648                 inform (input_location, "parameter passing for argument of "
6649                         "type %qT with %<[[no_unique_address]]%> members "
6650                         "changed %{in GCC 10.1%}",
6651                         TYPE_MAIN_VARIANT (type), url10);
6652               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6653                 inform (input_location, "parameter passing for argument of "
6654                         "type %qT when C++17 is enabled changed to match "
6655                         "C++14 %{in GCC 10.1%}",
6656                         TYPE_MAIN_VARIANT (type), url10);
6657               else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6658                 inform (input_location, "parameter passing for argument of "
6659                         "type %qT changed %{in GCC 12.1%}",
6660                         TYPE_MAIN_VARIANT (type), url12);
6661             }
6662           *count = ag_count;
6663         }
6664       else
6665         return false;
6666     }
6667   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6668            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6669            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6670     {
6671       *count = 1;
6672       new_mode = mode;
6673     }
6674   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6675     {
6676       *count = 2;
6677       new_mode = (mode == DCmode ? DFmode : SFmode);
6678     }
6679   else
6680     return false;
6681
6682
6683   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6684     return false;
6685
6686   *base_mode = new_mode;
6687
6688   if (TARGET_GENERAL_REGS_ONLY)
6689     error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6690            type);
6691
6692   return true;
6693 }
6694
6695 static bool
6696 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6697                                machine_mode mode, const_tree type)
6698 {
6699   int count ATTRIBUTE_UNUSED;
6700   machine_mode ag_mode ATTRIBUTE_UNUSED;
6701
6702   if (!use_vfp_abi (pcs_variant, false))
6703     return false;
6704   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6705                                                 &ag_mode, &count);
6706 }
6707
6708 static bool
6709 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6710                              const_tree type)
6711 {
6712   if (!use_vfp_abi (pcum->pcs_variant, false))
6713     return false;
6714
6715   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6716                                                 &pcum->aapcs_vfp_rmode,
6717                                                 &pcum->aapcs_vfp_rcount);
6718 }
6719
6720 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6721    for the behaviour of this function.  */
6722
6723 static bool
6724 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6725                     const_tree type  ATTRIBUTE_UNUSED)
6726 {
6727   int rmode_size
6728     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6729   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6730   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6731   int regno;
6732
6733   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6734     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6735       {
6736         pcum->aapcs_vfp_reg_alloc = mask << regno;
6737         if (mode == BLKmode
6738             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6739             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6740           {
6741             int i;
6742             int rcount = pcum->aapcs_vfp_rcount;
6743             int rshift = shift;
6744             machine_mode rmode = pcum->aapcs_vfp_rmode;
6745             rtx par;
6746             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6747               {
6748                 /* Avoid using unsupported vector modes.  */
6749                 if (rmode == V2SImode)
6750                   rmode = DImode;
6751                 else if (rmode == V4SImode)
6752                   {
6753                     rmode = DImode;
6754                     rcount *= 2;
6755                     rshift /= 2;
6756                   }
6757               }
6758             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6759             for (i = 0; i < rcount; i++)
6760               {
6761                 rtx tmp = gen_rtx_REG (rmode,
6762                                        FIRST_VFP_REGNUM + regno + i * rshift);
6763                 tmp = gen_rtx_EXPR_LIST
6764                   (VOIDmode, tmp,
6765                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6766                 XVECEXP (par, 0, i) = tmp;
6767               }
6768
6769             pcum->aapcs_reg = par;
6770           }
6771         else
6772           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6773         return true;
6774       }
6775   return false;
6776 }
6777
6778 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6779    comment there for the behaviour of this function.  */
6780
6781 static rtx
6782 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6783                                machine_mode mode,
6784                                const_tree type ATTRIBUTE_UNUSED)
6785 {
6786   if (!use_vfp_abi (pcs_variant, false))
6787     return NULL;
6788
6789   if (mode == BLKmode
6790       || (GET_MODE_CLASS (mode) == MODE_INT
6791           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6792           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6793     {
6794       int count;
6795       machine_mode ag_mode;
6796       int i;
6797       rtx par;
6798       int shift;
6799
6800       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6801                                              &ag_mode, &count);
6802
6803       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6804         {
6805           if (ag_mode == V2SImode)
6806             ag_mode = DImode;
6807           else if (ag_mode == V4SImode)
6808             {
6809               ag_mode = DImode;
6810               count *= 2;
6811             }
6812         }
6813       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6814       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6815       for (i = 0; i < count; i++)
6816         {
6817           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6818           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6819                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6820           XVECEXP (par, 0, i) = tmp;
6821         }
6822
6823       return par;
6824     }
6825
6826   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6827 }
6828
6829 static void
6830 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6831                    machine_mode mode  ATTRIBUTE_UNUSED,
6832                    const_tree type  ATTRIBUTE_UNUSED)
6833 {
6834   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6835   pcum->aapcs_vfp_reg_alloc = 0;
6836   return;
6837 }
6838
6839 #define AAPCS_CP(X)                             \
6840   {                                             \
6841     aapcs_ ## X ## _cum_init,                   \
6842     aapcs_ ## X ## _is_call_candidate,          \
6843     aapcs_ ## X ## _allocate,                   \
6844     aapcs_ ## X ## _is_return_candidate,        \
6845     aapcs_ ## X ## _allocate_return_reg,        \
6846     aapcs_ ## X ## _advance                     \
6847   }
6848
6849 /* Table of co-processors that can be used to pass arguments in
6850    registers.  Idealy no arugment should be a candidate for more than
6851    one co-processor table entry, but the table is processed in order
6852    and stops after the first match.  If that entry then fails to put
6853    the argument into a co-processor register, the argument will go on
6854    the stack.  */
6855 static struct
6856 {
6857   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6858   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6859
6860   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6861      BLKmode) is a candidate for this co-processor's registers; this
6862      function should ignore any position-dependent state in
6863      CUMULATIVE_ARGS and only use call-type dependent information.  */
6864   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6865
6866   /* Return true if the argument does get a co-processor register; it
6867      should set aapcs_reg to an RTX of the register allocated as is
6868      required for a return from FUNCTION_ARG.  */
6869   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6870
6871   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6872      be returned in this co-processor's registers.  */
6873   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6874
6875   /* Allocate and return an RTX element to hold the return type of a call.  This
6876      routine must not fail and will only be called if is_return_candidate
6877      returned true with the same parameters.  */
6878   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6879
6880   /* Finish processing this argument and prepare to start processing
6881      the next one.  */
6882   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6883 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6884   {
6885     AAPCS_CP(vfp)
6886   };
6887
6888 #undef AAPCS_CP
6889
6890 static int
6891 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6892                           const_tree type)
6893 {
6894   int i;
6895
6896   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6897     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6898       return i;
6899
6900   return -1;
6901 }
6902
6903 static int
6904 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6905 {
6906   /* We aren't passed a decl, so we can't check that a call is local.
6907      However, it isn't clear that that would be a win anyway, since it
6908      might limit some tail-calling opportunities.  */
6909   enum arm_pcs pcs_variant;
6910
6911   if (fntype)
6912     {
6913       const_tree fndecl = NULL_TREE;
6914
6915       if (TREE_CODE (fntype) == FUNCTION_DECL)
6916         {
6917           fndecl = fntype;
6918           fntype = TREE_TYPE (fntype);
6919         }
6920
6921       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6922     }
6923   else
6924     pcs_variant = arm_pcs_default;
6925
6926   if (pcs_variant != ARM_PCS_AAPCS)
6927     {
6928       int i;
6929
6930       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6931         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6932                                                         TYPE_MODE (type),
6933                                                         type))
6934           return i;
6935     }
6936   return -1;
6937 }
6938
6939 static rtx
6940 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6941                            const_tree fntype)
6942 {
6943   /* We aren't passed a decl, so we can't check that a call is local.
6944      However, it isn't clear that that would be a win anyway, since it
6945      might limit some tail-calling opportunities.  */
6946   enum arm_pcs pcs_variant;
6947   int unsignedp ATTRIBUTE_UNUSED;
6948
6949   if (fntype)
6950     {
6951       const_tree fndecl = NULL_TREE;
6952
6953       if (TREE_CODE (fntype) == FUNCTION_DECL)
6954         {
6955           fndecl = fntype;
6956           fntype = TREE_TYPE (fntype);
6957         }
6958
6959       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6960     }
6961   else
6962     pcs_variant = arm_pcs_default;
6963
6964   /* Promote integer types.  */
6965   if (type && INTEGRAL_TYPE_P (type))
6966     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6967
6968   if (pcs_variant != ARM_PCS_AAPCS)
6969     {
6970       int i;
6971
6972       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6973         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6974                                                         type))
6975           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6976                                                              mode, type);
6977     }
6978
6979   /* Promotes small structs returned in a register to full-word size
6980      for big-endian AAPCS.  */
6981   if (type && arm_return_in_msb (type))
6982     {
6983       HOST_WIDE_INT size = int_size_in_bytes (type);
6984       if (size % UNITS_PER_WORD != 0)
6985         {
6986           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6987           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6988         }
6989     }
6990
6991   return gen_rtx_REG (mode, R0_REGNUM);
6992 }
6993
6994 static rtx
6995 aapcs_libcall_value (machine_mode mode)
6996 {
6997   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6998       && GET_MODE_SIZE (mode) <= 4)
6999     mode = SImode;
7000
7001   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
7002 }
7003
7004 /* Lay out a function argument using the AAPCS rules.  The rule
7005    numbers referred to here are those in the AAPCS.  */
7006 static void
7007 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
7008                   const_tree type, bool named)
7009 {
7010   int nregs, nregs2;
7011   int ncrn;
7012
7013   /* We only need to do this once per argument.  */
7014   if (pcum->aapcs_arg_processed)
7015     return;
7016
7017   pcum->aapcs_arg_processed = true;
7018
7019   /* Special case: if named is false then we are handling an incoming
7020      anonymous argument which is on the stack.  */
7021   if (!named)
7022     return;
7023
7024   /* Is this a potential co-processor register candidate?  */
7025   if (pcum->pcs_variant != ARM_PCS_AAPCS)
7026     {
7027       int slot = aapcs_select_call_coproc (pcum, mode, type);
7028       pcum->aapcs_cprc_slot = slot;
7029
7030       /* We don't have to apply any of the rules from part B of the
7031          preparation phase, these are handled elsewhere in the
7032          compiler.  */
7033
7034       if (slot >= 0)
7035         {
7036           /* A Co-processor register candidate goes either in its own
7037              class of registers or on the stack.  */
7038           if (!pcum->aapcs_cprc_failed[slot])
7039             {
7040               /* C1.cp - Try to allocate the argument to co-processor
7041                  registers.  */
7042               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7043                 return;
7044
7045               /* C2.cp - Put the argument on the stack and note that we
7046                  can't assign any more candidates in this slot.  We also
7047                  need to note that we have allocated stack space, so that
7048                  we won't later try to split a non-cprc candidate between
7049                  core registers and the stack.  */
7050               pcum->aapcs_cprc_failed[slot] = true;
7051               pcum->can_split = false;
7052             }
7053
7054           /* We didn't get a register, so this argument goes on the
7055              stack.  */
7056           gcc_assert (pcum->can_split == false);
7057           return;
7058         }
7059     }
7060
7061   /* C3 - For double-word aligned arguments, round the NCRN up to the
7062      next even number.  */
7063   ncrn = pcum->aapcs_ncrn;
7064   if (ncrn & 1)
7065     {
7066       int res = arm_needs_doubleword_align (mode, type);
7067       /* Only warn during RTL expansion of call stmts, otherwise we would
7068          warn e.g. during gimplification even on functions that will be
7069          always inlined, and we'd warn multiple times.  Don't warn when
7070          called in expand_function_start either, as we warn instead in
7071          arm_function_arg_boundary in that case.  */
7072       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7073         inform (input_location, "parameter passing for argument of type "
7074                 "%qT changed in GCC 7.1", type);
7075       else if (res > 0)
7076         ncrn++;
7077     }
7078
7079   nregs = ARM_NUM_REGS2(mode, type);
7080
7081   /* Sigh, this test should really assert that nregs > 0, but a GCC
7082      extension allows empty structs and then gives them empty size; it
7083      then allows such a structure to be passed by value.  For some of
7084      the code below we have to pretend that such an argument has
7085      non-zero size so that we 'locate' it correctly either in
7086      registers or on the stack.  */
7087   gcc_assert (nregs >= 0);
7088
7089   nregs2 = nregs ? nregs : 1;
7090
7091   /* C4 - Argument fits entirely in core registers.  */
7092   if (ncrn + nregs2 <= NUM_ARG_REGS)
7093     {
7094       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7095       pcum->aapcs_next_ncrn = ncrn + nregs;
7096       return;
7097     }
7098
7099   /* C5 - Some core registers left and there are no arguments already
7100      on the stack: split this argument between the remaining core
7101      registers and the stack.  */
7102   if (ncrn < NUM_ARG_REGS && pcum->can_split)
7103     {
7104       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7105       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7106       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7107       return;
7108     }
7109
7110   /* C6 - NCRN is set to 4.  */
7111   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7112
7113   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
7114   return;
7115 }
7116
7117 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7118    for a call to a function whose data type is FNTYPE.
7119    For a library call, FNTYPE is NULL.  */
7120 void
7121 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7122                           rtx libname,
7123                           tree fndecl ATTRIBUTE_UNUSED)
7124 {
7125   /* Long call handling.  */
7126   if (fntype)
7127     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7128   else
7129     pcum->pcs_variant = arm_pcs_default;
7130
7131   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7132     {
7133       if (arm_libcall_uses_aapcs_base (libname))
7134         pcum->pcs_variant = ARM_PCS_AAPCS;
7135
7136       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7137       pcum->aapcs_reg = NULL_RTX;
7138       pcum->aapcs_partial = 0;
7139       pcum->aapcs_arg_processed = false;
7140       pcum->aapcs_cprc_slot = -1;
7141       pcum->can_split = true;
7142
7143       if (pcum->pcs_variant != ARM_PCS_AAPCS)
7144         {
7145           int i;
7146
7147           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7148             {
7149               pcum->aapcs_cprc_failed[i] = false;
7150               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7151             }
7152         }
7153       return;
7154     }
7155
7156   /* Legacy ABIs */
7157
7158   /* On the ARM, the offset starts at 0.  */
7159   pcum->nregs = 0;
7160   pcum->iwmmxt_nregs = 0;
7161   pcum->can_split = true;
7162
7163   /* Varargs vectors are treated the same as long long.
7164      named_count avoids having to change the way arm handles 'named' */
7165   pcum->named_count = 0;
7166   pcum->nargs = 0;
7167
7168   if (TARGET_REALLY_IWMMXT && fntype)
7169     {
7170       tree fn_arg;
7171
7172       for (fn_arg = TYPE_ARG_TYPES (fntype);
7173            fn_arg;
7174            fn_arg = TREE_CHAIN (fn_arg))
7175         pcum->named_count += 1;
7176
7177       if (! pcum->named_count)
7178         pcum->named_count = INT_MAX;
7179     }
7180 }
7181
7182 /* Return 2 if double word alignment is required for argument passing,
7183    but wasn't required before the fix for PR88469.
7184    Return 1 if double word alignment is required for argument passing.
7185    Return -1 if double word alignment used to be required for argument
7186    passing before PR77728 ABI fix, but is not required anymore.
7187    Return 0 if double word alignment is not required and wasn't requried
7188    before either.  */
7189 static int
7190 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7191 {
7192   if (!type)
7193     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7194
7195   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7196   if (!AGGREGATE_TYPE_P (type))
7197     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7198
7199   /* Array types: Use member alignment of element type.  */
7200   if (TREE_CODE (type) == ARRAY_TYPE)
7201     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7202
7203   int ret = 0;
7204   int ret2 = 0;
7205   /* Record/aggregate types: Use greatest member alignment of any member.
7206
7207      Note that we explicitly consider zero-sized fields here, even though
7208      they don't map to AAPCS machine types.  For example, in:
7209
7210          struct __attribute__((aligned(8))) empty {};
7211
7212          struct s {
7213            [[no_unique_address]] empty e;
7214            int x;
7215          };
7216
7217      "s" contains only one Fundamental Data Type (the int field)
7218      but gains 8-byte alignment and size thanks to "e".  */
7219   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7220     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7221       {
7222         if (TREE_CODE (field) == FIELD_DECL)
7223           return 1;
7224         else
7225           /* Before PR77728 fix, we were incorrectly considering also
7226              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7227              Make sure we can warn about that with -Wpsabi.  */
7228           ret = -1;
7229       }
7230     else if (TREE_CODE (field) == FIELD_DECL
7231              && DECL_BIT_FIELD_TYPE (field)
7232              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7233       ret2 = 1;
7234
7235   if (ret2)
7236     return 2;
7237
7238   return ret;
7239 }
7240
7241
7242 /* Determine where to put an argument to a function.
7243    Value is zero to push the argument on the stack,
7244    or a hard register in which to store the argument.
7245
7246    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7247     the preceding args and about the function being called.
7248    ARG is a description of the argument.
7249
7250    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7251    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7252    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7253    defined), say it is passed in the stack (function_prologue will
7254    indeed make it pass in the stack if necessary).  */
7255
7256 static rtx
7257 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7258 {
7259   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7260   int nregs;
7261
7262   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7263      a call insn (op3 of a call_value insn).  */
7264   if (arg.end_marker_p ())
7265     return const0_rtx;
7266
7267   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7268     {
7269       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7270       return pcum->aapcs_reg;
7271     }
7272
7273   /* Varargs vectors are treated the same as long long.
7274      named_count avoids having to change the way arm handles 'named' */
7275   if (TARGET_IWMMXT_ABI
7276       && arm_vector_mode_supported_p (arg.mode)
7277       && pcum->named_count > pcum->nargs + 1)
7278     {
7279       if (pcum->iwmmxt_nregs <= 9)
7280         return gen_rtx_REG (arg.mode,
7281                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7282       else
7283         {
7284           pcum->can_split = false;
7285           return NULL_RTX;
7286         }
7287     }
7288
7289   /* Put doubleword aligned quantities in even register pairs.  */
7290   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7291     {
7292       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7293       if (res < 0 && warn_psabi)
7294         inform (input_location, "parameter passing for argument of type "
7295                 "%qT changed in GCC 7.1", arg.type);
7296       else if (res > 0)
7297         {
7298           pcum->nregs++;
7299           if (res > 1 && warn_psabi)
7300             inform (input_location, "parameter passing for argument of type "
7301                     "%qT changed in GCC 9.1", arg.type);
7302         }
7303     }
7304
7305   /* Only allow splitting an arg between regs and memory if all preceding
7306      args were allocated to regs.  For args passed by reference we only count
7307      the reference pointer.  */
7308   if (pcum->can_split)
7309     nregs = 1;
7310   else
7311     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7312
7313   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7314     return NULL_RTX;
7315
7316   return gen_rtx_REG (arg.mode, pcum->nregs);
7317 }
7318
7319 static unsigned int
7320 arm_function_arg_boundary (machine_mode mode, const_tree type)
7321 {
7322   if (!ARM_DOUBLEWORD_ALIGN)
7323     return PARM_BOUNDARY;
7324
7325   int res = arm_needs_doubleword_align (mode, type);
7326   if (res < 0 && warn_psabi)
7327     inform (input_location, "parameter passing for argument of type %qT "
7328             "changed in GCC 7.1", type);
7329   if (res > 1 && warn_psabi)
7330     inform (input_location, "parameter passing for argument of type "
7331             "%qT changed in GCC 9.1", type);
7332
7333   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7334 }
7335
7336 static int
7337 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7338 {
7339   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7340   int nregs = pcum->nregs;
7341
7342   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7343     {
7344       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7345       return pcum->aapcs_partial;
7346     }
7347
7348   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7349     return 0;
7350
7351   if (NUM_ARG_REGS > nregs
7352       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7353       && pcum->can_split)
7354     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7355
7356   return 0;
7357 }
7358
7359 /* Update the data in PCUM to advance over argument ARG.  */
7360
7361 static void
7362 arm_function_arg_advance (cumulative_args_t pcum_v,
7363                           const function_arg_info &arg)
7364 {
7365   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7366
7367   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7368     {
7369       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7370
7371       if (pcum->aapcs_cprc_slot >= 0)
7372         {
7373           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7374                                                               arg.type);
7375           pcum->aapcs_cprc_slot = -1;
7376         }
7377
7378       /* Generic stuff.  */
7379       pcum->aapcs_arg_processed = false;
7380       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7381       pcum->aapcs_reg = NULL_RTX;
7382       pcum->aapcs_partial = 0;
7383     }
7384   else
7385     {
7386       pcum->nargs += 1;
7387       if (arm_vector_mode_supported_p (arg.mode)
7388           && pcum->named_count > pcum->nargs
7389           && TARGET_IWMMXT_ABI)
7390         pcum->iwmmxt_nregs += 1;
7391       else
7392         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7393     }
7394 }
7395
7396 /* Variable sized types are passed by reference.  This is a GCC
7397    extension to the ARM ABI.  */
7398
7399 static bool
7400 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7401 {
7402   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7403 }
7404 \f
7405 /* Encode the current state of the #pragma [no_]long_calls.  */
7406 typedef enum
7407 {
7408   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7409   LONG,         /* #pragma long_calls is in effect.  */
7410   SHORT         /* #pragma no_long_calls is in effect.  */
7411 } arm_pragma_enum;
7412
7413 static arm_pragma_enum arm_pragma_long_calls = OFF;
7414
7415 void
7416 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7417 {
7418   arm_pragma_long_calls = LONG;
7419 }
7420
7421 void
7422 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7423 {
7424   arm_pragma_long_calls = SHORT;
7425 }
7426
7427 void
7428 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7429 {
7430   arm_pragma_long_calls = OFF;
7431 }
7432 \f
7433 /* Handle an attribute requiring a FUNCTION_DECL;
7434    arguments as in struct attribute_spec.handler.  */
7435 static tree
7436 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7437                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7438 {
7439   if (TREE_CODE (*node) != FUNCTION_DECL)
7440     {
7441       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7442                name);
7443       *no_add_attrs = true;
7444     }
7445
7446   return NULL_TREE;
7447 }
7448
7449 /* Handle an "interrupt" or "isr" attribute;
7450    arguments as in struct attribute_spec.handler.  */
7451 static tree
7452 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7453                           bool *no_add_attrs)
7454 {
7455   if (DECL_P (*node))
7456     {
7457       if (TREE_CODE (*node) != FUNCTION_DECL)
7458         {
7459           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7460                    name);
7461           *no_add_attrs = true;
7462         }
7463       else if (TARGET_VFP_BASE)
7464         {
7465           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7466                    name);
7467         }
7468       /* FIXME: the argument if any is checked for type attributes;
7469          should it be checked for decl ones?  */
7470     }
7471   else
7472     {
7473       if (FUNC_OR_METHOD_TYPE_P (*node))
7474         {
7475           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7476             {
7477               warning (OPT_Wattributes, "%qE attribute ignored",
7478                        name);
7479               *no_add_attrs = true;
7480             }
7481         }
7482       else if (TREE_CODE (*node) == POINTER_TYPE
7483                && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
7484                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7485         {
7486           *node = build_variant_type_copy (*node);
7487           TREE_TYPE (*node) = build_type_attribute_variant
7488             (TREE_TYPE (*node),
7489              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7490           *no_add_attrs = true;
7491         }
7492       else
7493         {
7494           /* Possibly pass this attribute on from the type to a decl.  */
7495           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7496                        | (int) ATTR_FLAG_FUNCTION_NEXT
7497                        | (int) ATTR_FLAG_ARRAY_NEXT))
7498             {
7499               *no_add_attrs = true;
7500               return tree_cons (name, args, NULL_TREE);
7501             }
7502           else
7503             {
7504               warning (OPT_Wattributes, "%qE attribute ignored",
7505                        name);
7506             }
7507         }
7508     }
7509
7510   return NULL_TREE;
7511 }
7512
7513 /* Handle a "pcs" attribute; arguments as in struct
7514    attribute_spec.handler.  */
7515 static tree
7516 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7517                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7518 {
7519   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7520     {
7521       warning (OPT_Wattributes, "%qE attribute ignored", name);
7522       *no_add_attrs = true;
7523     }
7524   return NULL_TREE;
7525 }
7526
7527 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7528 /* Handle the "notshared" attribute.  This attribute is another way of
7529    requesting hidden visibility.  ARM's compiler supports
7530    "__declspec(notshared)"; we support the same thing via an
7531    attribute.  */
7532
7533 static tree
7534 arm_handle_notshared_attribute (tree *node,
7535                                 tree name ATTRIBUTE_UNUSED,
7536                                 tree args ATTRIBUTE_UNUSED,
7537                                 int flags ATTRIBUTE_UNUSED,
7538                                 bool *no_add_attrs)
7539 {
7540   tree decl = TYPE_NAME (*node);
7541
7542   if (decl)
7543     {
7544       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7545       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7546       *no_add_attrs = false;
7547     }
7548   return NULL_TREE;
7549 }
7550 #endif
7551
7552 /* This function returns true if a function with declaration FNDECL and type
7553    FNTYPE uses the stack to pass arguments or return variables and false
7554    otherwise.  This is used for functions with the attributes
7555    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7556    diagnostic messages if the stack is used.  NAME is the name of the attribute
7557    used.  */
7558
7559 static bool
7560 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7561 {
7562   function_args_iterator args_iter;
7563   CUMULATIVE_ARGS args_so_far_v;
7564   cumulative_args_t args_so_far;
7565   bool first_param = true;
7566   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7567
7568   /* Error out if any argument is passed on the stack.  */
7569   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7570   args_so_far = pack_cumulative_args (&args_so_far_v);
7571   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7572     {
7573       rtx arg_rtx;
7574
7575       prev_arg_type = arg_type;
7576       if (VOID_TYPE_P (arg_type))
7577         continue;
7578
7579       function_arg_info arg (arg_type, /*named=*/true);
7580       if (!first_param)
7581         /* ??? We should advance after processing the argument and pass
7582            the argument we're advancing past.  */
7583         arm_function_arg_advance (args_so_far, arg);
7584       arg_rtx = arm_function_arg (args_so_far, arg);
7585       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7586         {
7587           error ("%qE attribute not available to functions with arguments "
7588                  "passed on the stack", name);
7589           return true;
7590         }
7591       first_param = false;
7592     }
7593
7594   /* Error out for variadic functions since we cannot control how many
7595      arguments will be passed and thus stack could be used.  stdarg_p () is not
7596      used for the checking to avoid browsing arguments twice.  */
7597   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7598     {
7599       error ("%qE attribute not available to functions with variable number "
7600              "of arguments", name);
7601       return true;
7602     }
7603
7604   /* Error out if return value is passed on the stack.  */
7605   ret_type = TREE_TYPE (fntype);
7606   if (arm_return_in_memory (ret_type, fntype))
7607     {
7608       error ("%qE attribute not available to functions that return value on "
7609              "the stack", name);
7610       return true;
7611     }
7612   return false;
7613 }
7614
7615 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7616    function will check whether the attribute is allowed here and will add the
7617    attribute to the function declaration tree or otherwise issue a warning.  */
7618
7619 static tree
7620 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7621                                  tree /* args */,
7622                                  int /* flags */,
7623                                  bool *no_add_attrs)
7624 {
7625   tree fndecl;
7626
7627   if (!use_cmse)
7628     {
7629       *no_add_attrs = true;
7630       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7631                "option", name);
7632       return NULL_TREE;
7633     }
7634
7635   /* Ignore attribute for function types.  */
7636   if (TREE_CODE (*node) != FUNCTION_DECL)
7637     {
7638       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7639                name);
7640       *no_add_attrs = true;
7641       return NULL_TREE;
7642     }
7643
7644   fndecl = *node;
7645
7646   /* Warn for static linkage functions.  */
7647   if (!TREE_PUBLIC (fndecl))
7648     {
7649       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7650                "with static linkage", name);
7651       *no_add_attrs = true;
7652       return NULL_TREE;
7653     }
7654
7655   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7656                                                 TREE_TYPE (fndecl));
7657   return NULL_TREE;
7658 }
7659
7660
7661 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7662    function will check whether the attribute is allowed here and will add the
7663    attribute to the function type tree or otherwise issue a diagnostic.  The
7664    reason we check this at declaration time is to only allow the use of the
7665    attribute with declarations of function pointers and not function
7666    declarations.  This function checks NODE is of the expected type and issues
7667    diagnostics otherwise using NAME.  If it is not of the expected type
7668    *NO_ADD_ATTRS will be set to true.  */
7669
7670 static tree
7671 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7672                                  tree /* args */,
7673                                  int /* flags */,
7674                                  bool *no_add_attrs)
7675 {
7676   tree decl = NULL_TREE;
7677   tree fntype, type;
7678
7679   if (!use_cmse)
7680     {
7681       *no_add_attrs = true;
7682       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7683                "option", name);
7684       return NULL_TREE;
7685     }
7686
7687   if (DECL_P (*node))
7688     {
7689       fntype = TREE_TYPE (*node);
7690
7691       if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
7692         decl = *node;
7693     }
7694   else
7695     fntype = *node;
7696
7697   while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7698     fntype = TREE_TYPE (fntype);
7699
7700   if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7701     {
7702         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7703                  "function pointer", name);
7704         *no_add_attrs = true;
7705         return NULL_TREE;
7706     }
7707
7708   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7709
7710   if (*no_add_attrs)
7711     return NULL_TREE;
7712
7713   /* Prevent trees being shared among function types with and without
7714      cmse_nonsecure_call attribute.  */
7715   if (decl)
7716     {
7717       type = build_distinct_type_copy (TREE_TYPE (decl));
7718       TREE_TYPE (decl) = type;
7719     }
7720   else
7721     {
7722       type = build_distinct_type_copy (*node);
7723       *node = type;
7724     }
7725
7726   fntype = type;
7727
7728   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7729     {
7730       type = fntype;
7731       fntype = TREE_TYPE (fntype);
7732       fntype = build_distinct_type_copy (fntype);
7733       TREE_TYPE (type) = fntype;
7734     }
7735
7736   /* Construct a type attribute and add it to the function type.  */
7737   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7738                           TYPE_ATTRIBUTES (fntype));
7739   TYPE_ATTRIBUTES (fntype) = attrs;
7740   return NULL_TREE;
7741 }
7742
7743 /* Return 0 if the attributes for two types are incompatible, 1 if they
7744    are compatible, and 2 if they are nearly compatible (which causes a
7745    warning to be generated).  */
7746 static int
7747 arm_comp_type_attributes (const_tree type1, const_tree type2)
7748 {
7749   int l1, l2, s1, s2;
7750
7751   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7752                                   TYPE_ATTRIBUTES (type1));
7753   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7754                                   TYPE_ATTRIBUTES (type2));
7755   if (bool (attrs1) != bool (attrs2))
7756     return 0;
7757   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7758     return 0;
7759
7760   /* Check for mismatch of non-default calling convention.  */
7761   if (TREE_CODE (type1) != FUNCTION_TYPE)
7762     return 1;
7763
7764   /* Check for mismatched call attributes.  */
7765   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7766   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7767   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7768   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7769
7770   /* Only bother to check if an attribute is defined.  */
7771   if (l1 | l2 | s1 | s2)
7772     {
7773       /* If one type has an attribute, the other must have the same attribute.  */
7774       if ((l1 != l2) || (s1 != s2))
7775         return 0;
7776
7777       /* Disallow mixed attributes.  */
7778       if ((l1 & s2) || (l2 & s1))
7779         return 0;
7780     }
7781
7782   /* Check for mismatched ISR attribute.  */
7783   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7784   if (! l1)
7785     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7786   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7787   if (! l2)
7788     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7789   if (l1 != l2)
7790     return 0;
7791
7792   l1 = lookup_attribute ("cmse_nonsecure_call",
7793                          TYPE_ATTRIBUTES (type1)) != NULL;
7794   l2 = lookup_attribute ("cmse_nonsecure_call",
7795                          TYPE_ATTRIBUTES (type2)) != NULL;
7796
7797   if (l1 != l2)
7798     return 0;
7799
7800   return 1;
7801 }
7802
7803 /*  Assigns default attributes to newly defined type.  This is used to
7804     set short_call/long_call attributes for function types of
7805     functions defined inside corresponding #pragma scopes.  */
7806 static void
7807 arm_set_default_type_attributes (tree type)
7808 {
7809   /* Add __attribute__ ((long_call)) to all functions, when
7810      inside #pragma long_calls or __attribute__ ((short_call)),
7811      when inside #pragma no_long_calls.  */
7812   if (FUNC_OR_METHOD_TYPE_P (type))
7813     {
7814       tree type_attr_list, attr_name;
7815       type_attr_list = TYPE_ATTRIBUTES (type);
7816
7817       if (arm_pragma_long_calls == LONG)
7818         attr_name = get_identifier ("long_call");
7819       else if (arm_pragma_long_calls == SHORT)
7820         attr_name = get_identifier ("short_call");
7821       else
7822         return;
7823
7824       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7825       TYPE_ATTRIBUTES (type) = type_attr_list;
7826     }
7827 }
7828 \f
7829 /* Return true if DECL is known to be linked into section SECTION.  */
7830
7831 static bool
7832 arm_function_in_section_p (tree decl, section *section)
7833 {
7834   /* We can only be certain about the prevailing symbol definition.  */
7835   if (!decl_binds_to_current_def_p (decl))
7836     return false;
7837
7838   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7839   if (!DECL_SECTION_NAME (decl))
7840     {
7841       /* Make sure that we will not create a unique section for DECL.  */
7842       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7843         return false;
7844     }
7845
7846   return function_section (decl) == section;
7847 }
7848
7849 /* Return nonzero if a 32-bit "long_call" should be generated for
7850    a call from the current function to DECL.  We generate a long_call
7851    if the function:
7852
7853         a.  has an __attribute__((long call))
7854      or b.  is within the scope of a #pragma long_calls
7855      or c.  the -mlong-calls command line switch has been specified
7856
7857    However we do not generate a long call if the function:
7858
7859         d.  has an __attribute__ ((short_call))
7860      or e.  is inside the scope of a #pragma no_long_calls
7861      or f.  is defined in the same section as the current function.  */
7862
7863 bool
7864 arm_is_long_call_p (tree decl)
7865 {
7866   tree attrs;
7867
7868   if (!decl)
7869     return TARGET_LONG_CALLS;
7870
7871   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7872   if (lookup_attribute ("short_call", attrs))
7873     return false;
7874
7875   /* For "f", be conservative, and only cater for cases in which the
7876      whole of the current function is placed in the same section.  */
7877   if (!flag_reorder_blocks_and_partition
7878       && TREE_CODE (decl) == FUNCTION_DECL
7879       && arm_function_in_section_p (decl, current_function_section ()))
7880     return false;
7881
7882   if (lookup_attribute ("long_call", attrs))
7883     return true;
7884
7885   return TARGET_LONG_CALLS;
7886 }
7887
7888 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7889 static bool
7890 arm_function_ok_for_sibcall (tree decl, tree exp)
7891 {
7892   unsigned long func_type;
7893
7894   if (cfun->machine->sibcall_blocked)
7895     return false;
7896
7897   if (TARGET_FDPIC)
7898     {
7899       /* In FDPIC, never tailcall something for which we have no decl:
7900          the target function could be in a different module, requiring
7901          a different FDPIC register value.  */
7902       if (decl == NULL)
7903         return false;
7904     }
7905
7906   /* Never tailcall something if we are generating code for Thumb-1.  */
7907   if (TARGET_THUMB1)
7908     return false;
7909
7910   /* The PIC register is live on entry to VxWorks PLT entries, so we
7911      must make the call before restoring the PIC register.  */
7912   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7913     return false;
7914
7915   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7916      may be used both as target of the call and base register for restoring
7917      the VFP registers  */
7918   if (TARGET_APCS_FRAME && TARGET_ARM
7919       && TARGET_HARD_FLOAT
7920       && decl && arm_is_long_call_p (decl))
7921     return false;
7922
7923   /* If we are interworking and the function is not declared static
7924      then we can't tail-call it unless we know that it exists in this
7925      compilation unit (since it might be a Thumb routine).  */
7926   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7927       && !TREE_ASM_WRITTEN (decl))
7928     return false;
7929
7930   func_type = arm_current_func_type ();
7931   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7932   if (IS_INTERRUPT (func_type))
7933     return false;
7934
7935   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7936      generated for entry functions themselves.  */
7937   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7938     return false;
7939
7940   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7941      this would complicate matters for later code generation.  */
7942   if (TREE_CODE (exp) == CALL_EXPR)
7943     {
7944       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7945       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7946         return false;
7947     }
7948
7949   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7950     {
7951       /* Check that the return value locations are the same.  For
7952          example that we aren't returning a value from the sibling in
7953          a VFP register but then need to transfer it to a core
7954          register.  */
7955       rtx a, b;
7956       tree decl_or_type = decl;
7957
7958       /* If it is an indirect function pointer, get the function type.  */
7959       if (!decl)
7960         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7961
7962       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7963       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7964                               cfun->decl, false);
7965       if (!rtx_equal_p (a, b))
7966         return false;
7967     }
7968
7969   /* Never tailcall if function may be called with a misaligned SP.  */
7970   if (IS_STACKALIGN (func_type))
7971     return false;
7972
7973   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7974      references should become a NOP.  Don't convert such calls into
7975      sibling calls.  */
7976   if (TARGET_AAPCS_BASED
7977       && arm_abi == ARM_ABI_AAPCS
7978       && decl
7979       && DECL_WEAK (decl))
7980     return false;
7981
7982   /* We cannot do a tailcall for an indirect call by descriptor if all the
7983      argument registers are used because the only register left to load the
7984      address is IP and it will already contain the static chain.  */
7985   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7986     {
7987       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7988       CUMULATIVE_ARGS cum;
7989       cumulative_args_t cum_v;
7990
7991       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7992       cum_v = pack_cumulative_args (&cum);
7993
7994       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7995         {
7996           tree type = TREE_VALUE (t);
7997           if (!VOID_TYPE_P (type))
7998             {
7999               function_arg_info arg (type, /*named=*/true);
8000               arm_function_arg_advance (cum_v, arg);
8001             }
8002         }
8003
8004       function_arg_info arg (integer_type_node, /*named=*/true);
8005       if (!arm_function_arg (cum_v, arg))
8006         return false;
8007     }
8008
8009   /* Everything else is ok.  */
8010   return true;
8011 }
8012
8013 \f
8014 /* Addressing mode support functions.  */
8015
8016 /* Return nonzero if X is a legitimate immediate operand when compiling
8017    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
8018 int
8019 legitimate_pic_operand_p (rtx x)
8020 {
8021   if (SYMBOL_REF_P (x)
8022       || (GET_CODE (x) == CONST
8023           && GET_CODE (XEXP (x, 0)) == PLUS
8024           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8025     return 0;
8026
8027   return 1;
8028 }
8029
8030 /* Record that the current function needs a PIC register.  If PIC_REG is null,
8031    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
8032    both case cfun->machine->pic_reg is initialized if we have not already done
8033    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
8034    PIC register is reloaded in the current position of the instruction stream
8035    irregardless of whether it was loaded before.  Otherwise, it is only loaded
8036    if not already done so (crtl->uses_pic_offset_table is null).  Note that
8037    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8038    is only supported iff COMPUTE_NOW is false.  */
8039
8040 static void
8041 require_pic_register (rtx pic_reg, bool compute_now)
8042 {
8043   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8044
8045   /* A lot of the logic here is made obscure by the fact that this
8046      routine gets called as part of the rtx cost estimation process.
8047      We don't want those calls to affect any assumptions about the real
8048      function; and further, we can't call entry_of_function() until we
8049      start the real expansion process.  */
8050   if (!crtl->uses_pic_offset_table || compute_now)
8051     {
8052       gcc_assert (can_create_pseudo_p ()
8053                   || (pic_reg != NULL_RTX
8054                       && REG_P (pic_reg)
8055                       && GET_MODE (pic_reg) == Pmode));
8056       if (arm_pic_register != INVALID_REGNUM
8057           && !compute_now
8058           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8059         {
8060           if (!cfun->machine->pic_reg)
8061             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8062
8063           /* Play games to avoid marking the function as needing pic
8064              if we are being called as part of the cost-estimation
8065              process.  */
8066           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8067             crtl->uses_pic_offset_table = 1;
8068         }
8069       else
8070         {
8071           rtx_insn *seq, *insn;
8072
8073           if (pic_reg == NULL_RTX)
8074             pic_reg = gen_reg_rtx (Pmode);
8075           if (!cfun->machine->pic_reg)
8076             cfun->machine->pic_reg = pic_reg;
8077
8078           /* Play games to avoid marking the function as needing pic
8079              if we are being called as part of the cost-estimation
8080              process.  */
8081           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8082             {
8083               crtl->uses_pic_offset_table = 1;
8084               start_sequence ();
8085
8086               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8087                   && arm_pic_register > LAST_LO_REGNUM
8088                   && !compute_now)
8089                 emit_move_insn (cfun->machine->pic_reg,
8090                                 gen_rtx_REG (Pmode, arm_pic_register));
8091               else
8092                 arm_load_pic_register (0UL, pic_reg);
8093
8094               seq = get_insns ();
8095               end_sequence ();
8096
8097               for (insn = seq; insn; insn = NEXT_INSN (insn))
8098                 if (INSN_P (insn))
8099                   INSN_LOCATION (insn) = prologue_location;
8100
8101               /* We can be called during expansion of PHI nodes, where
8102                  we can't yet emit instructions directly in the final
8103                  insn stream.  Queue the insns on the entry edge, they will
8104                  be committed after everything else is expanded.  */
8105               if (currently_expanding_to_rtl)
8106                 insert_insn_on_edge (seq,
8107                                      single_succ_edge
8108                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8109               else
8110                 emit_insn (seq);
8111             }
8112         }
8113     }
8114 }
8115
8116 /* Generate insns to calculate the address of ORIG in pic mode.  */
8117 static rtx_insn *
8118 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8119 {
8120   rtx pat;
8121   rtx mem;
8122
8123   pat = gen_calculate_pic_address (reg, pic_reg, orig);
8124
8125   /* Make the MEM as close to a constant as possible.  */
8126   mem = SET_SRC (pat);
8127   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8128   MEM_READONLY_P (mem) = 1;
8129   MEM_NOTRAP_P (mem) = 1;
8130
8131   return emit_insn (pat);
8132 }
8133
8134 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
8135    created to hold the result of the load.  If not NULL, PIC_REG indicates
8136    which register to use as PIC register, otherwise it is decided by register
8137    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
8138    location in the instruction stream, irregardless of whether it was loaded
8139    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8140    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8141
8142    Returns the register REG into which the PIC load is performed.  */
8143
8144 rtx
8145 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8146                         bool compute_now)
8147 {
8148   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8149
8150   if (SYMBOL_REF_P (orig)
8151       || LABEL_REF_P (orig))
8152     {
8153       if (reg == 0)
8154         {
8155           gcc_assert (can_create_pseudo_p ());
8156           reg = gen_reg_rtx (Pmode);
8157         }
8158
8159       /* VxWorks does not impose a fixed gap between segments; the run-time
8160          gap can be different from the object-file gap.  We therefore can't
8161          use GOTOFF unless we are absolutely sure that the symbol is in the
8162          same segment as the GOT.  Unfortunately, the flexibility of linker
8163          scripts means that we can't be sure of that in general, so assume
8164          that GOTOFF is never valid on VxWorks.  */
8165       /* References to weak symbols cannot be resolved locally: they
8166          may be overridden by a non-weak definition at link time.  */
8167       rtx_insn *insn;
8168       if ((LABEL_REF_P (orig)
8169            || (SYMBOL_REF_P (orig)
8170                && SYMBOL_REF_LOCAL_P (orig)
8171                && (SYMBOL_REF_DECL (orig)
8172                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8173                && (!SYMBOL_REF_FUNCTION_P (orig)
8174                    || arm_fdpic_local_funcdesc_p (orig))))
8175           && NEED_GOT_RELOC
8176           && arm_pic_data_is_text_relative)
8177         insn = arm_pic_static_addr (orig, reg);
8178       else
8179         {
8180           /* If this function doesn't have a pic register, create one now.  */
8181           require_pic_register (pic_reg, compute_now);
8182
8183           if (pic_reg == NULL_RTX)
8184             pic_reg = cfun->machine->pic_reg;
8185
8186           insn = calculate_pic_address_constant (reg, pic_reg, orig);
8187         }
8188
8189       /* Put a REG_EQUAL note on this insn, so that it can be optimized
8190          by loop.  */
8191       set_unique_reg_note (insn, REG_EQUAL, orig);
8192
8193       return reg;
8194     }
8195   else if (GET_CODE (orig) == CONST)
8196     {
8197       rtx base, offset;
8198
8199       if (GET_CODE (XEXP (orig, 0)) == PLUS
8200           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8201         return orig;
8202
8203       /* Handle the case where we have: const (UNSPEC_TLS).  */
8204       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8205           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8206         return orig;
8207
8208       /* Handle the case where we have:
8209          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8210          CONST_INT.  */
8211       if (GET_CODE (XEXP (orig, 0)) == PLUS
8212           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8213           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8214         {
8215           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8216           return orig;
8217         }
8218
8219       if (reg == 0)
8220         {
8221           gcc_assert (can_create_pseudo_p ());
8222           reg = gen_reg_rtx (Pmode);
8223         }
8224
8225       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8226
8227       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8228                                      pic_reg, compute_now);
8229       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8230                                        base == reg ? 0 : reg, pic_reg,
8231                                        compute_now);
8232
8233       if (CONST_INT_P (offset))
8234         {
8235           /* The base register doesn't really matter, we only want to
8236              test the index for the appropriate mode.  */
8237           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8238             {
8239               gcc_assert (can_create_pseudo_p ());
8240               offset = force_reg (Pmode, offset);
8241             }
8242
8243           if (CONST_INT_P (offset))
8244             return plus_constant (Pmode, base, INTVAL (offset));
8245         }
8246
8247       if (GET_MODE_SIZE (mode) > 4
8248           && (GET_MODE_CLASS (mode) == MODE_INT
8249               || TARGET_SOFT_FLOAT))
8250         {
8251           emit_insn (gen_addsi3 (reg, base, offset));
8252           return reg;
8253         }
8254
8255       return gen_rtx_PLUS (Pmode, base, offset);
8256     }
8257
8258   return orig;
8259 }
8260
8261
8262 /* Generate insns that produce the address of the stack canary */
8263 rtx
8264 arm_stack_protect_tls_canary_mem (bool reload)
8265 {
8266   rtx tp = gen_reg_rtx (SImode);
8267   if (reload)
8268     emit_insn (gen_reload_tp_hard (tp));
8269   else
8270     emit_insn (gen_load_tp_hard (tp));
8271
8272   rtx reg = gen_reg_rtx (SImode);
8273   rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8274   emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8275   return gen_rtx_MEM (SImode, reg);
8276 }
8277
8278
8279 /* Whether a register is callee saved or not.  This is necessary because high
8280    registers are marked as caller saved when optimizing for size on Thumb-1
8281    targets despite being callee saved in order to avoid using them.  */
8282 #define callee_saved_reg_p(reg) \
8283   (!call_used_or_fixed_reg_p (reg) \
8284    || (TARGET_THUMB1 && optimize_size \
8285        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8286
8287 /* Return a mask for the call-clobbered low registers that are unused
8288    at the end of the prologue.  */
8289 static unsigned long
8290 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8291 {
8292   unsigned long mask = 0;
8293   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8294
8295   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8296     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8297       mask |= 1 << (reg - FIRST_LO_REGNUM);
8298   return mask;
8299 }
8300
8301 /* Similarly for the start of the epilogue.  */
8302 static unsigned long
8303 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8304 {
8305   unsigned long mask = 0;
8306   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8307
8308   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8309     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8310       mask |= 1 << (reg - FIRST_LO_REGNUM);
8311   return mask;
8312 }
8313
8314 /* Find a spare register to use during the prolog of a function.  */
8315
8316 static int
8317 thumb_find_work_register (unsigned long pushed_regs_mask)
8318 {
8319   int reg;
8320
8321   unsigned long unused_regs
8322     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8323
8324   /* Check the argument registers first as these are call-used.  The
8325      register allocation order means that sometimes r3 might be used
8326      but earlier argument registers might not, so check them all.  */
8327   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8328     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8329       return reg;
8330
8331   /* Otherwise look for a call-saved register that is going to be pushed.  */
8332   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8333     if (pushed_regs_mask & (1 << reg))
8334       return reg;
8335
8336   if (TARGET_THUMB2)
8337     {
8338       /* Thumb-2 can use high regs.  */
8339       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8340         if (pushed_regs_mask & (1 << reg))
8341           return reg;
8342     }
8343   /* Something went wrong - thumb_compute_save_reg_mask()
8344      should have arranged for a suitable register to be pushed.  */
8345   gcc_unreachable ();
8346 }
8347
8348 static GTY(()) int pic_labelno;
8349
8350 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8351    low register.  */
8352
8353 void
8354 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8355 {
8356   rtx l1, labelno, pic_tmp, pic_rtx;
8357
8358   if (crtl->uses_pic_offset_table == 0
8359       || TARGET_SINGLE_PIC_BASE
8360       || TARGET_FDPIC)
8361     return;
8362
8363   gcc_assert (flag_pic);
8364
8365   if (pic_reg == NULL_RTX)
8366     pic_reg = cfun->machine->pic_reg;
8367   if (TARGET_VXWORKS_RTP)
8368     {
8369       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8370       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8371       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8372
8373       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8374
8375       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8376       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8377     }
8378   else
8379     {
8380       /* We use an UNSPEC rather than a LABEL_REF because this label
8381          never appears in the code stream.  */
8382
8383       labelno = GEN_INT (pic_labelno++);
8384       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8385       l1 = gen_rtx_CONST (VOIDmode, l1);
8386
8387       /* On the ARM the PC register contains 'dot + 8' at the time of the
8388          addition, on the Thumb it is 'dot + 4'.  */
8389       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8390       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8391                                 UNSPEC_GOTSYM_OFF);
8392       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8393
8394       if (TARGET_32BIT)
8395         {
8396           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8397         }
8398       else /* TARGET_THUMB1 */
8399         {
8400           if (arm_pic_register != INVALID_REGNUM
8401               && REGNO (pic_reg) > LAST_LO_REGNUM)
8402             {
8403               /* We will have pushed the pic register, so we should always be
8404                  able to find a work register.  */
8405               pic_tmp = gen_rtx_REG (SImode,
8406                                      thumb_find_work_register (saved_regs));
8407               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8408               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8409               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8410             }
8411           else if (arm_pic_register != INVALID_REGNUM
8412                    && arm_pic_register > LAST_LO_REGNUM
8413                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8414             {
8415               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8416               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8417               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8418             }
8419           else
8420             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8421         }
8422     }
8423
8424   /* Need to emit this whether or not we obey regdecls,
8425      since setjmp/longjmp can cause life info to screw up.  */
8426   emit_use (pic_reg);
8427 }
8428
8429 /* Try to determine whether an object, referenced via ORIG, will be
8430    placed in the text or data segment.  This is used in FDPIC mode, to
8431    decide which relocations to use when accessing ORIG.  *IS_READONLY
8432    is set to true if ORIG is a read-only location, false otherwise.
8433    Return true if we could determine the location of ORIG, false
8434    otherwise.  *IS_READONLY is valid only when we return true.  */
8435 static bool
8436 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8437 {
8438   *is_readonly = false;
8439
8440   if (LABEL_REF_P (orig))
8441     {
8442       *is_readonly = true;
8443       return true;
8444     }
8445
8446   if (SYMBOL_REF_P (orig))
8447     {
8448       if (CONSTANT_POOL_ADDRESS_P (orig))
8449         {
8450           *is_readonly = true;
8451           return true;
8452         }
8453       if (SYMBOL_REF_LOCAL_P (orig)
8454           && !SYMBOL_REF_EXTERNAL_P (orig)
8455           && SYMBOL_REF_DECL (orig)
8456           && (!DECL_P (SYMBOL_REF_DECL (orig))
8457               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8458         {
8459           tree decl = SYMBOL_REF_DECL (orig);
8460           tree init = VAR_P (decl)
8461             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8462             ? decl : 0;
8463           int reloc = 0;
8464           bool named_section, readonly;
8465
8466           if (init && init != error_mark_node)
8467             reloc = compute_reloc_for_constant (init);
8468
8469           named_section = VAR_P (decl)
8470             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8471           readonly = decl_readonly_section (decl, reloc);
8472
8473           /* We don't know where the link script will put a named
8474              section, so return false in such a case.  */
8475           if (named_section)
8476             return false;
8477
8478           *is_readonly = readonly;
8479           return true;
8480         }
8481
8482       /* We don't know.  */
8483       return false;
8484     }
8485
8486   gcc_unreachable ();
8487 }
8488
8489 /* Generate code to load the address of a static var when flag_pic is set.  */
8490 static rtx_insn *
8491 arm_pic_static_addr (rtx orig, rtx reg)
8492 {
8493   rtx l1, labelno, offset_rtx;
8494   rtx_insn *insn;
8495
8496   gcc_assert (flag_pic);
8497
8498   bool is_readonly = false;
8499   bool info_known = false;
8500
8501   if (TARGET_FDPIC
8502       && SYMBOL_REF_P (orig)
8503       && !SYMBOL_REF_FUNCTION_P (orig))
8504     info_known = arm_is_segment_info_known (orig, &is_readonly);
8505
8506   if (TARGET_FDPIC
8507       && SYMBOL_REF_P (orig)
8508       && !SYMBOL_REF_FUNCTION_P (orig)
8509       && !info_known)
8510     {
8511       /* We don't know where orig is stored, so we have be
8512          pessimistic and use a GOT relocation.  */
8513       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8514
8515       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8516     }
8517   else if (TARGET_FDPIC
8518            && SYMBOL_REF_P (orig)
8519            && (SYMBOL_REF_FUNCTION_P (orig)
8520                || !is_readonly))
8521     {
8522       /* We use the GOTOFF relocation.  */
8523       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8524
8525       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8526       emit_insn (gen_movsi (reg, l1));
8527       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8528     }
8529   else
8530     {
8531       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8532          PC-relative access.  */
8533       /* We use an UNSPEC rather than a LABEL_REF because this label
8534          never appears in the code stream.  */
8535       labelno = GEN_INT (pic_labelno++);
8536       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8537       l1 = gen_rtx_CONST (VOIDmode, l1);
8538
8539       /* On the ARM the PC register contains 'dot + 8' at the time of the
8540          addition, on the Thumb it is 'dot + 4'.  */
8541       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8542       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8543                                    UNSPEC_SYMBOL_OFFSET);
8544       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8545
8546       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8547                                                    labelno));
8548     }
8549
8550   return insn;
8551 }
8552
8553 /* Return nonzero if X is valid as an ARM state addressing register.  */
8554 static int
8555 arm_address_register_rtx_p (rtx x, int strict_p)
8556 {
8557   int regno;
8558
8559   if (!REG_P (x))
8560     return 0;
8561
8562   regno = REGNO (x);
8563
8564   if (strict_p)
8565     return ARM_REGNO_OK_FOR_BASE_P (regno);
8566
8567   return (regno <= LAST_ARM_REGNUM
8568           || regno >= FIRST_PSEUDO_REGISTER
8569           || regno == FRAME_POINTER_REGNUM
8570           || regno == ARG_POINTER_REGNUM);
8571 }
8572
8573 /* Return TRUE if this rtx is the difference of a symbol and a label,
8574    and will reduce to a PC-relative relocation in the object file.
8575    Expressions like this can be left alone when generating PIC, rather
8576    than forced through the GOT.  */
8577 static int
8578 pcrel_constant_p (rtx x)
8579 {
8580   if (GET_CODE (x) == MINUS)
8581     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8582
8583   return FALSE;
8584 }
8585
8586 /* Return true if X will surely end up in an index register after next
8587    splitting pass.  */
8588 static bool
8589 will_be_in_index_register (const_rtx x)
8590 {
8591   /* arm.md: calculate_pic_address will split this into a register.  */
8592   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8593 }
8594
8595 /* Return nonzero if X is a valid ARM state address operand.  */
8596 int
8597 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8598                                 int strict_p)
8599 {
8600   bool use_ldrd;
8601   enum rtx_code code = GET_CODE (x);
8602
8603   if (arm_address_register_rtx_p (x, strict_p))
8604     return 1;
8605
8606   use_ldrd = (TARGET_LDRD
8607               && (mode == DImode || mode == DFmode));
8608
8609   if (code == POST_INC || code == PRE_DEC
8610       || ((code == PRE_INC || code == POST_DEC)
8611           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8612     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8613
8614   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8615            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8616            && GET_CODE (XEXP (x, 1)) == PLUS
8617            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8618     {
8619       rtx addend = XEXP (XEXP (x, 1), 1);
8620
8621       /* Don't allow ldrd post increment by register because it's hard
8622          to fixup invalid register choices.  */
8623       if (use_ldrd
8624           && GET_CODE (x) == POST_MODIFY
8625           && REG_P (addend))
8626         return 0;
8627
8628       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8629               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8630     }
8631
8632   /* After reload constants split into minipools will have addresses
8633      from a LABEL_REF.  */
8634   else if (reload_completed
8635            && (code == LABEL_REF
8636                || (code == CONST
8637                    && GET_CODE (XEXP (x, 0)) == PLUS
8638                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8639                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8640     return 1;
8641
8642   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8643     return 0;
8644
8645   else if (code == PLUS)
8646     {
8647       rtx xop0 = XEXP (x, 0);
8648       rtx xop1 = XEXP (x, 1);
8649
8650       return ((arm_address_register_rtx_p (xop0, strict_p)
8651                && ((CONST_INT_P (xop1)
8652                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8653                    || (!strict_p && will_be_in_index_register (xop1))))
8654               || (arm_address_register_rtx_p (xop1, strict_p)
8655                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8656     }
8657
8658 #if 0
8659   /* Reload currently can't handle MINUS, so disable this for now */
8660   else if (GET_CODE (x) == MINUS)
8661     {
8662       rtx xop0 = XEXP (x, 0);
8663       rtx xop1 = XEXP (x, 1);
8664
8665       return (arm_address_register_rtx_p (xop0, strict_p)
8666               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8667     }
8668 #endif
8669
8670   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8671            && code == SYMBOL_REF
8672            && CONSTANT_POOL_ADDRESS_P (x)
8673            && ! (flag_pic
8674                  && symbol_mentioned_p (get_pool_constant (x))
8675                  && ! pcrel_constant_p (get_pool_constant (x))))
8676     return 1;
8677
8678   return 0;
8679 }
8680
8681 /* Return true if we can avoid creating a constant pool entry for x.  */
8682 static bool
8683 can_avoid_literal_pool_for_label_p (rtx x)
8684 {
8685   /* Normally we can assign constant values to target registers without
8686      the help of constant pool.  But there are cases we have to use constant
8687      pool like:
8688      1) assign a label to register.
8689      2) sign-extend a 8bit value to 32bit and then assign to register.
8690
8691      Constant pool access in format:
8692      (set (reg r0) (mem (symbol_ref (".LC0"))))
8693      will cause the use of literal pool (later in function arm_reorg).
8694      So here we mark such format as an invalid format, then the compiler
8695      will adjust it into:
8696      (set (reg r0) (symbol_ref (".LC0")))
8697      (set (reg r0) (mem (reg r0))).
8698      No extra register is required, and (mem (reg r0)) won't cause the use
8699      of literal pools.  */
8700   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8701       && CONSTANT_POOL_ADDRESS_P (x))
8702     return 1;
8703   return 0;
8704 }
8705
8706
8707 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8708 static int
8709 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8710 {
8711   bool use_ldrd;
8712   enum rtx_code code = GET_CODE (x);
8713
8714   /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8715      can store and load it like any other 16-bit value.  */
8716   if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8717     mode = HImode;
8718
8719   if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8720     return mve_vector_mem_operand (mode, x, strict_p);
8721
8722   if (arm_address_register_rtx_p (x, strict_p))
8723     return 1;
8724
8725   use_ldrd = (TARGET_LDRD
8726               && (mode == DImode || mode == DFmode));
8727
8728   if (code == POST_INC || code == PRE_DEC
8729       || ((code == PRE_INC || code == POST_DEC)
8730           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8731     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8732
8733   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8734            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8735            && GET_CODE (XEXP (x, 1)) == PLUS
8736            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8737     {
8738       /* Thumb-2 only has autoincrement by constant.  */
8739       rtx addend = XEXP (XEXP (x, 1), 1);
8740       HOST_WIDE_INT offset;
8741
8742       if (!CONST_INT_P (addend))
8743         return 0;
8744
8745       offset = INTVAL(addend);
8746       if (GET_MODE_SIZE (mode) <= 4)
8747         return (offset > -256 && offset < 256);
8748
8749       return (use_ldrd && offset > -1024 && offset < 1024
8750               && (offset & 3) == 0);
8751     }
8752
8753   /* After reload constants split into minipools will have addresses
8754      from a LABEL_REF.  */
8755   else if (reload_completed
8756            && (code == LABEL_REF
8757                || (code == CONST
8758                    && GET_CODE (XEXP (x, 0)) == PLUS
8759                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8760                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8761     return 1;
8762
8763   else if (mode == TImode
8764            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8765            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8766     return 0;
8767
8768   else if (code == PLUS)
8769     {
8770       rtx xop0 = XEXP (x, 0);
8771       rtx xop1 = XEXP (x, 1);
8772
8773       return ((arm_address_register_rtx_p (xop0, strict_p)
8774                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8775                    || (!strict_p && will_be_in_index_register (xop1))))
8776               || (arm_address_register_rtx_p (xop1, strict_p)
8777                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8778     }
8779
8780   else if (can_avoid_literal_pool_for_label_p (x))
8781     return 0;
8782
8783   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8784            && code == SYMBOL_REF
8785            && CONSTANT_POOL_ADDRESS_P (x)
8786            && ! (flag_pic
8787                  && symbol_mentioned_p (get_pool_constant (x))
8788                  && ! pcrel_constant_p (get_pool_constant (x))))
8789     return 1;
8790
8791   return 0;
8792 }
8793
8794 /* Return nonzero if INDEX is valid for an address index operand in
8795    ARM state.  */
8796 static int
8797 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8798                         int strict_p)
8799 {
8800   HOST_WIDE_INT range;
8801   enum rtx_code code = GET_CODE (index);
8802
8803   /* Standard coprocessor addressing modes.  */
8804   if (TARGET_HARD_FLOAT
8805       && (mode == SFmode || mode == DFmode))
8806     return (code == CONST_INT && INTVAL (index) < 1024
8807             && INTVAL (index) > -1024
8808             && (INTVAL (index) & 3) == 0);
8809
8810   /* For quad modes, we restrict the constant offset to be slightly less
8811      than what the instruction format permits.  We do this because for
8812      quad mode moves, we will actually decompose them into two separate
8813      double-mode reads or writes.  INDEX must therefore be a valid
8814      (double-mode) offset and so should INDEX+8.  */
8815   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8816     return (code == CONST_INT
8817             && INTVAL (index) < 1016
8818             && INTVAL (index) > -1024
8819             && (INTVAL (index) & 3) == 0);
8820
8821   /* We have no such constraint on double mode offsets, so we permit the
8822      full range of the instruction format.  */
8823   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8824     return (code == CONST_INT
8825             && INTVAL (index) < 1024
8826             && INTVAL (index) > -1024
8827             && (INTVAL (index) & 3) == 0);
8828
8829   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8830     return (code == CONST_INT
8831             && INTVAL (index) < 1024
8832             && INTVAL (index) > -1024
8833             && (INTVAL (index) & 3) == 0);
8834
8835   if (arm_address_register_rtx_p (index, strict_p)
8836       && (GET_MODE_SIZE (mode) <= 4))
8837     return 1;
8838
8839   if (mode == DImode || mode == DFmode)
8840     {
8841       if (code == CONST_INT)
8842         {
8843           HOST_WIDE_INT val = INTVAL (index);
8844
8845           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8846              If vldr is selected it uses arm_coproc_mem_operand.  */
8847           if (TARGET_LDRD)
8848             return val > -256 && val < 256;
8849           else
8850             return val > -4096 && val < 4092;
8851         }
8852
8853       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8854     }
8855
8856   if (GET_MODE_SIZE (mode) <= 4
8857       && ! (arm_arch4
8858             && (mode == HImode
8859                 || mode == HFmode
8860                 || (mode == QImode && outer == SIGN_EXTEND))))
8861     {
8862       if (code == MULT)
8863         {
8864           rtx xiop0 = XEXP (index, 0);
8865           rtx xiop1 = XEXP (index, 1);
8866
8867           return ((arm_address_register_rtx_p (xiop0, strict_p)
8868                    && power_of_two_operand (xiop1, SImode))
8869                   || (arm_address_register_rtx_p (xiop1, strict_p)
8870                       && power_of_two_operand (xiop0, SImode)));
8871         }
8872       else if (code == LSHIFTRT || code == ASHIFTRT
8873                || code == ASHIFT || code == ROTATERT)
8874         {
8875           rtx op = XEXP (index, 1);
8876
8877           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8878                   && CONST_INT_P (op)
8879                   && INTVAL (op) > 0
8880                   && INTVAL (op) <= 31);
8881         }
8882     }
8883
8884   /* For ARM v4 we may be doing a sign-extend operation during the
8885      load.  */
8886   if (arm_arch4)
8887     {
8888       if (mode == HImode
8889           || mode == HFmode
8890           || (outer == SIGN_EXTEND && mode == QImode))
8891         range = 256;
8892       else
8893         range = 4096;
8894     }
8895   else
8896     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8897
8898   return (code == CONST_INT
8899           && INTVAL (index) < range
8900           && INTVAL (index) > -range);
8901 }
8902
8903 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8904    index operand.  i.e. 1, 2, 4 or 8.  */
8905 static bool
8906 thumb2_index_mul_operand (rtx op)
8907 {
8908   HOST_WIDE_INT val;
8909
8910   if (!CONST_INT_P (op))
8911     return false;
8912
8913   val = INTVAL(op);
8914   return (val == 1 || val == 2 || val == 4 || val == 8);
8915 }
8916
8917 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8918 static int
8919 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8920 {
8921   enum rtx_code code = GET_CODE (index);
8922
8923   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8924   /* Standard coprocessor addressing modes.  */
8925   if (TARGET_VFP_BASE
8926       && (mode == SFmode || mode == DFmode))
8927     return (code == CONST_INT && INTVAL (index) < 1024
8928             /* Thumb-2 allows only > -256 index range for it's core register
8929                load/stores. Since we allow SF/DF in core registers, we have
8930                to use the intersection between -256~4096 (core) and -1024~1024
8931                (coprocessor).  */
8932             && INTVAL (index) > -256
8933             && (INTVAL (index) & 3) == 0);
8934
8935   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8936     {
8937       /* For DImode assume values will usually live in core regs
8938          and only allow LDRD addressing modes.  */
8939       if (!TARGET_LDRD || mode != DImode)
8940         return (code == CONST_INT
8941                 && INTVAL (index) < 1024
8942                 && INTVAL (index) > -1024
8943                 && (INTVAL (index) & 3) == 0);
8944     }
8945
8946   /* For quad modes, we restrict the constant offset to be slightly less
8947      than what the instruction format permits.  We do this because for
8948      quad mode moves, we will actually decompose them into two separate
8949      double-mode reads or writes.  INDEX must therefore be a valid
8950      (double-mode) offset and so should INDEX+8.  */
8951   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8952     return (code == CONST_INT
8953             && INTVAL (index) < 1016
8954             && INTVAL (index) > -1024
8955             && (INTVAL (index) & 3) == 0);
8956
8957   /* We have no such constraint on double mode offsets, so we permit the
8958      full range of the instruction format.  */
8959   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8960     return (code == CONST_INT
8961             && INTVAL (index) < 1024
8962             && INTVAL (index) > -1024
8963             && (INTVAL (index) & 3) == 0);
8964
8965   if (arm_address_register_rtx_p (index, strict_p)
8966       && (GET_MODE_SIZE (mode) <= 4))
8967     return 1;
8968
8969   if (mode == DImode || mode == DFmode)
8970     {
8971       if (code == CONST_INT)
8972         {
8973           HOST_WIDE_INT val = INTVAL (index);
8974           /* Thumb-2 ldrd only has reg+const addressing modes.
8975              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8976              If vldr is selected it uses arm_coproc_mem_operand.  */
8977           if (TARGET_LDRD)
8978             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8979           else
8980             return IN_RANGE (val, -255, 4095 - 4);
8981         }
8982       else
8983         return 0;
8984     }
8985
8986   if (code == MULT)
8987     {
8988       rtx xiop0 = XEXP (index, 0);
8989       rtx xiop1 = XEXP (index, 1);
8990
8991       return ((arm_address_register_rtx_p (xiop0, strict_p)
8992                && thumb2_index_mul_operand (xiop1))
8993               || (arm_address_register_rtx_p (xiop1, strict_p)
8994                   && thumb2_index_mul_operand (xiop0)));
8995     }
8996   else if (code == ASHIFT)
8997     {
8998       rtx op = XEXP (index, 1);
8999
9000       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
9001               && CONST_INT_P (op)
9002               && INTVAL (op) > 0
9003               && INTVAL (op) <= 3);
9004     }
9005
9006   return (code == CONST_INT
9007           && INTVAL (index) < 4096
9008           && INTVAL (index) > -256);
9009 }
9010
9011 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
9012 static int
9013 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
9014 {
9015   int regno;
9016
9017   if (!REG_P (x))
9018     return 0;
9019
9020   regno = REGNO (x);
9021
9022   if (strict_p)
9023     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
9024
9025   return (regno <= LAST_LO_REGNUM
9026           || regno > LAST_VIRTUAL_REGISTER
9027           || regno == FRAME_POINTER_REGNUM
9028           || (GET_MODE_SIZE (mode) >= 4
9029               && (regno == STACK_POINTER_REGNUM
9030                   || regno >= FIRST_PSEUDO_REGISTER
9031                   || x == hard_frame_pointer_rtx
9032                   || x == arg_pointer_rtx)));
9033 }
9034
9035 /* Return nonzero if x is a legitimate index register.  This is the case
9036    for any base register that can access a QImode object.  */
9037 inline static int
9038 thumb1_index_register_rtx_p (rtx x, int strict_p)
9039 {
9040   return thumb1_base_register_rtx_p (x, QImode, strict_p);
9041 }
9042
9043 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9044
9045    The AP may be eliminated to either the SP or the FP, so we use the
9046    least common denominator, e.g. SImode, and offsets from 0 to 64.
9047
9048    ??? Verify whether the above is the right approach.
9049
9050    ??? Also, the FP may be eliminated to the SP, so perhaps that
9051    needs special handling also.
9052
9053    ??? Look at how the mips16 port solves this problem.  It probably uses
9054    better ways to solve some of these problems.
9055
9056    Although it is not incorrect, we don't accept QImode and HImode
9057    addresses based on the frame pointer or arg pointer until the
9058    reload pass starts.  This is so that eliminating such addresses
9059    into stack based ones won't produce impossible code.  */
9060 int
9061 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9062 {
9063   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9064     return 0;
9065
9066   /* ??? Not clear if this is right.  Experiment.  */
9067   if (GET_MODE_SIZE (mode) < 4
9068       && !(reload_in_progress || reload_completed)
9069       && (reg_mentioned_p (frame_pointer_rtx, x)
9070           || reg_mentioned_p (arg_pointer_rtx, x)
9071           || reg_mentioned_p (virtual_incoming_args_rtx, x)
9072           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9073           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9074           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9075     return 0;
9076
9077   /* Accept any base register.  SP only in SImode or larger.  */
9078   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9079     return 1;
9080
9081   /* This is PC relative data before arm_reorg runs.  */
9082   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9083            && SYMBOL_REF_P (x)
9084            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9085            && !arm_disable_literal_pool)
9086     return 1;
9087
9088   /* This is PC relative data after arm_reorg runs.  */
9089   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9090            && reload_completed
9091            && (LABEL_REF_P (x)
9092                || (GET_CODE (x) == CONST
9093                    && GET_CODE (XEXP (x, 0)) == PLUS
9094                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9095                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9096     return 1;
9097
9098   /* Post-inc indexing only supported for SImode and larger.  */
9099   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9100            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9101     return 1;
9102
9103   else if (GET_CODE (x) == PLUS)
9104     {
9105       /* REG+REG address can be any two index registers.  */
9106       /* We disallow FRAME+REG addressing since we know that FRAME
9107          will be replaced with STACK, and SP relative addressing only
9108          permits SP+OFFSET.  */
9109       if (GET_MODE_SIZE (mode) <= 4
9110           && XEXP (x, 0) != frame_pointer_rtx
9111           && XEXP (x, 1) != frame_pointer_rtx
9112           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9113           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9114               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9115         return 1;
9116
9117       /* REG+const has 5-7 bit offset for non-SP registers.  */
9118       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9119                 || XEXP (x, 0) == arg_pointer_rtx)
9120                && CONST_INT_P (XEXP (x, 1))
9121                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9122         return 1;
9123
9124       /* REG+const has 10-bit offset for SP, but only SImode and
9125          larger is supported.  */
9126       /* ??? Should probably check for DI/DFmode overflow here
9127          just like GO_IF_LEGITIMATE_OFFSET does.  */
9128       else if (REG_P (XEXP (x, 0))
9129                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9130                && GET_MODE_SIZE (mode) >= 4
9131                && CONST_INT_P (XEXP (x, 1))
9132                && INTVAL (XEXP (x, 1)) >= 0
9133                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9134                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9135         return 1;
9136
9137       else if (REG_P (XEXP (x, 0))
9138                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9139                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9140                    || VIRTUAL_REGISTER_P (XEXP (x, 0)))
9141                && GET_MODE_SIZE (mode) >= 4
9142                && CONST_INT_P (XEXP (x, 1))
9143                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9144         return 1;
9145     }
9146
9147   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9148            && GET_MODE_SIZE (mode) == 4
9149            && SYMBOL_REF_P (x)
9150            && CONSTANT_POOL_ADDRESS_P (x)
9151            && !arm_disable_literal_pool
9152            && ! (flag_pic
9153                  && symbol_mentioned_p (get_pool_constant (x))
9154                  && ! pcrel_constant_p (get_pool_constant (x))))
9155     return 1;
9156
9157   return 0;
9158 }
9159
9160 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9161    instruction of mode MODE.  */
9162 int
9163 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9164 {
9165   switch (GET_MODE_SIZE (mode))
9166     {
9167     case 1:
9168       return val >= 0 && val < 32;
9169
9170     case 2:
9171       return val >= 0 && val < 64 && (val & 1) == 0;
9172
9173     default:
9174       return (val >= 0
9175               && (val + GET_MODE_SIZE (mode)) <= 128
9176               && (val & 3) == 0);
9177     }
9178 }
9179
9180 bool
9181 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
9182 {
9183   if (TARGET_ARM)
9184     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9185   else if (TARGET_THUMB2)
9186     return thumb2_legitimate_address_p (mode, x, strict_p);
9187   else /* if (TARGET_THUMB1) */
9188     return thumb1_legitimate_address_p (mode, x, strict_p);
9189 }
9190
9191 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9192
9193    Given an rtx X being reloaded into a reg required to be
9194    in class CLASS, return the class of reg to actually use.
9195    In general this is just CLASS, but for the Thumb core registers and
9196    immediate constants we prefer a LO_REGS class or a subset.  */
9197
9198 static reg_class_t
9199 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9200 {
9201   if (TARGET_32BIT)
9202     return rclass;
9203   else
9204     {
9205       if (rclass == GENERAL_REGS)
9206         return LO_REGS;
9207       else
9208         return rclass;
9209     }
9210 }
9211
9212 /* Build the SYMBOL_REF for __tls_get_addr.  */
9213
9214 static GTY(()) rtx tls_get_addr_libfunc;
9215
9216 static rtx
9217 get_tls_get_addr (void)
9218 {
9219   if (!tls_get_addr_libfunc)
9220     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9221   return tls_get_addr_libfunc;
9222 }
9223
9224 rtx
9225 arm_load_tp (rtx target)
9226 {
9227   if (!target)
9228     target = gen_reg_rtx (SImode);
9229
9230   if (TARGET_HARD_TP)
9231     {
9232       /* Can return in any reg.  */
9233       emit_insn (gen_load_tp_hard (target));
9234     }
9235   else
9236     {
9237       /* Always returned in r0.  Immediately copy the result into a pseudo,
9238          otherwise other uses of r0 (e.g. setting up function arguments) may
9239          clobber the value.  */
9240
9241       rtx tmp;
9242
9243       if (TARGET_FDPIC)
9244         {
9245           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9246           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9247
9248           emit_insn (gen_load_tp_soft_fdpic ());
9249
9250           /* Restore r9.  */
9251           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9252         }
9253       else
9254         emit_insn (gen_load_tp_soft ());
9255
9256       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9257       emit_move_insn (target, tmp);
9258     }
9259   return target;
9260 }
9261
9262 static rtx
9263 load_tls_operand (rtx x, rtx reg)
9264 {
9265   rtx tmp;
9266
9267   if (reg == NULL_RTX)
9268     reg = gen_reg_rtx (SImode);
9269
9270   tmp = gen_rtx_CONST (SImode, x);
9271
9272   emit_move_insn (reg, tmp);
9273
9274   return reg;
9275 }
9276
9277 static rtx_insn *
9278 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9279 {
9280   rtx label, labelno = NULL_RTX, sum;
9281
9282   gcc_assert (reloc != TLS_DESCSEQ);
9283   start_sequence ();
9284
9285   if (TARGET_FDPIC)
9286     {
9287       sum = gen_rtx_UNSPEC (Pmode,
9288                             gen_rtvec (2, x, GEN_INT (reloc)),
9289                             UNSPEC_TLS);
9290     }
9291   else
9292     {
9293       labelno = GEN_INT (pic_labelno++);
9294       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9295       label = gen_rtx_CONST (VOIDmode, label);
9296
9297       sum = gen_rtx_UNSPEC (Pmode,
9298                             gen_rtvec (4, x, GEN_INT (reloc), label,
9299                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9300                             UNSPEC_TLS);
9301     }
9302   reg = load_tls_operand (sum, reg);
9303
9304   if (TARGET_FDPIC)
9305       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9306   else if (TARGET_ARM)
9307     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9308   else
9309     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9310
9311   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9312                                      LCT_PURE, /* LCT_CONST?  */
9313                                      Pmode, reg, Pmode);
9314
9315   rtx_insn *insns = get_insns ();
9316   end_sequence ();
9317
9318   return insns;
9319 }
9320
9321 static rtx
9322 arm_tls_descseq_addr (rtx x, rtx reg)
9323 {
9324   rtx labelno = GEN_INT (pic_labelno++);
9325   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9326   rtx sum = gen_rtx_UNSPEC (Pmode,
9327                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9328                                        gen_rtx_CONST (VOIDmode, label),
9329                                        GEN_INT (!TARGET_ARM)),
9330                             UNSPEC_TLS);
9331   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9332
9333   emit_insn (gen_tlscall (x, labelno));
9334   if (!reg)
9335     reg = gen_reg_rtx (SImode);
9336   else
9337     gcc_assert (REGNO (reg) != R0_REGNUM);
9338
9339   emit_move_insn (reg, reg0);
9340
9341   return reg;
9342 }
9343
9344
9345 rtx
9346 legitimize_tls_address (rtx x, rtx reg)
9347 {
9348   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9349   rtx_insn *insns;
9350   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9351
9352   switch (model)
9353     {
9354     case TLS_MODEL_GLOBAL_DYNAMIC:
9355       if (TARGET_GNU2_TLS)
9356         {
9357           gcc_assert (!TARGET_FDPIC);
9358
9359           reg = arm_tls_descseq_addr (x, reg);
9360
9361           tp = arm_load_tp (NULL_RTX);
9362
9363           dest = gen_rtx_PLUS (Pmode, tp, reg);
9364         }
9365       else
9366         {
9367           /* Original scheme */
9368           if (TARGET_FDPIC)
9369             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9370           else
9371             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9372           dest = gen_reg_rtx (Pmode);
9373           emit_libcall_block (insns, dest, ret, x);
9374         }
9375       return dest;
9376
9377     case TLS_MODEL_LOCAL_DYNAMIC:
9378       if (TARGET_GNU2_TLS)
9379         {
9380           gcc_assert (!TARGET_FDPIC);
9381
9382           reg = arm_tls_descseq_addr (x, reg);
9383
9384           tp = arm_load_tp (NULL_RTX);
9385
9386           dest = gen_rtx_PLUS (Pmode, tp, reg);
9387         }
9388       else
9389         {
9390           if (TARGET_FDPIC)
9391             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9392           else
9393             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9394
9395           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9396              share the LDM result with other LD model accesses.  */
9397           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9398                                 UNSPEC_TLS);
9399           dest = gen_reg_rtx (Pmode);
9400           emit_libcall_block (insns, dest, ret, eqv);
9401
9402           /* Load the addend.  */
9403           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9404                                                      GEN_INT (TLS_LDO32)),
9405                                    UNSPEC_TLS);
9406           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9407           dest = gen_rtx_PLUS (Pmode, dest, addend);
9408         }
9409       return dest;
9410
9411     case TLS_MODEL_INITIAL_EXEC:
9412       if (TARGET_FDPIC)
9413         {
9414           sum = gen_rtx_UNSPEC (Pmode,
9415                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9416                                 UNSPEC_TLS);
9417           reg = load_tls_operand (sum, reg);
9418           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9419           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9420         }
9421       else
9422         {
9423           labelno = GEN_INT (pic_labelno++);
9424           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9425           label = gen_rtx_CONST (VOIDmode, label);
9426           sum = gen_rtx_UNSPEC (Pmode,
9427                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9428                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9429                                 UNSPEC_TLS);
9430           reg = load_tls_operand (sum, reg);
9431
9432           if (TARGET_ARM)
9433             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9434           else if (TARGET_THUMB2)
9435             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9436           else
9437             {
9438               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9439               emit_move_insn (reg, gen_const_mem (SImode, reg));
9440             }
9441         }
9442
9443       tp = arm_load_tp (NULL_RTX);
9444
9445       return gen_rtx_PLUS (Pmode, tp, reg);
9446
9447     case TLS_MODEL_LOCAL_EXEC:
9448       tp = arm_load_tp (NULL_RTX);
9449
9450       reg = gen_rtx_UNSPEC (Pmode,
9451                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9452                             UNSPEC_TLS);
9453       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9454
9455       return gen_rtx_PLUS (Pmode, tp, reg);
9456
9457     default:
9458       abort ();
9459     }
9460 }
9461
9462 /* Try machine-dependent ways of modifying an illegitimate address
9463    to be legitimate.  If we find one, return the new, valid address.  */
9464 rtx
9465 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9466 {
9467   if (arm_tls_referenced_p (x))
9468     {
9469       rtx addend = NULL;
9470
9471       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9472         {
9473           addend = XEXP (XEXP (x, 0), 1);
9474           x = XEXP (XEXP (x, 0), 0);
9475         }
9476
9477       if (!SYMBOL_REF_P (x))
9478         return x;
9479
9480       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9481
9482       x = legitimize_tls_address (x, NULL_RTX);
9483
9484       if (addend)
9485         {
9486           x = gen_rtx_PLUS (SImode, x, addend);
9487           orig_x = x;
9488         }
9489       else
9490         return x;
9491     }
9492
9493   if (TARGET_THUMB1)
9494     return thumb_legitimize_address (x, orig_x, mode);
9495
9496   if (GET_CODE (x) == PLUS)
9497     {
9498       rtx xop0 = XEXP (x, 0);
9499       rtx xop1 = XEXP (x, 1);
9500
9501       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9502         xop0 = force_reg (SImode, xop0);
9503
9504       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9505           && !symbol_mentioned_p (xop1))
9506         xop1 = force_reg (SImode, xop1);
9507
9508       if (ARM_BASE_REGISTER_RTX_P (xop0)
9509           && CONST_INT_P (xop1))
9510         {
9511           HOST_WIDE_INT n, low_n;
9512           rtx base_reg, val;
9513           n = INTVAL (xop1);
9514
9515           /* VFP addressing modes actually allow greater offsets, but for
9516              now we just stick with the lowest common denominator.  */
9517           if (mode == DImode || mode == DFmode)
9518             {
9519               low_n = n & 0x0f;
9520               n &= ~0x0f;
9521               if (low_n > 4)
9522                 {
9523                   n += 16;
9524                   low_n -= 16;
9525                 }
9526             }
9527           else
9528             {
9529               low_n = ((mode) == TImode ? 0
9530                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9531               n -= low_n;
9532             }
9533
9534           base_reg = gen_reg_rtx (SImode);
9535           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9536           emit_move_insn (base_reg, val);
9537           x = plus_constant (Pmode, base_reg, low_n);
9538         }
9539       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9540         x = gen_rtx_PLUS (SImode, xop0, xop1);
9541     }
9542
9543   /* XXX We don't allow MINUS any more -- see comment in
9544      arm_legitimate_address_outer_p ().  */
9545   else if (GET_CODE (x) == MINUS)
9546     {
9547       rtx xop0 = XEXP (x, 0);
9548       rtx xop1 = XEXP (x, 1);
9549
9550       if (CONSTANT_P (xop0))
9551         xop0 = force_reg (SImode, xop0);
9552
9553       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9554         xop1 = force_reg (SImode, xop1);
9555
9556       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9557         x = gen_rtx_MINUS (SImode, xop0, xop1);
9558     }
9559
9560   /* Make sure to take full advantage of the pre-indexed addressing mode
9561      with absolute addresses which often allows for the base register to
9562      be factorized for multiple adjacent memory references, and it might
9563      even allows for the mini pool to be avoided entirely. */
9564   else if (CONST_INT_P (x) && optimize > 0)
9565     {
9566       unsigned int bits;
9567       HOST_WIDE_INT mask, base, index;
9568       rtx base_reg;
9569
9570       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9571          only use a 8-bit index. So let's use a 12-bit index for
9572          SImode only and hope that arm_gen_constant will enable LDRB
9573          to use more bits. */
9574       bits = (mode == SImode) ? 12 : 8;
9575       mask = (1 << bits) - 1;
9576       base = INTVAL (x) & ~mask;
9577       index = INTVAL (x) & mask;
9578       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9579         {
9580           /* It'll most probably be more efficient to generate the
9581              base with more bits set and use a negative index instead.
9582              Don't do this for Thumb as negative offsets are much more
9583              limited.  */
9584           base |= mask;
9585           index -= mask;
9586         }
9587       base_reg = force_reg (SImode, GEN_INT (base));
9588       x = plus_constant (Pmode, base_reg, index);
9589     }
9590
9591   if (flag_pic)
9592     {
9593       /* We need to find and carefully transform any SYMBOL and LABEL
9594          references; so go back to the original address expression.  */
9595       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9596                                           false /*compute_now*/);
9597
9598       if (new_x != orig_x)
9599         x = new_x;
9600     }
9601
9602   return x;
9603 }
9604
9605
9606 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9607    to be legitimate.  If we find one, return the new, valid address.  */
9608 rtx
9609 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9610 {
9611   if (GET_CODE (x) == PLUS
9612       && CONST_INT_P (XEXP (x, 1))
9613       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9614           || INTVAL (XEXP (x, 1)) < 0))
9615     {
9616       rtx xop0 = XEXP (x, 0);
9617       rtx xop1 = XEXP (x, 1);
9618       HOST_WIDE_INT offset = INTVAL (xop1);
9619
9620       /* Try and fold the offset into a biasing of the base register and
9621          then offsetting that.  Don't do this when optimizing for space
9622          since it can cause too many CSEs.  */
9623       if (optimize_size && offset >= 0
9624           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9625         {
9626           HOST_WIDE_INT delta;
9627
9628           if (offset >= 256)
9629             delta = offset - (256 - GET_MODE_SIZE (mode));
9630           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9631             delta = 31 * GET_MODE_SIZE (mode);
9632           else
9633             delta = offset & (~31 * GET_MODE_SIZE (mode));
9634
9635           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9636                                 NULL_RTX);
9637           x = plus_constant (Pmode, xop0, delta);
9638         }
9639       else if (offset < 0 && offset > -256)
9640         /* Small negative offsets are best done with a subtract before the
9641            dereference, forcing these into a register normally takes two
9642            instructions.  */
9643         x = force_operand (x, NULL_RTX);
9644       else
9645         {
9646           /* For the remaining cases, force the constant into a register.  */
9647           xop1 = force_reg (SImode, xop1);
9648           x = gen_rtx_PLUS (SImode, xop0, xop1);
9649         }
9650     }
9651   else if (GET_CODE (x) == PLUS
9652            && s_register_operand (XEXP (x, 1), SImode)
9653            && !s_register_operand (XEXP (x, 0), SImode))
9654     {
9655       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9656
9657       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9658     }
9659
9660   if (flag_pic)
9661     {
9662       /* We need to find and carefully transform any SYMBOL and LABEL
9663          references; so go back to the original address expression.  */
9664       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9665                                           false /*compute_now*/);
9666
9667       if (new_x != orig_x)
9668         x = new_x;
9669     }
9670
9671   return x;
9672 }
9673
9674 /* Return TRUE if X contains any TLS symbol references.  */
9675
9676 bool
9677 arm_tls_referenced_p (rtx x)
9678 {
9679   if (! TARGET_HAVE_TLS)
9680     return false;
9681
9682   subrtx_iterator::array_type array;
9683   FOR_EACH_SUBRTX (iter, array, x, ALL)
9684     {
9685       const_rtx x = *iter;
9686       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9687         {
9688           /* ARM currently does not provide relocations to encode TLS variables
9689              into AArch32 instructions, only data, so there is no way to
9690              currently implement these if a literal pool is disabled.  */
9691           if (arm_disable_literal_pool)
9692             sorry ("accessing thread-local storage is not currently supported "
9693                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9694
9695           return true;
9696         }
9697
9698       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9699          TLS offsets, not real symbol references.  */
9700       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9701         iter.skip_subrtxes ();
9702     }
9703   return false;
9704 }
9705
9706 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9707
9708    On the ARM, allow any integer (invalid ones are removed later by insn
9709    patterns), nice doubles and symbol_refs which refer to the function's
9710    constant pool XXX.
9711
9712    When generating pic allow anything.  */
9713
9714 static bool
9715 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9716 {
9717   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9718     return false;
9719
9720   return flag_pic || !label_mentioned_p (x);
9721 }
9722
9723 static bool
9724 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9725 {
9726   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9727      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9728      for ARMv8-M Baseline or later the result is valid.  */
9729   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9730     x = XEXP (x, 0);
9731
9732   return (CONST_INT_P (x)
9733           || CONST_DOUBLE_P (x)
9734           || CONSTANT_ADDRESS_P (x)
9735           || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9736           /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9737              we build the symbol address with upper/lower
9738              relocations.  */
9739           || (TARGET_THUMB1
9740               && !label_mentioned_p (x)
9741               && arm_valid_symbolic_address_p (x)
9742               && arm_disable_literal_pool)
9743           || flag_pic);
9744 }
9745
9746 static bool
9747 arm_legitimate_constant_p (machine_mode mode, rtx x)
9748 {
9749   return (!arm_cannot_force_const_mem (mode, x)
9750           && (TARGET_32BIT
9751               ? arm_legitimate_constant_p_1 (mode, x)
9752               : thumb_legitimate_constant_p (mode, x)));
9753 }
9754
9755 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9756
9757 static bool
9758 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9759 {
9760   rtx base, offset;
9761   split_const (x, &base, &offset);
9762
9763   if (SYMBOL_REF_P (base))
9764     {
9765       /* Function symbols cannot have an offset due to the Thumb bit.  */
9766       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9767           && INTVAL (offset) != 0)
9768         return true;
9769
9770       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9771           && !offset_within_block_p (base, INTVAL (offset)))
9772         return true;
9773     }
9774   return arm_tls_referenced_p (x);
9775 }
9776 \f
9777 #define REG_OR_SUBREG_REG(X)                                            \
9778   (REG_P (X)                                                    \
9779    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9780
9781 #define REG_OR_SUBREG_RTX(X)                    \
9782    (REG_P (X) ? (X) : SUBREG_REG (X))
9783
9784 static inline int
9785 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9786 {
9787   machine_mode mode = GET_MODE (x);
9788   int total, words;
9789
9790   switch (code)
9791     {
9792     case ASHIFT:
9793     case ASHIFTRT:
9794     case LSHIFTRT:
9795     case ROTATERT:
9796       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9797
9798     case PLUS:
9799     case MINUS:
9800     case COMPARE:
9801     case NEG:
9802     case NOT:
9803       return COSTS_N_INSNS (1);
9804
9805     case MULT:
9806       if (arm_arch6m && arm_m_profile_small_mul)
9807         return COSTS_N_INSNS (32);
9808
9809       if (CONST_INT_P (XEXP (x, 1)))
9810         {
9811           int cycles = 0;
9812           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9813
9814           while (i)
9815             {
9816               i >>= 2;
9817               cycles++;
9818             }
9819           return COSTS_N_INSNS (2) + cycles;
9820         }
9821       return COSTS_N_INSNS (1) + 16;
9822
9823     case SET:
9824       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9825          the mode.  */
9826       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9827       return (COSTS_N_INSNS (words)
9828               + 4 * ((MEM_P (SET_SRC (x)))
9829                      + MEM_P (SET_DEST (x))));
9830
9831     case CONST_INT:
9832       if (outer == SET)
9833         {
9834           if (UINTVAL (x) < 256
9835               /* 16-bit constant.  */
9836               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9837             return 0;
9838           if (thumb_shiftable_const (INTVAL (x)))
9839             return COSTS_N_INSNS (2);
9840           return arm_disable_literal_pool
9841             ? COSTS_N_INSNS (8)
9842             : COSTS_N_INSNS (3);
9843         }
9844       else if ((outer == PLUS || outer == COMPARE)
9845                && INTVAL (x) < 256 && INTVAL (x) > -256)
9846         return 0;
9847       else if ((outer == IOR || outer == XOR || outer == AND)
9848                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9849         return COSTS_N_INSNS (1);
9850       else if (outer == AND)
9851         {
9852           int i;
9853           /* This duplicates the tests in the andsi3 expander.  */
9854           for (i = 9; i <= 31; i++)
9855             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9856                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9857               return COSTS_N_INSNS (2);
9858         }
9859       else if (outer == ASHIFT || outer == ASHIFTRT
9860                || outer == LSHIFTRT)
9861         return 0;
9862       return COSTS_N_INSNS (2);
9863
9864     case CONST:
9865     case CONST_DOUBLE:
9866     case LABEL_REF:
9867     case SYMBOL_REF:
9868       return COSTS_N_INSNS (3);
9869
9870     case UDIV:
9871     case UMOD:
9872     case DIV:
9873     case MOD:
9874       return 100;
9875
9876     case TRUNCATE:
9877       return 99;
9878
9879     case AND:
9880     case XOR:
9881     case IOR:
9882       /* XXX guess.  */
9883       return 8;
9884
9885     case MEM:
9886       /* XXX another guess.  */
9887       /* Memory costs quite a lot for the first word, but subsequent words
9888          load at the equivalent of a single insn each.  */
9889       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9890               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9891                  ? 4 : 0));
9892
9893     case IF_THEN_ELSE:
9894       /* XXX a guess.  */
9895       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9896         return 14;
9897       return 2;
9898
9899     case SIGN_EXTEND:
9900     case ZERO_EXTEND:
9901       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9902       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9903
9904       if (mode == SImode)
9905         return total;
9906
9907       if (arm_arch6)
9908         return total + COSTS_N_INSNS (1);
9909
9910       /* Assume a two-shift sequence.  Increase the cost slightly so
9911          we prefer actual shifts over an extend operation.  */
9912       return total + 1 + COSTS_N_INSNS (2);
9913
9914     default:
9915       return 99;
9916     }
9917 }
9918
9919 /* Estimates the size cost of thumb1 instructions.
9920    For now most of the code is copied from thumb1_rtx_costs. We need more
9921    fine grain tuning when we have more related test cases.  */
9922 static inline int
9923 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9924 {
9925   machine_mode mode = GET_MODE (x);
9926   int words, cost;
9927
9928   switch (code)
9929     {
9930     case ASHIFT:
9931     case ASHIFTRT:
9932     case LSHIFTRT:
9933     case ROTATERT:
9934       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9935
9936     case PLUS:
9937     case MINUS:
9938       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9939          defined by RTL expansion, especially for the expansion of
9940          multiplication.  */
9941       if ((GET_CODE (XEXP (x, 0)) == MULT
9942            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9943           || (GET_CODE (XEXP (x, 1)) == MULT
9944               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9945         return COSTS_N_INSNS (2);
9946       /* Fall through.  */
9947     case COMPARE:
9948     case NEG:
9949     case NOT:
9950       return COSTS_N_INSNS (1);
9951
9952     case MULT:
9953       if (CONST_INT_P (XEXP (x, 1)))
9954         {
9955           /* Thumb1 mul instruction can't operate on const. We must Load it
9956              into a register first.  */
9957           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9958           /* For the targets which have a very small and high-latency multiply
9959              unit, we prefer to synthesize the mult with up to 5 instructions,
9960              giving a good balance between size and performance.  */
9961           if (arm_arch6m && arm_m_profile_small_mul)
9962             return COSTS_N_INSNS (5);
9963           else
9964             return COSTS_N_INSNS (1) + const_size;
9965         }
9966       return COSTS_N_INSNS (1);
9967
9968     case SET:
9969       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9970          the mode.  */
9971       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9972       cost = COSTS_N_INSNS (words);
9973       if (satisfies_constraint_J (SET_SRC (x))
9974           || satisfies_constraint_K (SET_SRC (x))
9975              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9976           || (CONST_INT_P (SET_SRC (x))
9977               && UINTVAL (SET_SRC (x)) >= 256
9978               && TARGET_HAVE_MOVT
9979               && satisfies_constraint_j (SET_SRC (x)))
9980              /* thumb1_movdi_insn.  */
9981           || ((words > 1) && MEM_P (SET_SRC (x))))
9982         cost += COSTS_N_INSNS (1);
9983       return cost;
9984
9985     case CONST_INT:
9986       if (outer == SET)
9987         {
9988           if (UINTVAL (x) < 256)
9989             return COSTS_N_INSNS (1);
9990           /* movw is 4byte long.  */
9991           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9992             return COSTS_N_INSNS (2);
9993           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9994           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9995             return COSTS_N_INSNS (2);
9996           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9997           if (thumb_shiftable_const (INTVAL (x)))
9998             return COSTS_N_INSNS (2);
9999           return arm_disable_literal_pool
10000             ? COSTS_N_INSNS (8)
10001             : COSTS_N_INSNS (3);
10002         }
10003       else if ((outer == PLUS || outer == COMPARE)
10004                && INTVAL (x) < 256 && INTVAL (x) > -256)
10005         return 0;
10006       else if ((outer == IOR || outer == XOR || outer == AND)
10007                && INTVAL (x) < 256 && INTVAL (x) >= -256)
10008         return COSTS_N_INSNS (1);
10009       else if (outer == AND)
10010         {
10011           int i;
10012           /* This duplicates the tests in the andsi3 expander.  */
10013           for (i = 9; i <= 31; i++)
10014             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
10015                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
10016               return COSTS_N_INSNS (2);
10017         }
10018       else if (outer == ASHIFT || outer == ASHIFTRT
10019                || outer == LSHIFTRT)
10020         return 0;
10021       return COSTS_N_INSNS (2);
10022
10023     case CONST:
10024     case CONST_DOUBLE:
10025     case LABEL_REF:
10026     case SYMBOL_REF:
10027       return COSTS_N_INSNS (3);
10028
10029     case UDIV:
10030     case UMOD:
10031     case DIV:
10032     case MOD:
10033       return 100;
10034
10035     case TRUNCATE:
10036       return 99;
10037
10038     case AND:
10039     case XOR:
10040     case IOR:
10041       return COSTS_N_INSNS (1);
10042
10043     case MEM:
10044       return (COSTS_N_INSNS (1)
10045               + COSTS_N_INSNS (1)
10046                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10047               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10048                  ? COSTS_N_INSNS (1) : 0));
10049
10050     case IF_THEN_ELSE:
10051       /* XXX a guess.  */
10052       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10053         return 14;
10054       return 2;
10055
10056     case ZERO_EXTEND:
10057       /* XXX still guessing.  */
10058       switch (GET_MODE (XEXP (x, 0)))
10059         {
10060           case E_QImode:
10061             return (1 + (mode == DImode ? 4 : 0)
10062                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10063
10064           case E_HImode:
10065             return (4 + (mode == DImode ? 4 : 0)
10066                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10067
10068           case E_SImode:
10069             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10070
10071           default:
10072             return 99;
10073         }
10074
10075     default:
10076       return 99;
10077     }
10078 }
10079
10080 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
10081    PLUS, adds the carry flag, then return the other operand.  If
10082    neither is a carry, return OP unchanged.  */
10083 static rtx
10084 strip_carry_operation (rtx op)
10085 {
10086   gcc_assert (GET_CODE (op) == PLUS);
10087   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10088     return XEXP (op, 1);
10089   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10090     return XEXP (op, 0);
10091   return op;
10092 }
10093
10094 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
10095    operand, then return the operand that is being shifted.  If the shift
10096    is not by a constant, then set SHIFT_REG to point to the operand.
10097    Return NULL if OP is not a shifter operand.  */
10098 static rtx
10099 shifter_op_p (rtx op, rtx *shift_reg)
10100 {
10101   enum rtx_code code = GET_CODE (op);
10102
10103   if (code == MULT && CONST_INT_P (XEXP (op, 1))
10104       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10105     return XEXP (op, 0);
10106   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10107     return XEXP (op, 0);
10108   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10109            || code == ASHIFTRT)
10110     {
10111       if (!CONST_INT_P (XEXP (op, 1)))
10112         *shift_reg = XEXP (op, 1);
10113       return XEXP (op, 0);
10114     }
10115
10116   return NULL;
10117 }
10118
10119 static bool
10120 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10121 {
10122   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10123   rtx_code code = GET_CODE (x);
10124   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10125
10126   switch (XINT (x, 1))
10127     {
10128     case UNSPEC_UNALIGNED_LOAD:
10129       /* We can only do unaligned loads into the integer unit, and we can't
10130          use LDM or LDRD.  */
10131       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10132       if (speed_p)
10133         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10134                   + extra_cost->ldst.load_unaligned);
10135
10136 #ifdef NOT_YET
10137       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10138                                  ADDR_SPACE_GENERIC, speed_p);
10139 #endif
10140       return true;
10141
10142     case UNSPEC_UNALIGNED_STORE:
10143       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10144       if (speed_p)
10145         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10146                   + extra_cost->ldst.store_unaligned);
10147
10148       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10149 #ifdef NOT_YET
10150       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10151                                  ADDR_SPACE_GENERIC, speed_p);
10152 #endif
10153       return true;
10154
10155     case UNSPEC_VRINTZ:
10156     case UNSPEC_VRINTP:
10157     case UNSPEC_VRINTM:
10158     case UNSPEC_VRINTR:
10159     case UNSPEC_VRINTX:
10160     case UNSPEC_VRINTA:
10161       if (speed_p)
10162         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10163
10164       return true;
10165     default:
10166       *cost = COSTS_N_INSNS (2);
10167       break;
10168     }
10169   return true;
10170 }
10171
10172 /* Cost of a libcall.  We assume one insn per argument, an amount for the
10173    call (one insn for -Os) and then one for processing the result.  */
10174 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10175
10176 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
10177         do                                                              \
10178           {                                                             \
10179             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
10180             if (shift_op != NULL                                        \
10181                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
10182               {                                                         \
10183                 if (shift_reg)                                          \
10184                   {                                                     \
10185                     if (speed_p)                                        \
10186                       *cost += extra_cost->alu.arith_shift_reg;         \
10187                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10188                                        ASHIFT, 1, speed_p);             \
10189                   }                                                     \
10190                 else if (speed_p)                                       \
10191                   *cost += extra_cost->alu.arith_shift;                 \
10192                                                                         \
10193                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
10194                                     ASHIFT, 0, speed_p)                 \
10195                           + rtx_cost (XEXP (x, 1 - IDX),                \
10196                                       GET_MODE (shift_op),              \
10197                                       OP, 1, speed_p));                 \
10198                 return true;                                            \
10199               }                                                         \
10200           }                                                             \
10201         while (0)
10202
10203 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
10204    considering the costs of the addressing mode and memory access
10205    separately.  */
10206 static bool
10207 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10208                int *cost, bool speed_p)
10209 {
10210   machine_mode mode = GET_MODE (x);
10211
10212   *cost = COSTS_N_INSNS (1);
10213
10214   if (flag_pic
10215       && GET_CODE (XEXP (x, 0)) == PLUS
10216       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10217     /* This will be split into two instructions.  Add the cost of the
10218        additional instruction here.  The cost of the memory access is computed
10219        below.  See arm.md:calculate_pic_address.  */
10220     *cost += COSTS_N_INSNS (1);
10221
10222   /* Calculate cost of the addressing mode.  */
10223   if (speed_p)
10224     {
10225       arm_addr_mode_op op_type;
10226       switch (GET_CODE (XEXP (x, 0)))
10227         {
10228         default:
10229         case REG:
10230           op_type = AMO_DEFAULT;
10231           break;
10232         case MINUS:
10233           /* MINUS does not appear in RTL, but the architecture supports it,
10234              so handle this case defensively.  */
10235           /* fall through */
10236         case PLUS:
10237           op_type = AMO_NO_WB;
10238           break;
10239         case PRE_INC:
10240         case PRE_DEC:
10241         case POST_INC:
10242         case POST_DEC:
10243         case PRE_MODIFY:
10244         case POST_MODIFY:
10245           op_type = AMO_WB;
10246           break;
10247         }
10248
10249       if (VECTOR_MODE_P (mode))
10250           *cost += current_tune->addr_mode_costs->vector[op_type];
10251       else if (FLOAT_MODE_P (mode))
10252           *cost += current_tune->addr_mode_costs->fp[op_type];
10253       else
10254           *cost += current_tune->addr_mode_costs->integer[op_type];
10255     }
10256
10257   /* Calculate cost of memory access.  */
10258   if (speed_p)
10259     {
10260       if (FLOAT_MODE_P (mode))
10261         {
10262           if (GET_MODE_SIZE (mode) == 8)
10263             *cost += extra_cost->ldst.loadd;
10264           else
10265             *cost += extra_cost->ldst.loadf;
10266         }
10267       else if (VECTOR_MODE_P (mode))
10268         *cost += extra_cost->ldst.loadv;
10269       else
10270         {
10271           /* Integer modes */
10272           if (GET_MODE_SIZE (mode) == 8)
10273             *cost += extra_cost->ldst.ldrd;
10274           else
10275             *cost += extra_cost->ldst.load;
10276         }
10277     }
10278
10279   return true;
10280 }
10281
10282 /* Helper for arm_bfi_p.  */
10283 static bool
10284 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10285 {
10286   unsigned HOST_WIDE_INT const1;
10287   unsigned HOST_WIDE_INT const2 = 0;
10288
10289   if (!CONST_INT_P (XEXP (op0, 1)))
10290     return false;
10291
10292   const1 = UINTVAL (XEXP (op0, 1));
10293   if (!CONST_INT_P (XEXP (op1, 1))
10294       || ~UINTVAL (XEXP (op1, 1)) != const1)
10295     return false;
10296
10297   if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10298       && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10299     {
10300       const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10301       *sub0 = XEXP (XEXP (op0, 0), 0);
10302     }
10303   else
10304     *sub0 = XEXP (op0, 0);
10305
10306   if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10307     return false;
10308
10309   *sub1 = XEXP (op1, 0);
10310   return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10311 }
10312
10313 /* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
10314    format looks something like:
10315
10316    (IOR (AND (reg1) (~const1))
10317         (AND (ASHIFT (reg2) (const2))
10318              (const1)))
10319
10320    where const1 is a consecutive sequence of 1-bits with the
10321    least-significant non-zero bit starting at bit position const2.  If
10322    const2 is zero, then the shift will not appear at all, due to
10323    canonicalization.  The two arms of the IOR expression may be
10324    flipped.  */
10325 static bool
10326 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10327 {
10328   if (GET_CODE (x) != IOR)
10329     return false;
10330   if (GET_CODE (XEXP (x, 0)) != AND
10331       || GET_CODE (XEXP (x, 1)) != AND)
10332     return false;
10333   return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10334           || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10335 }
10336
10337 /* RTX costs.  Make an estimate of the cost of executing the operation
10338    X, which is contained within an operation with code OUTER_CODE.
10339    SPEED_P indicates whether the cost desired is the performance cost,
10340    or the size cost.  The estimate is stored in COST and the return
10341    value is TRUE if the cost calculation is final, or FALSE if the
10342    caller should recurse through the operands of X to add additional
10343    costs.
10344
10345    We currently make no attempt to model the size savings of Thumb-2
10346    16-bit instructions.  At the normal points in compilation where
10347    this code is called we have no measure of whether the condition
10348    flags are live or not, and thus no realistic way to determine what
10349    the size will eventually be.  */
10350 static bool
10351 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10352                    const struct cpu_cost_table *extra_cost,
10353                    int *cost, bool speed_p)
10354 {
10355   machine_mode mode = GET_MODE (x);
10356
10357   *cost = COSTS_N_INSNS (1);
10358
10359   if (TARGET_THUMB1)
10360     {
10361       if (speed_p)
10362         *cost = thumb1_rtx_costs (x, code, outer_code);
10363       else
10364         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10365       return true;
10366     }
10367
10368   switch (code)
10369     {
10370     case SET:
10371       *cost = 0;
10372       /* SET RTXs don't have a mode so we get it from the destination.  */
10373       mode = GET_MODE (SET_DEST (x));
10374
10375       if (REG_P (SET_SRC (x))
10376           && REG_P (SET_DEST (x)))
10377         {
10378           /* Assume that most copies can be done with a single insn,
10379              unless we don't have HW FP, in which case everything
10380              larger than word mode will require two insns.  */
10381           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10382                                    && GET_MODE_SIZE (mode) > 4)
10383                                   || mode == DImode)
10384                                  ? 2 : 1);
10385           /* Conditional register moves can be encoded
10386              in 16 bits in Thumb mode.  */
10387           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10388             *cost >>= 1;
10389
10390           return true;
10391         }
10392
10393       if (CONST_INT_P (SET_SRC (x)))
10394         {
10395           /* Handle CONST_INT here, since the value doesn't have a mode
10396              and we would otherwise be unable to work out the true cost.  */
10397           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10398                             0, speed_p);
10399           outer_code = SET;
10400           /* Slightly lower the cost of setting a core reg to a constant.
10401              This helps break up chains and allows for better scheduling.  */
10402           if (REG_P (SET_DEST (x))
10403               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10404             *cost -= 1;
10405           x = SET_SRC (x);
10406           /* Immediate moves with an immediate in the range [0, 255] can be
10407              encoded in 16 bits in Thumb mode.  */
10408           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10409               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10410             *cost >>= 1;
10411           goto const_int_cost;
10412         }
10413
10414       return false;
10415
10416     case MEM:
10417       return arm_mem_costs (x, extra_cost, cost, speed_p);
10418
10419     case PARALLEL:
10420     {
10421    /* Calculations of LDM costs are complex.  We assume an initial cost
10422    (ldm_1st) which will load the number of registers mentioned in
10423    ldm_regs_per_insn_1st registers; then each additional
10424    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10425    formula for N regs is thus:
10426
10427    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10428                              + ldm_regs_per_insn_subsequent - 1)
10429                             / ldm_regs_per_insn_subsequent).
10430
10431    Additional costs may also be added for addressing.  A similar
10432    formula is used for STM.  */
10433
10434       bool is_ldm = load_multiple_operation (x, SImode);
10435       bool is_stm = store_multiple_operation (x, SImode);
10436
10437       if (is_ldm || is_stm)
10438         {
10439           if (speed_p)
10440             {
10441               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10442               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10443                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10444                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10445               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10446                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10447                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10448
10449               *cost += regs_per_insn_1st
10450                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10451                                             + regs_per_insn_sub - 1)
10452                                           / regs_per_insn_sub);
10453               return true;
10454             }
10455
10456         }
10457       return false;
10458     }
10459     case DIV:
10460     case UDIV:
10461       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10462           && (mode == SFmode || !TARGET_VFP_SINGLE))
10463         *cost += COSTS_N_INSNS (speed_p
10464                                ? extra_cost->fp[mode != SFmode].div : 0);
10465       else if (mode == SImode && TARGET_IDIV)
10466         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10467       else
10468         *cost = LIBCALL_COST (2);
10469
10470       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10471          possible udiv is prefered.  */
10472       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10473       return false;     /* All arguments must be in registers.  */
10474
10475     case MOD:
10476       /* MOD by a power of 2 can be expanded as:
10477          rsbs    r1, r0, #0
10478          and     r0, r0, #(n - 1)
10479          and     r1, r1, #(n - 1)
10480          rsbpl   r0, r1, #0.  */
10481       if (CONST_INT_P (XEXP (x, 1))
10482           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10483           && mode == SImode)
10484         {
10485           *cost += COSTS_N_INSNS (3);
10486
10487           if (speed_p)
10488             *cost += 2 * extra_cost->alu.logical
10489                      + extra_cost->alu.arith;
10490           return true;
10491         }
10492
10493     /* Fall-through.  */
10494     case UMOD:
10495       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10496          possible udiv is prefered.  */
10497       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10498       return false;     /* All arguments must be in registers.  */
10499
10500     case ROTATE:
10501       if (mode == SImode && REG_P (XEXP (x, 1)))
10502         {
10503           *cost += (COSTS_N_INSNS (1)
10504                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10505           if (speed_p)
10506             *cost += extra_cost->alu.shift_reg;
10507           return true;
10508         }
10509       /* Fall through */
10510     case ROTATERT:
10511     case ASHIFT:
10512     case LSHIFTRT:
10513     case ASHIFTRT:
10514       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10515         {
10516           *cost += (COSTS_N_INSNS (2)
10517                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10518           if (speed_p)
10519             *cost += 2 * extra_cost->alu.shift;
10520           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10521           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10522             *cost += 1;
10523           return true;
10524         }
10525       else if (mode == SImode)
10526         {
10527           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10528           /* Slightly disparage register shifts at -Os, but not by much.  */
10529           if (!CONST_INT_P (XEXP (x, 1)))
10530             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10531                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10532           return true;
10533         }
10534       else if (GET_MODE_CLASS (mode) == MODE_INT
10535                && GET_MODE_SIZE (mode) < 4)
10536         {
10537           if (code == ASHIFT)
10538             {
10539               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10540               /* Slightly disparage register shifts at -Os, but not by
10541                  much.  */
10542               if (!CONST_INT_P (XEXP (x, 1)))
10543                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10544                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10545             }
10546           else if (code == LSHIFTRT || code == ASHIFTRT)
10547             {
10548               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10549                 {
10550                   /* Can use SBFX/UBFX.  */
10551                   if (speed_p)
10552                     *cost += extra_cost->alu.bfx;
10553                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10554                 }
10555               else
10556                 {
10557                   *cost += COSTS_N_INSNS (1);
10558                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10559                   if (speed_p)
10560                     {
10561                       if (CONST_INT_P (XEXP (x, 1)))
10562                         *cost += 2 * extra_cost->alu.shift;
10563                       else
10564                         *cost += (extra_cost->alu.shift
10565                                   + extra_cost->alu.shift_reg);
10566                     }
10567                   else
10568                     /* Slightly disparage register shifts.  */
10569                     *cost += !CONST_INT_P (XEXP (x, 1));
10570                 }
10571             }
10572           else /* Rotates.  */
10573             {
10574               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10575               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10576               if (speed_p)
10577                 {
10578                   if (CONST_INT_P (XEXP (x, 1)))
10579                     *cost += (2 * extra_cost->alu.shift
10580                               + extra_cost->alu.log_shift);
10581                   else
10582                     *cost += (extra_cost->alu.shift
10583                               + extra_cost->alu.shift_reg
10584                               + extra_cost->alu.log_shift_reg);
10585                 }
10586             }
10587           return true;
10588         }
10589
10590       *cost = LIBCALL_COST (2);
10591       return false;
10592
10593     case BSWAP:
10594       if (arm_arch6)
10595         {
10596           if (mode == SImode)
10597             {
10598               if (speed_p)
10599                 *cost += extra_cost->alu.rev;
10600
10601               return false;
10602             }
10603         }
10604       else
10605         {
10606         /* No rev instruction available.  Look at arm_legacy_rev
10607            and thumb_legacy_rev for the form of RTL used then.  */
10608           if (TARGET_THUMB)
10609             {
10610               *cost += COSTS_N_INSNS (9);
10611
10612               if (speed_p)
10613                 {
10614                   *cost += 6 * extra_cost->alu.shift;
10615                   *cost += 3 * extra_cost->alu.logical;
10616                 }
10617             }
10618           else
10619             {
10620               *cost += COSTS_N_INSNS (4);
10621
10622               if (speed_p)
10623                 {
10624                   *cost += 2 * extra_cost->alu.shift;
10625                   *cost += extra_cost->alu.arith_shift;
10626                   *cost += 2 * extra_cost->alu.logical;
10627                 }
10628             }
10629           return true;
10630         }
10631       return false;
10632
10633     case MINUS:
10634       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10635           && (mode == SFmode || !TARGET_VFP_SINGLE))
10636         {
10637           if (GET_CODE (XEXP (x, 0)) == MULT
10638               || GET_CODE (XEXP (x, 1)) == MULT)
10639             {
10640               rtx mul_op0, mul_op1, sub_op;
10641
10642               if (speed_p)
10643                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10644
10645               if (GET_CODE (XEXP (x, 0)) == MULT)
10646                 {
10647                   mul_op0 = XEXP (XEXP (x, 0), 0);
10648                   mul_op1 = XEXP (XEXP (x, 0), 1);
10649                   sub_op = XEXP (x, 1);
10650                 }
10651               else
10652                 {
10653                   mul_op0 = XEXP (XEXP (x, 1), 0);
10654                   mul_op1 = XEXP (XEXP (x, 1), 1);
10655                   sub_op = XEXP (x, 0);
10656                 }
10657
10658               /* The first operand of the multiply may be optionally
10659                  negated.  */
10660               if (GET_CODE (mul_op0) == NEG)
10661                 mul_op0 = XEXP (mul_op0, 0);
10662
10663               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10664                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10665                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10666
10667               return true;
10668             }
10669
10670           if (speed_p)
10671             *cost += extra_cost->fp[mode != SFmode].addsub;
10672           return false;
10673         }
10674
10675       if (mode == SImode)
10676         {
10677           rtx shift_by_reg = NULL;
10678           rtx shift_op;
10679           rtx non_shift_op;
10680           rtx op0 = XEXP (x, 0);
10681           rtx op1 = XEXP (x, 1);
10682
10683           /* Factor out any borrow operation.  There's more than one way
10684              of expressing this; try to recognize them all.  */
10685           if (GET_CODE (op0) == MINUS)
10686             {
10687               if (arm_borrow_operation (op1, SImode))
10688                 {
10689                   op1 = XEXP (op0, 1);
10690                   op0 = XEXP (op0, 0);
10691                 }
10692               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10693                 op0 = XEXP (op0, 0);
10694             }
10695           else if (GET_CODE (op1) == PLUS
10696                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10697             op1 = XEXP (op1, 0);
10698           else if (GET_CODE (op0) == NEG
10699                    && arm_borrow_operation (op1, SImode))
10700             {
10701               /* Negate with carry-in.  For Thumb2 this is done with
10702                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10703                  RSC instruction that exists in Arm mode.  */
10704               if (speed_p)
10705                 *cost += (TARGET_THUMB2
10706                           ? extra_cost->alu.arith_shift
10707                           : extra_cost->alu.arith);
10708               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10709               return true;
10710             }
10711           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10712              Note we do mean ~borrow here.  */
10713           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10714             {
10715               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10716               return true;
10717             }
10718
10719           shift_op = shifter_op_p (op0, &shift_by_reg);
10720           if (shift_op == NULL)
10721             {
10722               shift_op = shifter_op_p (op1, &shift_by_reg);
10723               non_shift_op = op0;
10724             }
10725           else
10726             non_shift_op = op1;
10727
10728           if (shift_op != NULL)
10729             {
10730               if (shift_by_reg != NULL)
10731                 {
10732                   if (speed_p)
10733                     *cost += extra_cost->alu.arith_shift_reg;
10734                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10735                 }
10736               else if (speed_p)
10737                 *cost += extra_cost->alu.arith_shift;
10738
10739               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10740               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10741               return true;
10742             }
10743
10744           if (arm_arch_thumb2
10745               && GET_CODE (XEXP (x, 1)) == MULT)
10746             {
10747               /* MLS.  */
10748               if (speed_p)
10749                 *cost += extra_cost->mult[0].add;
10750               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10751               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10752               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10753               return true;
10754             }
10755
10756           if (CONST_INT_P (op0))
10757             {
10758               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10759                                             INTVAL (op0), NULL_RTX,
10760                                             NULL_RTX, 1, 0);
10761               *cost = COSTS_N_INSNS (insns);
10762               if (speed_p)
10763                 *cost += insns * extra_cost->alu.arith;
10764               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10765               return true;
10766             }
10767           else if (speed_p)
10768             *cost += extra_cost->alu.arith;
10769
10770           /* Don't recurse as we don't want to cost any borrow that
10771              we've stripped.  */
10772           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10773           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10774           return true;
10775         }
10776
10777       if (GET_MODE_CLASS (mode) == MODE_INT
10778           && GET_MODE_SIZE (mode) < 4)
10779         {
10780           rtx shift_op, shift_reg;
10781           shift_reg = NULL;
10782
10783           /* We check both sides of the MINUS for shifter operands since,
10784              unlike PLUS, it's not commutative.  */
10785
10786           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10787           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10788
10789           /* Slightly disparage, as we might need to widen the result.  */
10790           *cost += 1;
10791           if (speed_p)
10792             *cost += extra_cost->alu.arith;
10793
10794           if (CONST_INT_P (XEXP (x, 0)))
10795             {
10796               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10797               return true;
10798             }
10799
10800           return false;
10801         }
10802
10803       if (mode == DImode)
10804         {
10805           *cost += COSTS_N_INSNS (1);
10806
10807           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10808             {
10809               rtx op1 = XEXP (x, 1);
10810
10811               if (speed_p)
10812                 *cost += 2 * extra_cost->alu.arith;
10813
10814               if (GET_CODE (op1) == ZERO_EXTEND)
10815                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10816                                    0, speed_p);
10817               else
10818                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10819               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10820                                  0, speed_p);
10821               return true;
10822             }
10823           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10824             {
10825               if (speed_p)
10826                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10827               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10828                                   0, speed_p)
10829                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10830               return true;
10831             }
10832           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10833                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10834             {
10835               if (speed_p)
10836                 *cost += (extra_cost->alu.arith
10837                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10838                              ? extra_cost->alu.arith
10839                              : extra_cost->alu.arith_shift));
10840               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10841                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10842                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10843               return true;
10844             }
10845
10846           if (speed_p)
10847             *cost += 2 * extra_cost->alu.arith;
10848           return false;
10849         }
10850
10851       /* Vector mode?  */
10852
10853       *cost = LIBCALL_COST (2);
10854       return false;
10855
10856     case PLUS:
10857       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10858           && (mode == SFmode || !TARGET_VFP_SINGLE))
10859         {
10860           if (GET_CODE (XEXP (x, 0)) == MULT)
10861             {
10862               rtx mul_op0, mul_op1, add_op;
10863
10864               if (speed_p)
10865                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10866
10867               mul_op0 = XEXP (XEXP (x, 0), 0);
10868               mul_op1 = XEXP (XEXP (x, 0), 1);
10869               add_op = XEXP (x, 1);
10870
10871               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10872                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10873                         + rtx_cost (add_op, mode, code, 0, speed_p));
10874
10875               return true;
10876             }
10877
10878           if (speed_p)
10879             *cost += extra_cost->fp[mode != SFmode].addsub;
10880           return false;
10881         }
10882       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10883         {
10884           *cost = LIBCALL_COST (2);
10885           return false;
10886         }
10887
10888         /* Narrow modes can be synthesized in SImode, but the range
10889            of useful sub-operations is limited.  Check for shift operations
10890            on one of the operands.  Only left shifts can be used in the
10891            narrow modes.  */
10892       if (GET_MODE_CLASS (mode) == MODE_INT
10893           && GET_MODE_SIZE (mode) < 4)
10894         {
10895           rtx shift_op, shift_reg;
10896           shift_reg = NULL;
10897
10898           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10899
10900           if (CONST_INT_P (XEXP (x, 1)))
10901             {
10902               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10903                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10904                                             NULL_RTX, 1, 0);
10905               *cost = COSTS_N_INSNS (insns);
10906               if (speed_p)
10907                 *cost += insns * extra_cost->alu.arith;
10908               /* Slightly penalize a narrow operation as the result may
10909                  need widening.  */
10910               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10911               return true;
10912             }
10913
10914           /* Slightly penalize a narrow operation as the result may
10915              need widening.  */
10916           *cost += 1;
10917           if (speed_p)
10918             *cost += extra_cost->alu.arith;
10919
10920           return false;
10921         }
10922
10923       if (mode == SImode)
10924         {
10925           rtx shift_op, shift_reg;
10926
10927           if (TARGET_INT_SIMD
10928               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10929                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10930             {
10931               /* UXTA[BH] or SXTA[BH].  */
10932               if (speed_p)
10933                 *cost += extra_cost->alu.extend_arith;
10934               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10935                                   0, speed_p)
10936                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10937               return true;
10938             }
10939
10940           rtx op0 = XEXP (x, 0);
10941           rtx op1 = XEXP (x, 1);
10942
10943           /* Handle a side effect of adding in the carry to an addition.  */
10944           if (GET_CODE (op0) == PLUS
10945               && arm_carry_operation (op1, mode))
10946             {
10947               op1 = XEXP (op0, 1);
10948               op0 = XEXP (op0, 0);
10949             }
10950           else if (GET_CODE (op1) == PLUS
10951                    && arm_carry_operation (op0, mode))
10952             {
10953               op0 = XEXP (op1, 0);
10954               op1 = XEXP (op1, 1);
10955             }
10956           else if (GET_CODE (op0) == PLUS)
10957             {
10958               op0 = strip_carry_operation (op0);
10959               if (swap_commutative_operands_p (op0, op1))
10960                 std::swap (op0, op1);
10961             }
10962
10963           if (arm_carry_operation (op0, mode))
10964             {
10965               /* Adding the carry to a register is a canonicalization of
10966                  adding 0 to the register plus the carry.  */
10967               if (speed_p)
10968                 *cost += extra_cost->alu.arith;
10969               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10970               return true;
10971             }
10972
10973           shift_reg = NULL;
10974           shift_op = shifter_op_p (op0, &shift_reg);
10975           if (shift_op != NULL)
10976             {
10977               if (shift_reg)
10978                 {
10979                   if (speed_p)
10980                     *cost += extra_cost->alu.arith_shift_reg;
10981                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10982                 }
10983               else if (speed_p)
10984                 *cost += extra_cost->alu.arith_shift;
10985
10986               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10987                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10988               return true;
10989             }
10990
10991           if (GET_CODE (op0) == MULT)
10992             {
10993               rtx mul_op = op0;
10994
10995               if (TARGET_DSP_MULTIPLY
10996                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10997                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10998                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10999                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11000                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
11001                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
11002                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
11003                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
11004                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11005                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11006                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11007                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
11008                                       == 16))))))
11009                 {
11010                   /* SMLA[BT][BT].  */
11011                   if (speed_p)
11012                     *cost += extra_cost->mult[0].extend_add;
11013                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
11014                                       SIGN_EXTEND, 0, speed_p)
11015                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
11016                                         SIGN_EXTEND, 0, speed_p)
11017                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
11018                   return true;
11019                 }
11020
11021               if (speed_p)
11022                 *cost += extra_cost->mult[0].add;
11023               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
11024                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
11025                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
11026               return true;
11027             }
11028
11029           if (CONST_INT_P (op1))
11030             {
11031               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11032                                             INTVAL (op1), NULL_RTX,
11033                                             NULL_RTX, 1, 0);
11034               *cost = COSTS_N_INSNS (insns);
11035               if (speed_p)
11036                 *cost += insns * extra_cost->alu.arith;
11037               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11038               return true;
11039             }
11040
11041           if (speed_p)
11042             *cost += extra_cost->alu.arith;
11043
11044           /* Don't recurse here because we want to test the operands
11045              without any carry operation.  */
11046           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11047           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11048           return true;
11049         }
11050
11051       if (mode == DImode)
11052         {
11053           if (GET_CODE (XEXP (x, 0)) == MULT
11054               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11055                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11056                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11057                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11058             {
11059               if (speed_p)
11060                 *cost += extra_cost->mult[1].extend_add;
11061               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11062                                   ZERO_EXTEND, 0, speed_p)
11063                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11064                                     ZERO_EXTEND, 0, speed_p)
11065                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11066               return true;
11067             }
11068
11069           *cost += COSTS_N_INSNS (1);
11070
11071           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11072               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11073             {
11074               if (speed_p)
11075                 *cost += (extra_cost->alu.arith
11076                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11077                              ? extra_cost->alu.arith
11078                              : extra_cost->alu.arith_shift));
11079
11080               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11081                                   0, speed_p)
11082                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11083               return true;
11084             }
11085
11086           if (speed_p)
11087             *cost += 2 * extra_cost->alu.arith;
11088           return false;
11089         }
11090
11091       /* Vector mode?  */
11092       *cost = LIBCALL_COST (2);
11093       return false;
11094     case IOR:
11095       {
11096         rtx sub0, sub1;
11097         if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11098           {
11099             if (speed_p)
11100               *cost += extra_cost->alu.rev;
11101
11102             return true;
11103           }
11104         else if (mode == SImode && arm_arch_thumb2
11105                  && arm_bfi_p (x, &sub0, &sub1))
11106           {
11107             *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11108             *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11109             if (speed_p)
11110               *cost += extra_cost->alu.bfi;
11111
11112             return true;
11113           }
11114       }
11115
11116       /* Fall through.  */
11117     case AND: case XOR:
11118       if (mode == SImode)
11119         {
11120           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11121           rtx op0 = XEXP (x, 0);
11122           rtx shift_op, shift_reg;
11123
11124           if (subcode == NOT
11125               && (code == AND
11126                   || (code == IOR && TARGET_THUMB2)))
11127             op0 = XEXP (op0, 0);
11128
11129           shift_reg = NULL;
11130           shift_op = shifter_op_p (op0, &shift_reg);
11131           if (shift_op != NULL)
11132             {
11133               if (shift_reg)
11134                 {
11135                   if (speed_p)
11136                     *cost += extra_cost->alu.log_shift_reg;
11137                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11138                 }
11139               else if (speed_p)
11140                 *cost += extra_cost->alu.log_shift;
11141
11142               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11143                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11144               return true;
11145             }
11146
11147           if (CONST_INT_P (XEXP (x, 1)))
11148             {
11149               int insns = arm_gen_constant (code, SImode, NULL_RTX,
11150                                             INTVAL (XEXP (x, 1)), NULL_RTX,
11151                                             NULL_RTX, 1, 0);
11152
11153               *cost = COSTS_N_INSNS (insns);
11154               if (speed_p)
11155                 *cost += insns * extra_cost->alu.logical;
11156               *cost += rtx_cost (op0, mode, code, 0, speed_p);
11157               return true;
11158             }
11159
11160           if (speed_p)
11161             *cost += extra_cost->alu.logical;
11162           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11163                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11164           return true;
11165         }
11166
11167       if (mode == DImode)
11168         {
11169           rtx op0 = XEXP (x, 0);
11170           enum rtx_code subcode = GET_CODE (op0);
11171
11172           *cost += COSTS_N_INSNS (1);
11173
11174           if (subcode == NOT
11175               && (code == AND
11176                   || (code == IOR && TARGET_THUMB2)))
11177             op0 = XEXP (op0, 0);
11178
11179           if (GET_CODE (op0) == ZERO_EXTEND)
11180             {
11181               if (speed_p)
11182                 *cost += 2 * extra_cost->alu.logical;
11183
11184               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11185                                   0, speed_p)
11186                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11187               return true;
11188             }
11189           else if (GET_CODE (op0) == SIGN_EXTEND)
11190             {
11191               if (speed_p)
11192                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11193
11194               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11195                                   0, speed_p)
11196                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11197               return true;
11198             }
11199
11200           if (speed_p)
11201             *cost += 2 * extra_cost->alu.logical;
11202
11203           return true;
11204         }
11205       /* Vector mode?  */
11206
11207       *cost = LIBCALL_COST (2);
11208       return false;
11209
11210     case MULT:
11211       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11212           && (mode == SFmode || !TARGET_VFP_SINGLE))
11213         {
11214           rtx op0 = XEXP (x, 0);
11215
11216           if (GET_CODE (op0) == NEG && !flag_rounding_math)
11217             op0 = XEXP (op0, 0);
11218
11219           if (speed_p)
11220             *cost += extra_cost->fp[mode != SFmode].mult;
11221
11222           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11223                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11224           return true;
11225         }
11226       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11227         {
11228           *cost = LIBCALL_COST (2);
11229           return false;
11230         }
11231
11232       if (mode == SImode)
11233         {
11234           if (TARGET_DSP_MULTIPLY
11235               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11236                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11237                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11238                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11239                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11240                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11241                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11242                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11243                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11244                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11245                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11246                               && (INTVAL (XEXP (XEXP (x, 1), 1))
11247                                   == 16))))))
11248             {
11249               /* SMUL[TB][TB].  */
11250               if (speed_p)
11251                 *cost += extra_cost->mult[0].extend;
11252               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11253                                  SIGN_EXTEND, 0, speed_p);
11254               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11255                                  SIGN_EXTEND, 1, speed_p);
11256               return true;
11257             }
11258           if (speed_p)
11259             *cost += extra_cost->mult[0].simple;
11260           return false;
11261         }
11262
11263       if (mode == DImode)
11264         {
11265           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11266                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11267                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11268                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11269             {
11270               if (speed_p)
11271                 *cost += extra_cost->mult[1].extend;
11272               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11273                                   ZERO_EXTEND, 0, speed_p)
11274                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11275                                     ZERO_EXTEND, 0, speed_p));
11276               return true;
11277             }
11278
11279           *cost = LIBCALL_COST (2);
11280           return false;
11281         }
11282
11283       /* Vector mode?  */
11284       *cost = LIBCALL_COST (2);
11285       return false;
11286
11287     case NEG:
11288       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11289           && (mode == SFmode || !TARGET_VFP_SINGLE))
11290         {
11291           if (GET_CODE (XEXP (x, 0)) == MULT)
11292             {
11293               /* VNMUL.  */
11294               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11295               return true;
11296             }
11297
11298           if (speed_p)
11299             *cost += extra_cost->fp[mode != SFmode].neg;
11300
11301           return false;
11302         }
11303       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11304         {
11305           *cost = LIBCALL_COST (1);
11306           return false;
11307         }
11308
11309       if (mode == SImode)
11310         {
11311           if (GET_CODE (XEXP (x, 0)) == ABS)
11312             {
11313               *cost += COSTS_N_INSNS (1);
11314               /* Assume the non-flag-changing variant.  */
11315               if (speed_p)
11316                 *cost += (extra_cost->alu.log_shift
11317                           + extra_cost->alu.arith_shift);
11318               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11319               return true;
11320             }
11321
11322           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11323               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11324             {
11325               *cost += COSTS_N_INSNS (1);
11326               /* No extra cost for MOV imm and MVN imm.  */
11327               /* If the comparison op is using the flags, there's no further
11328                  cost, otherwise we need to add the cost of the comparison.  */
11329               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11330                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11331                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11332                 {
11333                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11334                   *cost += (COSTS_N_INSNS (1)
11335                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11336                                         0, speed_p)
11337                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11338                                         1, speed_p));
11339                   if (speed_p)
11340                     *cost += extra_cost->alu.arith;
11341                 }
11342               return true;
11343             }
11344
11345           if (speed_p)
11346             *cost += extra_cost->alu.arith;
11347           return false;
11348         }
11349
11350       if (GET_MODE_CLASS (mode) == MODE_INT
11351           && GET_MODE_SIZE (mode) < 4)
11352         {
11353           /* Slightly disparage, as we might need an extend operation.  */
11354           *cost += 1;
11355           if (speed_p)
11356             *cost += extra_cost->alu.arith;
11357           return false;
11358         }
11359
11360       if (mode == DImode)
11361         {
11362           *cost += COSTS_N_INSNS (1);
11363           if (speed_p)
11364             *cost += 2 * extra_cost->alu.arith;
11365           return false;
11366         }
11367
11368       /* Vector mode?  */
11369       *cost = LIBCALL_COST (1);
11370       return false;
11371
11372     case NOT:
11373       if (mode == SImode)
11374         {
11375           rtx shift_op;
11376           rtx shift_reg = NULL;
11377
11378           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11379
11380           if (shift_op)
11381             {
11382               if (shift_reg != NULL)
11383                 {
11384                   if (speed_p)
11385                     *cost += extra_cost->alu.log_shift_reg;
11386                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11387                 }
11388               else if (speed_p)
11389                 *cost += extra_cost->alu.log_shift;
11390               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11391               return true;
11392             }
11393
11394           if (speed_p)
11395             *cost += extra_cost->alu.logical;
11396           return false;
11397         }
11398       if (mode == DImode)
11399         {
11400           *cost += COSTS_N_INSNS (1);
11401           return false;
11402         }
11403
11404       /* Vector mode?  */
11405
11406       *cost += LIBCALL_COST (1);
11407       return false;
11408
11409     case IF_THEN_ELSE:
11410       {
11411         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11412           {
11413             *cost += COSTS_N_INSNS (3);
11414             return true;
11415           }
11416         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11417         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11418
11419         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11420         /* Assume that if one arm of the if_then_else is a register,
11421            that it will be tied with the result and eliminate the
11422            conditional insn.  */
11423         if (REG_P (XEXP (x, 1)))
11424           *cost += op2cost;
11425         else if (REG_P (XEXP (x, 2)))
11426           *cost += op1cost;
11427         else
11428           {
11429             if (speed_p)
11430               {
11431                 if (extra_cost->alu.non_exec_costs_exec)
11432                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11433                 else
11434                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11435               }
11436             else
11437               *cost += op1cost + op2cost;
11438           }
11439       }
11440       return true;
11441
11442     case COMPARE:
11443       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11444         *cost = 0;
11445       else
11446         {
11447           machine_mode op0mode;
11448           /* We'll mostly assume that the cost of a compare is the cost of the
11449              LHS.  However, there are some notable exceptions.  */
11450
11451           /* Floating point compares are never done as side-effects.  */
11452           op0mode = GET_MODE (XEXP (x, 0));
11453           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11454               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11455             {
11456               if (speed_p)
11457                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11458
11459               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11460                 {
11461                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11462                   return true;
11463                 }
11464
11465               return false;
11466             }
11467           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11468             {
11469               *cost = LIBCALL_COST (2);
11470               return false;
11471             }
11472
11473           /* DImode compares normally take two insns.  */
11474           if (op0mode == DImode)
11475             {
11476               *cost += COSTS_N_INSNS (1);
11477               if (speed_p)
11478                 *cost += 2 * extra_cost->alu.arith;
11479               return false;
11480             }
11481
11482           if (op0mode == SImode)
11483             {
11484               rtx shift_op;
11485               rtx shift_reg;
11486
11487               if (XEXP (x, 1) == const0_rtx
11488                   && !(REG_P (XEXP (x, 0))
11489                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11490                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11491                 {
11492                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11493
11494                   /* Multiply operations that set the flags are often
11495                      significantly more expensive.  */
11496                   if (speed_p
11497                       && GET_CODE (XEXP (x, 0)) == MULT
11498                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11499                     *cost += extra_cost->mult[0].flag_setting;
11500
11501                   if (speed_p
11502                       && GET_CODE (XEXP (x, 0)) == PLUS
11503                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11504                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11505                                                             0), 1), mode))
11506                     *cost += extra_cost->mult[0].flag_setting;
11507                   return true;
11508                 }
11509
11510               shift_reg = NULL;
11511               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11512               if (shift_op != NULL)
11513                 {
11514                   if (shift_reg != NULL)
11515                     {
11516                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11517                                          1, speed_p);
11518                       if (speed_p)
11519                         *cost += extra_cost->alu.arith_shift_reg;
11520                     }
11521                   else if (speed_p)
11522                     *cost += extra_cost->alu.arith_shift;
11523                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11524                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11525                   return true;
11526                 }
11527
11528               if (speed_p)
11529                 *cost += extra_cost->alu.arith;
11530               if (CONST_INT_P (XEXP (x, 1))
11531                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11532                 {
11533                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11534                   return true;
11535                 }
11536               return false;
11537             }
11538
11539           /* Vector mode?  */
11540
11541           *cost = LIBCALL_COST (2);
11542           return false;
11543         }
11544       return true;
11545
11546     case EQ:
11547     case GE:
11548     case GT:
11549     case LE:
11550     case LT:
11551       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11552          vcle and vclt). */
11553       if (TARGET_NEON
11554           && TARGET_HARD_FLOAT
11555           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11556           && (XEXP (x, 1) == CONST0_RTX (mode)))
11557         {
11558           *cost = 0;
11559           return true;
11560         }
11561
11562       /* Fall through.  */
11563     case NE:
11564     case LTU:
11565     case LEU:
11566     case GEU:
11567     case GTU:
11568     case ORDERED:
11569     case UNORDERED:
11570     case UNEQ:
11571     case UNLE:
11572     case UNLT:
11573     case UNGE:
11574     case UNGT:
11575     case LTGT:
11576       if (outer_code == SET)
11577         {
11578           /* Is it a store-flag operation?  */
11579           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11580               && XEXP (x, 1) == const0_rtx)
11581             {
11582               /* Thumb also needs an IT insn.  */
11583               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11584               return true;
11585             }
11586           if (XEXP (x, 1) == const0_rtx)
11587             {
11588               switch (code)
11589                 {
11590                 case LT:
11591                   /* LSR Rd, Rn, #31.  */
11592                   if (speed_p)
11593                     *cost += extra_cost->alu.shift;
11594                   break;
11595
11596                 case EQ:
11597                   /* RSBS T1, Rn, #0
11598                      ADC  Rd, Rn, T1.  */
11599
11600                 case NE:
11601                   /* SUBS T1, Rn, #1
11602                      SBC  Rd, Rn, T1.  */
11603                   *cost += COSTS_N_INSNS (1);
11604                   break;
11605
11606                 case LE:
11607                   /* RSBS T1, Rn, Rn, LSR #31
11608                      ADC  Rd, Rn, T1. */
11609                   *cost += COSTS_N_INSNS (1);
11610                   if (speed_p)
11611                     *cost += extra_cost->alu.arith_shift;
11612                   break;
11613
11614                 case GT:
11615                   /* RSB  Rd, Rn, Rn, ASR #1
11616                      LSR  Rd, Rd, #31.  */
11617                   *cost += COSTS_N_INSNS (1);
11618                   if (speed_p)
11619                     *cost += (extra_cost->alu.arith_shift
11620                               + extra_cost->alu.shift);
11621                   break;
11622
11623                 case GE:
11624                   /* ASR  Rd, Rn, #31
11625                      ADD  Rd, Rn, #1.  */
11626                   *cost += COSTS_N_INSNS (1);
11627                   if (speed_p)
11628                     *cost += extra_cost->alu.shift;
11629                   break;
11630
11631                 default:
11632                   /* Remaining cases are either meaningless or would take
11633                      three insns anyway.  */
11634                   *cost = COSTS_N_INSNS (3);
11635                   break;
11636                 }
11637               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11638               return true;
11639             }
11640           else
11641             {
11642               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11643               if (CONST_INT_P (XEXP (x, 1))
11644                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11645                 {
11646                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11647                   return true;
11648                 }
11649
11650               return false;
11651             }
11652         }
11653       /* Not directly inside a set.  If it involves the condition code
11654          register it must be the condition for a branch, cond_exec or
11655          I_T_E operation.  Since the comparison is performed elsewhere
11656          this is just the control part which has no additional
11657          cost.  */
11658       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11659                && XEXP (x, 1) == const0_rtx)
11660         {
11661           *cost = 0;
11662           return true;
11663         }
11664       return false;
11665
11666     case ABS:
11667       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11668           && (mode == SFmode || !TARGET_VFP_SINGLE))
11669         {
11670           if (speed_p)
11671             *cost += extra_cost->fp[mode != SFmode].neg;
11672
11673           return false;
11674         }
11675       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11676         {
11677           *cost = LIBCALL_COST (1);
11678           return false;
11679         }
11680
11681       if (mode == SImode)
11682         {
11683           if (speed_p)
11684             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11685           return false;
11686         }
11687       /* Vector mode?  */
11688       *cost = LIBCALL_COST (1);
11689       return false;
11690
11691     case SIGN_EXTEND:
11692       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11693           && MEM_P (XEXP (x, 0)))
11694         {
11695           if (mode == DImode)
11696             *cost += COSTS_N_INSNS (1);
11697
11698           if (!speed_p)
11699             return true;
11700
11701           if (GET_MODE (XEXP (x, 0)) == SImode)
11702             *cost += extra_cost->ldst.load;
11703           else
11704             *cost += extra_cost->ldst.load_sign_extend;
11705
11706           if (mode == DImode)
11707             *cost += extra_cost->alu.shift;
11708
11709           return true;
11710         }
11711
11712       /* Widening from less than 32-bits requires an extend operation.  */
11713       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11714         {
11715           /* We have SXTB/SXTH.  */
11716           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11717           if (speed_p)
11718             *cost += extra_cost->alu.extend;
11719         }
11720       else if (GET_MODE (XEXP (x, 0)) != SImode)
11721         {
11722           /* Needs two shifts.  */
11723           *cost += COSTS_N_INSNS (1);
11724           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11725           if (speed_p)
11726             *cost += 2 * extra_cost->alu.shift;
11727         }
11728
11729       /* Widening beyond 32-bits requires one more insn.  */
11730       if (mode == DImode)
11731         {
11732           *cost += COSTS_N_INSNS (1);
11733           if (speed_p)
11734             *cost += extra_cost->alu.shift;
11735         }
11736
11737       return true;
11738
11739     case ZERO_EXTEND:
11740       if ((arm_arch4
11741            || GET_MODE (XEXP (x, 0)) == SImode
11742            || GET_MODE (XEXP (x, 0)) == QImode)
11743           && MEM_P (XEXP (x, 0)))
11744         {
11745           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11746
11747           if (mode == DImode)
11748             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11749
11750           return true;
11751         }
11752
11753       /* Widening from less than 32-bits requires an extend operation.  */
11754       if (GET_MODE (XEXP (x, 0)) == QImode)
11755         {
11756           /* UXTB can be a shorter instruction in Thumb2, but it might
11757              be slower than the AND Rd, Rn, #255 alternative.  When
11758              optimizing for speed it should never be slower to use
11759              AND, and we don't really model 16-bit vs 32-bit insns
11760              here.  */
11761           if (speed_p)
11762             *cost += extra_cost->alu.logical;
11763         }
11764       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11765         {
11766           /* We have UXTB/UXTH.  */
11767           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11768           if (speed_p)
11769             *cost += extra_cost->alu.extend;
11770         }
11771       else if (GET_MODE (XEXP (x, 0)) != SImode)
11772         {
11773           /* Needs two shifts.  It's marginally preferable to use
11774              shifts rather than two BIC instructions as the second
11775              shift may merge with a subsequent insn as a shifter
11776              op.  */
11777           *cost = COSTS_N_INSNS (2);
11778           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11779           if (speed_p)
11780             *cost += 2 * extra_cost->alu.shift;
11781         }
11782
11783       /* Widening beyond 32-bits requires one more insn.  */
11784       if (mode == DImode)
11785         {
11786           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11787         }
11788
11789       return true;
11790
11791     case CONST_INT:
11792       *cost = 0;
11793       /* CONST_INT has no mode, so we cannot tell for sure how many
11794          insns are really going to be needed.  The best we can do is
11795          look at the value passed.  If it fits in SImode, then assume
11796          that's the mode it will be used for.  Otherwise assume it
11797          will be used in DImode.  */
11798       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11799         mode = SImode;
11800       else
11801         mode = DImode;
11802
11803       /* Avoid blowing up in arm_gen_constant ().  */
11804       if (!(outer_code == PLUS
11805             || outer_code == AND
11806             || outer_code == IOR
11807             || outer_code == XOR
11808             || outer_code == MINUS))
11809         outer_code = SET;
11810
11811     const_int_cost:
11812       if (mode == SImode)
11813         {
11814           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11815                                                     INTVAL (x), NULL, NULL,
11816                                                     0, 0));
11817           /* Extra costs?  */
11818         }
11819       else
11820         {
11821           *cost += COSTS_N_INSNS (arm_gen_constant
11822                                   (outer_code, SImode, NULL,
11823                                    trunc_int_for_mode (INTVAL (x), SImode),
11824                                    NULL, NULL, 0, 0)
11825                                   + arm_gen_constant (outer_code, SImode, NULL,
11826                                                       INTVAL (x) >> 32, NULL,
11827                                                       NULL, 0, 0));
11828           /* Extra costs?  */
11829         }
11830
11831       return true;
11832
11833     case CONST:
11834     case LABEL_REF:
11835     case SYMBOL_REF:
11836       if (speed_p)
11837         {
11838           if (arm_arch_thumb2 && !flag_pic)
11839             *cost += COSTS_N_INSNS (1);
11840           else
11841             *cost += extra_cost->ldst.load;
11842         }
11843       else
11844         *cost += COSTS_N_INSNS (1);
11845
11846       if (flag_pic)
11847         {
11848           *cost += COSTS_N_INSNS (1);
11849           if (speed_p)
11850             *cost += extra_cost->alu.arith;
11851         }
11852
11853       return true;
11854
11855     case CONST_FIXED:
11856       *cost = COSTS_N_INSNS (4);
11857       /* Fixme.  */
11858       return true;
11859
11860     case CONST_DOUBLE:
11861       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11862           && (mode == SFmode || !TARGET_VFP_SINGLE))
11863         {
11864           if (vfp3_const_double_rtx (x))
11865             {
11866               if (speed_p)
11867                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11868               return true;
11869             }
11870
11871           if (speed_p)
11872             {
11873               if (mode == DFmode)
11874                 *cost += extra_cost->ldst.loadd;
11875               else
11876                 *cost += extra_cost->ldst.loadf;
11877             }
11878           else
11879             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11880
11881           return true;
11882         }
11883       *cost = COSTS_N_INSNS (4);
11884       return true;
11885
11886     case CONST_VECTOR:
11887       /* Fixme.  */
11888       if (((TARGET_NEON && TARGET_HARD_FLOAT
11889             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11890            || TARGET_HAVE_MVE)
11891           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11892         *cost = COSTS_N_INSNS (1);
11893       else
11894         *cost = COSTS_N_INSNS (4);
11895       return true;
11896
11897     case HIGH:
11898     case LO_SUM:
11899       /* When optimizing for size, we prefer constant pool entries to
11900          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11901       if (!speed_p)
11902         *cost += 1;
11903       return true;
11904
11905     case CLZ:
11906       if (speed_p)
11907         *cost += extra_cost->alu.clz;
11908       return false;
11909
11910     case SMIN:
11911       if (XEXP (x, 1) == const0_rtx)
11912         {
11913           if (speed_p)
11914             *cost += extra_cost->alu.log_shift;
11915           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11916           return true;
11917         }
11918       /* Fall through.  */
11919     case SMAX:
11920     case UMIN:
11921     case UMAX:
11922       *cost += COSTS_N_INSNS (1);
11923       return false;
11924
11925     case TRUNCATE:
11926       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11927           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11928           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11929           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11930           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11931                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11932               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11933                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11934                       == ZERO_EXTEND))))
11935         {
11936           if (speed_p)
11937             *cost += extra_cost->mult[1].extend;
11938           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11939                               ZERO_EXTEND, 0, speed_p)
11940                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11941                                 ZERO_EXTEND, 0, speed_p));
11942           return true;
11943         }
11944       *cost = LIBCALL_COST (1);
11945       return false;
11946
11947     case UNSPEC_VOLATILE:
11948     case UNSPEC:
11949       return arm_unspec_cost (x, outer_code, speed_p, cost);
11950
11951     case PC:
11952       /* Reading the PC is like reading any other register.  Writing it
11953          is more expensive, but we take that into account elsewhere.  */
11954       *cost = 0;
11955       return true;
11956
11957     case ZERO_EXTRACT:
11958       /* TODO: Simple zero_extract of bottom bits using AND.  */
11959       /* Fall through.  */
11960     case SIGN_EXTRACT:
11961       if (arm_arch6
11962           && mode == SImode
11963           && CONST_INT_P (XEXP (x, 1))
11964           && CONST_INT_P (XEXP (x, 2)))
11965         {
11966           if (speed_p)
11967             *cost += extra_cost->alu.bfx;
11968           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11969           return true;
11970         }
11971       /* Without UBFX/SBFX, need to resort to shift operations.  */
11972       *cost += COSTS_N_INSNS (1);
11973       if (speed_p)
11974         *cost += 2 * extra_cost->alu.shift;
11975       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11976       return true;
11977
11978     case FLOAT_EXTEND:
11979       if (TARGET_HARD_FLOAT)
11980         {
11981           if (speed_p)
11982             *cost += extra_cost->fp[mode == DFmode].widen;
11983           if (!TARGET_VFP5
11984               && GET_MODE (XEXP (x, 0)) == HFmode)
11985             {
11986               /* Pre v8, widening HF->DF is a two-step process, first
11987                  widening to SFmode.  */
11988               *cost += COSTS_N_INSNS (1);
11989               if (speed_p)
11990                 *cost += extra_cost->fp[0].widen;
11991             }
11992           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11993           return true;
11994         }
11995
11996       *cost = LIBCALL_COST (1);
11997       return false;
11998
11999     case FLOAT_TRUNCATE:
12000       if (TARGET_HARD_FLOAT)
12001         {
12002           if (speed_p)
12003             *cost += extra_cost->fp[mode == DFmode].narrow;
12004           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12005           return true;
12006           /* Vector modes?  */
12007         }
12008       *cost = LIBCALL_COST (1);
12009       return false;
12010
12011     case FMA:
12012       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
12013         {
12014           rtx op0 = XEXP (x, 0);
12015           rtx op1 = XEXP (x, 1);
12016           rtx op2 = XEXP (x, 2);
12017
12018
12019           /* vfms or vfnma.  */
12020           if (GET_CODE (op0) == NEG)
12021             op0 = XEXP (op0, 0);
12022
12023           /* vfnms or vfnma.  */
12024           if (GET_CODE (op2) == NEG)
12025             op2 = XEXP (op2, 0);
12026
12027           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12028           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12029           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12030
12031           if (speed_p)
12032             *cost += extra_cost->fp[mode ==DFmode].fma;
12033
12034           return true;
12035         }
12036
12037       *cost = LIBCALL_COST (3);
12038       return false;
12039
12040     case FIX:
12041     case UNSIGNED_FIX:
12042       if (TARGET_HARD_FLOAT)
12043         {
12044           /* The *combine_vcvtf2i reduces a vmul+vcvt into
12045              a vcvt fixed-point conversion.  */
12046           if (code == FIX && mode == SImode
12047               && GET_CODE (XEXP (x, 0)) == FIX
12048               && GET_MODE (XEXP (x, 0)) == SFmode
12049               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12050               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12051                  > 0)
12052             {
12053               if (speed_p)
12054                 *cost += extra_cost->fp[0].toint;
12055
12056               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12057                                  code, 0, speed_p);
12058               return true;
12059             }
12060
12061           if (GET_MODE_CLASS (mode) == MODE_INT)
12062             {
12063               mode = GET_MODE (XEXP (x, 0));
12064               if (speed_p)
12065                 *cost += extra_cost->fp[mode == DFmode].toint;
12066               /* Strip of the 'cost' of rounding towards zero.  */
12067               if (GET_CODE (XEXP (x, 0)) == FIX)
12068                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12069                                    0, speed_p);
12070               else
12071                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12072               /* ??? Increase the cost to deal with transferring from
12073                  FP -> CORE registers?  */
12074               return true;
12075             }
12076           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12077                    && TARGET_VFP5)
12078             {
12079               if (speed_p)
12080                 *cost += extra_cost->fp[mode == DFmode].roundint;
12081               return false;
12082             }
12083           /* Vector costs? */
12084         }
12085       *cost = LIBCALL_COST (1);
12086       return false;
12087
12088     case FLOAT:
12089     case UNSIGNED_FLOAT:
12090       if (TARGET_HARD_FLOAT)
12091         {
12092           /* ??? Increase the cost to deal with transferring from CORE
12093              -> FP registers?  */
12094           if (speed_p)
12095             *cost += extra_cost->fp[mode == DFmode].fromint;
12096           return false;
12097         }
12098       *cost = LIBCALL_COST (1);
12099       return false;
12100
12101     case CALL:
12102       return true;
12103
12104     case ASM_OPERANDS:
12105       {
12106       /* Just a guess.  Guess number of instructions in the asm
12107          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
12108          though (see PR60663).  */
12109         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12110         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12111
12112         *cost = COSTS_N_INSNS (asm_length + num_operands);
12113         return true;
12114       }
12115     default:
12116       if (mode != VOIDmode)
12117         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12118       else
12119         *cost = COSTS_N_INSNS (4); /* Who knows?  */
12120       return false;
12121     }
12122 }
12123
12124 #undef HANDLE_NARROW_SHIFT_ARITH
12125
12126 /* RTX costs entry point.  */
12127
12128 static bool
12129 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12130                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12131 {
12132   bool result;
12133   int code = GET_CODE (x);
12134   gcc_assert (current_tune->insn_extra_cost);
12135
12136   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
12137                                 (enum rtx_code) outer_code,
12138                                 current_tune->insn_extra_cost,
12139                                 total, speed);
12140
12141   if (dump_file && arm_verbose_cost)
12142     {
12143       print_rtl_single (dump_file, x);
12144       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12145                *total, result ? "final" : "partial");
12146     }
12147   return result;
12148 }
12149
12150 static int
12151 arm_insn_cost (rtx_insn *insn, bool speed)
12152 {
12153   int cost;
12154
12155   /* Don't cost a simple reg-reg move at a full insn cost: such moves
12156      will likely disappear during register allocation.  */
12157   if (!reload_completed
12158       && GET_CODE (PATTERN (insn)) == SET
12159       && REG_P (SET_DEST (PATTERN (insn)))
12160       && REG_P (SET_SRC (PATTERN (insn))))
12161     return 2;
12162   cost = pattern_cost (PATTERN (insn), speed);
12163   /* If the cost is zero, then it's likely a complex insn.  We don't want the
12164      cost of these to be less than something we know about.  */
12165   return cost ? cost : COSTS_N_INSNS (2);
12166 }
12167
12168 /* All address computations that can be done are free, but rtx cost returns
12169    the same for practically all of them.  So we weight the different types
12170    of address here in the order (most pref first):
12171    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
12172 static inline int
12173 arm_arm_address_cost (rtx x)
12174 {
12175   enum rtx_code c  = GET_CODE (x);
12176
12177   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12178     return 0;
12179   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12180     return 10;
12181
12182   if (c == PLUS)
12183     {
12184       if (CONST_INT_P (XEXP (x, 1)))
12185         return 2;
12186
12187       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12188         return 3;
12189
12190       return 4;
12191     }
12192
12193   return 6;
12194 }
12195
12196 static inline int
12197 arm_thumb_address_cost (rtx x)
12198 {
12199   enum rtx_code c  = GET_CODE (x);
12200
12201   if (c == REG)
12202     return 1;
12203   if (c == PLUS
12204       && REG_P (XEXP (x, 0))
12205       && CONST_INT_P (XEXP (x, 1)))
12206     return 1;
12207
12208   return 2;
12209 }
12210
12211 static int
12212 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12213                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12214 {
12215   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12216 }
12217
12218 /* Adjust cost hook for XScale.  */
12219 static bool
12220 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12221                           int * cost)
12222 {
12223   /* Some true dependencies can have a higher cost depending
12224      on precisely how certain input operands are used.  */
12225   if (dep_type == 0
12226       && recog_memoized (insn) >= 0
12227       && recog_memoized (dep) >= 0)
12228     {
12229       int shift_opnum = get_attr_shift (insn);
12230       enum attr_type attr_type = get_attr_type (dep);
12231
12232       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12233          operand for INSN.  If we have a shifted input operand and the
12234          instruction we depend on is another ALU instruction, then we may
12235          have to account for an additional stall.  */
12236       if (shift_opnum != 0
12237           && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12238               || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12239               || attr_type == TYPE_ALUS_SHIFT_IMM
12240               || attr_type == TYPE_LOGIC_SHIFT_IMM
12241               || attr_type == TYPE_LOGICS_SHIFT_IMM
12242               || attr_type == TYPE_ALU_SHIFT_REG
12243               || attr_type == TYPE_ALUS_SHIFT_REG
12244               || attr_type == TYPE_LOGIC_SHIFT_REG
12245               || attr_type == TYPE_LOGICS_SHIFT_REG
12246               || attr_type == TYPE_MOV_SHIFT
12247               || attr_type == TYPE_MVN_SHIFT
12248               || attr_type == TYPE_MOV_SHIFT_REG
12249               || attr_type == TYPE_MVN_SHIFT_REG))
12250         {
12251           rtx shifted_operand;
12252           int opno;
12253
12254           /* Get the shifted operand.  */
12255           extract_insn (insn);
12256           shifted_operand = recog_data.operand[shift_opnum];
12257
12258           /* Iterate over all the operands in DEP.  If we write an operand
12259              that overlaps with SHIFTED_OPERAND, then we have increase the
12260              cost of this dependency.  */
12261           extract_insn (dep);
12262           preprocess_constraints (dep);
12263           for (opno = 0; opno < recog_data.n_operands; opno++)
12264             {
12265               /* We can ignore strict inputs.  */
12266               if (recog_data.operand_type[opno] == OP_IN)
12267                 continue;
12268
12269               if (reg_overlap_mentioned_p (recog_data.operand[opno],
12270                                            shifted_operand))
12271                 {
12272                   *cost = 2;
12273                   return false;
12274                 }
12275             }
12276         }
12277     }
12278   return true;
12279 }
12280
12281 /* Adjust cost hook for Cortex A9.  */
12282 static bool
12283 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12284                              int * cost)
12285 {
12286   switch (dep_type)
12287     {
12288     case REG_DEP_ANTI:
12289       *cost = 0;
12290       return false;
12291
12292     case REG_DEP_TRUE:
12293     case REG_DEP_OUTPUT:
12294         if (recog_memoized (insn) >= 0
12295             && recog_memoized (dep) >= 0)
12296           {
12297             if (GET_CODE (PATTERN (insn)) == SET)
12298               {
12299                 if (GET_MODE_CLASS
12300                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12301                   || GET_MODE_CLASS
12302                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12303                   {
12304                     enum attr_type attr_type_insn = get_attr_type (insn);
12305                     enum attr_type attr_type_dep = get_attr_type (dep);
12306
12307                     /* By default all dependencies of the form
12308                        s0 = s0 <op> s1
12309                        s0 = s0 <op> s2
12310                        have an extra latency of 1 cycle because
12311                        of the input and output dependency in this
12312                        case. However this gets modeled as an true
12313                        dependency and hence all these checks.  */
12314                     if (REG_P (SET_DEST (PATTERN (insn)))
12315                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12316                       {
12317                         /* FMACS is a special case where the dependent
12318                            instruction can be issued 3 cycles before
12319                            the normal latency in case of an output
12320                            dependency.  */
12321                         if ((attr_type_insn == TYPE_FMACS
12322                              || attr_type_insn == TYPE_FMACD)
12323                             && (attr_type_dep == TYPE_FMACS
12324                                 || attr_type_dep == TYPE_FMACD))
12325                           {
12326                             if (dep_type == REG_DEP_OUTPUT)
12327                               *cost = insn_default_latency (dep) - 3;
12328                             else
12329                               *cost = insn_default_latency (dep);
12330                             return false;
12331                           }
12332                         else
12333                           {
12334                             if (dep_type == REG_DEP_OUTPUT)
12335                               *cost = insn_default_latency (dep) + 1;
12336                             else
12337                               *cost = insn_default_latency (dep);
12338                           }
12339                         return false;
12340                       }
12341                   }
12342               }
12343           }
12344         break;
12345
12346     default:
12347       gcc_unreachable ();
12348     }
12349
12350   return true;
12351 }
12352
12353 /* Adjust cost hook for FA726TE.  */
12354 static bool
12355 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12356                            int * cost)
12357 {
12358   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12359      have penalty of 3.  */
12360   if (dep_type == REG_DEP_TRUE
12361       && recog_memoized (insn) >= 0
12362       && recog_memoized (dep) >= 0
12363       && get_attr_conds (dep) == CONDS_SET)
12364     {
12365       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12366       if (get_attr_conds (insn) == CONDS_USE
12367           && get_attr_type (insn) != TYPE_BRANCH)
12368         {
12369           *cost = 3;
12370           return false;
12371         }
12372
12373       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12374           || get_attr_conds (insn) == CONDS_USE)
12375         {
12376           *cost = 0;
12377           return false;
12378         }
12379     }
12380
12381   return true;
12382 }
12383
12384 /* Implement TARGET_REGISTER_MOVE_COST.
12385
12386    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12387    it is typically more expensive than a single memory access.  We set
12388    the cost to less than two memory accesses so that floating
12389    point to integer conversion does not go through memory.  */
12390
12391 int
12392 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12393                         reg_class_t from, reg_class_t to)
12394 {
12395   if (TARGET_32BIT)
12396     {
12397       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12398           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12399         return 15;
12400       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12401                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12402         return 4;
12403       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12404         return 20;
12405       else
12406         return 2;
12407     }
12408   else
12409     {
12410       if (from == HI_REGS || to == HI_REGS)
12411         return 4;
12412       else
12413         return 2;
12414     }
12415 }
12416
12417 /* Implement TARGET_MEMORY_MOVE_COST.  */
12418
12419 int
12420 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12421                       bool in ATTRIBUTE_UNUSED)
12422 {
12423   if (TARGET_32BIT)
12424     return 10;
12425   else
12426     {
12427       if (GET_MODE_SIZE (mode) < 4)
12428         return 8;
12429       else
12430         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12431     }
12432 }
12433
12434 /* Vectorizer cost model implementation.  */
12435
12436 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12437 static int
12438 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12439                                 tree vectype,
12440                                 int misalign ATTRIBUTE_UNUSED)
12441 {
12442   unsigned elements;
12443
12444   switch (type_of_cost)
12445     {
12446       case scalar_stmt:
12447         return current_tune->vec_costs->scalar_stmt_cost;
12448
12449       case scalar_load:
12450         return current_tune->vec_costs->scalar_load_cost;
12451
12452       case scalar_store:
12453         return current_tune->vec_costs->scalar_store_cost;
12454
12455       case vector_stmt:
12456         return current_tune->vec_costs->vec_stmt_cost;
12457
12458       case vector_load:
12459         return current_tune->vec_costs->vec_align_load_cost;
12460
12461       case vector_store:
12462         return current_tune->vec_costs->vec_store_cost;
12463
12464       case vec_to_scalar:
12465         return current_tune->vec_costs->vec_to_scalar_cost;
12466
12467       case scalar_to_vec:
12468         return current_tune->vec_costs->scalar_to_vec_cost;
12469
12470       case unaligned_load:
12471       case vector_gather_load:
12472         return current_tune->vec_costs->vec_unalign_load_cost;
12473
12474       case unaligned_store:
12475       case vector_scatter_store:
12476         return current_tune->vec_costs->vec_unalign_store_cost;
12477
12478       case cond_branch_taken:
12479         return current_tune->vec_costs->cond_taken_branch_cost;
12480
12481       case cond_branch_not_taken:
12482         return current_tune->vec_costs->cond_not_taken_branch_cost;
12483
12484       case vec_perm:
12485       case vec_promote_demote:
12486         return current_tune->vec_costs->vec_stmt_cost;
12487
12488       case vec_construct:
12489         elements = TYPE_VECTOR_SUBPARTS (vectype);
12490         return elements / 2 + 1;
12491
12492       default:
12493         gcc_unreachable ();
12494     }
12495 }
12496
12497 /* Return true if and only if this insn can dual-issue only as older.  */
12498 static bool
12499 cortexa7_older_only (rtx_insn *insn)
12500 {
12501   if (recog_memoized (insn) < 0)
12502     return false;
12503
12504   switch (get_attr_type (insn))
12505     {
12506     case TYPE_ALU_DSP_REG:
12507     case TYPE_ALU_SREG:
12508     case TYPE_ALUS_SREG:
12509     case TYPE_LOGIC_REG:
12510     case TYPE_LOGICS_REG:
12511     case TYPE_ADC_REG:
12512     case TYPE_ADCS_REG:
12513     case TYPE_ADR:
12514     case TYPE_BFM:
12515     case TYPE_REV:
12516     case TYPE_MVN_REG:
12517     case TYPE_SHIFT_IMM:
12518     case TYPE_SHIFT_REG:
12519     case TYPE_LOAD_BYTE:
12520     case TYPE_LOAD_4:
12521     case TYPE_STORE_4:
12522     case TYPE_FFARITHS:
12523     case TYPE_FADDS:
12524     case TYPE_FFARITHD:
12525     case TYPE_FADDD:
12526     case TYPE_FMOV:
12527     case TYPE_F_CVT:
12528     case TYPE_FCMPS:
12529     case TYPE_FCMPD:
12530     case TYPE_FCONSTS:
12531     case TYPE_FCONSTD:
12532     case TYPE_FMULS:
12533     case TYPE_FMACS:
12534     case TYPE_FMULD:
12535     case TYPE_FMACD:
12536     case TYPE_FDIVS:
12537     case TYPE_FDIVD:
12538     case TYPE_F_MRC:
12539     case TYPE_F_MRRC:
12540     case TYPE_F_FLAG:
12541     case TYPE_F_LOADS:
12542     case TYPE_F_STORES:
12543       return true;
12544     default:
12545       return false;
12546     }
12547 }
12548
12549 /* Return true if and only if this insn can dual-issue as younger.  */
12550 static bool
12551 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12552 {
12553   if (recog_memoized (insn) < 0)
12554     {
12555       if (verbose > 5)
12556         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12557       return false;
12558     }
12559
12560   switch (get_attr_type (insn))
12561     {
12562     case TYPE_ALU_IMM:
12563     case TYPE_ALUS_IMM:
12564     case TYPE_LOGIC_IMM:
12565     case TYPE_LOGICS_IMM:
12566     case TYPE_EXTEND:
12567     case TYPE_MVN_IMM:
12568     case TYPE_MOV_IMM:
12569     case TYPE_MOV_REG:
12570     case TYPE_MOV_SHIFT:
12571     case TYPE_MOV_SHIFT_REG:
12572     case TYPE_BRANCH:
12573     case TYPE_CALL:
12574       return true;
12575     default:
12576       return false;
12577     }
12578 }
12579
12580
12581 /* Look for an instruction that can dual issue only as an older
12582    instruction, and move it in front of any instructions that can
12583    dual-issue as younger, while preserving the relative order of all
12584    other instructions in the ready list.  This is a hueuristic to help
12585    dual-issue in later cycles, by postponing issue of more flexible
12586    instructions.  This heuristic may affect dual issue opportunities
12587    in the current cycle.  */
12588 static void
12589 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12590                         int *n_readyp, int clock)
12591 {
12592   int i;
12593   int first_older_only = -1, first_younger = -1;
12594
12595   if (verbose > 5)
12596     fprintf (file,
12597              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12598              clock,
12599              *n_readyp);
12600
12601   /* Traverse the ready list from the head (the instruction to issue
12602      first), and looking for the first instruction that can issue as
12603      younger and the first instruction that can dual-issue only as
12604      older.  */
12605   for (i = *n_readyp - 1; i >= 0; i--)
12606     {
12607       rtx_insn *insn = ready[i];
12608       if (cortexa7_older_only (insn))
12609         {
12610           first_older_only = i;
12611           if (verbose > 5)
12612             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12613           break;
12614         }
12615       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12616         first_younger = i;
12617     }
12618
12619   /* Nothing to reorder because either no younger insn found or insn
12620      that can dual-issue only as older appears before any insn that
12621      can dual-issue as younger.  */
12622   if (first_younger == -1)
12623     {
12624       if (verbose > 5)
12625         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12626       return;
12627     }
12628
12629   /* Nothing to reorder because no older-only insn in the ready list.  */
12630   if (first_older_only == -1)
12631     {
12632       if (verbose > 5)
12633         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12634       return;
12635     }
12636
12637   /* Move first_older_only insn before first_younger.  */
12638   if (verbose > 5)
12639     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12640              INSN_UID(ready [first_older_only]),
12641              INSN_UID(ready [first_younger]));
12642   rtx_insn *first_older_only_insn = ready [first_older_only];
12643   for (i = first_older_only; i < first_younger; i++)
12644     {
12645       ready[i] = ready[i+1];
12646     }
12647
12648   ready[i] = first_older_only_insn;
12649   return;
12650 }
12651
12652 /* Implement TARGET_SCHED_REORDER. */
12653 static int
12654 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12655                    int clock)
12656 {
12657   switch (arm_tune)
12658     {
12659     case TARGET_CPU_cortexa7:
12660       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12661       break;
12662     default:
12663       /* Do nothing for other cores.  */
12664       break;
12665     }
12666
12667   return arm_issue_rate ();
12668 }
12669
12670 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12671    It corrects the value of COST based on the relationship between
12672    INSN and DEP through the dependence LINK.  It returns the new
12673    value. There is a per-core adjust_cost hook to adjust scheduler costs
12674    and the per-core hook can choose to completely override the generic
12675    adjust_cost function. Only put bits of code into arm_adjust_cost that
12676    are common across all cores.  */
12677 static int
12678 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12679                  unsigned int)
12680 {
12681   rtx i_pat, d_pat;
12682
12683  /* When generating Thumb-1 code, we want to place flag-setting operations
12684     close to a conditional branch which depends on them, so that we can
12685     omit the comparison. */
12686   if (TARGET_THUMB1
12687       && dep_type == 0
12688       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12689       && recog_memoized (dep) >= 0
12690       && get_attr_conds (dep) == CONDS_SET)
12691     return 0;
12692
12693   if (current_tune->sched_adjust_cost != NULL)
12694     {
12695       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12696         return cost;
12697     }
12698
12699   /* XXX Is this strictly true?  */
12700   if (dep_type == REG_DEP_ANTI
12701       || dep_type == REG_DEP_OUTPUT)
12702     return 0;
12703
12704   /* Call insns don't incur a stall, even if they follow a load.  */
12705   if (dep_type == 0
12706       && CALL_P (insn))
12707     return 1;
12708
12709   if ((i_pat = single_set (insn)) != NULL
12710       && MEM_P (SET_SRC (i_pat))
12711       && (d_pat = single_set (dep)) != NULL
12712       && MEM_P (SET_DEST (d_pat)))
12713     {
12714       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12715       /* This is a load after a store, there is no conflict if the load reads
12716          from a cached area.  Assume that loads from the stack, and from the
12717          constant pool are cached, and that others will miss.  This is a
12718          hack.  */
12719
12720       if ((SYMBOL_REF_P (src_mem)
12721            && CONSTANT_POOL_ADDRESS_P (src_mem))
12722           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12723           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12724           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12725         return 1;
12726     }
12727
12728   return cost;
12729 }
12730
12731 int
12732 arm_max_conditional_execute (void)
12733 {
12734   return max_insns_skipped;
12735 }
12736
12737 static int
12738 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12739 {
12740   if (TARGET_32BIT)
12741     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12742   else
12743     return (optimize > 0) ? 2 : 0;
12744 }
12745
12746 static int
12747 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12748 {
12749   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12750 }
12751
12752 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12753    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12754    sequences of non-executed instructions in IT blocks probably take the same
12755    amount of time as executed instructions (and the IT instruction itself takes
12756    space in icache).  This function was experimentally determined to give good
12757    results on a popular embedded benchmark.  */
12758
12759 static int
12760 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12761 {
12762   return (TARGET_32BIT && speed_p) ? 1
12763          : arm_default_branch_cost (speed_p, predictable_p);
12764 }
12765
12766 static int
12767 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12768 {
12769   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12770 }
12771
12772 static bool fp_consts_inited = false;
12773
12774 static REAL_VALUE_TYPE value_fp0;
12775
12776 static void
12777 init_fp_table (void)
12778 {
12779   REAL_VALUE_TYPE r;
12780
12781   r = REAL_VALUE_ATOF ("0", DFmode);
12782   value_fp0 = r;
12783   fp_consts_inited = true;
12784 }
12785
12786 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12787 int
12788 arm_const_double_rtx (rtx x)
12789 {
12790   const REAL_VALUE_TYPE *r;
12791
12792   if (!fp_consts_inited)
12793     init_fp_table ();
12794
12795   r = CONST_DOUBLE_REAL_VALUE (x);
12796   if (REAL_VALUE_MINUS_ZERO (*r))
12797     return 0;
12798
12799   if (real_equal (r, &value_fp0))
12800     return 1;
12801
12802   return 0;
12803 }
12804
12805 /* VFPv3 has a fairly wide range of representable immediates, formed from
12806    "quarter-precision" floating-point values. These can be evaluated using this
12807    formula (with ^ for exponentiation):
12808
12809      -1^s * n * 2^-r
12810
12811    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12812    16 <= n <= 31 and 0 <= r <= 7.
12813
12814    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12815
12816      - A (most-significant) is the sign bit.
12817      - BCD are the exponent (encoded as r XOR 3).
12818      - EFGH are the mantissa (encoded as n - 16).
12819 */
12820
12821 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12822    fconst[sd] instruction, or -1 if X isn't suitable.  */
12823 static int
12824 vfp3_const_double_index (rtx x)
12825 {
12826   REAL_VALUE_TYPE r, m;
12827   int sign, exponent;
12828   unsigned HOST_WIDE_INT mantissa, mant_hi;
12829   unsigned HOST_WIDE_INT mask;
12830   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12831   bool fail;
12832
12833   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12834     return -1;
12835
12836   r = *CONST_DOUBLE_REAL_VALUE (x);
12837
12838   /* We can't represent these things, so detect them first.  */
12839   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12840     return -1;
12841
12842   /* Extract sign, exponent and mantissa.  */
12843   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12844   r = real_value_abs (&r);
12845   exponent = REAL_EXP (&r);
12846   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12847      highest (sign) bit, with a fixed binary point at bit point_pos.
12848      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12849      bits for the mantissa, this may fail (low bits would be lost).  */
12850   real_ldexp (&m, &r, point_pos - exponent);
12851   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12852   mantissa = w.elt (0);
12853   mant_hi = w.elt (1);
12854
12855   /* If there are bits set in the low part of the mantissa, we can't
12856      represent this value.  */
12857   if (mantissa != 0)
12858     return -1;
12859
12860   /* Now make it so that mantissa contains the most-significant bits, and move
12861      the point_pos to indicate that the least-significant bits have been
12862      discarded.  */
12863   point_pos -= HOST_BITS_PER_WIDE_INT;
12864   mantissa = mant_hi;
12865
12866   /* We can permit four significant bits of mantissa only, plus a high bit
12867      which is always 1.  */
12868   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12869   if ((mantissa & mask) != 0)
12870     return -1;
12871
12872   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12873   mantissa >>= point_pos - 5;
12874
12875   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12876      floating-point immediate zero with Neon using an integer-zero load, but
12877      that case is handled elsewhere.)  */
12878   if (mantissa == 0)
12879     return -1;
12880
12881   gcc_assert (mantissa >= 16 && mantissa <= 31);
12882
12883   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12884      normalized significands are in the range [1, 2). (Our mantissa is shifted
12885      left 4 places at this point relative to normalized IEEE754 values).  GCC
12886      internally uses [0.5, 1) (see real.cc), so the exponent returned from
12887      REAL_EXP must be altered.  */
12888   exponent = 5 - exponent;
12889
12890   if (exponent < 0 || exponent > 7)
12891     return -1;
12892
12893   /* Sign, mantissa and exponent are now in the correct form to plug into the
12894      formula described in the comment above.  */
12895   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12896 }
12897
12898 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12899 int
12900 vfp3_const_double_rtx (rtx x)
12901 {
12902   if (!TARGET_VFP3)
12903     return 0;
12904
12905   return vfp3_const_double_index (x) != -1;
12906 }
12907
12908 /* Recognize immediates which can be used in various Neon and MVE instructions.
12909    Legal immediates are described by the following table (for VMVN variants, the
12910    bitwise inverse of the constant shown is recognized. In either case, VMOV
12911    is output and the correct instruction to use for a given constant is chosen
12912    by the assembler). The constant shown is replicated across all elements of
12913    the destination vector.
12914
12915    insn elems variant constant (binary)
12916    ---- ----- ------- -----------------
12917    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12918    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12919    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12920    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12921    vmov  i16     4    00000000 abcdefgh
12922    vmov  i16     5    abcdefgh 00000000
12923    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12924    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12925    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12926    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12927    vmvn  i16    10    00000000 abcdefgh
12928    vmvn  i16    11    abcdefgh 00000000
12929    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12930    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12931    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12932    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12933    vmov   i8    16    abcdefgh
12934    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12935                       eeeeeeee ffffffff gggggggg hhhhhhhh
12936    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12937    vmov  f32    19    00000000 00000000 00000000 00000000
12938
12939    For case 18, B = !b. Representable values are exactly those accepted by
12940    vfp3_const_double_index, but are output as floating-point numbers rather
12941    than indices.
12942
12943    For case 19, we will change it to vmov.i32 when assembling.
12944
12945    Variants 0-5 (inclusive) may also be used as immediates for the second
12946    operand of VORR/VBIC instructions.
12947
12948    The INVERSE argument causes the bitwise inverse of the given operand to be
12949    recognized instead (used for recognizing legal immediates for the VAND/VORN
12950    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12951    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12952    output, rather than the real insns vbic/vorr).
12953
12954    INVERSE makes no difference to the recognition of float vectors.
12955
12956    The return value is the variant of immediate as shown in the above table, or
12957    -1 if the given value doesn't match any of the listed patterns.
12958 */
12959 static int
12960 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12961                       rtx *modconst, int *elementwidth)
12962 {
12963 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12964   matches = 1;                                  \
12965   for (i = 0; i < idx; i += (STRIDE))           \
12966     if (!(TEST))                                \
12967       matches = 0;                              \
12968   if (matches)                                  \
12969     {                                           \
12970       immtype = (CLASS);                        \
12971       elsize = (ELSIZE);                        \
12972       break;                                    \
12973     }
12974
12975   unsigned int i, elsize = 0, idx = 0, n_elts;
12976   unsigned int innersize;
12977   unsigned char bytes[16] = {};
12978   int immtype = -1, matches;
12979   unsigned int invmask = inverse ? 0xff : 0;
12980   bool vector = GET_CODE (op) == CONST_VECTOR;
12981
12982   if (vector)
12983     n_elts = CONST_VECTOR_NUNITS (op);
12984   else
12985     {
12986       n_elts = 1;
12987       gcc_assert (mode != VOIDmode);
12988     }
12989
12990   innersize = GET_MODE_UNIT_SIZE (mode);
12991
12992   /* Only support 128-bit vectors for MVE.  */
12993   if (TARGET_HAVE_MVE
12994       && (!vector
12995           || VALID_MVE_PRED_MODE (mode)
12996           || n_elts * innersize != 16))
12997     return -1;
12998
12999   if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
13000     return -1;
13001
13002   /* Vectors of float constants.  */
13003   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13004     {
13005       rtx el0 = CONST_VECTOR_ELT (op, 0);
13006
13007       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
13008         return -1;
13009
13010       /* FP16 vectors cannot be represented.  */
13011       if (GET_MODE_INNER (mode) == HFmode)
13012         return -1;
13013
13014       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
13015          are distinct in this context.  */
13016       if (!const_vec_duplicate_p (op))
13017         return -1;
13018
13019       if (modconst)
13020         *modconst = CONST_VECTOR_ELT (op, 0);
13021
13022       if (elementwidth)
13023         *elementwidth = 0;
13024
13025       if (el0 == CONST0_RTX (GET_MODE (el0)))
13026         return 19;
13027       else
13028         return 18;
13029     }
13030
13031   /* The tricks done in the code below apply for little-endian vector layout.
13032      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13033      FIXME: Implement logic for big-endian vectors.  */
13034   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13035     return -1;
13036
13037   /* Splat vector constant out into a byte vector.  */
13038   for (i = 0; i < n_elts; i++)
13039     {
13040       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13041       unsigned HOST_WIDE_INT elpart;
13042
13043       gcc_assert (CONST_INT_P (el));
13044       elpart = INTVAL (el);
13045
13046       for (unsigned int byte = 0; byte < innersize; byte++)
13047         {
13048           bytes[idx++] = (elpart & 0xff) ^ invmask;
13049           elpart >>= BITS_PER_UNIT;
13050         }
13051     }
13052
13053   /* Sanity check.  */
13054   gcc_assert (idx == GET_MODE_SIZE (mode));
13055
13056   do
13057     {
13058       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13059                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13060
13061       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13062                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13063
13064       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13065                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13066
13067       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13068                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13069
13070       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13071
13072       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13073
13074       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13075                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13076
13077       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13078                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13079
13080       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13081                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13082
13083       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13084                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13085
13086       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13087
13088       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13089
13090       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13091                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13092
13093       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13094                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13095
13096       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13097                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13098
13099       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13100                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13101
13102       CHECK (1, 8, 16, bytes[i] == bytes[0]);
13103
13104       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13105                         && bytes[i] == bytes[(i + 8) % idx]);
13106     }
13107   while (0);
13108
13109   if (immtype == -1)
13110     return -1;
13111
13112   if (elementwidth)
13113     *elementwidth = elsize;
13114
13115   if (modconst)
13116     {
13117       unsigned HOST_WIDE_INT imm = 0;
13118
13119       /* Un-invert bytes of recognized vector, if necessary.  */
13120       if (invmask != 0)
13121         for (i = 0; i < idx; i++)
13122           bytes[i] ^= invmask;
13123
13124       if (immtype == 17)
13125         {
13126           /* FIXME: Broken on 32-bit H_W_I hosts.  */
13127           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13128
13129           for (i = 0; i < 8; i++)
13130             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13131                    << (i * BITS_PER_UNIT);
13132
13133           *modconst = GEN_INT (imm);
13134         }
13135       else
13136         {
13137           unsigned HOST_WIDE_INT imm = 0;
13138
13139           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13140             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13141
13142           *modconst = GEN_INT (imm);
13143         }
13144     }
13145
13146   return immtype;
13147 #undef CHECK
13148 }
13149
13150 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13151    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
13152    (or zero for float elements), and a modified constant (whatever should be
13153    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
13154    modified to "simd_immediate_valid_for_move" as this function will be used
13155    both by neon and mve.  */
13156 int
13157 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13158                                rtx *modconst, int *elementwidth)
13159 {
13160   rtx tmpconst;
13161   int tmpwidth;
13162   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13163
13164   if (retval == -1)
13165     return 0;
13166
13167   if (modconst)
13168     *modconst = tmpconst;
13169
13170   if (elementwidth)
13171     *elementwidth = tmpwidth;
13172
13173   return 1;
13174 }
13175
13176 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
13177    the immediate is valid, write a constant suitable for using as an operand
13178    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13179    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
13180
13181 int
13182 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13183                                 rtx *modconst, int *elementwidth)
13184 {
13185   rtx tmpconst;
13186   int tmpwidth;
13187   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13188
13189   if (retval < 0 || retval > 5)
13190     return 0;
13191
13192   if (modconst)
13193     *modconst = tmpconst;
13194
13195   if (elementwidth)
13196     *elementwidth = tmpwidth;
13197
13198   return 1;
13199 }
13200
13201 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
13202    the immediate is valid, write a constant suitable for using as an operand
13203    to VSHR/VSHL to *MODCONST and the corresponding element width to
13204    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13205    because they have different limitations.  */
13206
13207 int
13208 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13209                                 rtx *modconst, int *elementwidth,
13210                                 bool isleftshift)
13211 {
13212   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13213   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13214   unsigned HOST_WIDE_INT last_elt = 0;
13215   unsigned HOST_WIDE_INT maxshift;
13216
13217   /* Split vector constant out into a byte vector.  */
13218   for (i = 0; i < n_elts; i++)
13219     {
13220       rtx el = CONST_VECTOR_ELT (op, i);
13221       unsigned HOST_WIDE_INT elpart;
13222
13223       if (CONST_INT_P (el))
13224         elpart = INTVAL (el);
13225       else if (CONST_DOUBLE_P (el))
13226         return 0;
13227       else
13228         gcc_unreachable ();
13229
13230       if (i != 0 && elpart != last_elt)
13231         return 0;
13232
13233       last_elt = elpart;
13234     }
13235
13236   /* Shift less than element size.  */
13237   maxshift = innersize * 8;
13238
13239   if (isleftshift)
13240     {
13241       /* Left shift immediate value can be from 0 to <size>-1.  */
13242       if (last_elt >= maxshift)
13243         return 0;
13244     }
13245   else
13246     {
13247       /* Right shift immediate value can be from 1 to <size>.  */
13248       if (last_elt == 0 || last_elt > maxshift)
13249         return 0;
13250     }
13251
13252   if (elementwidth)
13253     *elementwidth = innersize * 8;
13254
13255   if (modconst)
13256     *modconst = CONST_VECTOR_ELT (op, 0);
13257
13258   return 1;
13259 }
13260
13261 /* Return a string suitable for output of Neon immediate logic operation
13262    MNEM.  */
13263
13264 char *
13265 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13266                              int inverse, int quad)
13267 {
13268   int width, is_valid;
13269   static char templ[40];
13270
13271   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13272
13273   gcc_assert (is_valid != 0);
13274
13275   if (quad)
13276     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13277   else
13278     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13279
13280   return templ;
13281 }
13282
13283 /* Return a string suitable for output of Neon immediate shift operation
13284    (VSHR or VSHL) MNEM.  */
13285
13286 char *
13287 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13288                              machine_mode mode, int quad,
13289                              bool isleftshift)
13290 {
13291   int width, is_valid;
13292   static char templ[40];
13293
13294   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13295   gcc_assert (is_valid != 0);
13296
13297   if (quad)
13298     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13299   else
13300     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13301
13302   return templ;
13303 }
13304
13305 /* Output a sequence of pairwise operations to implement a reduction.
13306    NOTE: We do "too much work" here, because pairwise operations work on two
13307    registers-worth of operands in one go. Unfortunately we can't exploit those
13308    extra calculations to do the full operation in fewer steps, I don't think.
13309    Although all vector elements of the result but the first are ignored, we
13310    actually calculate the same result in each of the elements. An alternative
13311    such as initially loading a vector with zero to use as each of the second
13312    operands would use up an additional register and take an extra instruction,
13313    for no particular gain.  */
13314
13315 void
13316 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13317                       rtx (*reduc) (rtx, rtx, rtx))
13318 {
13319   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13320   rtx tmpsum = op1;
13321
13322   for (i = parts / 2; i >= 1; i /= 2)
13323     {
13324       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13325       emit_insn (reduc (dest, tmpsum, tmpsum));
13326       tmpsum = dest;
13327     }
13328 }
13329
13330 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13331    loaded into a register using VDUP.
13332
13333    If this is the case, and GENERATE is set, we also generate
13334    instructions to do this and return an RTX to assign to the register.  */
13335
13336 static rtx
13337 neon_vdup_constant (rtx vals, bool generate)
13338 {
13339   machine_mode mode = GET_MODE (vals);
13340   machine_mode inner_mode = GET_MODE_INNER (mode);
13341   rtx x;
13342
13343   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13344     return NULL_RTX;
13345
13346   if (!const_vec_duplicate_p (vals, &x))
13347     /* The elements are not all the same.  We could handle repeating
13348        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13349        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13350        vdup.i16).  */
13351     return NULL_RTX;
13352
13353   if (!generate)
13354     return x;
13355
13356   /* We can load this constant by using VDUP and a constant in a
13357      single ARM register.  This will be cheaper than a vector
13358      load.  */
13359
13360   x = copy_to_mode_reg (inner_mode, x);
13361   return gen_vec_duplicate (mode, x);
13362 }
13363
13364 /* Return a HI representation of CONST_VEC suitable for MVE predicates.  */
13365 rtx
13366 mve_bool_vec_to_const (rtx const_vec)
13367 {
13368   machine_mode mode = GET_MODE (const_vec);
13369
13370   if (!VECTOR_MODE_P (mode))
13371     return const_vec;
13372
13373   unsigned n_elts = GET_MODE_NUNITS (mode);
13374   unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13375   unsigned shift_c = 16 / n_elts;
13376   unsigned i;
13377   int hi_val = 0;
13378
13379   for (i = 0; i < n_elts; i++)
13380     {
13381       rtx el = CONST_VECTOR_ELT (const_vec, i);
13382       unsigned HOST_WIDE_INT elpart;
13383
13384       gcc_assert (CONST_INT_P (el));
13385       elpart = INTVAL (el) & ((1U << el_prec) - 1);
13386
13387       unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13388
13389       hi_val |= elpart << (index * shift_c);
13390     }
13391   /* We are using mov immediate to encode this constant which writes 32-bits
13392      so we need to make sure the top 16-bits are all 0, otherwise we can't
13393      guarantee we can actually write this immediate.  */
13394   return gen_int_mode (hi_val, SImode);
13395 }
13396
13397 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13398    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13399    into a register.
13400
13401    If this is the case, and GENERATE is set, we also generate code to do
13402    this and return an RTX to copy into the register.  */
13403
13404 rtx
13405 neon_make_constant (rtx vals, bool generate)
13406 {
13407   machine_mode mode = GET_MODE (vals);
13408   rtx target;
13409   rtx const_vec = NULL_RTX;
13410   int n_elts = GET_MODE_NUNITS (mode);
13411   int n_const = 0;
13412   int i;
13413
13414   if (GET_CODE (vals) == CONST_VECTOR)
13415     const_vec = vals;
13416   else if (GET_CODE (vals) == PARALLEL)
13417     {
13418       /* A CONST_VECTOR must contain only CONST_INTs and
13419          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13420          Only store valid constants in a CONST_VECTOR.  */
13421       for (i = 0; i < n_elts; ++i)
13422         {
13423           rtx x = XVECEXP (vals, 0, i);
13424           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13425             n_const++;
13426         }
13427       if (n_const == n_elts)
13428         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13429     }
13430   else
13431     gcc_unreachable ();
13432
13433   if (const_vec != NULL
13434       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13435     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13436     return const_vec;
13437   else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13438     return mve_bool_vec_to_const (const_vec);
13439   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13440     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13441        pipeline cycle; creating the constant takes one or two ARM
13442        pipeline cycles.  */
13443     return target;
13444   else if (const_vec != NULL_RTX)
13445     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13446        (for either double or quad vectors).  We cannot take advantage
13447        of single-cycle VLD1 because we need a PC-relative addressing
13448        mode.  */
13449     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13450   else
13451     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13452        We cannot construct an initializer.  */
13453     return NULL_RTX;
13454 }
13455
13456 /* Initialize vector TARGET to VALS.  */
13457
13458 void
13459 neon_expand_vector_init (rtx target, rtx vals)
13460 {
13461   machine_mode mode = GET_MODE (target);
13462   machine_mode inner_mode = GET_MODE_INNER (mode);
13463   int n_elts = GET_MODE_NUNITS (mode);
13464   int n_var = 0, one_var = -1;
13465   bool all_same = true;
13466   rtx x, mem;
13467   int i;
13468
13469   for (i = 0; i < n_elts; ++i)
13470     {
13471       x = XVECEXP (vals, 0, i);
13472       if (!CONSTANT_P (x))
13473         ++n_var, one_var = i;
13474
13475       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13476         all_same = false;
13477     }
13478
13479   if (n_var == 0)
13480     {
13481       rtx constant = neon_make_constant (vals);
13482       if (constant != NULL_RTX)
13483         {
13484           emit_move_insn (target, constant);
13485           return;
13486         }
13487     }
13488
13489   /* Splat a single non-constant element if we can.  */
13490   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13491     {
13492       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13493       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13494       return;
13495     }
13496
13497   /* One field is non-constant.  Load constant then overwrite varying
13498      field.  This is more efficient than using the stack.  */
13499   if (n_var == 1)
13500     {
13501       rtx copy = copy_rtx (vals);
13502       rtx merge_mask = GEN_INT (1 << one_var);
13503
13504       /* Load constant part of vector, substitute neighboring value for
13505          varying element.  */
13506       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13507       neon_expand_vector_init (target, copy);
13508
13509       /* Insert variable.  */
13510       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13511       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13512       return;
13513     }
13514
13515   /* Construct the vector in memory one field at a time
13516      and load the whole vector.  */
13517   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13518   for (i = 0; i < n_elts; i++)
13519     emit_move_insn (adjust_address_nv (mem, inner_mode,
13520                                     i * GET_MODE_SIZE (inner_mode)),
13521                     XVECEXP (vals, 0, i));
13522   emit_move_insn (target, mem);
13523 }
13524
13525 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13526    ERR if it doesn't.  EXP indicates the source location, which includes the
13527    inlining history for intrinsics.  */
13528
13529 static void
13530 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13531               const_tree exp, const char *desc)
13532 {
13533   HOST_WIDE_INT lane;
13534
13535   gcc_assert (CONST_INT_P (operand));
13536
13537   lane = INTVAL (operand);
13538
13539   if (lane < low || lane >= high)
13540     {
13541       if (exp)
13542         error_at (EXPR_LOCATION (exp),
13543                   "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13544       else
13545         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13546     }
13547 }
13548
13549 /* Bounds-check lanes.  */
13550
13551 void
13552 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13553                   const_tree exp)
13554 {
13555   bounds_check (operand, low, high, exp, "lane");
13556 }
13557
13558 /* Bounds-check constants.  */
13559
13560 void
13561 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13562 {
13563   bounds_check (operand, low, high, NULL_TREE, "constant");
13564 }
13565
13566 HOST_WIDE_INT
13567 neon_element_bits (machine_mode mode)
13568 {
13569   return GET_MODE_UNIT_BITSIZE (mode);
13570 }
13571
13572 \f
13573 /* Predicates for `match_operand' and `match_operator'.  */
13574
13575 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13576    WB level is 2 if full writeback address modes are allowed, 1
13577    if limited writeback address modes (POST_INC and PRE_DEC) are
13578    allowed and 0 if no writeback at all is supported.  */
13579
13580 int
13581 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13582 {
13583   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13584   rtx ind;
13585
13586   /* Reject eliminable registers.  */
13587   if (! (reload_in_progress || reload_completed || lra_in_progress)
13588       && (   reg_mentioned_p (frame_pointer_rtx, op)
13589           || reg_mentioned_p (arg_pointer_rtx, op)
13590           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13591           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13592           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13593           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13594     return FALSE;
13595
13596   /* Constants are converted into offsets from labels.  */
13597   if (!MEM_P (op))
13598     return FALSE;
13599
13600   ind = XEXP (op, 0);
13601
13602   if (reload_completed
13603       && (LABEL_REF_P (ind)
13604           || (GET_CODE (ind) == CONST
13605               && GET_CODE (XEXP (ind, 0)) == PLUS
13606               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13607               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13608     return TRUE;
13609
13610   /* Match: (mem (reg)).  */
13611   if (REG_P (ind))
13612     return arm_address_register_rtx_p (ind, 0);
13613
13614   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13615      acceptable in any case (subject to verification by
13616      arm_address_register_rtx_p).  We need full writeback to accept
13617      PRE_INC and POST_DEC, and at least restricted writeback for
13618      PRE_INC and POST_DEC.  */
13619   if (wb_level > 0
13620       && (GET_CODE (ind) == POST_INC
13621           || GET_CODE (ind) == PRE_DEC
13622           || (wb_level > 1
13623               && (GET_CODE (ind) == PRE_INC
13624                   || GET_CODE (ind) == POST_DEC))))
13625     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13626
13627   if (wb_level > 1
13628       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13629       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13630       && GET_CODE (XEXP (ind, 1)) == PLUS
13631       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13632     ind = XEXP (ind, 1);
13633
13634   /* Match:
13635      (plus (reg)
13636            (const))
13637
13638      The encoded immediate for 16-bit modes is multiplied by 2,
13639      while the encoded immediate for 32-bit and 64-bit modes is
13640      multiplied by 4.  */
13641   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13642   if (GET_CODE (ind) == PLUS
13643       && REG_P (XEXP (ind, 0))
13644       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13645       && CONST_INT_P (XEXP (ind, 1))
13646       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13647       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13648     return TRUE;
13649
13650   return FALSE;
13651 }
13652
13653 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13654    WB is true if full writeback address modes are allowed and is false
13655    if limited writeback address modes (POST_INC and PRE_DEC) are
13656    allowed.  */
13657
13658 int arm_coproc_mem_operand (rtx op, bool wb)
13659 {
13660   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13661 }
13662
13663 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13664    context in which no writeback address modes are allowed.  */
13665
13666 int
13667 arm_coproc_mem_operand_no_writeback (rtx op)
13668 {
13669   return arm_coproc_mem_operand_wb (op, 0);
13670 }
13671
13672 /* In non-STRICT mode, return the register number; in STRICT mode return
13673    the hard regno or the replacement if it won't be a mem.  Otherwise, return
13674    the original pseudo number.  */
13675 static int
13676 arm_effective_regno (rtx op, bool strict)
13677 {
13678   gcc_assert (REG_P (op));
13679   if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
13680       || !reg_renumber || reg_renumber[REGNO (op)] < 0)
13681     return REGNO (op);
13682   return reg_renumber[REGNO (op)];
13683 }
13684
13685 /* This function returns TRUE on matching mode and op.
13686 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13687 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13688 int
13689 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13690 {
13691   enum rtx_code code;
13692   int val, reg_no;
13693
13694   /* Match: (mem (reg)).  */
13695   if (REG_P (op))
13696     {
13697       reg_no = arm_effective_regno (op, strict);
13698       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13699                ? reg_no <= LAST_LO_REGNUM
13700                : reg_no < LAST_ARM_REGNUM)
13701               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13702     }
13703   code = GET_CODE (op);
13704
13705   if ((code == POST_INC
13706        || code == PRE_DEC
13707        || code == PRE_INC
13708        || code == POST_DEC)
13709       && REG_P (XEXP (op, 0)))
13710     {
13711       reg_no = arm_effective_regno (XEXP (op, 0), strict);
13712       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13713                ? reg_no <= LAST_LO_REGNUM
13714                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13715               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13716     }
13717   else if (((code == POST_MODIFY || code == PRE_MODIFY)
13718             && GET_CODE (XEXP (op, 1)) == PLUS
13719             && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13720             && REG_P (XEXP (op, 0))
13721             && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13722            /* Make sure to only accept PLUS after reload_completed, otherwise
13723               this will interfere with auto_inc's pattern detection.  */
13724            || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13725                && GET_CODE (XEXP (op, 1)) == CONST_INT))
13726     {
13727       reg_no = arm_effective_regno (XEXP (op, 0), strict);
13728       if (code == PLUS)
13729         val = INTVAL (XEXP (op, 1));
13730       else
13731         val = INTVAL (XEXP(XEXP (op, 1), 1));
13732
13733       switch (mode)
13734         {
13735           case E_V16QImode:
13736           case E_V8QImode:
13737           case E_V4QImode:
13738             if (abs (val) > 127)
13739               return FALSE;
13740             break;
13741           case E_V8HImode:
13742           case E_V8HFmode:
13743           case E_V4HImode:
13744           case E_V4HFmode:
13745             if (val % 2 != 0 || abs (val) > 254)
13746               return FALSE;
13747             break;
13748           case E_V4SImode:
13749           case E_V4SFmode:
13750             if (val % 4 != 0 || abs (val) > 508)
13751               return FALSE;
13752             break;
13753           default:
13754             return FALSE;
13755         }
13756       return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13757               || (MVE_STN_LDW_MODE (mode)
13758                   ? reg_no <= LAST_LO_REGNUM
13759                   : (reg_no < LAST_ARM_REGNUM
13760                      && (code == PLUS || reg_no != SP_REGNUM))));
13761     }
13762   return FALSE;
13763 }
13764
13765 /* Return TRUE if OP is a memory operand which we can load or store a vector
13766    to/from. TYPE is one of the following values:
13767     0 - Vector load/stor (vldr)
13768     1 - Core registers (ldm)
13769     2 - Element/structure loads (vld1)
13770  */
13771 int
13772 neon_vector_mem_operand (rtx op, int type, bool strict)
13773 {
13774   rtx ind;
13775
13776   /* Reject eliminable registers.  */
13777   if (strict && ! (reload_in_progress || reload_completed)
13778       && (reg_mentioned_p (frame_pointer_rtx, op)
13779           || reg_mentioned_p (arg_pointer_rtx, op)
13780           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13781           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13782           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13783           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13784     return FALSE;
13785
13786   /* Constants are converted into offsets from labels.  */
13787   if (!MEM_P (op))
13788     return FALSE;
13789
13790   ind = XEXP (op, 0);
13791
13792   if (reload_completed
13793       && (LABEL_REF_P (ind)
13794           || (GET_CODE (ind) == CONST
13795               && GET_CODE (XEXP (ind, 0)) == PLUS
13796               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13797               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13798     return TRUE;
13799
13800   /* Match: (mem (reg)).  */
13801   if (REG_P (ind))
13802     return arm_address_register_rtx_p (ind, 0);
13803
13804   /* Allow post-increment with Neon registers.  */
13805   if ((type != 1 && GET_CODE (ind) == POST_INC)
13806       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13807     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13808
13809   /* Allow post-increment by register for VLDn */
13810   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13811       && GET_CODE (XEXP (ind, 1)) == PLUS
13812       && REG_P (XEXP (XEXP (ind, 1), 1))
13813       && REG_P (XEXP (ind, 0))
13814       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13815      return true;
13816
13817   /* Match:
13818      (plus (reg)
13819           (const)).  */
13820   if (type == 0
13821       && GET_CODE (ind) == PLUS
13822       && REG_P (XEXP (ind, 0))
13823       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13824       && CONST_INT_P (XEXP (ind, 1))
13825       && INTVAL (XEXP (ind, 1)) > -1024
13826       /* For quad modes, we restrict the constant offset to be slightly less
13827          than what the instruction format permits.  We have no such constraint
13828          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13829       && (INTVAL (XEXP (ind, 1))
13830           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13831       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13832     return TRUE;
13833
13834   return FALSE;
13835 }
13836
13837 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13838    type.  */
13839 int
13840 mve_struct_mem_operand (rtx op)
13841 {
13842   rtx ind = XEXP (op, 0);
13843
13844   /* Match: (mem (reg)).  */
13845   if (REG_P (ind))
13846     return arm_address_register_rtx_p (ind, 0);
13847
13848   /* Allow only post-increment by the mode size.  */
13849   if (GET_CODE (ind) == POST_INC)
13850     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13851
13852   return FALSE;
13853 }
13854
13855 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13856    type.  */
13857 int
13858 neon_struct_mem_operand (rtx op)
13859 {
13860   rtx ind;
13861
13862   /* Reject eliminable registers.  */
13863   if (! (reload_in_progress || reload_completed)
13864       && (   reg_mentioned_p (frame_pointer_rtx, op)
13865           || reg_mentioned_p (arg_pointer_rtx, op)
13866           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13867           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13868           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13869           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13870     return FALSE;
13871
13872   /* Constants are converted into offsets from labels.  */
13873   if (!MEM_P (op))
13874     return FALSE;
13875
13876   ind = XEXP (op, 0);
13877
13878   if (reload_completed
13879       && (LABEL_REF_P (ind)
13880           || (GET_CODE (ind) == CONST
13881               && GET_CODE (XEXP (ind, 0)) == PLUS
13882               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13883               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13884     return TRUE;
13885
13886   /* Match: (mem (reg)).  */
13887   if (REG_P (ind))
13888     return arm_address_register_rtx_p (ind, 0);
13889
13890   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13891   if (GET_CODE (ind) == POST_INC
13892       || GET_CODE (ind) == PRE_DEC)
13893     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13894
13895   return FALSE;
13896 }
13897
13898 /* Prepares the operands for the VCMLA by lane instruction such that the right
13899    register number is selected.  This instruction is special in that it always
13900    requires a D register, however there is a choice to be made between Dn[0],
13901    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13902
13903    The VCMLA by lane function always selects two values. For instance given D0
13904    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13905    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13906    D0[0] or D1[0] are both valid.
13907
13908    This function centralizes that information based on OPERANDS, OPERANDS[3]
13909    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13910    updated to contain the right index.  */
13911
13912 rtx *
13913 neon_vcmla_lane_prepare_operands (rtx *operands)
13914 {
13915   int lane = INTVAL (operands[4]);
13916   machine_mode constmode = SImode;
13917   machine_mode mode = GET_MODE (operands[3]);
13918   int regno = REGNO (operands[3]);
13919   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13920   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13921     {
13922       operands[3] = gen_int_mode (regno + 1, constmode);
13923       operands[4]
13924         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13925     }
13926   else
13927     {
13928       operands[3] = gen_int_mode (regno, constmode);
13929       operands[4] = gen_int_mode (lane, constmode);
13930     }
13931   return operands;
13932 }
13933
13934
13935 /* Return true if X is a register that will be eliminated later on.  */
13936 int
13937 arm_eliminable_register (rtx x)
13938 {
13939   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13940                        || REGNO (x) == ARG_POINTER_REGNUM
13941                        || VIRTUAL_REGISTER_P (x));
13942 }
13943
13944 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13945    coprocessor registers.  Otherwise return NO_REGS.  */
13946
13947 enum reg_class
13948 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13949 {
13950   if (mode == HFmode)
13951     {
13952       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13953         return GENERAL_REGS;
13954       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13955         return NO_REGS;
13956       return GENERAL_REGS;
13957     }
13958
13959   /* The neon move patterns handle all legitimate vector and struct
13960      addresses.  */
13961   if (TARGET_NEON
13962       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13963       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13964           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13965           || VALID_NEON_STRUCT_MODE (mode)))
13966     return NO_REGS;
13967
13968   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13969     return NO_REGS;
13970
13971   return GENERAL_REGS;
13972 }
13973
13974 /* Values which must be returned in the most-significant end of the return
13975    register.  */
13976
13977 static bool
13978 arm_return_in_msb (const_tree valtype)
13979 {
13980   return (TARGET_AAPCS_BASED
13981           && BYTES_BIG_ENDIAN
13982           && (AGGREGATE_TYPE_P (valtype)
13983               || TREE_CODE (valtype) == COMPLEX_TYPE
13984               || FIXED_POINT_TYPE_P (valtype)));
13985 }
13986
13987 /* Return TRUE if X references a SYMBOL_REF.  */
13988 int
13989 symbol_mentioned_p (rtx x)
13990 {
13991   const char * fmt;
13992   int i;
13993
13994   if (SYMBOL_REF_P (x))
13995     return 1;
13996
13997   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13998      are constant offsets, not symbols.  */
13999   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14000     return 0;
14001
14002   fmt = GET_RTX_FORMAT (GET_CODE (x));
14003
14004   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14005     {
14006       if (fmt[i] == 'E')
14007         {
14008           int j;
14009
14010           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14011             if (symbol_mentioned_p (XVECEXP (x, i, j)))
14012               return 1;
14013         }
14014       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
14015         return 1;
14016     }
14017
14018   return 0;
14019 }
14020
14021 /* Return TRUE if X references a LABEL_REF.  */
14022 int
14023 label_mentioned_p (rtx x)
14024 {
14025   const char * fmt;
14026   int i;
14027
14028   if (LABEL_REF_P (x))
14029     return 1;
14030
14031   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14032      instruction, but they are constant offsets, not symbols.  */
14033   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14034     return 0;
14035
14036   fmt = GET_RTX_FORMAT (GET_CODE (x));
14037   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14038     {
14039       if (fmt[i] == 'E')
14040         {
14041           int j;
14042
14043           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14044             if (label_mentioned_p (XVECEXP (x, i, j)))
14045               return 1;
14046         }
14047       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14048         return 1;
14049     }
14050
14051   return 0;
14052 }
14053
14054 int
14055 tls_mentioned_p (rtx x)
14056 {
14057   switch (GET_CODE (x))
14058     {
14059     case CONST:
14060       return tls_mentioned_p (XEXP (x, 0));
14061
14062     case UNSPEC:
14063       if (XINT (x, 1) == UNSPEC_TLS)
14064         return 1;
14065
14066     /* Fall through.  */
14067     default:
14068       return 0;
14069     }
14070 }
14071
14072 /* Must not copy any rtx that uses a pc-relative address.
14073    Also, disallow copying of load-exclusive instructions that
14074    may appear after splitting of compare-and-swap-style operations
14075    so as to prevent those loops from being transformed away from their
14076    canonical forms (see PR 69904).  */
14077
14078 static bool
14079 arm_cannot_copy_insn_p (rtx_insn *insn)
14080 {
14081   /* The tls call insn cannot be copied, as it is paired with a data
14082      word.  */
14083   if (recog_memoized (insn) == CODE_FOR_tlscall)
14084     return true;
14085
14086   subrtx_iterator::array_type array;
14087   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14088     {
14089       const_rtx x = *iter;
14090       if (GET_CODE (x) == UNSPEC
14091           && (XINT (x, 1) == UNSPEC_PIC_BASE
14092               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14093         return true;
14094     }
14095
14096   rtx set = single_set (insn);
14097   if (set)
14098     {
14099       rtx src = SET_SRC (set);
14100       if (GET_CODE (src) == ZERO_EXTEND)
14101         src = XEXP (src, 0);
14102
14103       /* Catch the load-exclusive and load-acquire operations.  */
14104       if (GET_CODE (src) == UNSPEC_VOLATILE
14105           && (XINT (src, 1) == VUNSPEC_LL
14106               || XINT (src, 1) == VUNSPEC_LAX))
14107         return true;
14108     }
14109   return false;
14110 }
14111
14112 enum rtx_code
14113 minmax_code (rtx x)
14114 {
14115   enum rtx_code code = GET_CODE (x);
14116
14117   switch (code)
14118     {
14119     case SMAX:
14120       return GE;
14121     case SMIN:
14122       return LE;
14123     case UMIN:
14124       return LEU;
14125     case UMAX:
14126       return GEU;
14127     default:
14128       gcc_unreachable ();
14129     }
14130 }
14131
14132 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
14133
14134 bool
14135 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14136                         int *mask, bool *signed_sat)
14137 {
14138   /* The high bound must be a power of two minus one.  */
14139   int log = exact_log2 (INTVAL (hi_bound) + 1);
14140   if (log == -1)
14141     return false;
14142
14143   /* The low bound is either zero (for usat) or one less than the
14144      negation of the high bound (for ssat).  */
14145   if (INTVAL (lo_bound) == 0)
14146     {
14147       if (mask)
14148         *mask = log;
14149       if (signed_sat)
14150         *signed_sat = false;
14151
14152       return true;
14153     }
14154
14155   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14156     {
14157       if (mask)
14158         *mask = log + 1;
14159       if (signed_sat)
14160         *signed_sat = true;
14161
14162       return true;
14163     }
14164
14165   return false;
14166 }
14167
14168 /* Return 1 if memory locations are adjacent.  */
14169 int
14170 adjacent_mem_locations (rtx a, rtx b)
14171 {
14172   /* We don't guarantee to preserve the order of these memory refs.  */
14173   if (volatile_refs_p (a) || volatile_refs_p (b))
14174     return 0;
14175
14176   if ((REG_P (XEXP (a, 0))
14177        || (GET_CODE (XEXP (a, 0)) == PLUS
14178            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14179       && (REG_P (XEXP (b, 0))
14180           || (GET_CODE (XEXP (b, 0)) == PLUS
14181               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14182     {
14183       HOST_WIDE_INT val0 = 0, val1 = 0;
14184       rtx reg0, reg1;
14185       int val_diff;
14186
14187       if (GET_CODE (XEXP (a, 0)) == PLUS)
14188         {
14189           reg0 = XEXP (XEXP (a, 0), 0);
14190           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14191         }
14192       else
14193         reg0 = XEXP (a, 0);
14194
14195       if (GET_CODE (XEXP (b, 0)) == PLUS)
14196         {
14197           reg1 = XEXP (XEXP (b, 0), 0);
14198           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14199         }
14200       else
14201         reg1 = XEXP (b, 0);
14202
14203       /* Don't accept any offset that will require multiple
14204          instructions to handle, since this would cause the
14205          arith_adjacentmem pattern to output an overlong sequence.  */
14206       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14207         return 0;
14208
14209       /* Don't allow an eliminable register: register elimination can make
14210          the offset too large.  */
14211       if (arm_eliminable_register (reg0))
14212         return 0;
14213
14214       val_diff = val1 - val0;
14215
14216       if (arm_ld_sched)
14217         {
14218           /* If the target has load delay slots, then there's no benefit
14219              to using an ldm instruction unless the offset is zero and
14220              we are optimizing for size.  */
14221           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14222                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14223                   && (val_diff == 4 || val_diff == -4));
14224         }
14225
14226       return ((REGNO (reg0) == REGNO (reg1))
14227               && (val_diff == 4 || val_diff == -4));
14228     }
14229
14230   return 0;
14231 }
14232
14233 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
14234    for load operations, false for store operations.  CONSECUTIVE is true
14235    if the register numbers in the operation must be consecutive in the register
14236    bank. RETURN_PC is true if value is to be loaded in PC.
14237    The pattern we are trying to match for load is:
14238      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14239       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14240        :
14241        :
14242       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14243      ]
14244      where
14245      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14246      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14247      3.  If consecutive is TRUE, then for kth register being loaded,
14248          REGNO (R_dk) = REGNO (R_d0) + k.
14249    The pattern for store is similar.  */
14250 bool
14251 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14252                      bool consecutive, bool return_pc)
14253 {
14254   HOST_WIDE_INT count = XVECLEN (op, 0);
14255   rtx reg, mem, addr;
14256   unsigned regno;
14257   unsigned first_regno;
14258   HOST_WIDE_INT i = 1, base = 0, offset = 0;
14259   rtx elt;
14260   bool addr_reg_in_reglist = false;
14261   bool update = false;
14262   int reg_increment;
14263   int offset_adj;
14264   int regs_per_val;
14265
14266   /* If not in SImode, then registers must be consecutive
14267      (e.g., VLDM instructions for DFmode).  */
14268   gcc_assert ((mode == SImode) || consecutive);
14269   /* Setting return_pc for stores is illegal.  */
14270   gcc_assert (!return_pc || load);
14271
14272   /* Set up the increments and the regs per val based on the mode.  */
14273   reg_increment = GET_MODE_SIZE (mode);
14274   regs_per_val = reg_increment / 4;
14275   offset_adj = return_pc ? 1 : 0;
14276
14277   if (count <= 1
14278       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14279       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14280     return false;
14281
14282   /* Check if this is a write-back.  */
14283   elt = XVECEXP (op, 0, offset_adj);
14284   if (GET_CODE (SET_SRC (elt)) == PLUS)
14285     {
14286       i++;
14287       base = 1;
14288       update = true;
14289
14290       /* The offset adjustment must be the number of registers being
14291          popped times the size of a single register.  */
14292       if (!REG_P (SET_DEST (elt))
14293           || !REG_P (XEXP (SET_SRC (elt), 0))
14294           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14295           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14296           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14297              ((count - 1 - offset_adj) * reg_increment))
14298         return false;
14299     }
14300
14301   i = i + offset_adj;
14302   base = base + offset_adj;
14303   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14304      success depends on the type: VLDM can do just one reg,
14305      LDM must do at least two.  */
14306   if ((count <= i) && (mode == SImode))
14307       return false;
14308
14309   elt = XVECEXP (op, 0, i - 1);
14310   if (GET_CODE (elt) != SET)
14311     return false;
14312
14313   if (load)
14314     {
14315       reg = SET_DEST (elt);
14316       mem = SET_SRC (elt);
14317     }
14318   else
14319     {
14320       reg = SET_SRC (elt);
14321       mem = SET_DEST (elt);
14322     }
14323
14324   if (!REG_P (reg) || !MEM_P (mem))
14325     return false;
14326
14327   regno = REGNO (reg);
14328   first_regno = regno;
14329   addr = XEXP (mem, 0);
14330   if (GET_CODE (addr) == PLUS)
14331     {
14332       if (!CONST_INT_P (XEXP (addr, 1)))
14333         return false;
14334
14335       offset = INTVAL (XEXP (addr, 1));
14336       addr = XEXP (addr, 0);
14337     }
14338
14339   if (!REG_P (addr))
14340     return false;
14341
14342   /* Don't allow SP to be loaded unless it is also the base register. It
14343      guarantees that SP is reset correctly when an LDM instruction
14344      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14345   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14346     return false;
14347
14348   if (regno == REGNO (addr))
14349     addr_reg_in_reglist = true;
14350
14351   for (; i < count; i++)
14352     {
14353       elt = XVECEXP (op, 0, i);
14354       if (GET_CODE (elt) != SET)
14355         return false;
14356
14357       if (load)
14358         {
14359           reg = SET_DEST (elt);
14360           mem = SET_SRC (elt);
14361         }
14362       else
14363         {
14364           reg = SET_SRC (elt);
14365           mem = SET_DEST (elt);
14366         }
14367
14368       if (!REG_P (reg)
14369           || GET_MODE (reg) != mode
14370           || REGNO (reg) <= regno
14371           || (consecutive
14372               && (REGNO (reg) !=
14373                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14374           /* Don't allow SP to be loaded unless it is also the base register. It
14375              guarantees that SP is reset correctly when an LDM instruction
14376              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14377           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14378           || !MEM_P (mem)
14379           || GET_MODE (mem) != mode
14380           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14381                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14382                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14383                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14384                    offset + (i - base) * reg_increment))
14385               && (!REG_P (XEXP (mem, 0))
14386                   || offset + (i - base) * reg_increment != 0)))
14387         return false;
14388
14389       regno = REGNO (reg);
14390       if (regno == REGNO (addr))
14391         addr_reg_in_reglist = true;
14392     }
14393
14394   if (load)
14395     {
14396       if (update && addr_reg_in_reglist)
14397         return false;
14398
14399       /* For Thumb-1, address register is always modified - either by write-back
14400          or by explicit load.  If the pattern does not describe an update,
14401          then the address register must be in the list of loaded registers.  */
14402       if (TARGET_THUMB1)
14403         return update || addr_reg_in_reglist;
14404     }
14405
14406   return true;
14407 }
14408
14409 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14410    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14411    following form:
14412
14413    [(set (reg:SI <N>) (const_int 0))
14414     (set (reg:SI <M>) (const_int 0))
14415     ...
14416     (unspec_volatile [(const_int 0)]
14417                      VUNSPEC_CLRM_APSR)
14418     (clobber (reg:CC CC_REGNUM))
14419    ]
14420
14421    Any number (including 0) of set expressions is valid, the volatile unspec is
14422    optional.  All registers but SP and PC are allowed and registers must be in
14423    strict increasing order.
14424
14425    To be a valid VSCCLRM pattern, OP must have the following form:
14426
14427    [(unspec_volatile [(const_int 0)]
14428                      VUNSPEC_VSCCLRM_VPR)
14429     (set (reg:SF <N>) (const_int 0))
14430     (set (reg:SF <M>) (const_int 0))
14431     ...
14432    ]
14433
14434    As with CLRM, any number (including 0) of set expressions is valid, however
14435    the volatile unspec is mandatory here.  Any VFP single-precision register is
14436    accepted but all registers must be consecutive and in increasing order.  */
14437
14438 bool
14439 clear_operation_p (rtx op, bool vfp)
14440 {
14441   unsigned regno;
14442   unsigned last_regno = INVALID_REGNUM;
14443   rtx elt, reg, zero;
14444   int count = XVECLEN (op, 0);
14445   int first_set = vfp ? 1 : 0;
14446   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14447
14448   for (int i = first_set; i < count; i++)
14449     {
14450       elt = XVECEXP (op, 0, i);
14451
14452       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14453         {
14454           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14455               || XVECLEN (elt, 0) != 1
14456               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14457               || i != count - 2)
14458             return false;
14459
14460           continue;
14461         }
14462
14463       if (GET_CODE (elt) == CLOBBER)
14464         continue;
14465
14466       if (GET_CODE (elt) != SET)
14467         return false;
14468
14469       reg = SET_DEST (elt);
14470       zero = SET_SRC (elt);
14471
14472       if (!REG_P (reg)
14473           || GET_MODE (reg) != expected_mode
14474           || zero != CONST0_RTX (SImode))
14475         return false;
14476
14477       regno = REGNO (reg);
14478
14479       if (vfp)
14480         {
14481           if (i != first_set && regno != last_regno + 1)
14482             return false;
14483         }
14484       else
14485         {
14486           if (regno == SP_REGNUM || regno == PC_REGNUM)
14487             return false;
14488           if (i != first_set && regno <= last_regno)
14489             return false;
14490         }
14491
14492       last_regno = regno;
14493     }
14494
14495   return true;
14496 }
14497
14498 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14499    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14500    instruction.  ADD_OFFSET is nonzero if the base address register needs
14501    to be modified with an add instruction before we can use it.  */
14502
14503 static bool
14504 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14505                                  int nops, HOST_WIDE_INT add_offset)
14506  {
14507   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14508      if the offset isn't small enough.  The reason 2 ldrs are faster
14509      is because these ARMs are able to do more than one cache access
14510      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14511      whilst the ARM8 has a double bandwidth cache.  This means that
14512      these cores can do both an instruction fetch and a data fetch in
14513      a single cycle, so the trick of calculating the address into a
14514      scratch register (one of the result regs) and then doing a load
14515      multiple actually becomes slower (and no smaller in code size).
14516      That is the transformation
14517
14518         ldr     rd1, [rbase + offset]
14519         ldr     rd2, [rbase + offset + 4]
14520
14521      to
14522
14523         add     rd1, rbase, offset
14524         ldmia   rd1, {rd1, rd2}
14525
14526      produces worse code -- '3 cycles + any stalls on rd2' instead of
14527      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14528      access per cycle, the first sequence could never complete in less
14529      than 6 cycles, whereas the ldm sequence would only take 5 and
14530      would make better use of sequential accesses if not hitting the
14531      cache.
14532
14533      We cheat here and test 'arm_ld_sched' which we currently know to
14534      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14535      changes, then the test below needs to be reworked.  */
14536   if (nops == 2 && arm_ld_sched && add_offset != 0)
14537     return false;
14538
14539   /* XScale has load-store double instructions, but they have stricter
14540      alignment requirements than load-store multiple, so we cannot
14541      use them.
14542
14543      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14544      the pipeline until completion.
14545
14546         NREGS           CYCLES
14547           1               3
14548           2               4
14549           3               5
14550           4               6
14551
14552      An ldr instruction takes 1-3 cycles, but does not block the
14553      pipeline.
14554
14555         NREGS           CYCLES
14556           1              1-3
14557           2              2-6
14558           3              3-9
14559           4              4-12
14560
14561      Best case ldr will always win.  However, the more ldr instructions
14562      we issue, the less likely we are to be able to schedule them well.
14563      Using ldr instructions also increases code size.
14564
14565      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14566      for counts of 3 or 4 regs.  */
14567   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14568     return false;
14569   return true;
14570 }
14571
14572 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14573    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14574    an array ORDER which describes the sequence to use when accessing the
14575    offsets that produces an ascending order.  In this sequence, each
14576    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14577    must have been filled in with the lowest offset by the caller.
14578    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14579    we use to verify that ORDER produces an ascending order of registers.
14580    Return true if it was possible to construct such an order, false if
14581    not.  */
14582
14583 static bool
14584 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14585                       int *unsorted_regs)
14586 {
14587   int i;
14588   for (i = 1; i < nops; i++)
14589     {
14590       int j;
14591
14592       order[i] = order[i - 1];
14593       for (j = 0; j < nops; j++)
14594         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14595           {
14596             /* We must find exactly one offset that is higher than the
14597                previous one by 4.  */
14598             if (order[i] != order[i - 1])
14599               return false;
14600             order[i] = j;
14601           }
14602       if (order[i] == order[i - 1])
14603         return false;
14604       /* The register numbers must be ascending.  */
14605       if (unsorted_regs != NULL
14606           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14607         return false;
14608     }
14609   return true;
14610 }
14611
14612 /* Used to determine in a peephole whether a sequence of load
14613    instructions can be changed into a load-multiple instruction.
14614    NOPS is the number of separate load instructions we are examining.  The
14615    first NOPS entries in OPERANDS are the destination registers, the
14616    next NOPS entries are memory operands.  If this function is
14617    successful, *BASE is set to the common base register of the memory
14618    accesses; *LOAD_OFFSET is set to the first memory location's offset
14619    from that base register.
14620    REGS is an array filled in with the destination register numbers.
14621    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14622    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14623    the sequence of registers in REGS matches the loads from ascending memory
14624    locations, and the function verifies that the register numbers are
14625    themselves ascending.  If CHECK_REGS is false, the register numbers
14626    are stored in the order they are found in the operands.  */
14627 static int
14628 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14629                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14630 {
14631   int unsorted_regs[MAX_LDM_STM_OPS];
14632   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14633   int order[MAX_LDM_STM_OPS];
14634   int base_reg = -1;
14635   int i, ldm_case;
14636
14637   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14638      easily extended if required.  */
14639   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14640
14641   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14642
14643   /* Loop over the operands and check that the memory references are
14644      suitable (i.e. immediate offsets from the same base register).  At
14645      the same time, extract the target register, and the memory
14646      offsets.  */
14647   for (i = 0; i < nops; i++)
14648     {
14649       rtx reg;
14650       rtx offset;
14651
14652       /* Convert a subreg of a mem into the mem itself.  */
14653       if (GET_CODE (operands[nops + i]) == SUBREG)
14654         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14655
14656       gcc_assert (MEM_P (operands[nops + i]));
14657
14658       /* Don't reorder volatile memory references; it doesn't seem worth
14659          looking for the case where the order is ok anyway.  */
14660       if (MEM_VOLATILE_P (operands[nops + i]))
14661         return 0;
14662
14663       offset = const0_rtx;
14664
14665       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14666            || (SUBREG_P (reg)
14667                && REG_P (reg = SUBREG_REG (reg))))
14668           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14669               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14670                   || (SUBREG_P (reg)
14671                       && REG_P (reg = SUBREG_REG (reg))))
14672               && (CONST_INT_P (offset
14673                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14674         {
14675           if (i == 0)
14676             {
14677               base_reg = REGNO (reg);
14678               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14679                 return 0;
14680             }
14681           else if (base_reg != (int) REGNO (reg))
14682             /* Not addressed from the same base register.  */
14683             return 0;
14684
14685           unsorted_regs[i] = (REG_P (operands[i])
14686                               ? REGNO (operands[i])
14687                               : REGNO (SUBREG_REG (operands[i])));
14688
14689           /* If it isn't an integer register, or if it overwrites the
14690              base register but isn't the last insn in the list, then
14691              we can't do this.  */
14692           if (unsorted_regs[i] < 0
14693               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14694               || unsorted_regs[i] > 14
14695               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14696             return 0;
14697
14698           /* Don't allow SP to be loaded unless it is also the base
14699              register.  It guarantees that SP is reset correctly when
14700              an LDM instruction is interrupted.  Otherwise, we might
14701              end up with a corrupt stack.  */
14702           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14703             return 0;
14704
14705           unsorted_offsets[i] = INTVAL (offset);
14706           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14707             order[0] = i;
14708         }
14709       else
14710         /* Not a suitable memory address.  */
14711         return 0;
14712     }
14713
14714   /* All the useful information has now been extracted from the
14715      operands into unsorted_regs and unsorted_offsets; additionally,
14716      order[0] has been set to the lowest offset in the list.  Sort
14717      the offsets into order, verifying that they are adjacent, and
14718      check that the register numbers are ascending.  */
14719   if (!compute_offset_order (nops, unsorted_offsets, order,
14720                              check_regs ? unsorted_regs : NULL))
14721     return 0;
14722
14723   if (saved_order)
14724     memcpy (saved_order, order, sizeof order);
14725
14726   if (base)
14727     {
14728       *base = base_reg;
14729
14730       for (i = 0; i < nops; i++)
14731         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14732
14733       *load_offset = unsorted_offsets[order[0]];
14734     }
14735
14736   if (unsorted_offsets[order[0]] == 0)
14737     ldm_case = 1; /* ldmia */
14738   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14739     ldm_case = 2; /* ldmib */
14740   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14741     ldm_case = 3; /* ldmda */
14742   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14743     ldm_case = 4; /* ldmdb */
14744   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14745            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14746     ldm_case = 5;
14747   else
14748     return 0;
14749
14750   if (!multiple_operation_profitable_p (false, nops,
14751                                         ldm_case == 5
14752                                         ? unsorted_offsets[order[0]] : 0))
14753     return 0;
14754
14755   return ldm_case;
14756 }
14757
14758 /* Used to determine in a peephole whether a sequence of store instructions can
14759    be changed into a store-multiple instruction.
14760    NOPS is the number of separate store instructions we are examining.
14761    NOPS_TOTAL is the total number of instructions recognized by the peephole
14762    pattern.
14763    The first NOPS entries in OPERANDS are the source registers, the next
14764    NOPS entries are memory operands.  If this function is successful, *BASE is
14765    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14766    to the first memory location's offset from that base register.  REGS is an
14767    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14768    likewise filled with the corresponding rtx's.
14769    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14770    numbers to an ascending order of stores.
14771    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14772    from ascending memory locations, and the function verifies that the register
14773    numbers are themselves ascending.  If CHECK_REGS is false, the register
14774    numbers are stored in the order they are found in the operands.  */
14775 static int
14776 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14777                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14778                          HOST_WIDE_INT *load_offset, bool check_regs)
14779 {
14780   int unsorted_regs[MAX_LDM_STM_OPS];
14781   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14782   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14783   int order[MAX_LDM_STM_OPS];
14784   int base_reg = -1;
14785   rtx base_reg_rtx = NULL;
14786   int i, stm_case;
14787
14788   /* Write back of base register is currently only supported for Thumb 1.  */
14789   int base_writeback = TARGET_THUMB1;
14790
14791   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14792      easily extended if required.  */
14793   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14794
14795   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14796
14797   /* Loop over the operands and check that the memory references are
14798      suitable (i.e. immediate offsets from the same base register).  At
14799      the same time, extract the target register, and the memory
14800      offsets.  */
14801   for (i = 0; i < nops; i++)
14802     {
14803       rtx reg;
14804       rtx offset;
14805
14806       /* Convert a subreg of a mem into the mem itself.  */
14807       if (GET_CODE (operands[nops + i]) == SUBREG)
14808         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14809
14810       gcc_assert (MEM_P (operands[nops + i]));
14811
14812       /* Don't reorder volatile memory references; it doesn't seem worth
14813          looking for the case where the order is ok anyway.  */
14814       if (MEM_VOLATILE_P (operands[nops + i]))
14815         return 0;
14816
14817       offset = const0_rtx;
14818
14819       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14820            || (SUBREG_P (reg)
14821                && REG_P (reg = SUBREG_REG (reg))))
14822           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14823               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14824                   || (SUBREG_P (reg)
14825                       && REG_P (reg = SUBREG_REG (reg))))
14826               && (CONST_INT_P (offset
14827                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14828         {
14829           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14830                                   ? operands[i] : SUBREG_REG (operands[i]));
14831           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14832
14833           if (i == 0)
14834             {
14835               base_reg = REGNO (reg);
14836               base_reg_rtx = reg;
14837               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14838                 return 0;
14839             }
14840           else if (base_reg != (int) REGNO (reg))
14841             /* Not addressed from the same base register.  */
14842             return 0;
14843
14844           /* If it isn't an integer register, then we can't do this.  */
14845           if (unsorted_regs[i] < 0
14846               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14847               /* The effects are unpredictable if the base register is
14848                  both updated and stored.  */
14849               || (base_writeback && unsorted_regs[i] == base_reg)
14850               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14851               || unsorted_regs[i] > 14)
14852             return 0;
14853
14854           unsorted_offsets[i] = INTVAL (offset);
14855           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14856             order[0] = i;
14857         }
14858       else
14859         /* Not a suitable memory address.  */
14860         return 0;
14861     }
14862
14863   /* All the useful information has now been extracted from the
14864      operands into unsorted_regs and unsorted_offsets; additionally,
14865      order[0] has been set to the lowest offset in the list.  Sort
14866      the offsets into order, verifying that they are adjacent, and
14867      check that the register numbers are ascending.  */
14868   if (!compute_offset_order (nops, unsorted_offsets, order,
14869                              check_regs ? unsorted_regs : NULL))
14870     return 0;
14871
14872   if (saved_order)
14873     memcpy (saved_order, order, sizeof order);
14874
14875   if (base)
14876     {
14877       *base = base_reg;
14878
14879       for (i = 0; i < nops; i++)
14880         {
14881           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14882           if (reg_rtxs)
14883             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14884         }
14885
14886       *load_offset = unsorted_offsets[order[0]];
14887     }
14888
14889   if (TARGET_THUMB1
14890       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14891     return 0;
14892
14893   if (unsorted_offsets[order[0]] == 0)
14894     stm_case = 1; /* stmia */
14895   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14896     stm_case = 2; /* stmib */
14897   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14898     stm_case = 3; /* stmda */
14899   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14900     stm_case = 4; /* stmdb */
14901   else
14902     return 0;
14903
14904   if (!multiple_operation_profitable_p (false, nops, 0))
14905     return 0;
14906
14907   return stm_case;
14908 }
14909 \f
14910 /* Routines for use in generating RTL.  */
14911
14912 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14913    the instruction; REGS and MEMS are arrays containing the operands.
14914    BASEREG is the base register to be used in addressing the memory operands.
14915    WBACK_OFFSET is nonzero if the instruction should update the base
14916    register.  */
14917
14918 static rtx
14919 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14920                          HOST_WIDE_INT wback_offset)
14921 {
14922   int i = 0, j;
14923   rtx result;
14924
14925   if (!multiple_operation_profitable_p (false, count, 0))
14926     {
14927       rtx seq;
14928
14929       start_sequence ();
14930
14931       for (i = 0; i < count; i++)
14932         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14933
14934       if (wback_offset != 0)
14935         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14936
14937       seq = get_insns ();
14938       end_sequence ();
14939
14940       return seq;
14941     }
14942
14943   result = gen_rtx_PARALLEL (VOIDmode,
14944                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14945   if (wback_offset != 0)
14946     {
14947       XVECEXP (result, 0, 0)
14948         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14949       i = 1;
14950       count++;
14951     }
14952
14953   for (j = 0; i < count; i++, j++)
14954     XVECEXP (result, 0, i)
14955       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14956
14957   return result;
14958 }
14959
14960 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14961    the instruction; REGS and MEMS are arrays containing the operands.
14962    BASEREG is the base register to be used in addressing the memory operands.
14963    WBACK_OFFSET is nonzero if the instruction should update the base
14964    register.  */
14965
14966 static rtx
14967 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14968                           HOST_WIDE_INT wback_offset)
14969 {
14970   int i = 0, j;
14971   rtx result;
14972
14973   if (GET_CODE (basereg) == PLUS)
14974     basereg = XEXP (basereg, 0);
14975
14976   if (!multiple_operation_profitable_p (false, count, 0))
14977     {
14978       rtx seq;
14979
14980       start_sequence ();
14981
14982       for (i = 0; i < count; i++)
14983         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14984
14985       if (wback_offset != 0)
14986         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14987
14988       seq = get_insns ();
14989       end_sequence ();
14990
14991       return seq;
14992     }
14993
14994   result = gen_rtx_PARALLEL (VOIDmode,
14995                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14996   if (wback_offset != 0)
14997     {
14998       XVECEXP (result, 0, 0)
14999         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
15000       i = 1;
15001       count++;
15002     }
15003
15004   for (j = 0; i < count; i++, j++)
15005     XVECEXP (result, 0, i)
15006       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
15007
15008   return result;
15009 }
15010
15011 /* Generate either a load-multiple or a store-multiple instruction.  This
15012    function can be used in situations where we can start with a single MEM
15013    rtx and adjust its address upwards.
15014    COUNT is the number of operations in the instruction, not counting a
15015    possible update of the base register.  REGS is an array containing the
15016    register operands.
15017    BASEREG is the base register to be used in addressing the memory operands,
15018    which are constructed from BASEMEM.
15019    WRITE_BACK specifies whether the generated instruction should include an
15020    update of the base register.
15021    OFFSETP is used to pass an offset to and from this function; this offset
15022    is not used when constructing the address (instead BASEMEM should have an
15023    appropriate offset in its address), it is used only for setting
15024    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
15025
15026 static rtx
15027 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
15028                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
15029 {
15030   rtx mems[MAX_LDM_STM_OPS];
15031   HOST_WIDE_INT offset = *offsetp;
15032   int i;
15033
15034   gcc_assert (count <= MAX_LDM_STM_OPS);
15035
15036   if (GET_CODE (basereg) == PLUS)
15037     basereg = XEXP (basereg, 0);
15038
15039   for (i = 0; i < count; i++)
15040     {
15041       rtx addr = plus_constant (Pmode, basereg, i * 4);
15042       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15043       offset += 4;
15044     }
15045
15046   if (write_back)
15047     *offsetp = offset;
15048
15049   if (is_load)
15050     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15051                                     write_back ? 4 * count : 0);
15052   else
15053     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15054                                      write_back ? 4 * count : 0);
15055 }
15056
15057 rtx
15058 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15059                        rtx basemem, HOST_WIDE_INT *offsetp)
15060 {
15061   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15062                               offsetp);
15063 }
15064
15065 rtx
15066 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15067                         rtx basemem, HOST_WIDE_INT *offsetp)
15068 {
15069   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15070                               offsetp);
15071 }
15072
15073 /* Called from a peephole2 expander to turn a sequence of loads into an
15074    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
15075    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
15076    is true if we can reorder the registers because they are used commutatively
15077    subsequently.
15078    Returns true iff we could generate a new instruction.  */
15079
15080 bool
15081 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15082 {
15083   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15084   rtx mems[MAX_LDM_STM_OPS];
15085   int i, j, base_reg;
15086   rtx base_reg_rtx;
15087   HOST_WIDE_INT offset;
15088   int write_back = FALSE;
15089   int ldm_case;
15090   rtx addr;
15091
15092   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15093                                      &base_reg, &offset, !sort_regs);
15094
15095   if (ldm_case == 0)
15096     return false;
15097
15098   if (sort_regs)
15099     for (i = 0; i < nops - 1; i++)
15100       for (j = i + 1; j < nops; j++)
15101         if (regs[i] > regs[j])
15102           {
15103             int t = regs[i];
15104             regs[i] = regs[j];
15105             regs[j] = t;
15106           }
15107   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15108
15109   if (TARGET_THUMB1)
15110     {
15111       gcc_assert (ldm_case == 1 || ldm_case == 5);
15112
15113       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
15114       write_back = true;
15115       for (i = 0; i < nops; i++)
15116         if (base_reg == regs[i])
15117           write_back = false;
15118
15119       /* Ensure the base is dead if it is updated.  */
15120       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15121         return false;
15122     }
15123
15124   if (ldm_case == 5)
15125     {
15126       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15127       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15128       offset = 0;
15129       base_reg_rtx = newbase;
15130     }
15131
15132   for (i = 0; i < nops; i++)
15133     {
15134       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15135       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15136                                               SImode, addr, 0);
15137     }
15138   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15139                                       write_back ? offset + i * 4 : 0));
15140   return true;
15141 }
15142
15143 /* Called from a peephole2 expander to turn a sequence of stores into an
15144    STM instruction.  OPERANDS are the operands found by the peephole matcher;
15145    NOPS indicates how many separate stores we are trying to combine.
15146    Returns true iff we could generate a new instruction.  */
15147
15148 bool
15149 gen_stm_seq (rtx *operands, int nops)
15150 {
15151   int i;
15152   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15153   rtx mems[MAX_LDM_STM_OPS];
15154   int base_reg;
15155   rtx base_reg_rtx;
15156   HOST_WIDE_INT offset;
15157   int write_back = FALSE;
15158   int stm_case;
15159   rtx addr;
15160   bool base_reg_dies;
15161
15162   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15163                                       mem_order, &base_reg, &offset, true);
15164
15165   if (stm_case == 0)
15166     return false;
15167
15168   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15169
15170   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15171   if (TARGET_THUMB1)
15172     {
15173       gcc_assert (base_reg_dies);
15174       write_back = TRUE;
15175     }
15176
15177   if (stm_case == 5)
15178     {
15179       gcc_assert (base_reg_dies);
15180       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15181       offset = 0;
15182     }
15183
15184   addr = plus_constant (Pmode, base_reg_rtx, offset);
15185
15186   for (i = 0; i < nops; i++)
15187     {
15188       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15189       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15190                                               SImode, addr, 0);
15191     }
15192   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15193                                        write_back ? offset + i * 4 : 0));
15194   return true;
15195 }
15196
15197 /* Called from a peephole2 expander to turn a sequence of stores that are
15198    preceded by constant loads into an STM instruction.  OPERANDS are the
15199    operands found by the peephole matcher; NOPS indicates how many
15200    separate stores we are trying to combine; there are 2 * NOPS
15201    instructions in the peephole.
15202    Returns true iff we could generate a new instruction.  */
15203
15204 bool
15205 gen_const_stm_seq (rtx *operands, int nops)
15206 {
15207   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15208   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15209   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15210   rtx mems[MAX_LDM_STM_OPS];
15211   int base_reg;
15212   rtx base_reg_rtx;
15213   HOST_WIDE_INT offset;
15214   int write_back = FALSE;
15215   int stm_case;
15216   rtx addr;
15217   bool base_reg_dies;
15218   int i, j;
15219   HARD_REG_SET allocated;
15220
15221   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15222                                       mem_order, &base_reg, &offset, false);
15223
15224   if (stm_case == 0)
15225     return false;
15226
15227   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15228
15229   /* If the same register is used more than once, try to find a free
15230      register.  */
15231   CLEAR_HARD_REG_SET (allocated);
15232   for (i = 0; i < nops; i++)
15233     {
15234       for (j = i + 1; j < nops; j++)
15235         if (regs[i] == regs[j])
15236           {
15237             rtx t = peep2_find_free_register (0, nops * 2,
15238                                               TARGET_THUMB1 ? "l" : "r",
15239                                               SImode, &allocated);
15240             if (t == NULL_RTX)
15241               return false;
15242             reg_rtxs[i] = t;
15243             regs[i] = REGNO (t);
15244           }
15245     }
15246
15247   /* Compute an ordering that maps the register numbers to an ascending
15248      sequence.  */
15249   reg_order[0] = 0;
15250   for (i = 0; i < nops; i++)
15251     if (regs[i] < regs[reg_order[0]])
15252       reg_order[0] = i;
15253
15254   for (i = 1; i < nops; i++)
15255     {
15256       int this_order = reg_order[i - 1];
15257       for (j = 0; j < nops; j++)
15258         if (regs[j] > regs[reg_order[i - 1]]
15259             && (this_order == reg_order[i - 1]
15260                 || regs[j] < regs[this_order]))
15261           this_order = j;
15262       reg_order[i] = this_order;
15263     }
15264
15265   /* Ensure that registers that must be live after the instruction end
15266      up with the correct value.  */
15267   for (i = 0; i < nops; i++)
15268     {
15269       int this_order = reg_order[i];
15270       if ((this_order != mem_order[i]
15271            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15272           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15273         return false;
15274     }
15275
15276   /* Load the constants.  */
15277   for (i = 0; i < nops; i++)
15278     {
15279       rtx op = operands[2 * nops + mem_order[i]];
15280       sorted_regs[i] = regs[reg_order[i]];
15281       emit_move_insn (reg_rtxs[reg_order[i]], op);
15282     }
15283
15284   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15285
15286   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15287   if (TARGET_THUMB1)
15288     {
15289       gcc_assert (base_reg_dies);
15290       write_back = TRUE;
15291     }
15292
15293   if (stm_case == 5)
15294     {
15295       gcc_assert (base_reg_dies);
15296       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15297       offset = 0;
15298     }
15299
15300   addr = plus_constant (Pmode, base_reg_rtx, offset);
15301
15302   for (i = 0; i < nops; i++)
15303     {
15304       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15305       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15306                                               SImode, addr, 0);
15307     }
15308   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15309                                        write_back ? offset + i * 4 : 0));
15310   return true;
15311 }
15312
15313 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15314    unaligned copies on processors which support unaligned semantics for those
15315    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
15316    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15317    An interleave factor of 1 (the minimum) will perform no interleaving.
15318    Load/store multiple are used for aligned addresses where possible.  */
15319
15320 static void
15321 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15322                                    HOST_WIDE_INT length,
15323                                    unsigned int interleave_factor)
15324 {
15325   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15326   int *regnos = XALLOCAVEC (int, interleave_factor);
15327   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15328   HOST_WIDE_INT i, j;
15329   HOST_WIDE_INT remaining = length, words;
15330   rtx halfword_tmp = NULL, byte_tmp = NULL;
15331   rtx dst, src;
15332   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15333   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15334   HOST_WIDE_INT srcoffset, dstoffset;
15335   HOST_WIDE_INT src_autoinc, dst_autoinc;
15336   rtx mem, addr;
15337
15338   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15339
15340   /* Use hard registers if we have aligned source or destination so we can use
15341      load/store multiple with contiguous registers.  */
15342   if (dst_aligned || src_aligned)
15343     for (i = 0; i < interleave_factor; i++)
15344       regs[i] = gen_rtx_REG (SImode, i);
15345   else
15346     for (i = 0; i < interleave_factor; i++)
15347       regs[i] = gen_reg_rtx (SImode);
15348
15349   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15350   src = copy_addr_to_reg (XEXP (srcbase, 0));
15351
15352   srcoffset = dstoffset = 0;
15353
15354   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15355      For copying the last bytes we want to subtract this offset again.  */
15356   src_autoinc = dst_autoinc = 0;
15357
15358   for (i = 0; i < interleave_factor; i++)
15359     regnos[i] = i;
15360
15361   /* Copy BLOCK_SIZE_BYTES chunks.  */
15362
15363   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15364     {
15365       /* Load words.  */
15366       if (src_aligned && interleave_factor > 1)
15367         {
15368           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15369                                             TRUE, srcbase, &srcoffset));
15370           src_autoinc += UNITS_PER_WORD * interleave_factor;
15371         }
15372       else
15373         {
15374           for (j = 0; j < interleave_factor; j++)
15375             {
15376               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15377                                                  - src_autoinc));
15378               mem = adjust_automodify_address (srcbase, SImode, addr,
15379                                                srcoffset + j * UNITS_PER_WORD);
15380               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15381             }
15382           srcoffset += block_size_bytes;
15383         }
15384
15385       /* Store words.  */
15386       if (dst_aligned && interleave_factor > 1)
15387         {
15388           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15389                                              TRUE, dstbase, &dstoffset));
15390           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15391         }
15392       else
15393         {
15394           for (j = 0; j < interleave_factor; j++)
15395             {
15396               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15397                                                  - dst_autoinc));
15398               mem = adjust_automodify_address (dstbase, SImode, addr,
15399                                                dstoffset + j * UNITS_PER_WORD);
15400               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15401             }
15402           dstoffset += block_size_bytes;
15403         }
15404
15405       remaining -= block_size_bytes;
15406     }
15407
15408   /* Copy any whole words left (note these aren't interleaved with any
15409      subsequent halfword/byte load/stores in the interests of simplicity).  */
15410
15411   words = remaining / UNITS_PER_WORD;
15412
15413   gcc_assert (words < interleave_factor);
15414
15415   if (src_aligned && words > 1)
15416     {
15417       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15418                                         &srcoffset));
15419       src_autoinc += UNITS_PER_WORD * words;
15420     }
15421   else
15422     {
15423       for (j = 0; j < words; j++)
15424         {
15425           addr = plus_constant (Pmode, src,
15426                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15427           mem = adjust_automodify_address (srcbase, SImode, addr,
15428                                            srcoffset + j * UNITS_PER_WORD);
15429           if (src_aligned)
15430             emit_move_insn (regs[j], mem);
15431           else
15432             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15433         }
15434       srcoffset += words * UNITS_PER_WORD;
15435     }
15436
15437   if (dst_aligned && words > 1)
15438     {
15439       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15440                                          &dstoffset));
15441       dst_autoinc += words * UNITS_PER_WORD;
15442     }
15443   else
15444     {
15445       for (j = 0; j < words; j++)
15446         {
15447           addr = plus_constant (Pmode, dst,
15448                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15449           mem = adjust_automodify_address (dstbase, SImode, addr,
15450                                            dstoffset + j * UNITS_PER_WORD);
15451           if (dst_aligned)
15452             emit_move_insn (mem, regs[j]);
15453           else
15454             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15455         }
15456       dstoffset += words * UNITS_PER_WORD;
15457     }
15458
15459   remaining -= words * UNITS_PER_WORD;
15460
15461   gcc_assert (remaining < 4);
15462
15463   /* Copy a halfword if necessary.  */
15464
15465   if (remaining >= 2)
15466     {
15467       halfword_tmp = gen_reg_rtx (SImode);
15468
15469       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15470       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15471       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15472
15473       /* Either write out immediately, or delay until we've loaded the last
15474          byte, depending on interleave factor.  */
15475       if (interleave_factor == 1)
15476         {
15477           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15478           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15479           emit_insn (gen_unaligned_storehi (mem,
15480                        gen_lowpart (HImode, halfword_tmp)));
15481           halfword_tmp = NULL;
15482           dstoffset += 2;
15483         }
15484
15485       remaining -= 2;
15486       srcoffset += 2;
15487     }
15488
15489   gcc_assert (remaining < 2);
15490
15491   /* Copy last byte.  */
15492
15493   if ((remaining & 1) != 0)
15494     {
15495       byte_tmp = gen_reg_rtx (SImode);
15496
15497       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15498       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15499       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15500
15501       if (interleave_factor == 1)
15502         {
15503           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15504           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15505           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15506           byte_tmp = NULL;
15507           dstoffset++;
15508         }
15509
15510       remaining--;
15511       srcoffset++;
15512     }
15513
15514   /* Store last halfword if we haven't done so already.  */
15515
15516   if (halfword_tmp)
15517     {
15518       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15519       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15520       emit_insn (gen_unaligned_storehi (mem,
15521                    gen_lowpart (HImode, halfword_tmp)));
15522       dstoffset += 2;
15523     }
15524
15525   /* Likewise for last byte.  */
15526
15527   if (byte_tmp)
15528     {
15529       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15530       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15531       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15532       dstoffset++;
15533     }
15534
15535   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15536 }
15537
15538 /* From mips_adjust_block_mem:
15539
15540    Helper function for doing a loop-based block operation on memory
15541    reference MEM.  Each iteration of the loop will operate on LENGTH
15542    bytes of MEM.
15543
15544    Create a new base register for use within the loop and point it to
15545    the start of MEM.  Create a new memory reference that uses this
15546    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15547
15548 static void
15549 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15550                       rtx *loop_mem)
15551 {
15552   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15553
15554   /* Although the new mem does not refer to a known location,
15555      it does keep up to LENGTH bytes of alignment.  */
15556   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15557   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15558 }
15559
15560 /* From mips_block_move_loop:
15561
15562    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15563    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15564    the memory regions do not overlap.  */
15565
15566 static void
15567 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15568                                unsigned int interleave_factor,
15569                                HOST_WIDE_INT bytes_per_iter)
15570 {
15571   rtx src_reg, dest_reg, final_src, test;
15572   HOST_WIDE_INT leftover;
15573
15574   leftover = length % bytes_per_iter;
15575   length -= leftover;
15576
15577   /* Create registers and memory references for use within the loop.  */
15578   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15579   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15580
15581   /* Calculate the value that SRC_REG should have after the last iteration of
15582      the loop.  */
15583   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15584                                    0, 0, OPTAB_WIDEN);
15585
15586   /* Emit the start of the loop.  */
15587   rtx_code_label *label = gen_label_rtx ();
15588   emit_label (label);
15589
15590   /* Emit the loop body.  */
15591   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15592                                      interleave_factor);
15593
15594   /* Move on to the next block.  */
15595   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15596   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15597
15598   /* Emit the loop condition.  */
15599   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15600   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15601
15602   /* Mop up any left-over bytes.  */
15603   if (leftover)
15604     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15605 }
15606
15607 /* Emit a block move when either the source or destination is unaligned (not
15608    aligned to a four-byte boundary).  This may need further tuning depending on
15609    core type, optimize_size setting, etc.  */
15610
15611 static int
15612 arm_cpymemqi_unaligned (rtx *operands)
15613 {
15614   HOST_WIDE_INT length = INTVAL (operands[2]);
15615
15616   if (optimize_size)
15617     {
15618       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15619       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15620       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15621          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15622          or dst_aligned though: allow more interleaving in those cases since the
15623          resulting code can be smaller.  */
15624       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15625       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15626
15627       if (length > 12)
15628         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15629                                        interleave_factor, bytes_per_iter);
15630       else
15631         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15632                                            interleave_factor);
15633     }
15634   else
15635     {
15636       /* Note that the loop created by arm_block_move_unaligned_loop may be
15637          subject to loop unrolling, which makes tuning this condition a little
15638          redundant.  */
15639       if (length > 32)
15640         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15641       else
15642         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15643     }
15644
15645   return 1;
15646 }
15647
15648 int
15649 arm_gen_cpymemqi (rtx *operands)
15650 {
15651   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15652   HOST_WIDE_INT srcoffset, dstoffset;
15653   rtx src, dst, srcbase, dstbase;
15654   rtx part_bytes_reg = NULL;
15655   rtx mem;
15656
15657   if (!CONST_INT_P (operands[2])
15658       || !CONST_INT_P (operands[3])
15659       || INTVAL (operands[2]) > 64)
15660     return 0;
15661
15662   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15663     return arm_cpymemqi_unaligned (operands);
15664
15665   if (INTVAL (operands[3]) & 3)
15666     return 0;
15667
15668   dstbase = operands[0];
15669   srcbase = operands[1];
15670
15671   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15672   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15673
15674   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15675   out_words_to_go = INTVAL (operands[2]) / 4;
15676   last_bytes = INTVAL (operands[2]) & 3;
15677   dstoffset = srcoffset = 0;
15678
15679   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15680     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15681
15682   while (in_words_to_go >= 2)
15683     {
15684       if (in_words_to_go > 4)
15685         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15686                                           TRUE, srcbase, &srcoffset));
15687       else
15688         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15689                                           src, FALSE, srcbase,
15690                                           &srcoffset));
15691
15692       if (out_words_to_go)
15693         {
15694           if (out_words_to_go > 4)
15695             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15696                                                TRUE, dstbase, &dstoffset));
15697           else if (out_words_to_go != 1)
15698             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15699                                                out_words_to_go, dst,
15700                                                (last_bytes == 0
15701                                                 ? FALSE : TRUE),
15702                                                dstbase, &dstoffset));
15703           else
15704             {
15705               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15706               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15707               if (last_bytes != 0)
15708                 {
15709                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15710                   dstoffset += 4;
15711                 }
15712             }
15713         }
15714
15715       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15716       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15717     }
15718
15719   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15720   if (out_words_to_go)
15721     {
15722       rtx sreg;
15723
15724       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15725       sreg = copy_to_reg (mem);
15726
15727       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15728       emit_move_insn (mem, sreg);
15729       in_words_to_go--;
15730
15731       gcc_assert (!in_words_to_go);     /* Sanity check */
15732     }
15733
15734   if (in_words_to_go)
15735     {
15736       gcc_assert (in_words_to_go > 0);
15737
15738       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15739       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15740     }
15741
15742   gcc_assert (!last_bytes || part_bytes_reg);
15743
15744   if (BYTES_BIG_ENDIAN && last_bytes)
15745     {
15746       rtx tmp = gen_reg_rtx (SImode);
15747
15748       /* The bytes we want are in the top end of the word.  */
15749       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15750                               GEN_INT (8 * (4 - last_bytes))));
15751       part_bytes_reg = tmp;
15752
15753       while (last_bytes)
15754         {
15755           mem = adjust_automodify_address (dstbase, QImode,
15756                                            plus_constant (Pmode, dst,
15757                                                           last_bytes - 1),
15758                                            dstoffset + last_bytes - 1);
15759           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15760
15761           if (--last_bytes)
15762             {
15763               tmp = gen_reg_rtx (SImode);
15764               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15765               part_bytes_reg = tmp;
15766             }
15767         }
15768
15769     }
15770   else
15771     {
15772       if (last_bytes > 1)
15773         {
15774           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15775           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15776           last_bytes -= 2;
15777           if (last_bytes)
15778             {
15779               rtx tmp = gen_reg_rtx (SImode);
15780               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15781               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15782               part_bytes_reg = tmp;
15783               dstoffset += 2;
15784             }
15785         }
15786
15787       if (last_bytes)
15788         {
15789           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15790           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15791         }
15792     }
15793
15794   return 1;
15795 }
15796
15797 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15798 by mode size.  */
15799 inline static rtx
15800 next_consecutive_mem (rtx mem)
15801 {
15802   machine_mode mode = GET_MODE (mem);
15803   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15804   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15805
15806   return adjust_automodify_address (mem, mode, addr, offset);
15807 }
15808
15809 /* Copy using LDRD/STRD instructions whenever possible.
15810    Returns true upon success. */
15811 bool
15812 gen_cpymem_ldrd_strd (rtx *operands)
15813 {
15814   unsigned HOST_WIDE_INT len;
15815   HOST_WIDE_INT align;
15816   rtx src, dst, base;
15817   rtx reg0;
15818   bool src_aligned, dst_aligned;
15819   bool src_volatile, dst_volatile;
15820
15821   gcc_assert (CONST_INT_P (operands[2]));
15822   gcc_assert (CONST_INT_P (operands[3]));
15823
15824   len = UINTVAL (operands[2]);
15825   if (len > 64)
15826     return false;
15827
15828   /* Maximum alignment we can assume for both src and dst buffers.  */
15829   align = INTVAL (operands[3]);
15830
15831   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15832     return false;
15833
15834   /* Place src and dst addresses in registers
15835      and update the corresponding mem rtx.  */
15836   dst = operands[0];
15837   dst_volatile = MEM_VOLATILE_P (dst);
15838   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15839   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15840   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15841
15842   src = operands[1];
15843   src_volatile = MEM_VOLATILE_P (src);
15844   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15845   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15846   src = adjust_automodify_address (src, VOIDmode, base, 0);
15847
15848   if (!unaligned_access && !(src_aligned && dst_aligned))
15849     return false;
15850
15851   if (src_volatile || dst_volatile)
15852     return false;
15853
15854   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15855   if (!(dst_aligned || src_aligned))
15856     return arm_gen_cpymemqi (operands);
15857
15858   /* If the either src or dst is unaligned we'll be accessing it as pairs
15859      of unaligned SImode accesses.  Otherwise we can generate DImode
15860      ldrd/strd instructions.  */
15861   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15862   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15863
15864   while (len >= 8)
15865     {
15866       len -= 8;
15867       reg0 = gen_reg_rtx (DImode);
15868       rtx first_reg = NULL_RTX;
15869       rtx second_reg = NULL_RTX;
15870
15871       if (!src_aligned || !dst_aligned)
15872         {
15873           if (BYTES_BIG_ENDIAN)
15874             {
15875               second_reg = gen_lowpart (SImode, reg0);
15876               first_reg = gen_highpart_mode (SImode, DImode, reg0);
15877             }
15878           else
15879             {
15880               first_reg = gen_lowpart (SImode, reg0);
15881               second_reg = gen_highpart_mode (SImode, DImode, reg0);
15882             }
15883         }
15884       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15885         emit_move_insn (reg0, src);
15886       else if (src_aligned)
15887         emit_insn (gen_unaligned_loaddi (reg0, src));
15888       else
15889         {
15890           emit_insn (gen_unaligned_loadsi (first_reg, src));
15891           src = next_consecutive_mem (src);
15892           emit_insn (gen_unaligned_loadsi (second_reg, src));
15893         }
15894
15895       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15896         emit_move_insn (dst, reg0);
15897       else if (dst_aligned)
15898         emit_insn (gen_unaligned_storedi (dst, reg0));
15899       else
15900         {
15901           emit_insn (gen_unaligned_storesi (dst, first_reg));
15902           dst = next_consecutive_mem (dst);
15903           emit_insn (gen_unaligned_storesi (dst, second_reg));
15904         }
15905
15906       src = next_consecutive_mem (src);
15907       dst = next_consecutive_mem (dst);
15908     }
15909
15910   gcc_assert (len < 8);
15911   if (len >= 4)
15912     {
15913       /* More than a word but less than a double-word to copy.  Copy a word.  */
15914       reg0 = gen_reg_rtx (SImode);
15915       src = adjust_address (src, SImode, 0);
15916       dst = adjust_address (dst, SImode, 0);
15917       if (src_aligned)
15918         emit_move_insn (reg0, src);
15919       else
15920         emit_insn (gen_unaligned_loadsi (reg0, src));
15921
15922       if (dst_aligned)
15923         emit_move_insn (dst, reg0);
15924       else
15925         emit_insn (gen_unaligned_storesi (dst, reg0));
15926
15927       src = next_consecutive_mem (src);
15928       dst = next_consecutive_mem (dst);
15929       len -= 4;
15930     }
15931
15932   if (len == 0)
15933     return true;
15934
15935   /* Copy the remaining bytes.  */
15936   if (len >= 2)
15937     {
15938       dst = adjust_address (dst, HImode, 0);
15939       src = adjust_address (src, HImode, 0);
15940       reg0 = gen_reg_rtx (SImode);
15941       if (src_aligned)
15942         emit_insn (gen_zero_extendhisi2 (reg0, src));
15943       else
15944         emit_insn (gen_unaligned_loadhiu (reg0, src));
15945
15946       if (dst_aligned)
15947         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15948       else
15949         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15950
15951       src = next_consecutive_mem (src);
15952       dst = next_consecutive_mem (dst);
15953       if (len == 2)
15954         return true;
15955     }
15956
15957   dst = adjust_address (dst, QImode, 0);
15958   src = adjust_address (src, QImode, 0);
15959   reg0 = gen_reg_rtx (QImode);
15960   emit_move_insn (reg0, src);
15961   emit_move_insn (dst, reg0);
15962   return true;
15963 }
15964
15965 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15966    into its component 32-bit subregs.  OP2 may be an immediate
15967    constant and we want to simplify it in that case.  */
15968 void
15969 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15970                         rtx *lo_op2, rtx *hi_op2)
15971 {
15972   *lo_op1 = gen_lowpart (SImode, op1);
15973   *hi_op1 = gen_highpart (SImode, op1);
15974   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15975                                  subreg_lowpart_offset (SImode, DImode));
15976   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15977                                  subreg_highpart_offset (SImode, DImode));
15978 }
15979
15980 /* Select a dominance comparison mode if possible for a test of the general
15981    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15982    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15983    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15984    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15985    In all cases OP will be either EQ or NE, but we don't need to know which
15986    here.  If we are unable to support a dominance comparison we return
15987    CC mode.  This will then fail to match for the RTL expressions that
15988    generate this call.  */
15989 machine_mode
15990 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15991 {
15992   enum rtx_code cond1, cond2;
15993   int swapped = 0;
15994
15995   /* Currently we will probably get the wrong result if the individual
15996      comparisons are not simple.  This also ensures that it is safe to
15997      reverse a comparison if necessary.  */
15998   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15999        != CCmode)
16000       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
16001           != CCmode))
16002     return CCmode;
16003
16004   /* The if_then_else variant of this tests the second condition if the
16005      first passes, but is true if the first fails.  Reverse the first
16006      condition to get a true "inclusive-or" expression.  */
16007   if (cond_or == DOM_CC_NX_OR_Y)
16008     cond1 = reverse_condition (cond1);
16009
16010   /* If the comparisons are not equal, and one doesn't dominate the other,
16011      then we can't do this.  */
16012   if (cond1 != cond2
16013       && !comparison_dominates_p (cond1, cond2)
16014       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
16015     return CCmode;
16016
16017   if (swapped)
16018     std::swap (cond1, cond2);
16019
16020   switch (cond1)
16021     {
16022     case EQ:
16023       if (cond_or == DOM_CC_X_AND_Y)
16024         return CC_DEQmode;
16025
16026       switch (cond2)
16027         {
16028         case EQ: return CC_DEQmode;
16029         case LE: return CC_DLEmode;
16030         case LEU: return CC_DLEUmode;
16031         case GE: return CC_DGEmode;
16032         case GEU: return CC_DGEUmode;
16033         default: gcc_unreachable ();
16034         }
16035
16036     case LT:
16037       if (cond_or == DOM_CC_X_AND_Y)
16038         return CC_DLTmode;
16039
16040       switch (cond2)
16041         {
16042         case  LT:
16043             return CC_DLTmode;
16044         case LE:
16045           return CC_DLEmode;
16046         case NE:
16047           return CC_DNEmode;
16048         default:
16049           gcc_unreachable ();
16050         }
16051
16052     case GT:
16053       if (cond_or == DOM_CC_X_AND_Y)
16054         return CC_DGTmode;
16055
16056       switch (cond2)
16057         {
16058         case GT:
16059           return CC_DGTmode;
16060         case GE:
16061           return CC_DGEmode;
16062         case NE:
16063           return CC_DNEmode;
16064         default:
16065           gcc_unreachable ();
16066         }
16067
16068     case LTU:
16069       if (cond_or == DOM_CC_X_AND_Y)
16070         return CC_DLTUmode;
16071
16072       switch (cond2)
16073         {
16074         case LTU:
16075           return CC_DLTUmode;
16076         case LEU:
16077           return CC_DLEUmode;
16078         case NE:
16079           return CC_DNEmode;
16080         default:
16081           gcc_unreachable ();
16082         }
16083
16084     case GTU:
16085       if (cond_or == DOM_CC_X_AND_Y)
16086         return CC_DGTUmode;
16087
16088       switch (cond2)
16089         {
16090         case GTU:
16091           return CC_DGTUmode;
16092         case GEU:
16093           return CC_DGEUmode;
16094         case NE:
16095           return CC_DNEmode;
16096         default:
16097           gcc_unreachable ();
16098         }
16099
16100     /* The remaining cases only occur when both comparisons are the
16101        same.  */
16102     case NE:
16103       gcc_assert (cond1 == cond2);
16104       return CC_DNEmode;
16105
16106     case LE:
16107       gcc_assert (cond1 == cond2);
16108       return CC_DLEmode;
16109
16110     case GE:
16111       gcc_assert (cond1 == cond2);
16112       return CC_DGEmode;
16113
16114     case LEU:
16115       gcc_assert (cond1 == cond2);
16116       return CC_DLEUmode;
16117
16118     case GEU:
16119       gcc_assert (cond1 == cond2);
16120       return CC_DGEUmode;
16121
16122     default:
16123       gcc_unreachable ();
16124     }
16125 }
16126
16127 machine_mode
16128 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16129 {
16130   /* All floating point compares return CCFP if it is an equality
16131      comparison, and CCFPE otherwise.  */
16132   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16133     {
16134       switch (op)
16135         {
16136         case EQ:
16137         case NE:
16138         case UNORDERED:
16139         case ORDERED:
16140         case UNLT:
16141         case UNLE:
16142         case UNGT:
16143         case UNGE:
16144         case UNEQ:
16145         case LTGT:
16146           return CCFPmode;
16147
16148         case LT:
16149         case LE:
16150         case GT:
16151         case GE:
16152           return CCFPEmode;
16153
16154         default:
16155           gcc_unreachable ();
16156         }
16157     }
16158
16159   /* A compare with a shifted operand.  Because of canonicalization, the
16160      comparison will have to be swapped when we emit the assembler.  */
16161   if (GET_MODE (y) == SImode
16162       && (REG_P (y) || (SUBREG_P (y)))
16163       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16164           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16165           || GET_CODE (x) == ROTATERT))
16166     return CC_SWPmode;
16167
16168   /* A widened compare of the sum of a value plus a carry against a
16169      constant.  This is a representation of RSC.  We want to swap the
16170      result of the comparison at output.  Not valid if the Z bit is
16171      needed.  */
16172   if (GET_MODE (x) == DImode
16173       && GET_CODE (x) == PLUS
16174       && arm_borrow_operation (XEXP (x, 1), DImode)
16175       && CONST_INT_P (y)
16176       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16177            && (op == LE || op == GT))
16178           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16179               && (op == LEU || op == GTU))))
16180     return CC_SWPmode;
16181
16182   /* If X is a constant we want to use CC_RSBmode.  This is
16183      non-canonical, but arm_gen_compare_reg uses this to generate the
16184      correct canonical form.  */
16185   if (GET_MODE (y) == SImode
16186       && (REG_P (y) || SUBREG_P (y))
16187       && CONST_INT_P (x))
16188     return CC_RSBmode;
16189
16190   /* This operation is performed swapped, but since we only rely on the Z
16191      flag we don't need an additional mode.  */
16192   if (GET_MODE (y) == SImode
16193       && (REG_P (y) || (SUBREG_P (y)))
16194       && GET_CODE (x) == NEG
16195       && (op == EQ || op == NE))
16196     return CC_Zmode;
16197
16198   /* This is a special case that is used by combine to allow a
16199      comparison of a shifted byte load to be split into a zero-extend
16200      followed by a comparison of the shifted integer (only valid for
16201      equalities and unsigned inequalities).  */
16202   if (GET_MODE (x) == SImode
16203       && GET_CODE (x) == ASHIFT
16204       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16205       && GET_CODE (XEXP (x, 0)) == SUBREG
16206       && MEM_P (SUBREG_REG (XEXP (x, 0)))
16207       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16208       && (op == EQ || op == NE
16209           || op == GEU || op == GTU || op == LTU || op == LEU)
16210       && CONST_INT_P (y))
16211     return CC_Zmode;
16212
16213   /* A construct for a conditional compare, if the false arm contains
16214      0, then both conditions must be true, otherwise either condition
16215      must be true.  Not all conditions are possible, so CCmode is
16216      returned if it can't be done.  */
16217   if (GET_CODE (x) == IF_THEN_ELSE
16218       && (XEXP (x, 2) == const0_rtx
16219           || XEXP (x, 2) == const1_rtx)
16220       && COMPARISON_P (XEXP (x, 0))
16221       && COMPARISON_P (XEXP (x, 1)))
16222     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16223                                          INTVAL (XEXP (x, 2)));
16224
16225   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
16226   if (GET_CODE (x) == AND
16227       && (op == EQ || op == NE)
16228       && COMPARISON_P (XEXP (x, 0))
16229       && COMPARISON_P (XEXP (x, 1)))
16230     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16231                                          DOM_CC_X_AND_Y);
16232
16233   if (GET_CODE (x) == IOR
16234       && (op == EQ || op == NE)
16235       && COMPARISON_P (XEXP (x, 0))
16236       && COMPARISON_P (XEXP (x, 1)))
16237     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16238                                          DOM_CC_X_OR_Y);
16239
16240   /* An operation (on Thumb) where we want to test for a single bit.
16241      This is done by shifting that bit up into the top bit of a
16242      scratch register; we can then branch on the sign bit.  */
16243   if (TARGET_THUMB1
16244       && GET_MODE (x) == SImode
16245       && (op == EQ || op == NE)
16246       && GET_CODE (x) == ZERO_EXTRACT
16247       && XEXP (x, 1) == const1_rtx)
16248     return CC_Nmode;
16249
16250   /* An operation that sets the condition codes as a side-effect, the
16251      V flag is not set correctly, so we can only use comparisons where
16252      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
16253      instead.)  */
16254   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
16255   if (GET_MODE (x) == SImode
16256       && y == const0_rtx
16257       && (op == EQ || op == NE || op == LT || op == GE)
16258       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16259           || GET_CODE (x) == AND || GET_CODE (x) == IOR
16260           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16261           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16262           || GET_CODE (x) == LSHIFTRT
16263           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16264           || GET_CODE (x) == ROTATERT
16265           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16266     return CC_NZmode;
16267
16268   /* A comparison of ~reg with a const is really a special
16269      canoncialization of compare (~const, reg), which is a reverse
16270      subtract operation.  We may not get here if CONST is 0, but that
16271      doesn't matter because ~0 isn't a valid immediate for RSB.  */
16272   if (GET_MODE (x) == SImode
16273       && GET_CODE (x) == NOT
16274       && CONST_INT_P (y))
16275     return CC_RSBmode;
16276
16277   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16278     return CC_Zmode;
16279
16280   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16281       && GET_CODE (x) == PLUS
16282       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16283     return CC_Cmode;
16284
16285   if (GET_MODE (x) == DImode
16286       && GET_CODE (x) == PLUS
16287       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16288       && CONST_INT_P (y)
16289       && UINTVAL (y) == 0x800000000
16290       && (op == GEU || op == LTU))
16291     return CC_ADCmode;
16292
16293   if (GET_MODE (x) == DImode
16294       && (op == GE || op == LT)
16295       && GET_CODE (x) == SIGN_EXTEND
16296       && ((GET_CODE (y) == PLUS
16297            && arm_borrow_operation (XEXP (y, 0), DImode))
16298           || arm_borrow_operation (y, DImode)))
16299     return CC_NVmode;
16300
16301   if (GET_MODE (x) == DImode
16302       && (op == GEU || op == LTU)
16303       && GET_CODE (x) == ZERO_EXTEND
16304       && ((GET_CODE (y) == PLUS
16305            && arm_borrow_operation (XEXP (y, 0), DImode))
16306           || arm_borrow_operation (y, DImode)))
16307     return CC_Bmode;
16308
16309   if (GET_MODE (x) == DImode
16310       && (op == EQ || op == NE)
16311       && (GET_CODE (x) == PLUS
16312           || GET_CODE (x) == MINUS)
16313       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16314           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16315       && GET_CODE (y) == SIGN_EXTEND
16316       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16317     return CC_Vmode;
16318
16319   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16320     return GET_MODE (x);
16321
16322   return CCmode;
16323 }
16324
16325 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
16326    the sequence of instructions needed to generate a suitable condition
16327    code register.  Return the CC register result.  */
16328 static rtx
16329 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16330 {
16331   machine_mode mode;
16332   rtx cc_reg;
16333
16334     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
16335   gcc_assert (TARGET_32BIT);
16336   gcc_assert (!CONST_INT_P (x));
16337
16338   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16339                                   subreg_lowpart_offset (SImode, DImode));
16340   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16341                                   subreg_highpart_offset (SImode, DImode));
16342   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16343                                   subreg_lowpart_offset (SImode, DImode));
16344   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16345                                   subreg_highpart_offset (SImode, DImode));
16346   switch (code)
16347     {
16348     case EQ:
16349     case NE:
16350       {
16351         if (y_lo == const0_rtx || y_hi == const0_rtx)
16352           {
16353             if (y_lo != const0_rtx)
16354               {
16355                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16356
16357                 gcc_assert (y_hi == const0_rtx);
16358                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16359                 if (!arm_add_operand (y_lo, SImode))
16360                   y_lo = force_reg (SImode, y_lo);
16361                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16362                 x_lo = scratch2;
16363               }
16364             else if (y_hi != const0_rtx)
16365               {
16366                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16367
16368                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16369                 if (!arm_add_operand (y_hi, SImode))
16370                   y_hi = force_reg (SImode, y_hi);
16371                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16372                 x_hi = scratch2;
16373               }
16374
16375             if (!scratch)
16376               {
16377                 gcc_assert (!reload_completed);
16378                 scratch = gen_rtx_SCRATCH (SImode);
16379               }
16380
16381             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16382             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16383
16384             rtx set
16385               = gen_rtx_SET (cc_reg,
16386                              gen_rtx_COMPARE (CC_NZmode,
16387                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16388                                               const0_rtx));
16389             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16390                                                               clobber)));
16391             return cc_reg;
16392           }
16393
16394         if (!arm_add_operand (y_lo, SImode))
16395           y_lo = force_reg (SImode, y_lo);
16396
16397         if (!arm_add_operand (y_hi, SImode))
16398           y_hi = force_reg (SImode, y_hi);
16399
16400         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16401         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16402         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16403         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16404         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16405
16406         emit_insn (gen_rtx_SET (cc_reg,
16407                                 gen_rtx_COMPARE (mode, conjunction,
16408                                                  const0_rtx)));
16409         return cc_reg;
16410       }
16411
16412     case LT:
16413     case GE:
16414       {
16415         if (y_lo == const0_rtx)
16416           {
16417             /* If the low word of y is 0, then this is simply a normal
16418                compare of the upper words.  */
16419             if (!arm_add_operand (y_hi, SImode))
16420               y_hi = force_reg (SImode, y_hi);
16421
16422             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16423           }
16424
16425         if (!arm_add_operand (y_lo, SImode))
16426           y_lo = force_reg (SImode, y_lo);
16427
16428         rtx cmp1
16429           = gen_rtx_LTU (DImode,
16430                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16431                          const0_rtx);
16432
16433         if (!scratch)
16434           scratch = gen_rtx_SCRATCH (SImode);
16435
16436         if (!arm_not_operand (y_hi, SImode))
16437           y_hi = force_reg (SImode, y_hi);
16438
16439         rtx_insn *insn;
16440         if (y_hi == const0_rtx)
16441           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16442                                                            cmp1));
16443         else if (CONST_INT_P (y_hi))
16444           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16445                                                              y_hi, cmp1));
16446         else
16447           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16448                                                          cmp1));
16449         return SET_DEST (single_set (insn));
16450       }
16451
16452     case LE:
16453     case GT:
16454       {
16455         /* During expansion, we only expect to get here if y is a
16456            constant that we want to handle, otherwise we should have
16457            swapped the operands already.  */
16458         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16459
16460         if (!const_ok_for_arm (INTVAL (y_lo)))
16461           y_lo = force_reg (SImode, y_lo);
16462
16463         /* Perform a reverse subtract and compare.  */
16464         rtx cmp1
16465           = gen_rtx_LTU (DImode,
16466                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16467                          const0_rtx);
16468         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16469                                                                  x_hi, cmp1));
16470         return SET_DEST (single_set (insn));
16471       }
16472
16473     case LTU:
16474     case GEU:
16475       {
16476         if (y_lo == const0_rtx)
16477           {
16478             /* If the low word of y is 0, then this is simply a normal
16479                compare of the upper words.  */
16480             if (!arm_add_operand (y_hi, SImode))
16481               y_hi = force_reg (SImode, y_hi);
16482
16483             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16484           }
16485
16486         if (!arm_add_operand (y_lo, SImode))
16487           y_lo = force_reg (SImode, y_lo);
16488
16489         rtx cmp1
16490           = gen_rtx_LTU (DImode,
16491                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16492                          const0_rtx);
16493
16494         if (!scratch)
16495           scratch = gen_rtx_SCRATCH (SImode);
16496         if (!arm_not_operand (y_hi, SImode))
16497           y_hi = force_reg (SImode, y_hi);
16498
16499         rtx_insn *insn;
16500         if (y_hi == const0_rtx)
16501           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16502                                                           cmp1));
16503         else if (CONST_INT_P (y_hi))
16504           {
16505             /* Constant is viewed as unsigned when zero-extended.  */
16506             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16507             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16508                                                               y_hi, cmp1));
16509           }
16510         else
16511           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16512                                                         cmp1));
16513         return SET_DEST (single_set (insn));
16514       }
16515
16516     case LEU:
16517     case GTU:
16518       {
16519         /* During expansion, we only expect to get here if y is a
16520            constant that we want to handle, otherwise we should have
16521            swapped the operands already.  */
16522         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16523
16524         if (!const_ok_for_arm (INTVAL (y_lo)))
16525           y_lo = force_reg (SImode, y_lo);
16526
16527         /* Perform a reverse subtract and compare.  */
16528         rtx cmp1
16529           = gen_rtx_LTU (DImode,
16530                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16531                          const0_rtx);
16532         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16533         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16534                                                                 x_hi, cmp1));
16535         return SET_DEST (single_set (insn));
16536       }
16537
16538     default:
16539       gcc_unreachable ();
16540     }
16541 }
16542
16543 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16544    return the rtx for register 0 in the proper mode.  */
16545 rtx
16546 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16547 {
16548   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16549     return arm_gen_dicompare_reg (code, x, y, scratch);
16550
16551   machine_mode mode = SELECT_CC_MODE (code, x, y);
16552   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16553   if (mode == CC_RSBmode)
16554     {
16555       if (!scratch)
16556         scratch = gen_rtx_SCRATCH (SImode);
16557       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16558                                               GEN_INT (~UINTVAL (x)), y));
16559     }
16560   else
16561     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16562
16563   return cc_reg;
16564 }
16565
16566 /* Generate a sequence of insns that will generate the correct return
16567    address mask depending on the physical architecture that the program
16568    is running on.  */
16569 rtx
16570 arm_gen_return_addr_mask (void)
16571 {
16572   rtx reg = gen_reg_rtx (Pmode);
16573
16574   emit_insn (gen_return_addr_mask (reg));
16575   return reg;
16576 }
16577
16578 void
16579 arm_reload_in_hi (rtx *operands)
16580 {
16581   rtx ref = operands[1];
16582   rtx base, scratch;
16583   HOST_WIDE_INT offset = 0;
16584
16585   if (SUBREG_P (ref))
16586     {
16587       offset = SUBREG_BYTE (ref);
16588       ref = SUBREG_REG (ref);
16589     }
16590
16591   if (REG_P (ref))
16592     {
16593       /* We have a pseudo which has been spilt onto the stack; there
16594          are two cases here: the first where there is a simple
16595          stack-slot replacement and a second where the stack-slot is
16596          out of range, or is used as a subreg.  */
16597       if (reg_equiv_mem (REGNO (ref)))
16598         {
16599           ref = reg_equiv_mem (REGNO (ref));
16600           base = find_replacement (&XEXP (ref, 0));
16601         }
16602       else
16603         /* The slot is out of range, or was dressed up in a SUBREG.  */
16604         base = reg_equiv_address (REGNO (ref));
16605
16606       /* PR 62554: If there is no equivalent memory location then just move
16607          the value as an SImode register move.  This happens when the target
16608          architecture variant does not have an HImode register move.  */
16609       if (base == NULL)
16610         {
16611           gcc_assert (REG_P (operands[0]));
16612           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16613                                 gen_rtx_SUBREG (SImode, ref, 0)));
16614           return;
16615         }
16616     }
16617   else
16618     base = find_replacement (&XEXP (ref, 0));
16619
16620   /* Handle the case where the address is too complex to be offset by 1.  */
16621   if (GET_CODE (base) == MINUS
16622       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16623     {
16624       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16625
16626       emit_set_insn (base_plus, base);
16627       base = base_plus;
16628     }
16629   else if (GET_CODE (base) == PLUS)
16630     {
16631       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16632       HOST_WIDE_INT hi, lo;
16633
16634       offset += INTVAL (XEXP (base, 1));
16635       base = XEXP (base, 0);
16636
16637       /* Rework the address into a legal sequence of insns.  */
16638       /* Valid range for lo is -4095 -> 4095 */
16639       lo = (offset >= 0
16640             ? (offset & 0xfff)
16641             : -((-offset) & 0xfff));
16642
16643       /* Corner case, if lo is the max offset then we would be out of range
16644          once we have added the additional 1 below, so bump the msb into the
16645          pre-loading insn(s).  */
16646       if (lo == 4095)
16647         lo &= 0x7ff;
16648
16649       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16650              ^ (HOST_WIDE_INT) 0x80000000)
16651             - (HOST_WIDE_INT) 0x80000000);
16652
16653       gcc_assert (hi + lo == offset);
16654
16655       if (hi != 0)
16656         {
16657           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16658
16659           /* Get the base address; addsi3 knows how to handle constants
16660              that require more than one insn.  */
16661           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16662           base = base_plus;
16663           offset = lo;
16664         }
16665     }
16666
16667   /* Operands[2] may overlap operands[0] (though it won't overlap
16668      operands[1]), that's why we asked for a DImode reg -- so we can
16669      use the bit that does not overlap.  */
16670   if (REGNO (operands[2]) == REGNO (operands[0]))
16671     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16672   else
16673     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16674
16675   emit_insn (gen_zero_extendqisi2 (scratch,
16676                                    gen_rtx_MEM (QImode,
16677                                                 plus_constant (Pmode, base,
16678                                                                offset))));
16679   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16680                                    gen_rtx_MEM (QImode,
16681                                                 plus_constant (Pmode, base,
16682                                                                offset + 1))));
16683   if (!BYTES_BIG_ENDIAN)
16684     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16685                    gen_rtx_IOR (SImode,
16686                                 gen_rtx_ASHIFT
16687                                 (SImode,
16688                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16689                                  GEN_INT (8)),
16690                                 scratch));
16691   else
16692     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16693                    gen_rtx_IOR (SImode,
16694                                 gen_rtx_ASHIFT (SImode, scratch,
16695                                                 GEN_INT (8)),
16696                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16697 }
16698
16699 /* Handle storing a half-word to memory during reload by synthesizing as two
16700    byte stores.  Take care not to clobber the input values until after we
16701    have moved them somewhere safe.  This code assumes that if the DImode
16702    scratch in operands[2] overlaps either the input value or output address
16703    in some way, then that value must die in this insn (we absolutely need
16704    two scratch registers for some corner cases).  */
16705 void
16706 arm_reload_out_hi (rtx *operands)
16707 {
16708   rtx ref = operands[0];
16709   rtx outval = operands[1];
16710   rtx base, scratch;
16711   HOST_WIDE_INT offset = 0;
16712
16713   if (SUBREG_P (ref))
16714     {
16715       offset = SUBREG_BYTE (ref);
16716       ref = SUBREG_REG (ref);
16717     }
16718
16719   if (REG_P (ref))
16720     {
16721       /* We have a pseudo which has been spilt onto the stack; there
16722          are two cases here: the first where there is a simple
16723          stack-slot replacement and a second where the stack-slot is
16724          out of range, or is used as a subreg.  */
16725       if (reg_equiv_mem (REGNO (ref)))
16726         {
16727           ref = reg_equiv_mem (REGNO (ref));
16728           base = find_replacement (&XEXP (ref, 0));
16729         }
16730       else
16731         /* The slot is out of range, or was dressed up in a SUBREG.  */
16732         base = reg_equiv_address (REGNO (ref));
16733
16734       /* PR 62254: If there is no equivalent memory location then just move
16735          the value as an SImode register move.  This happens when the target
16736          architecture variant does not have an HImode register move.  */
16737       if (base == NULL)
16738         {
16739           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16740
16741           if (REG_P (outval))
16742             {
16743               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16744                                     gen_rtx_SUBREG (SImode, outval, 0)));
16745             }
16746           else /* SUBREG_P (outval)  */
16747             {
16748               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16749                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16750                                       SUBREG_REG (outval)));
16751               else
16752                 /* FIXME: Handle other cases ?  */
16753                 gcc_unreachable ();
16754             }
16755           return;
16756         }
16757     }
16758   else
16759     base = find_replacement (&XEXP (ref, 0));
16760
16761   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16762
16763   /* Handle the case where the address is too complex to be offset by 1.  */
16764   if (GET_CODE (base) == MINUS
16765       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16766     {
16767       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16768
16769       /* Be careful not to destroy OUTVAL.  */
16770       if (reg_overlap_mentioned_p (base_plus, outval))
16771         {
16772           /* Updating base_plus might destroy outval, see if we can
16773              swap the scratch and base_plus.  */
16774           if (!reg_overlap_mentioned_p (scratch, outval))
16775             std::swap (scratch, base_plus);
16776           else
16777             {
16778               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16779
16780               /* Be conservative and copy OUTVAL into the scratch now,
16781                  this should only be necessary if outval is a subreg
16782                  of something larger than a word.  */
16783               /* XXX Might this clobber base?  I can't see how it can,
16784                  since scratch is known to overlap with OUTVAL, and
16785                  must be wider than a word.  */
16786               emit_insn (gen_movhi (scratch_hi, outval));
16787               outval = scratch_hi;
16788             }
16789         }
16790
16791       emit_set_insn (base_plus, base);
16792       base = base_plus;
16793     }
16794   else if (GET_CODE (base) == PLUS)
16795     {
16796       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16797       HOST_WIDE_INT hi, lo;
16798
16799       offset += INTVAL (XEXP (base, 1));
16800       base = XEXP (base, 0);
16801
16802       /* Rework the address into a legal sequence of insns.  */
16803       /* Valid range for lo is -4095 -> 4095 */
16804       lo = (offset >= 0
16805             ? (offset & 0xfff)
16806             : -((-offset) & 0xfff));
16807
16808       /* Corner case, if lo is the max offset then we would be out of range
16809          once we have added the additional 1 below, so bump the msb into the
16810          pre-loading insn(s).  */
16811       if (lo == 4095)
16812         lo &= 0x7ff;
16813
16814       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16815              ^ (HOST_WIDE_INT) 0x80000000)
16816             - (HOST_WIDE_INT) 0x80000000);
16817
16818       gcc_assert (hi + lo == offset);
16819
16820       if (hi != 0)
16821         {
16822           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16823
16824           /* Be careful not to destroy OUTVAL.  */
16825           if (reg_overlap_mentioned_p (base_plus, outval))
16826             {
16827               /* Updating base_plus might destroy outval, see if we
16828                  can swap the scratch and base_plus.  */
16829               if (!reg_overlap_mentioned_p (scratch, outval))
16830                 std::swap (scratch, base_plus);
16831               else
16832                 {
16833                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16834
16835                   /* Be conservative and copy outval into scratch now,
16836                      this should only be necessary if outval is a
16837                      subreg of something larger than a word.  */
16838                   /* XXX Might this clobber base?  I can't see how it
16839                      can, since scratch is known to overlap with
16840                      outval.  */
16841                   emit_insn (gen_movhi (scratch_hi, outval));
16842                   outval = scratch_hi;
16843                 }
16844             }
16845
16846           /* Get the base address; addsi3 knows how to handle constants
16847              that require more than one insn.  */
16848           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16849           base = base_plus;
16850           offset = lo;
16851         }
16852     }
16853
16854   if (BYTES_BIG_ENDIAN)
16855     {
16856       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16857                                          plus_constant (Pmode, base,
16858                                                         offset + 1)),
16859                             gen_lowpart (QImode, outval)));
16860       emit_insn (gen_lshrsi3 (scratch,
16861                               gen_rtx_SUBREG (SImode, outval, 0),
16862                               GEN_INT (8)));
16863       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16864                                                                 offset)),
16865                             gen_lowpart (QImode, scratch)));
16866     }
16867   else
16868     {
16869       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16870                                                                 offset)),
16871                             gen_lowpart (QImode, outval)));
16872       emit_insn (gen_lshrsi3 (scratch,
16873                               gen_rtx_SUBREG (SImode, outval, 0),
16874                               GEN_INT (8)));
16875       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16876                                          plus_constant (Pmode, base,
16877                                                         offset + 1)),
16878                             gen_lowpart (QImode, scratch)));
16879     }
16880 }
16881
16882 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16883    (padded to the size of a word) should be passed in a register.  */
16884
16885 static bool
16886 arm_must_pass_in_stack (const function_arg_info &arg)
16887 {
16888   if (TARGET_AAPCS_BASED)
16889     return must_pass_in_stack_var_size (arg);
16890   else
16891     return must_pass_in_stack_var_size_or_pad (arg);
16892 }
16893
16894
16895 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16896    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16897    the default.  For AAPCS based ABIs small aggregate types are placed
16898    in the lowest memory address.  */
16899
16900 static pad_direction
16901 arm_function_arg_padding (machine_mode mode, const_tree type)
16902 {
16903   if (!TARGET_AAPCS_BASED)
16904     return default_function_arg_padding (mode, type);
16905
16906   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16907     return PAD_DOWNWARD;
16908
16909   return PAD_UPWARD;
16910 }
16911
16912
16913 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16914    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16915    register has useful data, and return the opposite if the most
16916    significant byte does.  */
16917
16918 bool
16919 arm_pad_reg_upward (machine_mode mode,
16920                     tree type, int first ATTRIBUTE_UNUSED)
16921 {
16922   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16923     {
16924       /* For AAPCS, small aggregates, small fixed-point types,
16925          and small complex types are always padded upwards.  */
16926       if (type)
16927         {
16928           if ((AGGREGATE_TYPE_P (type)
16929                || TREE_CODE (type) == COMPLEX_TYPE
16930                || FIXED_POINT_TYPE_P (type))
16931               && int_size_in_bytes (type) <= 4)
16932             return true;
16933         }
16934       else
16935         {
16936           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16937               && GET_MODE_SIZE (mode) <= 4)
16938             return true;
16939         }
16940     }
16941
16942   /* Otherwise, use default padding.  */
16943   return !BYTES_BIG_ENDIAN;
16944 }
16945
16946 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16947    assuming that the address in the base register is word aligned.  */
16948 bool
16949 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16950 {
16951   HOST_WIDE_INT max_offset;
16952
16953   /* Offset must be a multiple of 4 in Thumb mode.  */
16954   if (TARGET_THUMB2 && ((offset & 3) != 0))
16955     return false;
16956
16957   if (TARGET_THUMB2)
16958     max_offset = 1020;
16959   else if (TARGET_ARM)
16960     max_offset = 255;
16961   else
16962     return false;
16963
16964   return ((offset <= max_offset) && (offset >= -max_offset));
16965 }
16966
16967 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16968    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16969    Assumes that the address in the base register RN is word aligned.  Pattern
16970    guarantees that both memory accesses use the same base register,
16971    the offsets are constants within the range, and the gap between the offsets is 4.
16972    If preload complete then check that registers are legal.  WBACK indicates whether
16973    address is updated.  LOAD indicates whether memory access is load or store.  */
16974 bool
16975 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16976                        bool wback, bool load)
16977 {
16978   unsigned int t, t2, n;
16979
16980   if (!reload_completed)
16981     return true;
16982
16983   if (!offset_ok_for_ldrd_strd (offset))
16984     return false;
16985
16986   t = REGNO (rt);
16987   t2 = REGNO (rt2);
16988   n = REGNO (rn);
16989
16990   if ((TARGET_THUMB2)
16991       && ((wback && (n == t || n == t2))
16992           || (t == SP_REGNUM)
16993           || (t == PC_REGNUM)
16994           || (t2 == SP_REGNUM)
16995           || (t2 == PC_REGNUM)
16996           || (!load && (n == PC_REGNUM))
16997           || (load && (t == t2))
16998           /* Triggers Cortex-M3 LDRD errata.  */
16999           || (!wback && load && fix_cm3_ldrd && (n == t))))
17000     return false;
17001
17002   if ((TARGET_ARM)
17003       && ((wback && (n == t || n == t2))
17004           || (t2 == PC_REGNUM)
17005           || (t % 2 != 0)   /* First destination register is not even.  */
17006           || (t2 != t + 1)
17007           /* PC can be used as base register (for offset addressing only),
17008              but it is depricated.  */
17009           || (n == PC_REGNUM)))
17010     return false;
17011
17012   return true;
17013 }
17014
17015 /* Return true if a 64-bit access with alignment ALIGN and with a
17016    constant offset OFFSET from the base pointer is permitted on this
17017    architecture.  */
17018 static bool
17019 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
17020 {
17021   return (unaligned_access
17022           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
17023           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
17024 }
17025
17026 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
17027    operand MEM's address contains an immediate offset from the base
17028    register and has no side effects, in which case it sets BASE,
17029    OFFSET and ALIGN accordingly.  */
17030 static bool
17031 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
17032 {
17033   rtx addr;
17034
17035   gcc_assert (base != NULL && offset != NULL);
17036
17037   /* TODO: Handle more general memory operand patterns, such as
17038      PRE_DEC and PRE_INC.  */
17039
17040   if (side_effects_p (mem))
17041     return false;
17042
17043   /* Can't deal with subregs.  */
17044   if (SUBREG_P (mem))
17045     return false;
17046
17047   gcc_assert (MEM_P (mem));
17048
17049   *offset = const0_rtx;
17050   *align = MEM_ALIGN (mem);
17051
17052   addr = XEXP (mem, 0);
17053
17054   /* If addr isn't valid for DImode, then we can't handle it.  */
17055   if (!arm_legitimate_address_p (DImode, addr,
17056                                  reload_in_progress || reload_completed))
17057     return false;
17058
17059   if (REG_P (addr))
17060     {
17061       *base = addr;
17062       return true;
17063     }
17064   else if (GET_CODE (addr) == PLUS)
17065     {
17066       *base = XEXP (addr, 0);
17067       *offset = XEXP (addr, 1);
17068       return (REG_P (*base) && CONST_INT_P (*offset));
17069     }
17070
17071   return false;
17072 }
17073
17074 /* Called from a peephole2 to replace two word-size accesses with a
17075    single LDRD/STRD instruction.  Returns true iff we can generate a
17076    new instruction sequence.  That is, both accesses use the same base
17077    register and the gap between constant offsets is 4.  This function
17078    may reorder its operands to match ldrd/strd RTL templates.
17079    OPERANDS are the operands found by the peephole matcher;
17080    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17081    corresponding memory operands.  LOAD indicaates whether the access
17082    is load or store.  CONST_STORE indicates a store of constant
17083    integer values held in OPERANDS[4,5] and assumes that the pattern
17084    is of length 4 insn, for the purpose of checking dead registers.
17085    COMMUTE indicates that register operands may be reordered.  */
17086 bool
17087 gen_operands_ldrd_strd (rtx *operands, bool load,
17088                         bool const_store, bool commute)
17089 {
17090   int nops = 2;
17091   HOST_WIDE_INT offsets[2], offset, align[2];
17092   rtx base = NULL_RTX;
17093   rtx cur_base, cur_offset, tmp;
17094   int i, gap;
17095   HARD_REG_SET regset;
17096
17097   gcc_assert (!const_store || !load);
17098   /* Check that the memory references are immediate offsets from the
17099      same base register.  Extract the base register, the destination
17100      registers, and the corresponding memory offsets.  */
17101   for (i = 0; i < nops; i++)
17102     {
17103       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17104                                  &align[i]))
17105         return false;
17106
17107       if (i == 0)
17108         base = cur_base;
17109       else if (REGNO (base) != REGNO (cur_base))
17110         return false;
17111
17112       offsets[i] = INTVAL (cur_offset);
17113       if (GET_CODE (operands[i]) == SUBREG)
17114         {
17115           tmp = SUBREG_REG (operands[i]);
17116           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17117           operands[i] = tmp;
17118         }
17119     }
17120
17121   /* Make sure there is no dependency between the individual loads.  */
17122   if (load && REGNO (operands[0]) == REGNO (base))
17123     return false; /* RAW */
17124
17125   if (load && REGNO (operands[0]) == REGNO (operands[1]))
17126     return false; /* WAW */
17127
17128   /* If the same input register is used in both stores
17129      when storing different constants, try to find a free register.
17130      For example, the code
17131         mov r0, 0
17132         str r0, [r2]
17133         mov r0, 1
17134         str r0, [r2, #4]
17135      can be transformed into
17136         mov r1, 0
17137         mov r0, 1
17138         strd r1, r0, [r2]
17139      in Thumb mode assuming that r1 is free.
17140      For ARM mode do the same but only if the starting register
17141      can be made to be even.  */
17142   if (const_store
17143       && REGNO (operands[0]) == REGNO (operands[1])
17144       && INTVAL (operands[4]) != INTVAL (operands[5]))
17145     {
17146     if (TARGET_THUMB2)
17147       {
17148         CLEAR_HARD_REG_SET (regset);
17149         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17150         if (tmp == NULL_RTX)
17151           return false;
17152
17153         /* Use the new register in the first load to ensure that
17154            if the original input register is not dead after peephole,
17155            then it will have the correct constant value.  */
17156         operands[0] = tmp;
17157       }
17158     else if (TARGET_ARM)
17159       {
17160         int regno = REGNO (operands[0]);
17161         if (!peep2_reg_dead_p (4, operands[0]))
17162           {
17163             /* When the input register is even and is not dead after the
17164                pattern, it has to hold the second constant but we cannot
17165                form a legal STRD in ARM mode with this register as the second
17166                register.  */
17167             if (regno % 2 == 0)
17168               return false;
17169
17170             /* Is regno-1 free? */
17171             SET_HARD_REG_SET (regset);
17172             CLEAR_HARD_REG_BIT(regset, regno - 1);
17173             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17174             if (tmp == NULL_RTX)
17175               return false;
17176
17177             operands[0] = tmp;
17178           }
17179         else
17180           {
17181             /* Find a DImode register.  */
17182             CLEAR_HARD_REG_SET (regset);
17183             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17184             if (tmp != NULL_RTX)
17185               {
17186                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17187                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17188               }
17189             else
17190               {
17191                 /* Can we use the input register to form a DI register?  */
17192                 SET_HARD_REG_SET (regset);
17193                 CLEAR_HARD_REG_BIT(regset,
17194                                    regno % 2 == 0 ? regno + 1 : regno - 1);
17195                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17196                 if (tmp == NULL_RTX)
17197                   return false;
17198                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17199               }
17200           }
17201
17202         gcc_assert (operands[0] != NULL_RTX);
17203         gcc_assert (operands[1] != NULL_RTX);
17204         gcc_assert (REGNO (operands[0]) % 2 == 0);
17205         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17206       }
17207     }
17208
17209   /* Make sure the instructions are ordered with lower memory access first.  */
17210   if (offsets[0] > offsets[1])
17211     {
17212       gap = offsets[0] - offsets[1];
17213       offset = offsets[1];
17214
17215       /* Swap the instructions such that lower memory is accessed first.  */
17216       std::swap (operands[0], operands[1]);
17217       std::swap (operands[2], operands[3]);
17218       std::swap (align[0], align[1]);
17219       if (const_store)
17220         std::swap (operands[4], operands[5]);
17221     }
17222   else
17223     {
17224       gap = offsets[1] - offsets[0];
17225       offset = offsets[0];
17226     }
17227
17228   /* Make sure accesses are to consecutive memory locations.  */
17229   if (gap != GET_MODE_SIZE (SImode))
17230     return false;
17231
17232   if (!align_ok_ldrd_strd (align[0], offset))
17233     return false;
17234
17235   /* Make sure we generate legal instructions.  */
17236   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17237                              false, load))
17238     return true;
17239
17240   /* In Thumb state, where registers are almost unconstrained, there
17241      is little hope to fix it.  */
17242   if (TARGET_THUMB2)
17243     return false;
17244
17245   if (load && commute)
17246     {
17247       /* Try reordering registers.  */
17248       std::swap (operands[0], operands[1]);
17249       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17250                                  false, load))
17251         return true;
17252     }
17253
17254   if (const_store)
17255     {
17256       /* If input registers are dead after this pattern, they can be
17257          reordered or replaced by other registers that are free in the
17258          current pattern.  */
17259       if (!peep2_reg_dead_p (4, operands[0])
17260           || !peep2_reg_dead_p (4, operands[1]))
17261         return false;
17262
17263       /* Try to reorder the input registers.  */
17264       /* For example, the code
17265            mov r0, 0
17266            mov r1, 1
17267            str r1, [r2]
17268            str r0, [r2, #4]
17269          can be transformed into
17270            mov r1, 0
17271            mov r0, 1
17272            strd r0, [r2]
17273       */
17274       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17275                                   false, false))
17276         {
17277           std::swap (operands[0], operands[1]);
17278           return true;
17279         }
17280
17281       /* Try to find a free DI register.  */
17282       CLEAR_HARD_REG_SET (regset);
17283       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17284       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17285       while (true)
17286         {
17287           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17288           if (tmp == NULL_RTX)
17289             return false;
17290
17291           /* DREG must be an even-numbered register in DImode.
17292              Split it into SI registers.  */
17293           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17294           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17295           gcc_assert (operands[0] != NULL_RTX);
17296           gcc_assert (operands[1] != NULL_RTX);
17297           gcc_assert (REGNO (operands[0]) % 2 == 0);
17298           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17299
17300           return (operands_ok_ldrd_strd (operands[0], operands[1],
17301                                          base, offset,
17302                                          false, load));
17303         }
17304     }
17305
17306   return false;
17307 }
17308
17309
17310 /* Return true if parallel execution of the two word-size accesses provided
17311    could be satisfied with a single LDRD/STRD instruction.  Two word-size
17312    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17313    register operands and OPERANDS[2,3] are the corresponding memory operands.
17314    */
17315 bool
17316 valid_operands_ldrd_strd (rtx *operands, bool load)
17317 {
17318   int nops = 2;
17319   HOST_WIDE_INT offsets[2], offset, align[2];
17320   rtx base = NULL_RTX;
17321   rtx cur_base, cur_offset;
17322   int i, gap;
17323
17324   /* Check that the memory references are immediate offsets from the
17325      same base register.  Extract the base register, the destination
17326      registers, and the corresponding memory offsets.  */
17327   for (i = 0; i < nops; i++)
17328     {
17329       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17330                                  &align[i]))
17331         return false;
17332
17333       if (i == 0)
17334         base = cur_base;
17335       else if (REGNO (base) != REGNO (cur_base))
17336         return false;
17337
17338       offsets[i] = INTVAL (cur_offset);
17339       if (GET_CODE (operands[i]) == SUBREG)
17340         return false;
17341     }
17342
17343   if (offsets[0] > offsets[1])
17344     return false;
17345
17346   gap = offsets[1] - offsets[0];
17347   offset = offsets[0];
17348
17349   /* Make sure accesses are to consecutive memory locations.  */
17350   if (gap != GET_MODE_SIZE (SImode))
17351     return false;
17352
17353   if (!align_ok_ldrd_strd (align[0], offset))
17354     return false;
17355
17356   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17357                                 false, load);
17358 }
17359
17360 \f
17361 /* Print a symbolic form of X to the debug file, F.  */
17362 static void
17363 arm_print_value (FILE *f, rtx x)
17364 {
17365   switch (GET_CODE (x))
17366     {
17367     case CONST_INT:
17368       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17369       return;
17370
17371     case CONST_DOUBLE:
17372       {
17373         char fpstr[20];
17374         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17375                          sizeof (fpstr), 0, 1);
17376         fputs (fpstr, f);
17377       }
17378       return;
17379
17380     case CONST_VECTOR:
17381       {
17382         int i;
17383
17384         fprintf (f, "<");
17385         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17386           {
17387             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17388             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17389               fputc (',', f);
17390           }
17391         fprintf (f, ">");
17392       }
17393       return;
17394
17395     case CONST_STRING:
17396       fprintf (f, "\"%s\"", XSTR (x, 0));
17397       return;
17398
17399     case SYMBOL_REF:
17400       fprintf (f, "`%s'", XSTR (x, 0));
17401       return;
17402
17403     case LABEL_REF:
17404       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17405       return;
17406
17407     case CONST:
17408       arm_print_value (f, XEXP (x, 0));
17409       return;
17410
17411     case PLUS:
17412       arm_print_value (f, XEXP (x, 0));
17413       fprintf (f, "+");
17414       arm_print_value (f, XEXP (x, 1));
17415       return;
17416
17417     case PC:
17418       fprintf (f, "pc");
17419       return;
17420
17421     default:
17422       fprintf (f, "????");
17423       return;
17424     }
17425 }
17426 \f
17427 /* Routines for manipulation of the constant pool.  */
17428
17429 /* Arm instructions cannot load a large constant directly into a
17430    register; they have to come from a pc relative load.  The constant
17431    must therefore be placed in the addressable range of the pc
17432    relative load.  Depending on the precise pc relative load
17433    instruction the range is somewhere between 256 bytes and 4k.  This
17434    means that we often have to dump a constant inside a function, and
17435    generate code to branch around it.
17436
17437    It is important to minimize this, since the branches will slow
17438    things down and make the code larger.
17439
17440    Normally we can hide the table after an existing unconditional
17441    branch so that there is no interruption of the flow, but in the
17442    worst case the code looks like this:
17443
17444         ldr     rn, L1
17445         ...
17446         b       L2
17447         align
17448         L1:     .long value
17449         L2:
17450         ...
17451
17452         ldr     rn, L3
17453         ...
17454         b       L4
17455         align
17456         L3:     .long value
17457         L4:
17458         ...
17459
17460    We fix this by performing a scan after scheduling, which notices
17461    which instructions need to have their operands fetched from the
17462    constant table and builds the table.
17463
17464    The algorithm starts by building a table of all the constants that
17465    need fixing up and all the natural barriers in the function (places
17466    where a constant table can be dropped without breaking the flow).
17467    For each fixup we note how far the pc-relative replacement will be
17468    able to reach and the offset of the instruction into the function.
17469
17470    Having built the table we then group the fixes together to form
17471    tables that are as large as possible (subject to addressing
17472    constraints) and emit each table of constants after the last
17473    barrier that is within range of all the instructions in the group.
17474    If a group does not contain a barrier, then we forcibly create one
17475    by inserting a jump instruction into the flow.  Once the table has
17476    been inserted, the insns are then modified to reference the
17477    relevant entry in the pool.
17478
17479    Possible enhancements to the algorithm (not implemented) are:
17480
17481    1) For some processors and object formats, there may be benefit in
17482    aligning the pools to the start of cache lines; this alignment
17483    would need to be taken into account when calculating addressability
17484    of a pool.  */
17485
17486 /* These typedefs are located at the start of this file, so that
17487    they can be used in the prototypes there.  This comment is to
17488    remind readers of that fact so that the following structures
17489    can be understood more easily.
17490
17491      typedef struct minipool_node    Mnode;
17492      typedef struct minipool_fixup   Mfix;  */
17493
17494 struct minipool_node
17495 {
17496   /* Doubly linked chain of entries.  */
17497   Mnode * next;
17498   Mnode * prev;
17499   /* The maximum offset into the code that this entry can be placed.  While
17500      pushing fixes for forward references, all entries are sorted in order
17501      of increasing max_address.  */
17502   HOST_WIDE_INT max_address;
17503   /* Similarly for an entry inserted for a backwards ref.  */
17504   HOST_WIDE_INT min_address;
17505   /* The number of fixes referencing this entry.  This can become zero
17506      if we "unpush" an entry.  In this case we ignore the entry when we
17507      come to emit the code.  */
17508   int refcount;
17509   /* The offset from the start of the minipool.  */
17510   HOST_WIDE_INT offset;
17511   /* The value in table.  */
17512   rtx value;
17513   /* The mode of value.  */
17514   machine_mode mode;
17515   /* The size of the value.  With iWMMXt enabled
17516      sizes > 4 also imply an alignment of 8-bytes.  */
17517   int fix_size;
17518 };
17519
17520 struct minipool_fixup
17521 {
17522   Mfix *            next;
17523   rtx_insn *        insn;
17524   HOST_WIDE_INT     address;
17525   rtx *             loc;
17526   machine_mode mode;
17527   int               fix_size;
17528   rtx               value;
17529   Mnode *           minipool;
17530   HOST_WIDE_INT     forwards;
17531   HOST_WIDE_INT     backwards;
17532 };
17533
17534 /* Fixes less than a word need padding out to a word boundary.  */
17535 #define MINIPOOL_FIX_SIZE(mode) \
17536   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17537
17538 static Mnode *  minipool_vector_head;
17539 static Mnode *  minipool_vector_tail;
17540 static rtx_code_label   *minipool_vector_label;
17541 static int      minipool_pad;
17542
17543 /* The linked list of all minipool fixes required for this function.  */
17544 Mfix *          minipool_fix_head;
17545 Mfix *          minipool_fix_tail;
17546 /* The fix entry for the current minipool, once it has been placed.  */
17547 Mfix *          minipool_barrier;
17548
17549 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17550 #define JUMP_TABLES_IN_TEXT_SECTION 0
17551 #endif
17552
17553 static HOST_WIDE_INT
17554 get_jump_table_size (rtx_jump_table_data *insn)
17555 {
17556   /* ADDR_VECs only take room if read-only data does into the text
17557      section.  */
17558   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17559     {
17560       rtx body = PATTERN (insn);
17561       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17562       HOST_WIDE_INT size;
17563       HOST_WIDE_INT modesize;
17564
17565       modesize = GET_MODE_SIZE (GET_MODE (body));
17566       size = modesize * XVECLEN (body, elt);
17567       switch (modesize)
17568         {
17569         case 1:
17570           /* Round up size  of TBB table to a halfword boundary.  */
17571           size = (size + 1) & ~HOST_WIDE_INT_1;
17572           break;
17573         case 2:
17574           /* No padding necessary for TBH.  */
17575           break;
17576         case 4:
17577           /* Add two bytes for alignment on Thumb.  */
17578           if (TARGET_THUMB)
17579             size += 2;
17580           break;
17581         default:
17582           gcc_unreachable ();
17583         }
17584       return size;
17585     }
17586
17587   return 0;
17588 }
17589
17590 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17591    function descriptor) into a register and the GOT address into the
17592    FDPIC register, returning an rtx for the register holding the
17593    function address.  */
17594
17595 rtx
17596 arm_load_function_descriptor (rtx funcdesc)
17597 {
17598   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17599   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17600   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17601   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17602
17603   emit_move_insn (fnaddr_reg, fnaddr);
17604
17605   /* The ABI requires the entry point address to be loaded first, but
17606      since we cannot support lazy binding for lack of atomic load of
17607      two 32-bits values, we do not need to bother to prevent the
17608      previous load from being moved after that of the GOT address.  */
17609   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17610
17611   return fnaddr_reg;
17612 }
17613
17614 /* Return the maximum amount of padding that will be inserted before
17615    label LABEL.  */
17616 static HOST_WIDE_INT
17617 get_label_padding (rtx label)
17618 {
17619   HOST_WIDE_INT align, min_insn_size;
17620
17621   align = 1 << label_to_alignment (label).levels[0].log;
17622   min_insn_size = TARGET_THUMB ? 2 : 4;
17623   return align > min_insn_size ? align - min_insn_size : 0;
17624 }
17625
17626 /* Move a minipool fix MP from its current location to before MAX_MP.
17627    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17628    constraints may need updating.  */
17629 static Mnode *
17630 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17631                                HOST_WIDE_INT max_address)
17632 {
17633   /* The code below assumes these are different.  */
17634   gcc_assert (mp != max_mp);
17635
17636   if (max_mp == NULL)
17637     {
17638       if (max_address < mp->max_address)
17639         mp->max_address = max_address;
17640     }
17641   else
17642     {
17643       if (max_address > max_mp->max_address - mp->fix_size)
17644         mp->max_address = max_mp->max_address - mp->fix_size;
17645       else
17646         mp->max_address = max_address;
17647
17648       /* Unlink MP from its current position.  Since max_mp is non-null,
17649        mp->prev must be non-null.  */
17650       mp->prev->next = mp->next;
17651       if (mp->next != NULL)
17652         mp->next->prev = mp->prev;
17653       else
17654         minipool_vector_tail = mp->prev;
17655
17656       /* Re-insert it before MAX_MP.  */
17657       mp->next = max_mp;
17658       mp->prev = max_mp->prev;
17659       max_mp->prev = mp;
17660
17661       if (mp->prev != NULL)
17662         mp->prev->next = mp;
17663       else
17664         minipool_vector_head = mp;
17665     }
17666
17667   /* Save the new entry.  */
17668   max_mp = mp;
17669
17670   /* Scan over the preceding entries and adjust their addresses as
17671      required.  */
17672   while (mp->prev != NULL
17673          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17674     {
17675       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17676       mp = mp->prev;
17677     }
17678
17679   return max_mp;
17680 }
17681
17682 /* Add a constant to the minipool for a forward reference.  Returns the
17683    node added or NULL if the constant will not fit in this pool.  */
17684 static Mnode *
17685 add_minipool_forward_ref (Mfix *fix)
17686 {
17687   /* If set, max_mp is the first pool_entry that has a lower
17688      constraint than the one we are trying to add.  */
17689   Mnode *       max_mp = NULL;
17690   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17691   Mnode *       mp;
17692
17693   /* If the minipool starts before the end of FIX->INSN then this FIX
17694      cannot be placed into the current pool.  Furthermore, adding the
17695      new constant pool entry may cause the pool to start FIX_SIZE bytes
17696      earlier.  */
17697   if (minipool_vector_head &&
17698       (fix->address + get_attr_length (fix->insn)
17699        >= minipool_vector_head->max_address - fix->fix_size))
17700     return NULL;
17701
17702   /* Scan the pool to see if a constant with the same value has
17703      already been added.  While we are doing this, also note the
17704      location where we must insert the constant if it doesn't already
17705      exist.  */
17706   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17707     {
17708       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17709           && fix->mode == mp->mode
17710           && (!LABEL_P (fix->value)
17711               || (CODE_LABEL_NUMBER (fix->value)
17712                   == CODE_LABEL_NUMBER (mp->value)))
17713           && rtx_equal_p (fix->value, mp->value))
17714         {
17715           /* More than one fix references this entry.  */
17716           mp->refcount++;
17717           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17718         }
17719
17720       /* Note the insertion point if necessary.  */
17721       if (max_mp == NULL
17722           && mp->max_address > max_address)
17723         max_mp = mp;
17724
17725       /* If we are inserting an 8-bytes aligned quantity and
17726          we have not already found an insertion point, then
17727          make sure that all such 8-byte aligned quantities are
17728          placed at the start of the pool.  */
17729       if (ARM_DOUBLEWORD_ALIGN
17730           && max_mp == NULL
17731           && fix->fix_size >= 8
17732           && mp->fix_size < 8)
17733         {
17734           max_mp = mp;
17735           max_address = mp->max_address;
17736         }
17737     }
17738
17739   /* The value is not currently in the minipool, so we need to create
17740      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17741      the end of the list since the placement is less constrained than
17742      any existing entry.  Otherwise, we insert the new fix before
17743      MAX_MP and, if necessary, adjust the constraints on the other
17744      entries.  */
17745   mp = XNEW (Mnode);
17746   mp->fix_size = fix->fix_size;
17747   mp->mode = fix->mode;
17748   mp->value = fix->value;
17749   mp->refcount = 1;
17750   /* Not yet required for a backwards ref.  */
17751   mp->min_address = -65536;
17752
17753   if (max_mp == NULL)
17754     {
17755       mp->max_address = max_address;
17756       mp->next = NULL;
17757       mp->prev = minipool_vector_tail;
17758
17759       if (mp->prev == NULL)
17760         {
17761           minipool_vector_head = mp;
17762           minipool_vector_label = gen_label_rtx ();
17763         }
17764       else
17765         mp->prev->next = mp;
17766
17767       minipool_vector_tail = mp;
17768     }
17769   else
17770     {
17771       if (max_address > max_mp->max_address - mp->fix_size)
17772         mp->max_address = max_mp->max_address - mp->fix_size;
17773       else
17774         mp->max_address = max_address;
17775
17776       mp->next = max_mp;
17777       mp->prev = max_mp->prev;
17778       max_mp->prev = mp;
17779       if (mp->prev != NULL)
17780         mp->prev->next = mp;
17781       else
17782         minipool_vector_head = mp;
17783     }
17784
17785   /* Save the new entry.  */
17786   max_mp = mp;
17787
17788   /* Scan over the preceding entries and adjust their addresses as
17789      required.  */
17790   while (mp->prev != NULL
17791          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17792     {
17793       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17794       mp = mp->prev;
17795     }
17796
17797   return max_mp;
17798 }
17799
17800 static Mnode *
17801 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17802                                 HOST_WIDE_INT  min_address)
17803 {
17804   HOST_WIDE_INT offset;
17805
17806   /* The code below assumes these are different.  */
17807   gcc_assert (mp != min_mp);
17808
17809   if (min_mp == NULL)
17810     {
17811       if (min_address > mp->min_address)
17812         mp->min_address = min_address;
17813     }
17814   else
17815     {
17816       /* We will adjust this below if it is too loose.  */
17817       mp->min_address = min_address;
17818
17819       /* Unlink MP from its current position.  Since min_mp is non-null,
17820          mp->next must be non-null.  */
17821       mp->next->prev = mp->prev;
17822       if (mp->prev != NULL)
17823         mp->prev->next = mp->next;
17824       else
17825         minipool_vector_head = mp->next;
17826
17827       /* Reinsert it after MIN_MP.  */
17828       mp->prev = min_mp;
17829       mp->next = min_mp->next;
17830       min_mp->next = mp;
17831       if (mp->next != NULL)
17832         mp->next->prev = mp;
17833       else
17834         minipool_vector_tail = mp;
17835     }
17836
17837   min_mp = mp;
17838
17839   offset = 0;
17840   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17841     {
17842       mp->offset = offset;
17843       if (mp->refcount > 0)
17844         offset += mp->fix_size;
17845
17846       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17847         mp->next->min_address = mp->min_address + mp->fix_size;
17848     }
17849
17850   return min_mp;
17851 }
17852
17853 /* Add a constant to the minipool for a backward reference.  Returns the
17854    node added or NULL if the constant will not fit in this pool.
17855
17856    Note that the code for insertion for a backwards reference can be
17857    somewhat confusing because the calculated offsets for each fix do
17858    not take into account the size of the pool (which is still under
17859    construction.  */
17860 static Mnode *
17861 add_minipool_backward_ref (Mfix *fix)
17862 {
17863   /* If set, min_mp is the last pool_entry that has a lower constraint
17864      than the one we are trying to add.  */
17865   Mnode *min_mp = NULL;
17866   /* This can be negative, since it is only a constraint.  */
17867   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17868   Mnode *mp;
17869
17870   /* If we can't reach the current pool from this insn, or if we can't
17871      insert this entry at the end of the pool without pushing other
17872      fixes out of range, then we don't try.  This ensures that we
17873      can't fail later on.  */
17874   if (min_address >= minipool_barrier->address
17875       || (minipool_vector_tail->min_address + fix->fix_size
17876           >= minipool_barrier->address))
17877     return NULL;
17878
17879   /* Scan the pool to see if a constant with the same value has
17880      already been added.  While we are doing this, also note the
17881      location where we must insert the constant if it doesn't already
17882      exist.  */
17883   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17884     {
17885       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17886           && fix->mode == mp->mode
17887           && (!LABEL_P (fix->value)
17888               || (CODE_LABEL_NUMBER (fix->value)
17889                   == CODE_LABEL_NUMBER (mp->value)))
17890           && rtx_equal_p (fix->value, mp->value)
17891           /* Check that there is enough slack to move this entry to the
17892              end of the table (this is conservative).  */
17893           && (mp->max_address
17894               > (minipool_barrier->address
17895                  + minipool_vector_tail->offset
17896                  + minipool_vector_tail->fix_size)))
17897         {
17898           mp->refcount++;
17899           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17900         }
17901
17902       if (min_mp != NULL)
17903         mp->min_address += fix->fix_size;
17904       else
17905         {
17906           /* Note the insertion point if necessary.  */
17907           if (mp->min_address < min_address)
17908             {
17909               /* For now, we do not allow the insertion of 8-byte alignment
17910                  requiring nodes anywhere but at the start of the pool.  */
17911               if (ARM_DOUBLEWORD_ALIGN
17912                   && fix->fix_size >= 8 && mp->fix_size < 8)
17913                 return NULL;
17914               else
17915                 min_mp = mp;
17916             }
17917           else if (mp->max_address
17918                    < minipool_barrier->address + mp->offset + fix->fix_size)
17919             {
17920               /* Inserting before this entry would push the fix beyond
17921                  its maximum address (which can happen if we have
17922                  re-located a forwards fix); force the new fix to come
17923                  after it.  */
17924               if (ARM_DOUBLEWORD_ALIGN
17925                   && fix->fix_size >= 8 && mp->fix_size < 8)
17926                 return NULL;
17927               else
17928                 {
17929                   min_mp = mp;
17930                   min_address = mp->min_address + fix->fix_size;
17931                 }
17932             }
17933           /* Do not insert a non-8-byte aligned quantity before 8-byte
17934              aligned quantities.  */
17935           else if (ARM_DOUBLEWORD_ALIGN
17936                    && fix->fix_size < 8
17937                    && mp->fix_size >= 8)
17938             {
17939               min_mp = mp;
17940               min_address = mp->min_address + fix->fix_size;
17941             }
17942         }
17943     }
17944
17945   /* We need to create a new entry.  */
17946   mp = XNEW (Mnode);
17947   mp->fix_size = fix->fix_size;
17948   mp->mode = fix->mode;
17949   mp->value = fix->value;
17950   mp->refcount = 1;
17951   mp->max_address = minipool_barrier->address + 65536;
17952
17953   mp->min_address = min_address;
17954
17955   if (min_mp == NULL)
17956     {
17957       mp->prev = NULL;
17958       mp->next = minipool_vector_head;
17959
17960       if (mp->next == NULL)
17961         {
17962           minipool_vector_tail = mp;
17963           minipool_vector_label = gen_label_rtx ();
17964         }
17965       else
17966         mp->next->prev = mp;
17967
17968       minipool_vector_head = mp;
17969     }
17970   else
17971     {
17972       mp->next = min_mp->next;
17973       mp->prev = min_mp;
17974       min_mp->next = mp;
17975
17976       if (mp->next != NULL)
17977         mp->next->prev = mp;
17978       else
17979         minipool_vector_tail = mp;
17980     }
17981
17982   /* Save the new entry.  */
17983   min_mp = mp;
17984
17985   if (mp->prev)
17986     mp = mp->prev;
17987   else
17988     mp->offset = 0;
17989
17990   /* Scan over the following entries and adjust their offsets.  */
17991   while (mp->next != NULL)
17992     {
17993       if (mp->next->min_address < mp->min_address + mp->fix_size)
17994         mp->next->min_address = mp->min_address + mp->fix_size;
17995
17996       if (mp->refcount)
17997         mp->next->offset = mp->offset + mp->fix_size;
17998       else
17999         mp->next->offset = mp->offset;
18000
18001       mp = mp->next;
18002     }
18003
18004   return min_mp;
18005 }
18006
18007 static void
18008 assign_minipool_offsets (Mfix *barrier)
18009 {
18010   HOST_WIDE_INT offset = 0;
18011   Mnode *mp;
18012
18013   minipool_barrier = barrier;
18014
18015   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18016     {
18017       mp->offset = offset;
18018
18019       if (mp->refcount > 0)
18020         offset += mp->fix_size;
18021     }
18022 }
18023
18024 /* Output the literal table */
18025 static void
18026 dump_minipool (rtx_insn *scan)
18027 {
18028   Mnode * mp;
18029   Mnode * nmp;
18030   int align64 = 0;
18031
18032   if (ARM_DOUBLEWORD_ALIGN)
18033     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18034       if (mp->refcount > 0 && mp->fix_size >= 8)
18035         {
18036           align64 = 1;
18037           break;
18038         }
18039
18040   if (dump_file)
18041     fprintf (dump_file,
18042              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18043              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18044
18045   scan = emit_label_after (gen_label_rtx (), scan);
18046   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18047   scan = emit_label_after (minipool_vector_label, scan);
18048
18049   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18050     {
18051       if (mp->refcount > 0)
18052         {
18053           if (dump_file)
18054             {
18055               fprintf (dump_file,
18056                        ";;  Offset %u, min %ld, max %ld ",
18057                        (unsigned) mp->offset, (unsigned long) mp->min_address,
18058                        (unsigned long) mp->max_address);
18059               arm_print_value (dump_file, mp->value);
18060               fputc ('\n', dump_file);
18061             }
18062
18063           rtx val = copy_rtx (mp->value);
18064
18065           switch (GET_MODE_SIZE (mp->mode))
18066             {
18067 #ifdef HAVE_consttable_1
18068             case 1:
18069               scan = emit_insn_after (gen_consttable_1 (val), scan);
18070               break;
18071
18072 #endif
18073 #ifdef HAVE_consttable_2
18074             case 2:
18075               scan = emit_insn_after (gen_consttable_2 (val), scan);
18076               break;
18077
18078 #endif
18079 #ifdef HAVE_consttable_4
18080             case 4:
18081               scan = emit_insn_after (gen_consttable_4 (val), scan);
18082               break;
18083
18084 #endif
18085 #ifdef HAVE_consttable_8
18086             case 8:
18087               scan = emit_insn_after (gen_consttable_8 (val), scan);
18088               break;
18089
18090 #endif
18091 #ifdef HAVE_consttable_16
18092             case 16:
18093               scan = emit_insn_after (gen_consttable_16 (val), scan);
18094               break;
18095
18096 #endif
18097             default:
18098               gcc_unreachable ();
18099             }
18100         }
18101
18102       nmp = mp->next;
18103       free (mp);
18104     }
18105
18106   minipool_vector_head = minipool_vector_tail = NULL;
18107   scan = emit_insn_after (gen_consttable_end (), scan);
18108   scan = emit_barrier_after (scan);
18109 }
18110
18111 /* Return the cost of forcibly inserting a barrier after INSN.  */
18112 static int
18113 arm_barrier_cost (rtx_insn *insn)
18114 {
18115   /* Basing the location of the pool on the loop depth is preferable,
18116      but at the moment, the basic block information seems to be
18117      corrupt by this stage of the compilation.  */
18118   int base_cost = 50;
18119   rtx_insn *next = next_nonnote_insn (insn);
18120
18121   if (next != NULL && LABEL_P (next))
18122     base_cost -= 20;
18123
18124   switch (GET_CODE (insn))
18125     {
18126     case CODE_LABEL:
18127       /* It will always be better to place the table before the label, rather
18128          than after it.  */
18129       return 50;
18130
18131     case INSN:
18132     case CALL_INSN:
18133       return base_cost;
18134
18135     case JUMP_INSN:
18136       return base_cost - 10;
18137
18138     default:
18139       return base_cost + 10;
18140     }
18141 }
18142
18143 /* Find the best place in the insn stream in the range
18144    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18145    Create the barrier by inserting a jump and add a new fix entry for
18146    it.  */
18147 static Mfix *
18148 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18149 {
18150   HOST_WIDE_INT count = 0;
18151   rtx_barrier *barrier;
18152   rtx_insn *from = fix->insn;
18153   /* The instruction after which we will insert the jump.  */
18154   rtx_insn *selected = NULL;
18155   int selected_cost;
18156   /* The address at which the jump instruction will be placed.  */
18157   HOST_WIDE_INT selected_address;
18158   Mfix * new_fix;
18159   HOST_WIDE_INT max_count = max_address - fix->address;
18160   rtx_code_label *label = gen_label_rtx ();
18161
18162   selected_cost = arm_barrier_cost (from);
18163   selected_address = fix->address;
18164
18165   while (from && count < max_count)
18166     {
18167       rtx_jump_table_data *tmp;
18168       int new_cost;
18169
18170       /* This code shouldn't have been called if there was a natural barrier
18171          within range.  */
18172       gcc_assert (!BARRIER_P (from));
18173
18174       /* Count the length of this insn.  This must stay in sync with the
18175          code that pushes minipool fixes.  */
18176       if (LABEL_P (from))
18177         count += get_label_padding (from);
18178       else
18179         count += get_attr_length (from);
18180
18181       /* If there is a jump table, add its length.  */
18182       if (tablejump_p (from, NULL, &tmp))
18183         {
18184           count += get_jump_table_size (tmp);
18185
18186           /* Jump tables aren't in a basic block, so base the cost on
18187              the dispatch insn.  If we select this location, we will
18188              still put the pool after the table.  */
18189           new_cost = arm_barrier_cost (from);
18190
18191           if (count < max_count
18192               && (!selected || new_cost <= selected_cost))
18193             {
18194               selected = tmp;
18195               selected_cost = new_cost;
18196               selected_address = fix->address + count;
18197             }
18198
18199           /* Continue after the dispatch table.  */
18200           from = NEXT_INSN (tmp);
18201           continue;
18202         }
18203
18204       new_cost = arm_barrier_cost (from);
18205
18206       if (count < max_count
18207           && (!selected || new_cost <= selected_cost))
18208         {
18209           selected = from;
18210           selected_cost = new_cost;
18211           selected_address = fix->address + count;
18212         }
18213
18214       from = NEXT_INSN (from);
18215     }
18216
18217   /* Make sure that we found a place to insert the jump.  */
18218   gcc_assert (selected);
18219
18220   /* Create a new JUMP_INSN that branches around a barrier.  */
18221   from = emit_jump_insn_after (gen_jump (label), selected);
18222   JUMP_LABEL (from) = label;
18223   barrier = emit_barrier_after (from);
18224   emit_label_after (label, barrier);
18225
18226   /* Create a minipool barrier entry for the new barrier.  */
18227   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18228   new_fix->insn = barrier;
18229   new_fix->address = selected_address;
18230   new_fix->next = fix->next;
18231   fix->next = new_fix;
18232
18233   return new_fix;
18234 }
18235
18236 /* Record that there is a natural barrier in the insn stream at
18237    ADDRESS.  */
18238 static void
18239 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18240 {
18241   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18242
18243   fix->insn = insn;
18244   fix->address = address;
18245
18246   fix->next = NULL;
18247   if (minipool_fix_head != NULL)
18248     minipool_fix_tail->next = fix;
18249   else
18250     minipool_fix_head = fix;
18251
18252   minipool_fix_tail = fix;
18253 }
18254
18255 /* Record INSN, which will need fixing up to load a value from the
18256    minipool.  ADDRESS is the offset of the insn since the start of the
18257    function; LOC is a pointer to the part of the insn which requires
18258    fixing; VALUE is the constant that must be loaded, which is of type
18259    MODE.  */
18260 static void
18261 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18262                    machine_mode mode, rtx value)
18263 {
18264   gcc_assert (!arm_disable_literal_pool);
18265   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18266
18267   fix->insn = insn;
18268   fix->address = address;
18269   fix->loc = loc;
18270   fix->mode = mode;
18271   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18272   fix->value = value;
18273   fix->forwards = get_attr_pool_range (insn);
18274   fix->backwards = get_attr_neg_pool_range (insn);
18275   fix->minipool = NULL;
18276
18277   /* If an insn doesn't have a range defined for it, then it isn't
18278      expecting to be reworked by this code.  Better to stop now than
18279      to generate duff assembly code.  */
18280   gcc_assert (fix->forwards || fix->backwards);
18281
18282   /* If an entry requires 8-byte alignment then assume all constant pools
18283      require 4 bytes of padding.  Trying to do this later on a per-pool
18284      basis is awkward because existing pool entries have to be modified.  */
18285   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18286     minipool_pad = 4;
18287
18288   if (dump_file)
18289     {
18290       fprintf (dump_file,
18291                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18292                GET_MODE_NAME (mode),
18293                INSN_UID (insn), (unsigned long) address,
18294                -1 * (long)fix->backwards, (long)fix->forwards);
18295       arm_print_value (dump_file, fix->value);
18296       fprintf (dump_file, "\n");
18297     }
18298
18299   /* Add it to the chain of fixes.  */
18300   fix->next = NULL;
18301
18302   if (minipool_fix_head != NULL)
18303     minipool_fix_tail->next = fix;
18304   else
18305     minipool_fix_head = fix;
18306
18307   minipool_fix_tail = fix;
18308 }
18309
18310 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18311    Returns the number of insns needed, or 99 if we always want to synthesize
18312    the value.  */
18313 int
18314 arm_max_const_double_inline_cost ()
18315 {
18316   return ((optimize_size || arm_ld_sched) ? 3 : 4);
18317 }
18318
18319 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18320    Returns the number of insns needed, or 99 if we don't know how to
18321    do it.  */
18322 int
18323 arm_const_double_inline_cost (rtx val)
18324 {
18325   rtx lowpart, highpart;
18326   machine_mode mode;
18327
18328   mode = GET_MODE (val);
18329
18330   if (mode == VOIDmode)
18331     mode = DImode;
18332
18333   gcc_assert (GET_MODE_SIZE (mode) == 8);
18334
18335   lowpart = gen_lowpart (SImode, val);
18336   highpart = gen_highpart_mode (SImode, mode, val);
18337
18338   gcc_assert (CONST_INT_P (lowpart));
18339   gcc_assert (CONST_INT_P (highpart));
18340
18341   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18342                             NULL_RTX, NULL_RTX, 0, 0)
18343           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18344                               NULL_RTX, NULL_RTX, 0, 0));
18345 }
18346
18347 /* Cost of loading a SImode constant.  */
18348 static inline int
18349 arm_const_inline_cost (enum rtx_code code, rtx val)
18350 {
18351   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18352                            NULL_RTX, NULL_RTX, 1, 0);
18353 }
18354
18355 /* Return true if it is worthwhile to split a 64-bit constant into two
18356    32-bit operations.  This is the case if optimizing for size, or
18357    if we have load delay slots, or if one 32-bit part can be done with
18358    a single data operation.  */
18359 bool
18360 arm_const_double_by_parts (rtx val)
18361 {
18362   machine_mode mode = GET_MODE (val);
18363   rtx part;
18364
18365   if (optimize_size || arm_ld_sched)
18366     return true;
18367
18368   if (mode == VOIDmode)
18369     mode = DImode;
18370
18371   part = gen_highpart_mode (SImode, mode, val);
18372
18373   gcc_assert (CONST_INT_P (part));
18374
18375   if (const_ok_for_arm (INTVAL (part))
18376       || const_ok_for_arm (~INTVAL (part)))
18377     return true;
18378
18379   part = gen_lowpart (SImode, val);
18380
18381   gcc_assert (CONST_INT_P (part));
18382
18383   if (const_ok_for_arm (INTVAL (part))
18384       || const_ok_for_arm (~INTVAL (part)))
18385     return true;
18386
18387   return false;
18388 }
18389
18390 /* Return true if it is possible to inline both the high and low parts
18391    of a 64-bit constant into 32-bit data processing instructions.  */
18392 bool
18393 arm_const_double_by_immediates (rtx val)
18394 {
18395   machine_mode mode = GET_MODE (val);
18396   rtx part;
18397
18398   if (mode == VOIDmode)
18399     mode = DImode;
18400
18401   part = gen_highpart_mode (SImode, mode, val);
18402
18403   gcc_assert (CONST_INT_P (part));
18404
18405   if (!const_ok_for_arm (INTVAL (part)))
18406     return false;
18407
18408   part = gen_lowpart (SImode, val);
18409
18410   gcc_assert (CONST_INT_P (part));
18411
18412   if (!const_ok_for_arm (INTVAL (part)))
18413     return false;
18414
18415   return true;
18416 }
18417
18418 /* Scan INSN and note any of its operands that need fixing.
18419    If DO_PUSHES is false we do not actually push any of the fixups
18420    needed.  */
18421 static void
18422 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18423 {
18424   int opno;
18425
18426   extract_constrain_insn (insn);
18427
18428   if (recog_data.n_alternatives == 0)
18429     return;
18430
18431   /* Fill in recog_op_alt with information about the constraints of
18432      this insn.  */
18433   preprocess_constraints (insn);
18434
18435   const operand_alternative *op_alt = which_op_alt ();
18436   for (opno = 0; opno < recog_data.n_operands; opno++)
18437     {
18438       /* Things we need to fix can only occur in inputs.  */
18439       if (recog_data.operand_type[opno] != OP_IN)
18440         continue;
18441
18442       /* If this alternative is a memory reference, then any mention
18443          of constants in this alternative is really to fool reload
18444          into allowing us to accept one there.  We need to fix them up
18445          now so that we output the right code.  */
18446       if (op_alt[opno].memory_ok)
18447         {
18448           rtx op = recog_data.operand[opno];
18449
18450           if (CONSTANT_P (op))
18451             {
18452               if (do_pushes)
18453                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18454                                    recog_data.operand_mode[opno], op);
18455             }
18456           else if (MEM_P (op)
18457                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18458                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18459             {
18460               if (do_pushes)
18461                 {
18462                   rtx cop = avoid_constant_pool_reference (op);
18463
18464                   /* Casting the address of something to a mode narrower
18465                      than a word can cause avoid_constant_pool_reference()
18466                      to return the pool reference itself.  That's no good to
18467                      us here.  Lets just hope that we can use the
18468                      constant pool value directly.  */
18469                   if (op == cop)
18470                     cop = get_pool_constant (XEXP (op, 0));
18471
18472                   push_minipool_fix (insn, address,
18473                                      recog_data.operand_loc[opno],
18474                                      recog_data.operand_mode[opno], cop);
18475                 }
18476
18477             }
18478         }
18479     }
18480
18481   return;
18482 }
18483
18484 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18485    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18486    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18487    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18488    or four masks, depending on whether it is being computed for a
18489    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18490    respectively.  The tree for the type of the argument or a field within an
18491    argument is passed in ARG_TYPE, the current register this argument or field
18492    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18493    argument or field starts at is passed in STARTING_BIT and the last used bit
18494    is kept in LAST_USED_BIT which is also updated accordingly.  */
18495
18496 static unsigned HOST_WIDE_INT
18497 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18498                                uint32_t * padding_bits_to_clear,
18499                                unsigned starting_bit, int * last_used_bit)
18500
18501 {
18502   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18503
18504   if (TREE_CODE (arg_type) == RECORD_TYPE)
18505     {
18506       unsigned current_bit = starting_bit;
18507       tree field;
18508       long int offset, size;
18509
18510
18511       field = TYPE_FIELDS (arg_type);
18512       while (field)
18513         {
18514           /* The offset within a structure is always an offset from
18515              the start of that structure.  Make sure we take that into the
18516              calculation of the register based offset that we use here.  */
18517           offset = starting_bit;
18518           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18519           offset %= 32;
18520
18521           /* This is the actual size of the field, for bitfields this is the
18522              bitfield width and not the container size.  */
18523           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18524
18525           if (*last_used_bit != offset)
18526             {
18527               if (offset < *last_used_bit)
18528                 {
18529                   /* This field's offset is before the 'last_used_bit', that
18530                      means this field goes on the next register.  So we need to
18531                      pad the rest of the current register and increase the
18532                      register number.  */
18533                   uint32_t mask;
18534                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18535                   mask++;
18536
18537                   padding_bits_to_clear[*regno] |= mask;
18538                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18539                   (*regno)++;
18540                 }
18541               else
18542                 {
18543                   /* Otherwise we pad the bits between the last field's end and
18544                      the start of the new field.  */
18545                   uint32_t mask;
18546
18547                   mask = ((uint32_t)-1) >> (32 - offset);
18548                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18549                   padding_bits_to_clear[*regno] |= mask;
18550                 }
18551               current_bit = offset;
18552             }
18553
18554           /* Calculate further padding bits for inner structs/unions too.  */
18555           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18556             {
18557               *last_used_bit = current_bit;
18558               not_to_clear_reg_mask
18559                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18560                                                   padding_bits_to_clear, offset,
18561                                                   last_used_bit);
18562             }
18563           else
18564             {
18565               /* Update 'current_bit' with this field's size.  If the
18566                  'current_bit' lies in a subsequent register, update 'regno' and
18567                  reset 'current_bit' to point to the current bit in that new
18568                  register.  */
18569               current_bit += size;
18570               while (current_bit >= 32)
18571                 {
18572                   current_bit-=32;
18573                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18574                   (*regno)++;
18575                 }
18576               *last_used_bit = current_bit;
18577             }
18578
18579           field = TREE_CHAIN (field);
18580         }
18581       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18582     }
18583   else if (TREE_CODE (arg_type) == UNION_TYPE)
18584     {
18585       tree field, field_t;
18586       int i, regno_t, field_size;
18587       int max_reg = -1;
18588       int max_bit = -1;
18589       uint32_t mask;
18590       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18591         = {-1, -1, -1, -1};
18592
18593       /* To compute the padding bits in a union we only consider bits as
18594          padding bits if they are always either a padding bit or fall outside a
18595          fields size for all fields in the union.  */
18596       field = TYPE_FIELDS (arg_type);
18597       while (field)
18598         {
18599           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18600             = {0U, 0U, 0U, 0U};
18601           int last_used_bit_t = *last_used_bit;
18602           regno_t = *regno;
18603           field_t = TREE_TYPE (field);
18604
18605           /* If the field's type is either a record or a union make sure to
18606              compute their padding bits too.  */
18607           if (RECORD_OR_UNION_TYPE_P (field_t))
18608             not_to_clear_reg_mask
18609               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18610                                                 &padding_bits_to_clear_t[0],
18611                                                 starting_bit, &last_used_bit_t);
18612           else
18613             {
18614               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18615               regno_t = (field_size / 32) + *regno;
18616               last_used_bit_t = (starting_bit + field_size) % 32;
18617             }
18618
18619           for (i = *regno; i < regno_t; i++)
18620             {
18621               /* For all but the last register used by this field only keep the
18622                  padding bits that were padding bits in this field.  */
18623               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18624             }
18625
18626             /* For the last register, keep all padding bits that were padding
18627                bits in this field and any padding bits that are still valid
18628                as padding bits but fall outside of this field's size.  */
18629             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18630             padding_bits_to_clear_res[regno_t]
18631               &= padding_bits_to_clear_t[regno_t] | mask;
18632
18633           /* Update the maximum size of the fields in terms of registers used
18634              ('max_reg') and the 'last_used_bit' in said register.  */
18635           if (max_reg < regno_t)
18636             {
18637               max_reg = regno_t;
18638               max_bit = last_used_bit_t;
18639             }
18640           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18641             max_bit = last_used_bit_t;
18642
18643           field = TREE_CHAIN (field);
18644         }
18645
18646       /* Update the current padding_bits_to_clear using the intersection of the
18647          padding bits of all the fields.  */
18648       for (i=*regno; i < max_reg; i++)
18649         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18650
18651       /* Do not keep trailing padding bits, we do not know yet whether this
18652          is the end of the argument.  */
18653       mask = ((uint32_t) 1 << max_bit) - 1;
18654       padding_bits_to_clear[max_reg]
18655         |= padding_bits_to_clear_res[max_reg] & mask;
18656
18657       *regno = max_reg;
18658       *last_used_bit = max_bit;
18659     }
18660   else
18661     /* This function should only be used for structs and unions.  */
18662     gcc_unreachable ();
18663
18664   return not_to_clear_reg_mask;
18665 }
18666
18667 /* In the context of ARMv8-M Security Extensions, this function is used for both
18668    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18669    registers are used when returning or passing arguments, which is then
18670    returned as a mask.  It will also compute a mask to indicate padding/unused
18671    bits for each of these registers, and passes this through the
18672    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18673    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18674    the starting register used to pass this argument or return value is passed
18675    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18676    for struct and union types.  */
18677
18678 static unsigned HOST_WIDE_INT
18679 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18680                              uint32_t * padding_bits_to_clear)
18681
18682 {
18683   int last_used_bit = 0;
18684   unsigned HOST_WIDE_INT not_to_clear_mask;
18685
18686   if (RECORD_OR_UNION_TYPE_P (arg_type))
18687     {
18688       not_to_clear_mask
18689         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18690                                          padding_bits_to_clear, 0,
18691                                          &last_used_bit);
18692
18693
18694       /* If the 'last_used_bit' is not zero, that means we are still using a
18695          part of the last 'regno'.  In such cases we must clear the trailing
18696          bits.  Otherwise we are not using regno and we should mark it as to
18697          clear.  */
18698       if (last_used_bit != 0)
18699         padding_bits_to_clear[regno]
18700           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18701       else
18702         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18703     }
18704   else
18705     {
18706       not_to_clear_mask = 0;
18707       /* We are not dealing with structs nor unions.  So these arguments may be
18708          passed in floating point registers too.  In some cases a BLKmode is
18709          used when returning or passing arguments in multiple VFP registers.  */
18710       if (GET_MODE (arg_rtx) == BLKmode)
18711         {
18712           int i, arg_regs;
18713           rtx reg;
18714
18715           /* This should really only occur when dealing with the hard-float
18716              ABI.  */
18717           gcc_assert (TARGET_HARD_FLOAT_ABI);
18718
18719           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18720             {
18721               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18722               gcc_assert (REG_P (reg));
18723
18724               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18725
18726               /* If we are dealing with DF mode, make sure we don't
18727                  clear either of the registers it addresses.  */
18728               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18729               if (arg_regs > 1)
18730                 {
18731                   unsigned HOST_WIDE_INT mask;
18732                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18733                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18734                   not_to_clear_mask |= mask;
18735                 }
18736             }
18737         }
18738       else
18739         {
18740           /* Otherwise we can rely on the MODE to determine how many registers
18741              are being used by this argument.  */
18742           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18743           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18744           if (arg_regs > 1)
18745             {
18746               unsigned HOST_WIDE_INT
18747               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18748               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18749               not_to_clear_mask |= mask;
18750             }
18751         }
18752     }
18753
18754   return not_to_clear_mask;
18755 }
18756
18757 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18758    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18759    are to be fully cleared, using the value in register CLEARING_REG if more
18760    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18761    the bits that needs to be cleared in caller-saved core registers, with
18762    SCRATCH_REG used as a scratch register for that clearing.
18763
18764    NOTE: one of three following assertions must hold:
18765    - SCRATCH_REG is a low register
18766    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18767      in TO_CLEAR_BITMAP)
18768    - CLEARING_REG is a low register.  */
18769
18770 static void
18771 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18772                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18773 {
18774   bool saved_clearing = false;
18775   rtx saved_clearing_reg = NULL_RTX;
18776   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18777
18778   gcc_assert (arm_arch_cmse);
18779
18780   if (!bitmap_empty_p (to_clear_bitmap))
18781     {
18782       minregno = bitmap_first_set_bit (to_clear_bitmap);
18783       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18784     }
18785   clearing_regno = REGNO (clearing_reg);
18786
18787   /* Clear padding bits.  */
18788   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18789   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18790     {
18791       uint64_t mask;
18792       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18793
18794       if (padding_bits_to_clear[i] == 0)
18795         continue;
18796
18797       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18798          CLEARING_REG as scratch.  */
18799       if (TARGET_THUMB1
18800           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18801         {
18802           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18803              such that we can use clearing_reg to clear the unused bits in the
18804              arguments.  */
18805           if ((clearing_regno > maxregno
18806                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18807               && !saved_clearing)
18808             {
18809               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18810               emit_move_insn (scratch_reg, clearing_reg);
18811               saved_clearing = true;
18812               saved_clearing_reg = scratch_reg;
18813             }
18814           scratch_reg = clearing_reg;
18815         }
18816
18817       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18818       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18819       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18820
18821       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18822       mask = (~padding_bits_to_clear[i]) >> 16;
18823       rtx16 = gen_int_mode (16, SImode);
18824       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18825       if (mask)
18826         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18827
18828       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18829     }
18830   if (saved_clearing)
18831     emit_move_insn (clearing_reg, saved_clearing_reg);
18832
18833
18834   /* Clear full registers.  */
18835
18836   if (TARGET_HAVE_FPCXT_CMSE)
18837     {
18838       rtvec vunspec_vec;
18839       int i, j, k, nb_regs;
18840       rtx use_seq, par, reg, set, vunspec;
18841       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18842       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18843       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18844
18845       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18846         {
18847           /* Find next register to clear and exit if none.  */
18848           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18849           if (i > maxregno)
18850             break;
18851
18852           /* Compute number of consecutive registers to clear.  */
18853           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18854                j++);
18855           nb_regs = j - i;
18856
18857           /* Create VSCCLRM RTX pattern.  */
18858           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18859           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18860           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18861                                              VUNSPEC_VSCCLRM_VPR);
18862           XVECEXP (par, 0, 0) = vunspec;
18863
18864           /* Insert VFP register clearing RTX in the pattern.  */
18865           start_sequence ();
18866           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18867             {
18868               if (!bitmap_bit_p (to_clear_bitmap, j))
18869                 continue;
18870
18871               reg = gen_rtx_REG (SFmode, j);
18872               set = gen_rtx_SET (reg, const0_rtx);
18873               XVECEXP (par, 0, k++) = set;
18874               emit_use (reg);
18875             }
18876           use_seq = get_insns ();
18877           end_sequence ();
18878
18879           emit_insn_after (use_seq, emit_insn (par));
18880         }
18881
18882       /* Get set of core registers to clear.  */
18883       bitmap_clear (core_regs_bitmap);
18884       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18885                         IP_REGNUM - R0_REGNUM + 1);
18886       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18887                   core_regs_bitmap);
18888       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18889
18890       if (bitmap_empty_p (to_clear_core_bitmap))
18891         return;
18892
18893       /* Create clrm RTX pattern.  */
18894       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18895       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18896
18897       /* Insert core register clearing RTX in the pattern.  */
18898       start_sequence ();
18899       for (j = 0, i = minregno; j < nb_regs; i++)
18900         {
18901           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18902             continue;
18903
18904           reg = gen_rtx_REG (SImode, i);
18905           set = gen_rtx_SET (reg, const0_rtx);
18906           XVECEXP (par, 0, j++) = set;
18907           emit_use (reg);
18908         }
18909
18910       /* Insert APSR register clearing RTX in the pattern
18911        * along with clobbering CC.  */
18912       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18913       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18914                                          VUNSPEC_CLRM_APSR);
18915
18916       XVECEXP (par, 0, j++) = vunspec;
18917
18918       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18919       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18920       XVECEXP (par, 0, j) = clobber;
18921
18922       use_seq = get_insns ();
18923       end_sequence ();
18924
18925       emit_insn_after (use_seq, emit_insn (par));
18926     }
18927   else
18928     {
18929       /* If not marked for clearing, clearing_reg already does not contain
18930          any secret.  */
18931       if (clearing_regno <= maxregno
18932           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18933         {
18934           emit_move_insn (clearing_reg, const0_rtx);
18935           emit_use (clearing_reg);
18936           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18937         }
18938
18939       for (regno = minregno; regno <= maxregno; regno++)
18940         {
18941           if (!bitmap_bit_p (to_clear_bitmap, regno))
18942             continue;
18943
18944           if (IS_VFP_REGNUM (regno))
18945             {
18946               /* If regno is an even vfp register and its successor is also to
18947                  be cleared, use vmov.  */
18948               if (TARGET_VFP_DOUBLE
18949                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18950                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18951                 {
18952                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18953                                   CONST1_RTX (DFmode));
18954                   emit_use (gen_rtx_REG (DFmode, regno));
18955                   regno++;
18956                 }
18957               else
18958                 {
18959                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18960                                   CONST1_RTX (SFmode));
18961                   emit_use (gen_rtx_REG (SFmode, regno));
18962                 }
18963             }
18964           else
18965             {
18966               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18967               emit_use (gen_rtx_REG (SImode, regno));
18968             }
18969         }
18970     }
18971 }
18972
18973 /* Clear core and caller-saved VFP registers not used to pass arguments before
18974    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18975    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18976    libgcc/config/arm/cmse_nonsecure_call.S.  */
18977
18978 static void
18979 cmse_nonsecure_call_inline_register_clear (void)
18980 {
18981   basic_block bb;
18982
18983   FOR_EACH_BB_FN (bb, cfun)
18984     {
18985       rtx_insn *insn;
18986
18987       FOR_BB_INSNS (bb, insn)
18988         {
18989           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18990           /* frame = VFP regs + FPSCR + VPR.  */
18991           unsigned lazy_store_stack_frame_size
18992             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18993           unsigned long callee_saved_mask
18994             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18995             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18996           unsigned address_regnum, regno;
18997           unsigned max_int_regno
18998             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18999           unsigned max_fp_regno
19000             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
19001           unsigned maxregno
19002             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
19003           auto_sbitmap to_clear_bitmap (maxregno + 1);
19004           rtx_insn *seq;
19005           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
19006           rtx address;
19007           CUMULATIVE_ARGS args_so_far_v;
19008           cumulative_args_t args_so_far;
19009           tree arg_type, fntype;
19010           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
19011           function_args_iterator args_iter;
19012           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
19013
19014           if (!NONDEBUG_INSN_P (insn))
19015             continue;
19016
19017           if (!CALL_P (insn))
19018             continue;
19019
19020           pat = PATTERN (insn);
19021           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
19022           call = XVECEXP (pat, 0, 0);
19023
19024           /* Get the real call RTX if the insn sets a value, ie. returns.  */
19025           if (GET_CODE (call) == SET)
19026               call = SET_SRC (call);
19027
19028           /* Check if it is a cmse_nonsecure_call.  */
19029           unspec = XEXP (call, 0);
19030           if (GET_CODE (unspec) != UNSPEC
19031               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
19032             continue;
19033
19034           /* Mark registers that needs to be cleared.  Those that holds a
19035              parameter are removed from the set further below.  */
19036           bitmap_clear (to_clear_bitmap);
19037           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
19038                             max_int_regno - R0_REGNUM + 1);
19039
19040           /* Only look at the caller-saved floating point registers in case of
19041              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
19042              lazy store and loads which clear both caller- and callee-saved
19043              registers.  */
19044           if (!lazy_fpclear)
19045             {
19046               auto_sbitmap float_bitmap (maxregno + 1);
19047
19048               bitmap_clear (float_bitmap);
19049               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19050                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
19051               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19052             }
19053
19054           /* Make sure the register used to hold the function address is not
19055              cleared.  */
19056           address = RTVEC_ELT (XVEC (unspec, 0), 0);
19057           gcc_assert (MEM_P (address));
19058           gcc_assert (REG_P (XEXP (address, 0)));
19059           address_regnum = REGNO (XEXP (address, 0));
19060           if (address_regnum <= max_int_regno)
19061             bitmap_clear_bit (to_clear_bitmap, address_regnum);
19062
19063           /* Set basic block of call insn so that df rescan is performed on
19064              insns inserted here.  */
19065           set_block_for_insn (insn, bb);
19066           df_set_flags (DF_DEFER_INSN_RESCAN);
19067           start_sequence ();
19068
19069           /* Make sure the scheduler doesn't schedule other insns beyond
19070              here.  */
19071           emit_insn (gen_blockage ());
19072
19073           /* Walk through all arguments and clear registers appropriately.
19074           */
19075           fntype = TREE_TYPE (MEM_EXPR (address));
19076           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19077                                     NULL_TREE);
19078           args_so_far = pack_cumulative_args (&args_so_far_v);
19079           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19080             {
19081               rtx arg_rtx;
19082               uint64_t to_clear_args_mask;
19083
19084               if (VOID_TYPE_P (arg_type))
19085                 continue;
19086
19087               function_arg_info arg (arg_type, /*named=*/true);
19088               if (!first_param)
19089                 /* ??? We should advance after processing the argument and pass
19090                    the argument we're advancing past.  */
19091                 arm_function_arg_advance (args_so_far, arg);
19092
19093               arg_rtx = arm_function_arg (args_so_far, arg);
19094               gcc_assert (REG_P (arg_rtx));
19095               to_clear_args_mask
19096                 = compute_not_to_clear_mask (arg_type, arg_rtx,
19097                                              REGNO (arg_rtx),
19098                                              &padding_bits_to_clear[0]);
19099               if (to_clear_args_mask)
19100                 {
19101                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
19102                     {
19103                       if (to_clear_args_mask & (1ULL << regno))
19104                         bitmap_clear_bit (to_clear_bitmap, regno);
19105                     }
19106                 }
19107
19108               first_param = false;
19109             }
19110
19111           /* We use right shift and left shift to clear the LSB of the address
19112              we jump to instead of using bic, to avoid having to use an extra
19113              register on Thumb-1.  */
19114           clearing_reg = XEXP (address, 0);
19115           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19116           emit_insn (gen_rtx_SET (clearing_reg, shift));
19117           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19118           emit_insn (gen_rtx_SET (clearing_reg, shift));
19119
19120           if (clear_callee_saved)
19121             {
19122               rtx push_insn =
19123                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19124               /* Disable frame debug info in push because it needs to be
19125                  disabled for pop (see below).  */
19126               RTX_FRAME_RELATED_P (push_insn) = 0;
19127
19128               /* Lazy store multiple.  */
19129               if (lazy_fpclear)
19130                 {
19131                   rtx imm;
19132                   rtx_insn *add_insn;
19133
19134                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19135                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19136                                                     stack_pointer_rtx, imm));
19137                   /* If we have the frame pointer, then it will be the
19138                      CFA reg.  Otherwise, the stack pointer is the CFA
19139                      reg, so we need to emit a CFA adjust.  */
19140                   if (!frame_pointer_needed)
19141                     arm_add_cfa_adjust_cfa_note (add_insn,
19142                                                  - lazy_store_stack_frame_size,
19143                                                  stack_pointer_rtx,
19144                                                  stack_pointer_rtx);
19145                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19146                 }
19147               /* Save VFP callee-saved registers.  */
19148               else
19149                 {
19150                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19151                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
19152                   /* Disable frame debug info in push because it needs to be
19153                      disabled for vpop (see below).  */
19154                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19155                 }
19156             }
19157
19158           /* Clear caller-saved registers that leak before doing a non-secure
19159              call.  */
19160           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19161           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19162                                 NUM_ARG_REGS, ip_reg, clearing_reg);
19163
19164           seq = get_insns ();
19165           end_sequence ();
19166           emit_insn_before (seq, insn);
19167
19168           if (TARGET_HAVE_FPCXT_CMSE)
19169             {
19170               rtx_insn *last, *pop_insn, *after = insn;
19171
19172               start_sequence ();
19173
19174               /* Lazy load multiple done as part of libcall in Armv8-M.  */
19175               if (lazy_fpclear)
19176                 {
19177                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19178                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19179                   rtx_insn *add_insn =
19180                     emit_insn (gen_addsi3 (stack_pointer_rtx,
19181                                            stack_pointer_rtx, imm));
19182                   if (!frame_pointer_needed)
19183                     arm_add_cfa_adjust_cfa_note (add_insn,
19184                                                  lazy_store_stack_frame_size,
19185                                                  stack_pointer_rtx,
19186                                                  stack_pointer_rtx);
19187                 }
19188               /* Restore VFP callee-saved registers.  */
19189               else
19190                 {
19191                   int nb_callee_saved_vfp_regs =
19192                     (max_fp_regno - D7_VFP_REGNUM) / 2;
19193                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19194                                               nb_callee_saved_vfp_regs,
19195                                               stack_pointer_rtx);
19196                   /* Disable frame debug info in vpop because the SP adjustment
19197                      is made using a CFA adjustment note while CFA used is
19198                      sometimes R7.  This then causes an assert failure in the
19199                      CFI note creation code.  */
19200                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19201                 }
19202
19203               arm_emit_multi_reg_pop (callee_saved_mask);
19204               pop_insn = get_last_insn ();
19205
19206               /* Disable frame debug info in pop because they reset the state
19207                  of popped registers to what it was at the beginning of the
19208                  function, before the prologue.  This leads to incorrect state
19209                  when doing the pop after the nonsecure call for registers that
19210                  are pushed both in prologue and before the nonsecure call.
19211
19212                  It also occasionally triggers an assert failure in CFI note
19213                  creation code when there are two codepaths to the epilogue,
19214                  one of which does not go through the nonsecure call.
19215                  Obviously this mean that debugging between the push and pop is
19216                  not reliable.  */
19217               RTX_FRAME_RELATED_P (pop_insn) = 0;
19218
19219               seq = get_insns ();
19220               last = get_last_insn ();
19221               end_sequence ();
19222
19223               emit_insn_after (seq, after);
19224
19225               /* Skip pop we have just inserted after nonsecure call, we know
19226                  it does not contain a nonsecure call.  */
19227               insn = last;
19228             }
19229         }
19230     }
19231 }
19232
19233 /* Rewrite move insn into subtract of 0 if the condition codes will
19234    be useful in next conditional jump insn.  */
19235
19236 static void
19237 thumb1_reorg (void)
19238 {
19239   basic_block bb;
19240
19241   FOR_EACH_BB_FN (bb, cfun)
19242     {
19243       rtx dest, src;
19244       rtx cmp, op0, op1, set = NULL;
19245       rtx_insn *prev, *insn = BB_END (bb);
19246       bool insn_clobbered = false;
19247
19248       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19249         insn = PREV_INSN (insn);
19250
19251       /* Find the last cbranchsi4_insn in basic block BB.  */
19252       if (insn == BB_HEAD (bb)
19253           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19254         continue;
19255
19256       /* Get the register with which we are comparing.  */
19257       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19258       op0 = XEXP (cmp, 0);
19259       op1 = XEXP (cmp, 1);
19260
19261       /* Check that comparison is against ZERO.  */
19262       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19263         continue;
19264
19265       /* Find the first flag setting insn before INSN in basic block BB.  */
19266       gcc_assert (insn != BB_HEAD (bb));
19267       for (prev = PREV_INSN (insn);
19268            (!insn_clobbered
19269             && prev != BB_HEAD (bb)
19270             && (NOTE_P (prev)
19271                 || DEBUG_INSN_P (prev)
19272                 || ((set = single_set (prev)) != NULL
19273                     && get_attr_conds (prev) == CONDS_NOCOND)));
19274            prev = PREV_INSN (prev))
19275         {
19276           if (reg_set_p (op0, prev))
19277             insn_clobbered = true;
19278         }
19279
19280       /* Skip if op0 is clobbered by insn other than prev. */
19281       if (insn_clobbered)
19282         continue;
19283
19284       if (!set)
19285         continue;
19286
19287       dest = SET_DEST (set);
19288       src = SET_SRC (set);
19289       if (!low_register_operand (dest, SImode)
19290           || !low_register_operand (src, SImode))
19291         continue;
19292
19293       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19294          in INSN.  Both src and dest of the move insn are checked.  */
19295       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19296         {
19297           dest = copy_rtx (dest);
19298           src = copy_rtx (src);
19299           src = gen_rtx_MINUS (SImode, src, const0_rtx);
19300           PATTERN (prev) = gen_rtx_SET (dest, src);
19301           INSN_CODE (prev) = -1;
19302           /* Set test register in INSN to dest.  */
19303           XEXP (cmp, 0) = copy_rtx (dest);
19304           INSN_CODE (insn) = -1;
19305         }
19306     }
19307 }
19308
19309 /* Convert instructions to their cc-clobbering variant if possible, since
19310    that allows us to use smaller encodings.  */
19311
19312 static void
19313 thumb2_reorg (void)
19314 {
19315   basic_block bb;
19316   regset_head live;
19317
19318   INIT_REG_SET (&live);
19319
19320   /* We are freeing block_for_insn in the toplev to keep compatibility
19321      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
19322   compute_bb_for_insn ();
19323   df_analyze ();
19324
19325   enum Convert_Action {SKIP, CONV, SWAP_CONV};
19326
19327   FOR_EACH_BB_FN (bb, cfun)
19328     {
19329       if ((current_tune->disparage_flag_setting_t16_encodings
19330            == tune_params::DISPARAGE_FLAGS_ALL)
19331           && optimize_bb_for_speed_p (bb))
19332         continue;
19333
19334       rtx_insn *insn;
19335       Convert_Action action = SKIP;
19336       Convert_Action action_for_partial_flag_setting
19337         = ((current_tune->disparage_flag_setting_t16_encodings
19338             != tune_params::DISPARAGE_FLAGS_NEITHER)
19339            && optimize_bb_for_speed_p (bb))
19340           ? SKIP : CONV;
19341
19342       COPY_REG_SET (&live, DF_LR_OUT (bb));
19343       df_simulate_initialize_backwards (bb, &live);
19344       FOR_BB_INSNS_REVERSE (bb, insn)
19345         {
19346           if (NONJUMP_INSN_P (insn)
19347               && !REGNO_REG_SET_P (&live, CC_REGNUM)
19348               && GET_CODE (PATTERN (insn)) == SET)
19349             {
19350               action = SKIP;
19351               rtx pat = PATTERN (insn);
19352               rtx dst = XEXP (pat, 0);
19353               rtx src = XEXP (pat, 1);
19354               rtx op0 = NULL_RTX, op1 = NULL_RTX;
19355
19356               if (UNARY_P (src) || BINARY_P (src))
19357                   op0 = XEXP (src, 0);
19358
19359               if (BINARY_P (src))
19360                   op1 = XEXP (src, 1);
19361
19362               if (low_register_operand (dst, SImode))
19363                 {
19364                   switch (GET_CODE (src))
19365                     {
19366                     case PLUS:
19367                       /* Adding two registers and storing the result
19368                          in the first source is already a 16-bit
19369                          operation.  */
19370                       if (rtx_equal_p (dst, op0)
19371                           && register_operand (op1, SImode))
19372                         break;
19373
19374                       if (low_register_operand (op0, SImode))
19375                         {
19376                           /* ADDS <Rd>,<Rn>,<Rm>  */
19377                           if (low_register_operand (op1, SImode))
19378                             action = CONV;
19379                           /* ADDS <Rdn>,#<imm8>  */
19380                           /* SUBS <Rdn>,#<imm8>  */
19381                           else if (rtx_equal_p (dst, op0)
19382                                    && CONST_INT_P (op1)
19383                                    && IN_RANGE (INTVAL (op1), -255, 255))
19384                             action = CONV;
19385                           /* ADDS <Rd>,<Rn>,#<imm3>  */
19386                           /* SUBS <Rd>,<Rn>,#<imm3>  */
19387                           else if (CONST_INT_P (op1)
19388                                    && IN_RANGE (INTVAL (op1), -7, 7))
19389                             action = CONV;
19390                         }
19391                       /* ADCS <Rd>, <Rn>  */
19392                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19393                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19394                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19395                                                        SImode)
19396                               && COMPARISON_P (op1)
19397                               && cc_register (XEXP (op1, 0), VOIDmode)
19398                               && maybe_get_arm_condition_code (op1) == ARM_CS
19399                               && XEXP (op1, 1) == const0_rtx)
19400                         action = CONV;
19401                       break;
19402
19403                     case MINUS:
19404                       /* RSBS <Rd>,<Rn>,#0
19405                          Not handled here: see NEG below.  */
19406                       /* SUBS <Rd>,<Rn>,#<imm3>
19407                          SUBS <Rdn>,#<imm8>
19408                          Not handled here: see PLUS above.  */
19409                       /* SUBS <Rd>,<Rn>,<Rm>  */
19410                       if (low_register_operand (op0, SImode)
19411                           && low_register_operand (op1, SImode))
19412                             action = CONV;
19413                       break;
19414
19415                     case MULT:
19416                       /* MULS <Rdm>,<Rn>,<Rdm>
19417                          As an exception to the rule, this is only used
19418                          when optimizing for size since MULS is slow on all
19419                          known implementations.  We do not even want to use
19420                          MULS in cold code, if optimizing for speed, so we
19421                          test the global flag here.  */
19422                       if (!optimize_size)
19423                         break;
19424                       /* Fall through.  */
19425                     case AND:
19426                     case IOR:
19427                     case XOR:
19428                       /* ANDS <Rdn>,<Rm>  */
19429                       if (rtx_equal_p (dst, op0)
19430                           && low_register_operand (op1, SImode))
19431                         action = action_for_partial_flag_setting;
19432                       else if (rtx_equal_p (dst, op1)
19433                                && low_register_operand (op0, SImode))
19434                         action = action_for_partial_flag_setting == SKIP
19435                                  ? SKIP : SWAP_CONV;
19436                       break;
19437
19438                     case ASHIFTRT:
19439                     case ASHIFT:
19440                     case LSHIFTRT:
19441                       /* ASRS <Rdn>,<Rm> */
19442                       /* LSRS <Rdn>,<Rm> */
19443                       /* LSLS <Rdn>,<Rm> */
19444                       if (rtx_equal_p (dst, op0)
19445                           && low_register_operand (op1, SImode))
19446                         action = action_for_partial_flag_setting;
19447                       /* ASRS <Rd>,<Rm>,#<imm5> */
19448                       /* LSRS <Rd>,<Rm>,#<imm5> */
19449                       /* LSLS <Rd>,<Rm>,#<imm5> */
19450                       else if (low_register_operand (op0, SImode)
19451                                && CONST_INT_P (op1)
19452                                && IN_RANGE (INTVAL (op1), 0, 31))
19453                         action = action_for_partial_flag_setting;
19454                       break;
19455
19456                     case ROTATERT:
19457                       /* RORS <Rdn>,<Rm>  */
19458                       if (rtx_equal_p (dst, op0)
19459                           && low_register_operand (op1, SImode))
19460                         action = action_for_partial_flag_setting;
19461                       break;
19462
19463                     case NOT:
19464                       /* MVNS <Rd>,<Rm>  */
19465                       if (low_register_operand (op0, SImode))
19466                         action = action_for_partial_flag_setting;
19467                       break;
19468
19469                     case NEG:
19470                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19471                       if (low_register_operand (op0, SImode))
19472                         action = CONV;
19473                       break;
19474
19475                     case CONST_INT:
19476                       /* MOVS <Rd>,#<imm8>  */
19477                       if (CONST_INT_P (src)
19478                           && IN_RANGE (INTVAL (src), 0, 255))
19479                         action = action_for_partial_flag_setting;
19480                       break;
19481
19482                     case REG:
19483                       /* MOVS and MOV<c> with registers have different
19484                          encodings, so are not relevant here.  */
19485                       break;
19486
19487                     default:
19488                       break;
19489                     }
19490                 }
19491
19492               if (action != SKIP)
19493                 {
19494                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19495                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19496                   rtvec vec;
19497
19498                   if (action == SWAP_CONV)
19499                     {
19500                       src = copy_rtx (src);
19501                       XEXP (src, 0) = op1;
19502                       XEXP (src, 1) = op0;
19503                       pat = gen_rtx_SET (dst, src);
19504                       vec = gen_rtvec (2, pat, clobber);
19505                     }
19506                   else /* action == CONV */
19507                     vec = gen_rtvec (2, pat, clobber);
19508
19509                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19510                   INSN_CODE (insn) = -1;
19511                 }
19512             }
19513
19514           if (NONDEBUG_INSN_P (insn))
19515             df_simulate_one_insn_backwards (bb, insn, &live);
19516         }
19517     }
19518
19519   CLEAR_REG_SET (&live);
19520 }
19521
19522 /* Gcc puts the pool in the wrong place for ARM, since we can only
19523    load addresses a limited distance around the pc.  We do some
19524    special munging to move the constant pool values to the correct
19525    point in the code.  */
19526 static void
19527 arm_reorg (void)
19528 {
19529   rtx_insn *insn;
19530   HOST_WIDE_INT address = 0;
19531   Mfix * fix;
19532
19533   if (use_cmse)
19534     cmse_nonsecure_call_inline_register_clear ();
19535
19536   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19537   if (cfun->is_thunk)
19538     ;
19539   else if (TARGET_THUMB1)
19540     thumb1_reorg ();
19541   else if (TARGET_THUMB2)
19542     thumb2_reorg ();
19543
19544   /* Ensure all insns that must be split have been split at this point.
19545      Otherwise, the pool placement code below may compute incorrect
19546      insn lengths.  Note that when optimizing, all insns have already
19547      been split at this point.  */
19548   if (!optimize)
19549     split_all_insns_noflow ();
19550
19551   /* Make sure we do not attempt to create a literal pool even though it should
19552      no longer be necessary to create any.  */
19553   if (arm_disable_literal_pool)
19554     return ;
19555
19556   minipool_fix_head = minipool_fix_tail = NULL;
19557
19558   /* The first insn must always be a note, or the code below won't
19559      scan it properly.  */
19560   insn = get_insns ();
19561   gcc_assert (NOTE_P (insn));
19562   minipool_pad = 0;
19563
19564   /* Scan all the insns and record the operands that will need fixing.  */
19565   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19566     {
19567       if (BARRIER_P (insn))
19568         push_minipool_barrier (insn, address);
19569       else if (INSN_P (insn))
19570         {
19571           rtx_jump_table_data *table;
19572
19573           note_invalid_constants (insn, address, true);
19574           address += get_attr_length (insn);
19575
19576           /* If the insn is a vector jump, add the size of the table
19577              and skip the table.  */
19578           if (tablejump_p (insn, NULL, &table))
19579             {
19580               address += get_jump_table_size (table);
19581               insn = table;
19582             }
19583         }
19584       else if (LABEL_P (insn))
19585         /* Add the worst-case padding due to alignment.  We don't add
19586            the _current_ padding because the minipool insertions
19587            themselves might change it.  */
19588         address += get_label_padding (insn);
19589     }
19590
19591   fix = minipool_fix_head;
19592
19593   /* Now scan the fixups and perform the required changes.  */
19594   while (fix)
19595     {
19596       Mfix * ftmp;
19597       Mfix * fdel;
19598       Mfix *  last_added_fix;
19599       Mfix * last_barrier = NULL;
19600       Mfix * this_fix;
19601
19602       /* Skip any further barriers before the next fix.  */
19603       while (fix && BARRIER_P (fix->insn))
19604         fix = fix->next;
19605
19606       /* No more fixes.  */
19607       if (fix == NULL)
19608         break;
19609
19610       last_added_fix = NULL;
19611
19612       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19613         {
19614           if (BARRIER_P (ftmp->insn))
19615             {
19616               if (ftmp->address >= minipool_vector_head->max_address)
19617                 break;
19618
19619               last_barrier = ftmp;
19620             }
19621           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19622             break;
19623
19624           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19625         }
19626
19627       /* If we found a barrier, drop back to that; any fixes that we
19628          could have reached but come after the barrier will now go in
19629          the next mini-pool.  */
19630       if (last_barrier != NULL)
19631         {
19632           /* Reduce the refcount for those fixes that won't go into this
19633              pool after all.  */
19634           for (fdel = last_barrier->next;
19635                fdel && fdel != ftmp;
19636                fdel = fdel->next)
19637             {
19638               fdel->minipool->refcount--;
19639               fdel->minipool = NULL;
19640             }
19641
19642           ftmp = last_barrier;
19643         }
19644       else
19645         {
19646           /* ftmp is first fix that we can't fit into this pool and
19647              there no natural barriers that we could use.  Insert a
19648              new barrier in the code somewhere between the previous
19649              fix and this one, and arrange to jump around it.  */
19650           HOST_WIDE_INT max_address;
19651
19652           /* The last item on the list of fixes must be a barrier, so
19653              we can never run off the end of the list of fixes without
19654              last_barrier being set.  */
19655           gcc_assert (ftmp);
19656
19657           max_address = minipool_vector_head->max_address;
19658           /* Check that there isn't another fix that is in range that
19659              we couldn't fit into this pool because the pool was
19660              already too large: we need to put the pool before such an
19661              instruction.  The pool itself may come just after the
19662              fix because create_fix_barrier also allows space for a
19663              jump instruction.  */
19664           if (ftmp->address < max_address)
19665             max_address = ftmp->address + 1;
19666
19667           last_barrier = create_fix_barrier (last_added_fix, max_address);
19668         }
19669
19670       assign_minipool_offsets (last_barrier);
19671
19672       while (ftmp)
19673         {
19674           if (!BARRIER_P (ftmp->insn)
19675               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19676                   == NULL))
19677             break;
19678
19679           ftmp = ftmp->next;
19680         }
19681
19682       /* Scan over the fixes we have identified for this pool, fixing them
19683          up and adding the constants to the pool itself.  */
19684       for (this_fix = fix; this_fix && ftmp != this_fix;
19685            this_fix = this_fix->next)
19686         if (!BARRIER_P (this_fix->insn))
19687           {
19688             rtx addr
19689               = plus_constant (Pmode,
19690                                gen_rtx_LABEL_REF (VOIDmode,
19691                                                   minipool_vector_label),
19692                                this_fix->minipool->offset);
19693             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19694           }
19695
19696       dump_minipool (last_barrier->insn);
19697       fix = ftmp;
19698     }
19699
19700   /* From now on we must synthesize any constants that we can't handle
19701      directly.  This can happen if the RTL gets split during final
19702      instruction generation.  */
19703   cfun->machine->after_arm_reorg = 1;
19704
19705   /* Free the minipool memory.  */
19706   obstack_free (&minipool_obstack, minipool_startobj);
19707 }
19708 \f
19709 /* Routines to output assembly language.  */
19710
19711 /* Return string representation of passed in real value.  */
19712 static const char *
19713 fp_const_from_val (REAL_VALUE_TYPE *r)
19714 {
19715   if (!fp_consts_inited)
19716     init_fp_table ();
19717
19718   gcc_assert (real_equal (r, &value_fp0));
19719   return "0";
19720 }
19721
19722 /* OPERANDS[0] is the entire list of insns that constitute pop,
19723    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19724    is in the list, UPDATE is true iff the list contains explicit
19725    update of base register.  */
19726 void
19727 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19728                          bool update)
19729 {
19730   int i;
19731   char pattern[100];
19732   int offset;
19733   const char *conditional;
19734   int num_saves = XVECLEN (operands[0], 0);
19735   unsigned int regno;
19736   unsigned int regno_base = REGNO (operands[1]);
19737   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19738
19739   offset = 0;
19740   offset += update ? 1 : 0;
19741   offset += return_pc ? 1 : 0;
19742
19743   /* Is the base register in the list?  */
19744   for (i = offset; i < num_saves; i++)
19745     {
19746       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19747       /* If SP is in the list, then the base register must be SP.  */
19748       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19749       /* If base register is in the list, there must be no explicit update.  */
19750       if (regno == regno_base)
19751         gcc_assert (!update);
19752     }
19753
19754   conditional = reverse ? "%?%D0" : "%?%d0";
19755   /* Can't use POP if returning from an interrupt.  */
19756   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19757     sprintf (pattern, "pop%s\t{", conditional);
19758   else
19759     {
19760       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19761          It's just a convention, their semantics are identical.  */
19762       if (regno_base == SP_REGNUM)
19763         sprintf (pattern, "ldmfd%s\t", conditional);
19764       else if (update)
19765         sprintf (pattern, "ldmia%s\t", conditional);
19766       else
19767         sprintf (pattern, "ldm%s\t", conditional);
19768
19769       strcat (pattern, reg_names[regno_base]);
19770       if (update)
19771         strcat (pattern, "!, {");
19772       else
19773         strcat (pattern, ", {");
19774     }
19775
19776   /* Output the first destination register.  */
19777   strcat (pattern,
19778           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19779
19780   /* Output the rest of the destination registers.  */
19781   for (i = offset + 1; i < num_saves; i++)
19782     {
19783       strcat (pattern, ", ");
19784       strcat (pattern,
19785               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19786     }
19787
19788   strcat (pattern, "}");
19789
19790   if (interrupt_p && return_pc)
19791     strcat (pattern, "^");
19792
19793   output_asm_insn (pattern, &cond);
19794 }
19795
19796
19797 /* Output the assembly for a store multiple.  */
19798
19799 const char *
19800 vfp_output_vstmd (rtx * operands)
19801 {
19802   char pattern[100];
19803   int p;
19804   int base;
19805   int i;
19806   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19807                    ? XEXP (operands[0], 0)
19808                    : XEXP (XEXP (operands[0], 0), 0);
19809   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19810
19811   if (push_p)
19812     strcpy (pattern, "vpush%?.64\t{%P1");
19813   else
19814     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19815
19816   p = strlen (pattern);
19817
19818   gcc_assert (REG_P (operands[1]));
19819
19820   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19821   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19822     {
19823       p += sprintf (&pattern[p], ", d%d", base + i);
19824     }
19825   strcpy (&pattern[p], "}");
19826
19827   output_asm_insn (pattern, operands);
19828   return "";
19829 }
19830
19831
19832 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19833    number of bytes pushed.  */
19834
19835 static int
19836 vfp_emit_fstmd (int base_reg, int count)
19837 {
19838   rtx par;
19839   rtx dwarf;
19840   rtx tmp, reg;
19841   int i;
19842
19843   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19844      register pairs are stored by a store multiple insn.  We avoid this
19845      by pushing an extra pair.  */
19846   if (count == 2 && !arm_arch6)
19847     {
19848       if (base_reg == LAST_VFP_REGNUM - 3)
19849         base_reg -= 2;
19850       count++;
19851     }
19852
19853   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19854      larger stores into multiple parts (up to a maximum of two, in
19855      practice).  */
19856   if (count > 16)
19857     {
19858       int saved;
19859       /* NOTE: base_reg is an internal register number, so each D register
19860          counts as 2.  */
19861       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19862       saved += vfp_emit_fstmd (base_reg, 16);
19863       return saved;
19864     }
19865
19866   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19867   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19868
19869   reg = gen_rtx_REG (DFmode, base_reg);
19870   base_reg += 2;
19871
19872   XVECEXP (par, 0, 0)
19873     = gen_rtx_SET (gen_frame_mem
19874                    (BLKmode,
19875                     gen_rtx_PRE_MODIFY (Pmode,
19876                                         stack_pointer_rtx,
19877                                         plus_constant
19878                                         (Pmode, stack_pointer_rtx,
19879                                          - (count * 8)))
19880                     ),
19881                    gen_rtx_UNSPEC (BLKmode,
19882                                    gen_rtvec (1, reg),
19883                                    UNSPEC_PUSH_MULT));
19884
19885   tmp = gen_rtx_SET (stack_pointer_rtx,
19886                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19887   RTX_FRAME_RELATED_P (tmp) = 1;
19888   XVECEXP (dwarf, 0, 0) = tmp;
19889
19890   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19891   RTX_FRAME_RELATED_P (tmp) = 1;
19892   XVECEXP (dwarf, 0, 1) = tmp;
19893
19894   for (i = 1; i < count; i++)
19895     {
19896       reg = gen_rtx_REG (DFmode, base_reg);
19897       base_reg += 2;
19898       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19899
19900       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19901                                         plus_constant (Pmode,
19902                                                        stack_pointer_rtx,
19903                                                        i * 8)),
19904                          reg);
19905       RTX_FRAME_RELATED_P (tmp) = 1;
19906       XVECEXP (dwarf, 0, i + 1) = tmp;
19907     }
19908
19909   par = emit_insn (par);
19910   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19911   RTX_FRAME_RELATED_P (par) = 1;
19912
19913   return count * 8;
19914 }
19915
19916 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19917    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19918
19919 bool
19920 detect_cmse_nonsecure_call (tree addr)
19921 {
19922   if (!addr)
19923     return FALSE;
19924
19925   tree fntype = TREE_TYPE (addr);
19926   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19927                                     TYPE_ATTRIBUTES (fntype)))
19928     return TRUE;
19929   return FALSE;
19930 }
19931
19932
19933 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19934    the call target.  */
19935
19936 void
19937 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19938 {
19939   rtx insn;
19940
19941   insn = emit_call_insn (pat);
19942
19943   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19944      If the call might use such an entry, add a use of the PIC register
19945      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19946   if (TARGET_VXWORKS_RTP
19947       && flag_pic
19948       && !sibcall
19949       && SYMBOL_REF_P (addr)
19950       && (SYMBOL_REF_DECL (addr)
19951           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19952           : !SYMBOL_REF_LOCAL_P (addr)))
19953     {
19954       require_pic_register (NULL_RTX, false /*compute_now*/);
19955       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19956     }
19957
19958   if (TARGET_FDPIC)
19959     {
19960       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19961       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19962     }
19963
19964   if (TARGET_AAPCS_BASED)
19965     {
19966       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19967          linker.  We need to add an IP clobber to allow setting
19968          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19969          is not needed since it's a fixed register.  */
19970       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19971       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19972     }
19973 }
19974
19975 /* Output a 'call' insn.  */
19976 const char *
19977 output_call (rtx *operands)
19978 {
19979   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19980
19981   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19982   if (REGNO (operands[0]) == LR_REGNUM)
19983     {
19984       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19985       output_asm_insn ("mov%?\t%0, %|lr", operands);
19986     }
19987
19988   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19989
19990   if (TARGET_INTERWORK || arm_arch4t)
19991     output_asm_insn ("bx%?\t%0", operands);
19992   else
19993     output_asm_insn ("mov%?\t%|pc, %0", operands);
19994
19995   return "";
19996 }
19997
19998 /* Output a move from arm registers to arm registers of a long double
19999    OPERANDS[0] is the destination.
20000    OPERANDS[1] is the source.  */
20001 const char *
20002 output_mov_long_double_arm_from_arm (rtx *operands)
20003 {
20004   /* We have to be careful here because the two might overlap.  */
20005   int dest_start = REGNO (operands[0]);
20006   int src_start = REGNO (operands[1]);
20007   rtx ops[2];
20008   int i;
20009
20010   if (dest_start < src_start)
20011     {
20012       for (i = 0; i < 3; i++)
20013         {
20014           ops[0] = gen_rtx_REG (SImode, dest_start + i);
20015           ops[1] = gen_rtx_REG (SImode, src_start + i);
20016           output_asm_insn ("mov%?\t%0, %1", ops);
20017         }
20018     }
20019   else
20020     {
20021       for (i = 2; i >= 0; i--)
20022         {
20023           ops[0] = gen_rtx_REG (SImode, dest_start + i);
20024           ops[1] = gen_rtx_REG (SImode, src_start + i);
20025           output_asm_insn ("mov%?\t%0, %1", ops);
20026         }
20027     }
20028
20029   return "";
20030 }
20031
20032 void
20033 arm_emit_movpair (rtx dest, rtx src)
20034  {
20035   /* If the src is an immediate, simplify it.  */
20036   if (CONST_INT_P (src))
20037     {
20038       HOST_WIDE_INT val = INTVAL (src);
20039       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
20040       if ((val >> 16) & 0x0000ffff)
20041         {
20042           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20043                                                GEN_INT (16)),
20044                          GEN_INT ((val >> 16) & 0x0000ffff));
20045           rtx_insn *insn = get_last_insn ();
20046           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20047         }
20048       return;
20049     }
20050    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20051    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20052    rtx_insn *insn = get_last_insn ();
20053    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20054  }
20055
20056 /* Output a move between double words.  It must be REG<-MEM
20057    or MEM<-REG.  */
20058 const char *
20059 output_move_double (rtx *operands, bool emit, int *count)
20060 {
20061   enum rtx_code code0 = GET_CODE (operands[0]);
20062   enum rtx_code code1 = GET_CODE (operands[1]);
20063   rtx otherops[3];
20064   if (count)
20065     *count = 1;
20066
20067   /* The only case when this might happen is when
20068      you are looking at the length of a DImode instruction
20069      that has an invalid constant in it.  */
20070   if (code0 == REG && code1 != MEM)
20071     {
20072       gcc_assert (!emit);
20073       *count = 2;
20074       return "";
20075     }
20076
20077   if (code0 == REG)
20078     {
20079       unsigned int reg0 = REGNO (operands[0]);
20080       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20081
20082       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20083
20084       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
20085
20086       switch (GET_CODE (XEXP (operands[1], 0)))
20087         {
20088         case REG:
20089
20090           if (emit)
20091             {
20092               if (can_ldrd
20093                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20094                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20095               else
20096                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20097             }
20098           break;
20099
20100         case PRE_INC:
20101           gcc_assert (can_ldrd);
20102           if (emit)
20103             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20104           break;
20105
20106         case PRE_DEC:
20107           if (emit)
20108             {
20109               if (can_ldrd)
20110                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20111               else
20112                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20113             }
20114           break;
20115
20116         case POST_INC:
20117           if (emit)
20118             {
20119               if (can_ldrd)
20120                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20121               else
20122                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20123             }
20124           break;
20125
20126         case POST_DEC:
20127           gcc_assert (can_ldrd);
20128           if (emit)
20129             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20130           break;
20131
20132         case PRE_MODIFY:
20133         case POST_MODIFY:
20134           /* Autoicrement addressing modes should never have overlapping
20135              base and destination registers, and overlapping index registers
20136              are already prohibited, so this doesn't need to worry about
20137              fix_cm3_ldrd.  */
20138           otherops[0] = operands[0];
20139           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20140           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20141
20142           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20143             {
20144               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20145                 {
20146                   /* Registers overlap so split out the increment.  */
20147                   if (emit)
20148                     {
20149                       gcc_assert (can_ldrd);
20150                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
20151                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20152                     }
20153                   if (count)
20154                     *count = 2;
20155                 }
20156               else
20157                 {
20158                   /* Use a single insn if we can.
20159                      FIXME: IWMMXT allows offsets larger than ldrd can
20160                      handle, fix these up with a pair of ldr.  */
20161                   if (can_ldrd
20162                       && (TARGET_THUMB2
20163                       || !CONST_INT_P (otherops[2])
20164                       || (INTVAL (otherops[2]) > -256
20165                           && INTVAL (otherops[2]) < 256)))
20166                     {
20167                       if (emit)
20168                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20169                     }
20170                   else
20171                     {
20172                       if (emit)
20173                         {
20174                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20175                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20176                         }
20177                       if (count)
20178                         *count = 2;
20179
20180                     }
20181                 }
20182             }
20183           else
20184             {
20185               /* Use a single insn if we can.
20186                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
20187                  fix these up with a pair of ldr.  */
20188               if (can_ldrd
20189                   && (TARGET_THUMB2
20190                   || !CONST_INT_P (otherops[2])
20191                   || (INTVAL (otherops[2]) > -256
20192                       && INTVAL (otherops[2]) < 256)))
20193                 {
20194                   if (emit)
20195                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20196                 }
20197               else
20198                 {
20199                   if (emit)
20200                     {
20201                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20202                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20203                     }
20204                   if (count)
20205                     *count = 2;
20206                 }
20207             }
20208           break;
20209
20210         case LABEL_REF:
20211         case CONST:
20212           /* We might be able to use ldrd %0, %1 here.  However the range is
20213              different to ldr/adr, and it is broken on some ARMv7-M
20214              implementations.  */
20215           /* Use the second register of the pair to avoid problematic
20216              overlap.  */
20217           otherops[1] = operands[1];
20218           if (emit)
20219             output_asm_insn ("adr%?\t%0, %1", otherops);
20220           operands[1] = otherops[0];
20221           if (emit)
20222             {
20223               if (can_ldrd)
20224                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20225               else
20226                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20227             }
20228
20229           if (count)
20230             *count = 2;
20231           break;
20232
20233           /* ??? This needs checking for thumb2.  */
20234         default:
20235           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20236                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20237             {
20238               otherops[0] = operands[0];
20239               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20240               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20241
20242               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20243                 {
20244                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20245                     {
20246                       switch ((int) INTVAL (otherops[2]))
20247                         {
20248                         case -8:
20249                           if (emit)
20250                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20251                           return "";
20252                         case -4:
20253                           if (TARGET_THUMB2)
20254                             break;
20255                           if (emit)
20256                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20257                           return "";
20258                         case 4:
20259                           if (TARGET_THUMB2)
20260                             break;
20261                           if (emit)
20262                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20263                           return "";
20264                         }
20265                     }
20266                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20267                   operands[1] = otherops[0];
20268                   if (can_ldrd
20269                       && (REG_P (otherops[2])
20270                           || TARGET_THUMB2
20271                           || (CONST_INT_P (otherops[2])
20272                               && INTVAL (otherops[2]) > -256
20273                               && INTVAL (otherops[2]) < 256)))
20274                     {
20275                       if (reg_overlap_mentioned_p (operands[0],
20276                                                    otherops[2]))
20277                         {
20278                           /* Swap base and index registers over to
20279                              avoid a conflict.  */
20280                           std::swap (otherops[1], otherops[2]);
20281                         }
20282                       /* If both registers conflict, it will usually
20283                          have been fixed by a splitter.  */
20284                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
20285                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20286                         {
20287                           if (emit)
20288                             {
20289                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
20290                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20291                             }
20292                           if (count)
20293                             *count = 2;
20294                         }
20295                       else
20296                         {
20297                           otherops[0] = operands[0];
20298                           if (emit)
20299                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20300                         }
20301                       return "";
20302                     }
20303
20304                   if (CONST_INT_P (otherops[2]))
20305                     {
20306                       if (emit)
20307                         {
20308                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20309                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20310                           else
20311                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
20312                         }
20313                     }
20314                   else
20315                     {
20316                       if (emit)
20317                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
20318                     }
20319                 }
20320               else
20321                 {
20322                   if (emit)
20323                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20324                 }
20325
20326               if (count)
20327                 *count = 2;
20328
20329               if (can_ldrd)
20330                 return "ldrd%?\t%0, [%1]";
20331
20332               return "ldmia%?\t%1, %M0";
20333             }
20334           else
20335             {
20336               otherops[1] = adjust_address (operands[1], SImode, 4);
20337               /* Take care of overlapping base/data reg.  */
20338               if (reg_mentioned_p (operands[0], operands[1]))
20339                 {
20340                   if (emit)
20341                     {
20342                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20343                       output_asm_insn ("ldr%?\t%0, %1", operands);
20344                     }
20345                   if (count)
20346                     *count = 2;
20347
20348                 }
20349               else
20350                 {
20351                   if (emit)
20352                     {
20353                       output_asm_insn ("ldr%?\t%0, %1", operands);
20354                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20355                     }
20356                   if (count)
20357                     *count = 2;
20358                 }
20359             }
20360         }
20361     }
20362   else
20363     {
20364       /* Constraints should ensure this.  */
20365       gcc_assert (code0 == MEM && code1 == REG);
20366       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20367                   || (TARGET_ARM && TARGET_LDRD));
20368
20369       /* For TARGET_ARM the first source register of an STRD
20370          must be even.  This is usually the case for double-word
20371          values but user assembly constraints can force an odd
20372          starting register.  */
20373       bool allow_strd = TARGET_LDRD
20374                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20375       switch (GET_CODE (XEXP (operands[0], 0)))
20376         {
20377         case REG:
20378           if (emit)
20379             {
20380               if (allow_strd)
20381                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20382               else
20383                 output_asm_insn ("stm%?\t%m0, %M1", operands);
20384             }
20385           break;
20386
20387         case PRE_INC:
20388           gcc_assert (allow_strd);
20389           if (emit)
20390             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20391           break;
20392
20393         case PRE_DEC:
20394           if (emit)
20395             {
20396               if (allow_strd)
20397                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20398               else
20399                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20400             }
20401           break;
20402
20403         case POST_INC:
20404           if (emit)
20405             {
20406               if (allow_strd)
20407                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20408               else
20409                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20410             }
20411           break;
20412
20413         case POST_DEC:
20414           gcc_assert (allow_strd);
20415           if (emit)
20416             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20417           break;
20418
20419         case PRE_MODIFY:
20420         case POST_MODIFY:
20421           otherops[0] = operands[1];
20422           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20423           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20424
20425           /* IWMMXT allows offsets larger than strd can handle,
20426              fix these up with a pair of str.  */
20427           if (!TARGET_THUMB2
20428               && CONST_INT_P (otherops[2])
20429               && (INTVAL(otherops[2]) <= -256
20430                   || INTVAL(otherops[2]) >= 256))
20431             {
20432               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20433                 {
20434                   if (emit)
20435                     {
20436                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20437                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20438                     }
20439                   if (count)
20440                     *count = 2;
20441                 }
20442               else
20443                 {
20444                   if (emit)
20445                     {
20446                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20447                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20448                     }
20449                   if (count)
20450                     *count = 2;
20451                 }
20452             }
20453           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20454             {
20455               if (emit)
20456                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20457             }
20458           else
20459             {
20460               if (emit)
20461                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20462             }
20463           break;
20464
20465         case PLUS:
20466           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20467           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20468             {
20469               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20470                 {
20471                 case -8:
20472                   if (emit)
20473                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20474                   return "";
20475
20476                 case -4:
20477                   if (TARGET_THUMB2)
20478                     break;
20479                   if (emit)
20480                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20481                   return "";
20482
20483                 case 4:
20484                   if (TARGET_THUMB2)
20485                     break;
20486                   if (emit)
20487                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20488                   return "";
20489                 }
20490             }
20491           if (allow_strd
20492               && (REG_P (otherops[2])
20493                   || TARGET_THUMB2
20494                   || (CONST_INT_P (otherops[2])
20495                       && INTVAL (otherops[2]) > -256
20496                       && INTVAL (otherops[2]) < 256)))
20497             {
20498               otherops[0] = operands[1];
20499               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20500               if (emit)
20501                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20502               return "";
20503             }
20504           /* Fall through */
20505
20506         default:
20507           otherops[0] = adjust_address (operands[0], SImode, 4);
20508           otherops[1] = operands[1];
20509           if (emit)
20510             {
20511               output_asm_insn ("str%?\t%1, %0", operands);
20512               output_asm_insn ("str%?\t%H1, %0", otherops);
20513             }
20514           if (count)
20515             *count = 2;
20516         }
20517     }
20518
20519   return "";
20520 }
20521
20522 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20523    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20524
20525 const char *
20526 output_move_quad (rtx *operands)
20527 {
20528   if (REG_P (operands[0]))
20529     {
20530       /* Load, or reg->reg move.  */
20531
20532       if (MEM_P (operands[1]))
20533         {
20534           switch (GET_CODE (XEXP (operands[1], 0)))
20535             {
20536             case REG:
20537               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20538               break;
20539
20540             case LABEL_REF:
20541             case CONST:
20542               output_asm_insn ("adr%?\t%0, %1", operands);
20543               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20544               break;
20545
20546             default:
20547               gcc_unreachable ();
20548             }
20549         }
20550       else
20551         {
20552           rtx ops[2];
20553           int dest, src, i;
20554
20555           gcc_assert (REG_P (operands[1]));
20556
20557           dest = REGNO (operands[0]);
20558           src = REGNO (operands[1]);
20559
20560           /* This seems pretty dumb, but hopefully GCC won't try to do it
20561              very often.  */
20562           if (dest < src)
20563             for (i = 0; i < 4; i++)
20564               {
20565                 ops[0] = gen_rtx_REG (SImode, dest + i);
20566                 ops[1] = gen_rtx_REG (SImode, src + i);
20567                 output_asm_insn ("mov%?\t%0, %1", ops);
20568               }
20569           else
20570             for (i = 3; i >= 0; i--)
20571               {
20572                 ops[0] = gen_rtx_REG (SImode, dest + i);
20573                 ops[1] = gen_rtx_REG (SImode, src + i);
20574                 output_asm_insn ("mov%?\t%0, %1", ops);
20575               }
20576         }
20577     }
20578   else
20579     {
20580       gcc_assert (MEM_P (operands[0]));
20581       gcc_assert (REG_P (operands[1]));
20582       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20583
20584       switch (GET_CODE (XEXP (operands[0], 0)))
20585         {
20586         case REG:
20587           output_asm_insn ("stm%?\t%m0, %M1", operands);
20588           break;
20589
20590         default:
20591           gcc_unreachable ();
20592         }
20593     }
20594
20595   return "";
20596 }
20597
20598 /* Output a VFP load or store instruction.  */
20599
20600 const char *
20601 output_move_vfp (rtx *operands)
20602 {
20603   rtx reg, mem, addr, ops[2];
20604   int load = REG_P (operands[0]);
20605   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20606   int sp = (!TARGET_VFP_FP16INST
20607             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20608   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20609   const char *templ;
20610   char buff[50];
20611   machine_mode mode;
20612
20613   reg = operands[!load];
20614   mem = operands[load];
20615
20616   mode = GET_MODE (reg);
20617
20618   gcc_assert (REG_P (reg));
20619   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20620   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20621               || mode == SFmode
20622               || mode == DFmode
20623               || mode == HImode
20624               || mode == SImode
20625               || mode == DImode
20626               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20627   gcc_assert (MEM_P (mem));
20628
20629   addr = XEXP (mem, 0);
20630
20631   switch (GET_CODE (addr))
20632     {
20633     case PRE_DEC:
20634       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20635       ops[0] = XEXP (addr, 0);
20636       ops[1] = reg;
20637       break;
20638
20639     case POST_INC:
20640       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20641       ops[0] = XEXP (addr, 0);
20642       ops[1] = reg;
20643       break;
20644
20645     default:
20646       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20647       ops[0] = reg;
20648       ops[1] = mem;
20649       break;
20650     }
20651
20652   sprintf (buff, templ,
20653            load ? "ld" : "st",
20654            dp ? "64" : sp ? "32" : "16",
20655            dp ? "P" : "",
20656            integer_p ? "\t%@ int" : "");
20657   output_asm_insn (buff, ops);
20658
20659   return "";
20660 }
20661
20662 /* Output a Neon double-word or quad-word load or store, or a load
20663    or store for larger structure modes.
20664
20665    WARNING: The ordering of elements is weird in big-endian mode,
20666    because the EABI requires that vectors stored in memory appear
20667    as though they were stored by a VSTM, as required by the EABI.
20668    GCC RTL defines element ordering based on in-memory order.
20669    This can be different from the architectural ordering of elements
20670    within a NEON register. The intrinsics defined in arm_neon.h use the
20671    NEON register element ordering, not the GCC RTL element ordering.
20672
20673    For example, the in-memory ordering of a big-endian a quadword
20674    vector with 16-bit elements when stored from register pair {d0,d1}
20675    will be (lowest address first, d0[N] is NEON register element N):
20676
20677      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20678
20679    When necessary, quadword registers (dN, dN+1) are moved to ARM
20680    registers from rN in the order:
20681
20682      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20683
20684    So that STM/LDM can be used on vectors in ARM registers, and the
20685    same memory layout will result as if VSTM/VLDM were used.
20686
20687    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20688    possible, which allows use of appropriate alignment tags.
20689    Note that the choice of "64" is independent of the actual vector
20690    element size; this size simply ensures that the behavior is
20691    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20692
20693    Due to limitations of those instructions, use of VST1.64/VLD1.64
20694    is not possible if:
20695     - the address contains PRE_DEC, or
20696     - the mode refers to more than 4 double-word registers
20697
20698    In those cases, it would be possible to replace VSTM/VLDM by a
20699    sequence of instructions; this is not currently implemented since
20700    this is not certain to actually improve performance.  */
20701
20702 const char *
20703 output_move_neon (rtx *operands)
20704 {
20705   rtx reg, mem, addr, ops[2];
20706   int regno, nregs, load = REG_P (operands[0]);
20707   const char *templ;
20708   char buff[50];
20709   machine_mode mode;
20710
20711   reg = operands[!load];
20712   mem = operands[load];
20713
20714   mode = GET_MODE (reg);
20715
20716   gcc_assert (REG_P (reg));
20717   regno = REGNO (reg);
20718   nregs = REG_NREGS (reg) / 2;
20719   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20720               || NEON_REGNO_OK_FOR_QUAD (regno));
20721   gcc_assert (VALID_NEON_DREG_MODE (mode)
20722               || VALID_NEON_QREG_MODE (mode)
20723               || VALID_NEON_STRUCT_MODE (mode));
20724   gcc_assert (MEM_P (mem));
20725
20726   addr = XEXP (mem, 0);
20727
20728   /* Strip off const from addresses like (const (plus (...))).  */
20729   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20730     addr = XEXP (addr, 0);
20731
20732   switch (GET_CODE (addr))
20733     {
20734     case POST_INC:
20735       /* We have to use vldm / vstm for too-large modes.  */
20736       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20737         {
20738           templ = "v%smia%%?\t%%0!, %%h1";
20739           ops[0] = XEXP (addr, 0);
20740         }
20741       else
20742         {
20743           templ = "v%s1.64\t%%h1, %%A0";
20744           ops[0] = mem;
20745         }
20746       ops[1] = reg;
20747       break;
20748
20749     case PRE_DEC:
20750       /* We have to use vldm / vstm in this case, since there is no
20751          pre-decrement form of the vld1 / vst1 instructions.  */
20752       templ = "v%smdb%%?\t%%0!, %%h1";
20753       ops[0] = XEXP (addr, 0);
20754       ops[1] = reg;
20755       break;
20756
20757     case POST_MODIFY:
20758       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20759       gcc_unreachable ();
20760
20761     case REG:
20762       /* We have to use vldm / vstm for too-large modes.  */
20763       if (nregs > 1)
20764         {
20765           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20766             templ = "v%smia%%?\t%%m0, %%h1";
20767           else
20768             templ = "v%s1.64\t%%h1, %%A0";
20769
20770           ops[0] = mem;
20771           ops[1] = reg;
20772           break;
20773         }
20774       /* Fall through.  */
20775     case PLUS:
20776       if (GET_CODE (addr) == PLUS)
20777         addr = XEXP (addr, 0);
20778       /* Fall through.  */
20779     case LABEL_REF:
20780       {
20781         int i;
20782         int overlap = -1;
20783         for (i = 0; i < nregs; i++)
20784           {
20785             /* We're only using DImode here because it's a convenient
20786                size.  */
20787             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20788             ops[1] = adjust_address (mem, DImode, 8 * i);
20789             if (reg_overlap_mentioned_p (ops[0], mem))
20790               {
20791                 gcc_assert (overlap == -1);
20792                 overlap = i;
20793               }
20794             else
20795               {
20796                 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20797                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20798                 else
20799                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20800                 output_asm_insn (buff, ops);
20801               }
20802           }
20803         if (overlap != -1)
20804           {
20805             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20806             ops[1] = adjust_address (mem, SImode, 8 * overlap);
20807             if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20808               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20809             else
20810               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20811             output_asm_insn (buff, ops);
20812           }
20813
20814         return "";
20815       }
20816
20817     default:
20818       gcc_unreachable ();
20819     }
20820
20821   sprintf (buff, templ, load ? "ld" : "st");
20822   output_asm_insn (buff, ops);
20823
20824   return "";
20825 }
20826
20827 /* Compute and return the length of neon_mov<mode>, where <mode> is
20828    one of VSTRUCT modes: EI, OI, CI or XI.  */
20829 int
20830 arm_attr_length_move_neon (rtx_insn *insn)
20831 {
20832   rtx reg, mem, addr;
20833   int load;
20834   machine_mode mode;
20835
20836   extract_insn_cached (insn);
20837
20838   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20839     {
20840       mode = GET_MODE (recog_data.operand[0]);
20841       switch (mode)
20842         {
20843         case E_EImode:
20844         case E_OImode:
20845           return 8;
20846         case E_CImode:
20847           return 12;
20848         case E_XImode:
20849           return 16;
20850         default:
20851           gcc_unreachable ();
20852         }
20853     }
20854
20855   load = REG_P (recog_data.operand[0]);
20856   reg = recog_data.operand[!load];
20857   mem = recog_data.operand[load];
20858
20859   gcc_assert (MEM_P (mem));
20860
20861   addr = XEXP (mem, 0);
20862
20863   /* Strip off const from addresses like (const (plus (...))).  */
20864   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20865     addr = XEXP (addr, 0);
20866
20867   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20868     {
20869       int insns = REG_NREGS (reg) / 2;
20870       return insns * 4;
20871     }
20872   else
20873     return 4;
20874 }
20875
20876 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20877    return zero.  */
20878
20879 int
20880 arm_address_offset_is_imm (rtx_insn *insn)
20881 {
20882   rtx mem, addr;
20883
20884   extract_insn_cached (insn);
20885
20886   if (REG_P (recog_data.operand[0]))
20887     return 0;
20888
20889   mem = recog_data.operand[0];
20890
20891   gcc_assert (MEM_P (mem));
20892
20893   addr = XEXP (mem, 0);
20894
20895   if (REG_P (addr)
20896       || (GET_CODE (addr) == PLUS
20897           && REG_P (XEXP (addr, 0))
20898           && CONST_INT_P (XEXP (addr, 1))))
20899     return 1;
20900   else
20901     return 0;
20902 }
20903
20904 /* Output an ADD r, s, #n where n may be too big for one instruction.
20905    If adding zero to one register, output nothing.  */
20906 const char *
20907 output_add_immediate (rtx *operands)
20908 {
20909   HOST_WIDE_INT n = INTVAL (operands[2]);
20910
20911   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20912     {
20913       if (n < 0)
20914         output_multi_immediate (operands,
20915                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20916                                 -n);
20917       else
20918         output_multi_immediate (operands,
20919                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20920                                 n);
20921     }
20922
20923   return "";
20924 }
20925
20926 /* Output a multiple immediate operation.
20927    OPERANDS is the vector of operands referred to in the output patterns.
20928    INSTR1 is the output pattern to use for the first constant.
20929    INSTR2 is the output pattern to use for subsequent constants.
20930    IMMED_OP is the index of the constant slot in OPERANDS.
20931    N is the constant value.  */
20932 static const char *
20933 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20934                         int immed_op, HOST_WIDE_INT n)
20935 {
20936 #if HOST_BITS_PER_WIDE_INT > 32
20937   n &= 0xffffffff;
20938 #endif
20939
20940   if (n == 0)
20941     {
20942       /* Quick and easy output.  */
20943       operands[immed_op] = const0_rtx;
20944       output_asm_insn (instr1, operands);
20945     }
20946   else
20947     {
20948       int i;
20949       const char * instr = instr1;
20950
20951       /* Note that n is never zero here (which would give no output).  */
20952       for (i = 0; i < 32; i += 2)
20953         {
20954           if (n & (3 << i))
20955             {
20956               operands[immed_op] = GEN_INT (n & (255 << i));
20957               output_asm_insn (instr, operands);
20958               instr = instr2;
20959               i += 6;
20960             }
20961         }
20962     }
20963
20964   return "";
20965 }
20966
20967 /* Return the name of a shifter operation.  */
20968 static const char *
20969 arm_shift_nmem(enum rtx_code code)
20970 {
20971   switch (code)
20972     {
20973     case ASHIFT:
20974       return ARM_LSL_NAME;
20975
20976     case ASHIFTRT:
20977       return "asr";
20978
20979     case LSHIFTRT:
20980       return "lsr";
20981
20982     case ROTATERT:
20983       return "ror";
20984
20985     default:
20986       abort();
20987     }
20988 }
20989
20990 /* Return the appropriate ARM instruction for the operation code.
20991    The returned result should not be overwritten.  OP is the rtx of the
20992    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20993    was shifted.  */
20994 const char *
20995 arithmetic_instr (rtx op, int shift_first_arg)
20996 {
20997   switch (GET_CODE (op))
20998     {
20999     case PLUS:
21000       return "add";
21001
21002     case MINUS:
21003       return shift_first_arg ? "rsb" : "sub";
21004
21005     case IOR:
21006       return "orr";
21007
21008     case XOR:
21009       return "eor";
21010
21011     case AND:
21012       return "and";
21013
21014     case ASHIFT:
21015     case ASHIFTRT:
21016     case LSHIFTRT:
21017     case ROTATERT:
21018       return arm_shift_nmem(GET_CODE(op));
21019
21020     default:
21021       gcc_unreachable ();
21022     }
21023 }
21024
21025 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21026    for the operation code.  The returned result should not be overwritten.
21027    OP is the rtx code of the shift.
21028    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21029    shift.  */
21030 static const char *
21031 shift_op (rtx op, HOST_WIDE_INT *amountp)
21032 {
21033   const char * mnem;
21034   enum rtx_code code = GET_CODE (op);
21035
21036   switch (code)
21037     {
21038     case ROTATE:
21039       if (!CONST_INT_P (XEXP (op, 1)))
21040         {
21041           output_operand_lossage ("invalid shift operand");
21042           return NULL;
21043         }
21044
21045       code = ROTATERT;
21046       *amountp = 32 - INTVAL (XEXP (op, 1));
21047       mnem = "ror";
21048       break;
21049
21050     case ASHIFT:
21051     case ASHIFTRT:
21052     case LSHIFTRT:
21053     case ROTATERT:
21054       mnem = arm_shift_nmem(code);
21055       if (CONST_INT_P (XEXP (op, 1)))
21056         {
21057           *amountp = INTVAL (XEXP (op, 1));
21058         }
21059       else if (REG_P (XEXP (op, 1)))
21060         {
21061           *amountp = -1;
21062           return mnem;
21063         }
21064       else
21065         {
21066           output_operand_lossage ("invalid shift operand");
21067           return NULL;
21068         }
21069       break;
21070
21071     case MULT:
21072       /* We never have to worry about the amount being other than a
21073          power of 2, since this case can never be reloaded from a reg.  */
21074       if (!CONST_INT_P (XEXP (op, 1)))
21075         {
21076           output_operand_lossage ("invalid shift operand");
21077           return NULL;
21078         }
21079
21080       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21081
21082       /* Amount must be a power of two.  */
21083       if (*amountp & (*amountp - 1))
21084         {
21085           output_operand_lossage ("invalid shift operand");
21086           return NULL;
21087         }
21088
21089       *amountp = exact_log2 (*amountp);
21090       gcc_assert (IN_RANGE (*amountp, 0, 31));
21091       return ARM_LSL_NAME;
21092
21093     default:
21094       output_operand_lossage ("invalid shift operand");
21095       return NULL;
21096     }
21097
21098   /* This is not 100% correct, but follows from the desire to merge
21099      multiplication by a power of 2 with the recognizer for a
21100      shift.  >=32 is not a valid shift for "lsl", so we must try and
21101      output a shift that produces the correct arithmetical result.
21102      Using lsr #32 is identical except for the fact that the carry bit
21103      is not set correctly if we set the flags; but we never use the
21104      carry bit from such an operation, so we can ignore that.  */
21105   if (code == ROTATERT)
21106     /* Rotate is just modulo 32.  */
21107     *amountp &= 31;
21108   else if (*amountp != (*amountp & 31))
21109     {
21110       if (code == ASHIFT)
21111         mnem = "lsr";
21112       *amountp = 32;
21113     }
21114
21115   /* Shifts of 0 are no-ops.  */
21116   if (*amountp == 0)
21117     return NULL;
21118
21119   return mnem;
21120 }
21121
21122 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
21123    because /bin/as is horribly restrictive.  The judgement about
21124    whether or not each character is 'printable' (and can be output as
21125    is) or not (and must be printed with an octal escape) must be made
21126    with reference to the *host* character set -- the situation is
21127    similar to that discussed in the comments above pp_c_char in
21128    c-pretty-print.cc.  */
21129
21130 #define MAX_ASCII_LEN 51
21131
21132 void
21133 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21134 {
21135   int i;
21136   int len_so_far = 0;
21137
21138   fputs ("\t.ascii\t\"", stream);
21139
21140   for (i = 0; i < len; i++)
21141     {
21142       int c = p[i];
21143
21144       if (len_so_far >= MAX_ASCII_LEN)
21145         {
21146           fputs ("\"\n\t.ascii\t\"", stream);
21147           len_so_far = 0;
21148         }
21149
21150       if (ISPRINT (c))
21151         {
21152           if (c == '\\' || c == '\"')
21153             {
21154               putc ('\\', stream);
21155               len_so_far++;
21156             }
21157           putc (c, stream);
21158           len_so_far++;
21159         }
21160       else
21161         {
21162           fprintf (stream, "\\%03o", c);
21163           len_so_far += 4;
21164         }
21165     }
21166
21167   fputs ("\"\n", stream);
21168 }
21169 \f
21170
21171 /* Compute the register save mask for registers 0 through 12
21172    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
21173
21174 static unsigned long
21175 arm_compute_save_reg0_reg12_mask (void)
21176 {
21177   unsigned long func_type = arm_current_func_type ();
21178   unsigned long save_reg_mask = 0;
21179   unsigned int reg;
21180
21181   if (IS_INTERRUPT (func_type))
21182     {
21183       unsigned int max_reg;
21184       /* Interrupt functions must not corrupt any registers,
21185          even call clobbered ones.  If this is a leaf function
21186          we can just examine the registers used by the RTL, but
21187          otherwise we have to assume that whatever function is
21188          called might clobber anything, and so we have to save
21189          all the call-clobbered registers as well.  */
21190       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21191         /* FIQ handlers have registers r8 - r12 banked, so
21192            we only need to check r0 - r7, Normal ISRs only
21193            bank r14 and r15, so we must check up to r12.
21194            r13 is the stack pointer which is always preserved,
21195            so we do not need to consider it here.  */
21196         max_reg = 7;
21197       else
21198         max_reg = 12;
21199
21200       for (reg = 0; reg <= max_reg; reg++)
21201         if (reg_needs_saving_p (reg))
21202           save_reg_mask |= (1 << reg);
21203
21204       /* Also save the pic base register if necessary.  */
21205       if (PIC_REGISTER_MAY_NEED_SAVING
21206           && crtl->uses_pic_offset_table)
21207         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21208     }
21209   else if (IS_VOLATILE(func_type))
21210     {
21211       /* For noreturn functions we historically omitted register saves
21212          altogether.  However this really messes up debugging.  As a
21213          compromise save just the frame pointers.  Combined with the link
21214          register saved elsewhere this should be sufficient to get
21215          a backtrace.  */
21216       if (frame_pointer_needed)
21217         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21218       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21219         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21220       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21221         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21222     }
21223   else
21224     {
21225       /* In the normal case we only need to save those registers
21226          which are call saved and which are used by this function.  */
21227       for (reg = 0; reg <= 11; reg++)
21228         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21229           save_reg_mask |= (1 << reg);
21230
21231       /* Handle the frame pointer as a special case.  */
21232       if (frame_pointer_needed)
21233         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21234
21235       /* If we aren't loading the PIC register,
21236          don't stack it even though it may be live.  */
21237       if (PIC_REGISTER_MAY_NEED_SAVING
21238           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21239               || crtl->uses_pic_offset_table))
21240         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21241
21242       /* The prologue will copy SP into R0, so save it.  */
21243       if (IS_STACKALIGN (func_type))
21244         save_reg_mask |= 1;
21245     }
21246
21247   /* Save registers so the exception handler can modify them.  */
21248   if (crtl->calls_eh_return)
21249     {
21250       unsigned int i;
21251
21252       for (i = 0; ; i++)
21253         {
21254           reg = EH_RETURN_DATA_REGNO (i);
21255           if (reg == INVALID_REGNUM)
21256             break;
21257           save_reg_mask |= 1 << reg;
21258         }
21259     }
21260
21261   return save_reg_mask;
21262 }
21263
21264 /* Return true if r3 is live at the start of the function.  */
21265
21266 static bool
21267 arm_r3_live_at_start_p (void)
21268 {
21269   /* Just look at cfg info, which is still close enough to correct at this
21270      point.  This gives false positives for broken functions that might use
21271      uninitialized data that happens to be allocated in r3, but who cares?  */
21272   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21273 }
21274
21275 /* Compute the number of bytes used to store the static chain register on the
21276    stack, above the stack frame.  We need to know this accurately to get the
21277    alignment of the rest of the stack frame correct.  */
21278
21279 static int
21280 arm_compute_static_chain_stack_bytes (void)
21281 {
21282   /* Once the value is updated from the init value of -1, do not
21283      re-compute.  */
21284   if (cfun->machine->static_chain_stack_bytes != -1)
21285     return cfun->machine->static_chain_stack_bytes;
21286
21287   /* See the defining assertion in arm_expand_prologue.  */
21288   if (IS_NESTED (arm_current_func_type ())
21289       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21290           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21291                || flag_stack_clash_protection)
21292               && !df_regs_ever_live_p (LR_REGNUM)))
21293       && arm_r3_live_at_start_p ()
21294       && crtl->args.pretend_args_size == 0)
21295     return 4;
21296
21297   return 0;
21298 }
21299
21300 /* Compute a bit mask of which core registers need to be
21301    saved on the stack for the current function.
21302    This is used by arm_compute_frame_layout, which may add extra registers.  */
21303
21304 static unsigned long
21305 arm_compute_save_core_reg_mask (void)
21306 {
21307   unsigned int save_reg_mask = 0;
21308   unsigned long func_type = arm_current_func_type ();
21309   unsigned int reg;
21310
21311   if (IS_NAKED (func_type))
21312     /* This should never really happen.  */
21313     return 0;
21314
21315   /* If we are creating a stack frame, then we must save the frame pointer,
21316      IP (which will hold the old stack pointer), LR and the PC.  */
21317   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21318     save_reg_mask |=
21319       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21320       | (1 << IP_REGNUM)
21321       | (1 << LR_REGNUM)
21322       | (1 << PC_REGNUM);
21323
21324   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21325
21326   if (arm_current_function_pac_enabled_p ())
21327     save_reg_mask |= 1 << IP_REGNUM;
21328
21329   /* Decide if we need to save the link register.
21330      Interrupt routines have their own banked link register,
21331      so they never need to save it.
21332      Otherwise if we do not use the link register we do not need to save
21333      it.  If we are pushing other registers onto the stack however, we
21334      can save an instruction in the epilogue by pushing the link register
21335      now and then popping it back into the PC.  This incurs extra memory
21336      accesses though, so we only do it when optimizing for size, and only
21337      if we know that we will not need a fancy return sequence.  */
21338   if (df_regs_ever_live_p (LR_REGNUM)
21339       || (save_reg_mask
21340           && optimize_size
21341           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21342           && !crtl->tail_call_emit
21343           && !crtl->calls_eh_return))
21344     save_reg_mask |= 1 << LR_REGNUM;
21345
21346   if (cfun->machine->lr_save_eliminated)
21347     save_reg_mask &= ~ (1 << LR_REGNUM);
21348
21349   if (TARGET_REALLY_IWMMXT
21350       && ((bit_count (save_reg_mask)
21351            + ARM_NUM_INTS (crtl->args.pretend_args_size +
21352                            arm_compute_static_chain_stack_bytes())
21353            ) % 2) != 0)
21354     {
21355       /* The total number of registers that are going to be pushed
21356          onto the stack is odd.  We need to ensure that the stack
21357          is 64-bit aligned before we start to save iWMMXt registers,
21358          and also before we start to create locals.  (A local variable
21359          might be a double or long long which we will load/store using
21360          an iWMMXt instruction).  Therefore we need to push another
21361          ARM register, so that the stack will be 64-bit aligned.  We
21362          try to avoid using the arg registers (r0 -r3) as they might be
21363          used to pass values in a tail call.  */
21364       for (reg = 4; reg <= 12; reg++)
21365         if ((save_reg_mask & (1 << reg)) == 0)
21366           break;
21367
21368       if (reg <= 12)
21369         save_reg_mask |= (1 << reg);
21370       else
21371         {
21372           cfun->machine->sibcall_blocked = 1;
21373           save_reg_mask |= (1 << 3);
21374         }
21375     }
21376
21377   /* We may need to push an additional register for use initializing the
21378      PIC base register.  */
21379   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21380       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21381     {
21382       reg = thumb_find_work_register (1 << 4);
21383       if (!call_used_or_fixed_reg_p (reg))
21384         save_reg_mask |= (1 << reg);
21385     }
21386
21387   return save_reg_mask;
21388 }
21389
21390 /* Compute a bit mask of which core registers need to be
21391    saved on the stack for the current function.  */
21392 static unsigned long
21393 thumb1_compute_save_core_reg_mask (void)
21394 {
21395   unsigned long mask;
21396   unsigned reg;
21397
21398   mask = 0;
21399   for (reg = 0; reg < 12; reg ++)
21400     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21401       mask |= 1 << reg;
21402
21403   /* Handle the frame pointer as a special case.  */
21404   if (frame_pointer_needed)
21405     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21406
21407   if (flag_pic
21408       && !TARGET_SINGLE_PIC_BASE
21409       && arm_pic_register != INVALID_REGNUM
21410       && crtl->uses_pic_offset_table)
21411     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21412
21413   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21414   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21415     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21416
21417   /* LR will also be pushed if any lo regs are pushed.  */
21418   if (mask & 0xff || thumb_force_lr_save ())
21419     mask |= (1 << LR_REGNUM);
21420
21421   bool call_clobbered_scratch
21422     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21423        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21424
21425   /* Make sure we have a low work register if we need one.  We will
21426      need one if we are going to push a high register, but we are not
21427      currently intending to push a low register.  However if both the
21428      prologue and epilogue have a spare call-clobbered low register,
21429      then we won't need to find an additional work register.  It does
21430      not need to be the same register in the prologue and
21431      epilogue.  */
21432   if ((mask & 0xff) == 0
21433       && !call_clobbered_scratch
21434       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21435     {
21436       /* Use thumb_find_work_register to choose which register
21437          we will use.  If the register is live then we will
21438          have to push it.  Use LAST_LO_REGNUM as our fallback
21439          choice for the register to select.  */
21440       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21441       /* Make sure the register returned by thumb_find_work_register is
21442          not part of the return value.  */
21443       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21444         reg = LAST_LO_REGNUM;
21445
21446       if (callee_saved_reg_p (reg))
21447         mask |= 1 << reg;
21448     }
21449
21450   /* The 504 below is 8 bytes less than 512 because there are two possible
21451      alignment words.  We can't tell here if they will be present or not so we
21452      have to play it safe and assume that they are. */
21453   if ((CALLER_INTERWORKING_SLOT_SIZE +
21454        ROUND_UP_WORD (get_frame_size ()) +
21455        crtl->outgoing_args_size) >= 504)
21456     {
21457       /* This is the same as the code in thumb1_expand_prologue() which
21458          determines which register to use for stack decrement. */
21459       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21460         if (mask & (1 << reg))
21461           break;
21462
21463       if (reg > LAST_LO_REGNUM)
21464         {
21465           /* Make sure we have a register available for stack decrement. */
21466           mask |= 1 << LAST_LO_REGNUM;
21467         }
21468     }
21469
21470   return mask;
21471 }
21472
21473 /* Return the number of bytes required to save VFP registers.  */
21474 static int
21475 arm_get_vfp_saved_size (void)
21476 {
21477   unsigned int regno;
21478   int count;
21479   int saved;
21480
21481   saved = 0;
21482   /* Space for saved VFP registers.  */
21483   if (TARGET_VFP_BASE)
21484     {
21485       count = 0;
21486       for (regno = FIRST_VFP_REGNUM;
21487            regno < LAST_VFP_REGNUM;
21488            regno += 2)
21489         {
21490           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21491             {
21492               if (count > 0)
21493                 {
21494                   /* Workaround ARM10 VFPr1 bug.  */
21495                   if (count == 2 && !arm_arch6)
21496                     count++;
21497                   saved += count * 8;
21498                 }
21499               count = 0;
21500             }
21501           else
21502             count++;
21503         }
21504       if (count > 0)
21505         {
21506           if (count == 2 && !arm_arch6)
21507             count++;
21508           saved += count * 8;
21509         }
21510     }
21511   return saved;
21512 }
21513
21514
21515 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21516    everything bar the final return instruction.  If simple_return is true,
21517    then do not output epilogue, because it has already been emitted in RTL.
21518
21519    Note: do not forget to update length attribute of corresponding insn pattern
21520    when changing assembly output (eg. length attribute of
21521    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21522    register clearing sequences).  */
21523 const char *
21524 output_return_instruction (rtx operand, bool really_return, bool reverse,
21525                            bool simple_return)
21526 {
21527   char conditional[10];
21528   char instr[100];
21529   unsigned reg;
21530   unsigned long live_regs_mask;
21531   unsigned long func_type;
21532   arm_stack_offsets *offsets;
21533
21534   func_type = arm_current_func_type ();
21535
21536   if (IS_NAKED (func_type))
21537     return "";
21538
21539   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21540     {
21541       /* If this function was declared non-returning, and we have
21542          found a tail call, then we have to trust that the called
21543          function won't return.  */
21544       if (really_return)
21545         {
21546           rtx ops[2];
21547
21548           /* Otherwise, trap an attempted return by aborting.  */
21549           ops[0] = operand;
21550           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21551                                        : "abort");
21552           assemble_external_libcall (ops[1]);
21553           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21554         }
21555
21556       return "";
21557     }
21558
21559   gcc_assert (!cfun->calls_alloca || really_return);
21560
21561   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21562
21563   cfun->machine->return_used_this_function = 1;
21564
21565   offsets = arm_get_frame_offsets ();
21566   live_regs_mask = offsets->saved_regs_mask;
21567
21568   if (!simple_return && live_regs_mask)
21569     {
21570       const char * return_reg;
21571
21572       /* If we do not have any special requirements for function exit
21573          (e.g. interworking) then we can load the return address
21574          directly into the PC.  Otherwise we must load it into LR.  */
21575       if (really_return
21576           && !IS_CMSE_ENTRY (func_type)
21577           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21578         return_reg = reg_names[PC_REGNUM];
21579       else
21580         return_reg = reg_names[LR_REGNUM];
21581
21582       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21583         {
21584           /* There are three possible reasons for the IP register
21585              being saved.  1) a stack frame was created, in which case
21586              IP contains the old stack pointer, or 2) an ISR routine
21587              corrupted it, or 3) it was saved to align the stack on
21588              iWMMXt.  In case 1, restore IP into SP, otherwise just
21589              restore IP.  */
21590           if (frame_pointer_needed)
21591             {
21592               live_regs_mask &= ~ (1 << IP_REGNUM);
21593               live_regs_mask |=   (1 << SP_REGNUM);
21594             }
21595           else
21596             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21597         }
21598
21599       /* On some ARM architectures it is faster to use LDR rather than
21600          LDM to load a single register.  On other architectures, the
21601          cost is the same.  In 26 bit mode, or for exception handlers,
21602          we have to use LDM to load the PC so that the CPSR is also
21603          restored.  */
21604       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21605         if (live_regs_mask == (1U << reg))
21606           break;
21607
21608       if (reg <= LAST_ARM_REGNUM
21609           && (reg != LR_REGNUM
21610               || ! really_return
21611               || ! IS_INTERRUPT (func_type)))
21612         {
21613           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21614                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21615         }
21616       else
21617         {
21618           char *p;
21619           int first = 1;
21620
21621           /* Generate the load multiple instruction to restore the
21622              registers.  Note we can get here, even if
21623              frame_pointer_needed is true, but only if sp already
21624              points to the base of the saved core registers.  */
21625           if (live_regs_mask & (1 << SP_REGNUM))
21626             {
21627               unsigned HOST_WIDE_INT stack_adjust;
21628
21629               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21630               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21631
21632               if (stack_adjust && arm_arch5t && TARGET_ARM)
21633                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21634               else
21635                 {
21636                   /* If we can't use ldmib (SA110 bug),
21637                      then try to pop r3 instead.  */
21638                   if (stack_adjust)
21639                     live_regs_mask |= 1 << 3;
21640
21641                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21642                 }
21643             }
21644           /* For interrupt returns we have to use an LDM rather than
21645              a POP so that we can use the exception return variant.  */
21646           else if (IS_INTERRUPT (func_type))
21647             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21648           else
21649             sprintf (instr, "pop%s\t{", conditional);
21650
21651           p = instr + strlen (instr);
21652
21653           for (reg = 0; reg <= SP_REGNUM; reg++)
21654             if (live_regs_mask & (1 << reg))
21655               {
21656                 int l = strlen (reg_names[reg]);
21657
21658                 if (first)
21659                   first = 0;
21660                 else
21661                   {
21662                     memcpy (p, ", ", 2);
21663                     p += 2;
21664                   }
21665
21666                 memcpy (p, "%|", 2);
21667                 memcpy (p + 2, reg_names[reg], l);
21668                 p += l + 2;
21669               }
21670
21671           if (live_regs_mask & (1 << LR_REGNUM))
21672             {
21673               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21674               /* If returning from an interrupt, restore the CPSR.  */
21675               if (IS_INTERRUPT (func_type))
21676                 strcat (p, "^");
21677             }
21678           else
21679             strcpy (p, "}");
21680         }
21681
21682       output_asm_insn (instr, & operand);
21683
21684       /* See if we need to generate an extra instruction to
21685          perform the actual function return.  */
21686       if (really_return
21687           && func_type != ARM_FT_INTERWORKED
21688           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21689         {
21690           /* The return has already been handled
21691              by loading the LR into the PC.  */
21692           return "";
21693         }
21694     }
21695
21696   if (really_return)
21697     {
21698       switch ((int) ARM_FUNC_TYPE (func_type))
21699         {
21700         case ARM_FT_ISR:
21701         case ARM_FT_FIQ:
21702           /* ??? This is wrong for unified assembly syntax.  */
21703           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21704           break;
21705
21706         case ARM_FT_INTERWORKED:
21707           gcc_assert (arm_arch5t || arm_arch4t);
21708           sprintf (instr, "bx%s\t%%|lr", conditional);
21709           break;
21710
21711         case ARM_FT_EXCEPTION:
21712           /* ??? This is wrong for unified assembly syntax.  */
21713           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21714           break;
21715
21716         default:
21717           if (IS_CMSE_ENTRY (func_type))
21718             {
21719               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21720                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21721                  VSTR/VLDR instructions in the prologue and epilogue.  */
21722               if (!TARGET_HAVE_FPCXT_CMSE)
21723                 {
21724                   /* Check if we have to clear the 'GE bits' which is only used if
21725                      parallel add and subtraction instructions are available.  */
21726                   if (TARGET_INT_SIMD)
21727                     snprintf (instr, sizeof (instr),
21728                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21729                   else
21730                     snprintf (instr, sizeof (instr),
21731                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21732
21733                   output_asm_insn (instr, & operand);
21734                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21735                      care of it.  */
21736                   if (TARGET_HARD_FLOAT)
21737                     {
21738                       /* Clear the cumulative exception-status bits (0-4,7) and
21739                          the condition code bits (28-31) of the FPSCR.  We need
21740                          to remember to clear the first scratch register used
21741                          (IP) and save and restore the second (r4).
21742
21743                          Important note: the length of the
21744                          thumb2_cmse_entry_return insn pattern must account for
21745                          the size of the below instructions.  */
21746                       output_asm_insn ("push\t{%|r4}", & operand);
21747                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21748                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21749                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21750                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21751                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21752                       output_asm_insn ("pop\t{%|r4}", & operand);
21753                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21754                     }
21755                 }
21756               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21757             }
21758           /* Use bx if it's available.  */
21759           else if (arm_arch5t || arm_arch4t)
21760             sprintf (instr, "bx%s\t%%|lr", conditional);
21761           else
21762             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21763           break;
21764         }
21765
21766       output_asm_insn (instr, & operand);
21767     }
21768
21769   return "";
21770 }
21771
21772 /* Output in FILE asm statements needed to declare the NAME of the function
21773    defined by its DECL node.  */
21774
21775 void
21776 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21777 {
21778   size_t cmse_name_len;
21779   char *cmse_name = 0;
21780   char cmse_prefix[] = "__acle_se_";
21781
21782   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21783      extra function label for each function with the 'cmse_nonsecure_entry'
21784      attribute.  This extra function label should be prepended with
21785      '__acle_se_', telling the linker that it needs to create secure gateway
21786      veneers for this function.  */
21787   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21788                                     DECL_ATTRIBUTES (decl)))
21789     {
21790       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21791       cmse_name = XALLOCAVEC (char, cmse_name_len);
21792       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21793       targetm.asm_out.globalize_label (file, cmse_name);
21794
21795       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21796       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21797     }
21798
21799   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21800   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21801   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21802   ASM_OUTPUT_LABEL (file, name);
21803
21804   if (cmse_name)
21805     ASM_OUTPUT_LABEL (file, cmse_name);
21806
21807   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21808 }
21809
21810 /* Write the function name into the code section, directly preceding
21811    the function prologue.
21812
21813    Code will be output similar to this:
21814      t0
21815          .ascii "arm_poke_function_name", 0
21816          .align
21817      t1
21818          .word 0xff000000 + (t1 - t0)
21819      arm_poke_function_name
21820          mov     ip, sp
21821          stmfd   sp!, {fp, ip, lr, pc}
21822          sub     fp, ip, #4
21823
21824    When performing a stack backtrace, code can inspect the value
21825    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21826    at location pc - 12 and the top 8 bits are set, then we know
21827    that there is a function name embedded immediately preceding this
21828    location and has length ((pc[-3]) & 0xff000000).
21829
21830    We assume that pc is declared as a pointer to an unsigned long.
21831
21832    It is of no benefit to output the function name if we are assembling
21833    a leaf function.  These function types will not contain a stack
21834    backtrace structure, therefore it is not possible to determine the
21835    function name.  */
21836 void
21837 arm_poke_function_name (FILE *stream, const char *name)
21838 {
21839   unsigned long alignlength;
21840   unsigned long length;
21841   rtx           x;
21842
21843   length      = strlen (name) + 1;
21844   alignlength = ROUND_UP_WORD (length);
21845
21846   ASM_OUTPUT_ASCII (stream, name, length);
21847   ASM_OUTPUT_ALIGN (stream, 2);
21848   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21849   assemble_aligned_integer (UNITS_PER_WORD, x);
21850 }
21851
21852 /* Place some comments into the assembler stream
21853    describing the current function.  */
21854 static void
21855 arm_output_function_prologue (FILE *f)
21856 {
21857   unsigned long func_type;
21858
21859   /* Sanity check.  */
21860   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21861
21862   func_type = arm_current_func_type ();
21863
21864   switch ((int) ARM_FUNC_TYPE (func_type))
21865     {
21866     default:
21867     case ARM_FT_NORMAL:
21868       break;
21869     case ARM_FT_INTERWORKED:
21870       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21871       break;
21872     case ARM_FT_ISR:
21873       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21874       break;
21875     case ARM_FT_FIQ:
21876       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21877       break;
21878     case ARM_FT_EXCEPTION:
21879       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21880       break;
21881     }
21882
21883   if (IS_NAKED (func_type))
21884     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21885
21886   if (IS_VOLATILE (func_type))
21887     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21888
21889   if (IS_NESTED (func_type))
21890     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21891   if (IS_STACKALIGN (func_type))
21892     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21893   if (IS_CMSE_ENTRY (func_type))
21894     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21895
21896   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21897                (HOST_WIDE_INT) crtl->args.size,
21898                crtl->args.pretend_args_size,
21899                (HOST_WIDE_INT) get_frame_size ());
21900
21901   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21902                frame_pointer_needed,
21903                cfun->machine->uses_anonymous_args);
21904
21905   if (cfun->machine->lr_save_eliminated)
21906     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21907
21908   if (crtl->calls_eh_return)
21909     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21910
21911 }
21912
21913 static void
21914 arm_output_function_epilogue (FILE *)
21915 {
21916   arm_stack_offsets *offsets;
21917
21918   if (TARGET_THUMB1)
21919     {
21920       int regno;
21921
21922       /* Emit any call-via-reg trampolines that are needed for v4t support
21923          of call_reg and call_value_reg type insns.  */
21924       for (regno = 0; regno < LR_REGNUM; regno++)
21925         {
21926           rtx label = cfun->machine->call_via[regno];
21927
21928           if (label != NULL)
21929             {
21930               switch_to_section (function_section (current_function_decl));
21931               targetm.asm_out.internal_label (asm_out_file, "L",
21932                                               CODE_LABEL_NUMBER (label));
21933               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21934             }
21935         }
21936
21937       /* ??? Probably not safe to set this here, since it assumes that a
21938          function will be emitted as assembly immediately after we generate
21939          RTL for it.  This does not happen for inline functions.  */
21940       cfun->machine->return_used_this_function = 0;
21941     }
21942   else /* TARGET_32BIT */
21943     {
21944       /* We need to take into account any stack-frame rounding.  */
21945       offsets = arm_get_frame_offsets ();
21946
21947       gcc_assert (!use_return_insn (FALSE, NULL)
21948                   || (cfun->machine->return_used_this_function != 0)
21949                   || offsets->saved_regs == offsets->outgoing_args
21950                   || frame_pointer_needed);
21951     }
21952 }
21953
21954 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21955    STR and STRD.  If an even number of registers are being pushed, one
21956    or more STRD patterns are created for each register pair.  If an
21957    odd number of registers are pushed, emit an initial STR followed by
21958    as many STRD instructions as are needed.  This works best when the
21959    stack is initially 64-bit aligned (the normal case), since it
21960    ensures that each STRD is also 64-bit aligned.  */
21961 static void
21962 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21963 {
21964   int num_regs = 0;
21965   int i;
21966   int regno;
21967   rtx par = NULL_RTX;
21968   rtx dwarf = NULL_RTX;
21969   rtx tmp;
21970   bool first = true;
21971
21972   num_regs = bit_count (saved_regs_mask);
21973
21974   /* Must be at least one register to save, and can't save SP or PC.  */
21975   gcc_assert (num_regs > 0 && num_regs <= 14);
21976   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21977   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21978
21979   /* Create sequence for DWARF info.  All the frame-related data for
21980      debugging is held in this wrapper.  */
21981   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21982
21983   /* Describe the stack adjustment.  */
21984   tmp = gen_rtx_SET (stack_pointer_rtx,
21985                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21986   RTX_FRAME_RELATED_P (tmp) = 1;
21987   XVECEXP (dwarf, 0, 0) = tmp;
21988
21989   /* Find the first register.  */
21990   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21991     ;
21992
21993   i = 0;
21994
21995   /* If there's an odd number of registers to push.  Start off by
21996      pushing a single register.  This ensures that subsequent strd
21997      operations are dword aligned (assuming that SP was originally
21998      64-bit aligned).  */
21999   if ((num_regs & 1) != 0)
22000     {
22001       rtx reg, mem, insn;
22002
22003       reg = gen_rtx_REG (SImode, regno);
22004       if (num_regs == 1)
22005         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
22006                                                      stack_pointer_rtx));
22007       else
22008         mem = gen_frame_mem (Pmode,
22009                              gen_rtx_PRE_MODIFY
22010                              (Pmode, stack_pointer_rtx,
22011                               plus_constant (Pmode, stack_pointer_rtx,
22012                                              -4 * num_regs)));
22013
22014       tmp = gen_rtx_SET (mem, reg);
22015       RTX_FRAME_RELATED_P (tmp) = 1;
22016       insn = emit_insn (tmp);
22017       RTX_FRAME_RELATED_P (insn) = 1;
22018       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22019       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
22020       RTX_FRAME_RELATED_P (tmp) = 1;
22021       i++;
22022       regno++;
22023       XVECEXP (dwarf, 0, i) = tmp;
22024       first = false;
22025     }
22026
22027   while (i < num_regs)
22028     if (saved_regs_mask & (1 << regno))
22029       {
22030         rtx reg1, reg2, mem1, mem2;
22031         rtx tmp0, tmp1, tmp2;
22032         int regno2;
22033
22034         /* Find the register to pair with this one.  */
22035         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
22036              regno2++)
22037           ;
22038
22039         reg1 = gen_rtx_REG (SImode, regno);
22040         reg2 = gen_rtx_REG (SImode, regno2);
22041
22042         if (first)
22043           {
22044             rtx insn;
22045
22046             first = false;
22047             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22048                                                         stack_pointer_rtx,
22049                                                         -4 * num_regs));
22050             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22051                                                         stack_pointer_rtx,
22052                                                         -4 * (num_regs - 1)));
22053             tmp0 = gen_rtx_SET (stack_pointer_rtx,
22054                                 plus_constant (Pmode, stack_pointer_rtx,
22055                                                -4 * (num_regs)));
22056             tmp1 = gen_rtx_SET (mem1, reg1);
22057             tmp2 = gen_rtx_SET (mem2, reg2);
22058             RTX_FRAME_RELATED_P (tmp0) = 1;
22059             RTX_FRAME_RELATED_P (tmp1) = 1;
22060             RTX_FRAME_RELATED_P (tmp2) = 1;
22061             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22062             XVECEXP (par, 0, 0) = tmp0;
22063             XVECEXP (par, 0, 1) = tmp1;
22064             XVECEXP (par, 0, 2) = tmp2;
22065             insn = emit_insn (par);
22066             RTX_FRAME_RELATED_P (insn) = 1;
22067             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22068           }
22069         else
22070           {
22071             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22072                                                         stack_pointer_rtx,
22073                                                         4 * i));
22074             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22075                                                         stack_pointer_rtx,
22076                                                         4 * (i + 1)));
22077             tmp1 = gen_rtx_SET (mem1, reg1);
22078             tmp2 = gen_rtx_SET (mem2, reg2);
22079             RTX_FRAME_RELATED_P (tmp1) = 1;
22080             RTX_FRAME_RELATED_P (tmp2) = 1;
22081             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22082             XVECEXP (par, 0, 0) = tmp1;
22083             XVECEXP (par, 0, 1) = tmp2;
22084             emit_insn (par);
22085           }
22086
22087         /* Create unwind information.  This is an approximation.  */
22088         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22089                                            plus_constant (Pmode,
22090                                                           stack_pointer_rtx,
22091                                                           4 * i)),
22092                             reg1);
22093         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22094                                            plus_constant (Pmode,
22095                                                           stack_pointer_rtx,
22096                                                           4 * (i + 1))),
22097                             reg2);
22098
22099         RTX_FRAME_RELATED_P (tmp1) = 1;
22100         RTX_FRAME_RELATED_P (tmp2) = 1;
22101         XVECEXP (dwarf, 0, i + 1) = tmp1;
22102         XVECEXP (dwarf, 0, i + 2) = tmp2;
22103         i += 2;
22104         regno = regno2 + 1;
22105       }
22106     else
22107       regno++;
22108
22109   return;
22110 }
22111
22112 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
22113    whenever possible, otherwise it emits single-word stores.  The first store
22114    also allocates stack space for all saved registers, using writeback with
22115    post-addressing mode.  All other stores use offset addressing.  If no STRD
22116    can be emitted, this function emits a sequence of single-word stores,
22117    and not an STM as before, because single-word stores provide more freedom
22118    scheduling and can be turned into an STM by peephole optimizations.  */
22119 static void
22120 arm_emit_strd_push (unsigned long saved_regs_mask)
22121 {
22122   int num_regs = 0;
22123   int i, j, dwarf_index  = 0;
22124   int offset = 0;
22125   rtx dwarf = NULL_RTX;
22126   rtx insn = NULL_RTX;
22127   rtx tmp, mem;
22128
22129   /* TODO: A more efficient code can be emitted by changing the
22130      layout, e.g., first push all pairs that can use STRD to keep the
22131      stack aligned, and then push all other registers.  */
22132   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22133     if (saved_regs_mask & (1 << i))
22134       num_regs++;
22135
22136   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22137   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22138   gcc_assert (num_regs > 0);
22139
22140   /* Create sequence for DWARF info.  */
22141   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22142
22143   /* For dwarf info, we generate explicit stack update.  */
22144   tmp = gen_rtx_SET (stack_pointer_rtx,
22145                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22146   RTX_FRAME_RELATED_P (tmp) = 1;
22147   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22148
22149   /* Save registers.  */
22150   offset = - 4 * num_regs;
22151   j = 0;
22152   while (j <= LAST_ARM_REGNUM)
22153     if (saved_regs_mask & (1 << j))
22154       {
22155         if ((j % 2 == 0)
22156             && (saved_regs_mask & (1 << (j + 1))))
22157           {
22158             /* Current register and previous register form register pair for
22159                which STRD can be generated.  */
22160             if (offset < 0)
22161               {
22162                 /* Allocate stack space for all saved registers.  */
22163                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22164                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22165                 mem = gen_frame_mem (DImode, tmp);
22166                 offset = 0;
22167               }
22168             else if (offset > 0)
22169               mem = gen_frame_mem (DImode,
22170                                    plus_constant (Pmode,
22171                                                   stack_pointer_rtx,
22172                                                   offset));
22173             else
22174               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22175
22176             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22177             RTX_FRAME_RELATED_P (tmp) = 1;
22178             tmp = emit_insn (tmp);
22179
22180             /* Record the first store insn.  */
22181             if (dwarf_index == 1)
22182               insn = tmp;
22183
22184             /* Generate dwarf info.  */
22185             mem = gen_frame_mem (SImode,
22186                                  plus_constant (Pmode,
22187                                                 stack_pointer_rtx,
22188                                                 offset));
22189             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22190             RTX_FRAME_RELATED_P (tmp) = 1;
22191             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22192
22193             mem = gen_frame_mem (SImode,
22194                                  plus_constant (Pmode,
22195                                                 stack_pointer_rtx,
22196                                                 offset + 4));
22197             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22198             RTX_FRAME_RELATED_P (tmp) = 1;
22199             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22200
22201             offset += 8;
22202             j += 2;
22203           }
22204         else
22205           {
22206             /* Emit a single word store.  */
22207             if (offset < 0)
22208               {
22209                 /* Allocate stack space for all saved registers.  */
22210                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22211                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22212                 mem = gen_frame_mem (SImode, tmp);
22213                 offset = 0;
22214               }
22215             else if (offset > 0)
22216               mem = gen_frame_mem (SImode,
22217                                    plus_constant (Pmode,
22218                                                   stack_pointer_rtx,
22219                                                   offset));
22220             else
22221               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22222
22223             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22224             RTX_FRAME_RELATED_P (tmp) = 1;
22225             tmp = emit_insn (tmp);
22226
22227             /* Record the first store insn.  */
22228             if (dwarf_index == 1)
22229               insn = tmp;
22230
22231             /* Generate dwarf info.  */
22232             mem = gen_frame_mem (SImode,
22233                                  plus_constant(Pmode,
22234                                                stack_pointer_rtx,
22235                                                offset));
22236             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22237             RTX_FRAME_RELATED_P (tmp) = 1;
22238             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22239
22240             offset += 4;
22241             j += 1;
22242           }
22243       }
22244     else
22245       j++;
22246
22247   /* Attach dwarf info to the first insn we generate.  */
22248   gcc_assert (insn != NULL_RTX);
22249   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22250   RTX_FRAME_RELATED_P (insn) = 1;
22251 }
22252
22253 /* Generate and emit an insn that we will recognize as a push_multi.
22254    Unfortunately, since this insn does not reflect very well the actual
22255    semantics of the operation, we need to annotate the insn for the benefit
22256    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
22257    MASK for registers that should be annotated for DWARF2 frame unwind
22258    information.  */
22259 static rtx
22260 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22261 {
22262   int num_regs = 0;
22263   int num_dwarf_regs = 0;
22264   int i, j;
22265   rtx par;
22266   rtx dwarf;
22267   int dwarf_par_index;
22268   rtx tmp, reg;
22269
22270   /* We don't record the PC in the dwarf frame information.  */
22271   dwarf_regs_mask &= ~(1 << PC_REGNUM);
22272
22273   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22274     {
22275       if (mask & (1 << i))
22276         num_regs++;
22277       if (dwarf_regs_mask & (1 << i))
22278         num_dwarf_regs++;
22279     }
22280
22281   gcc_assert (num_regs && num_regs <= 16);
22282   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22283
22284   /* For the body of the insn we are going to generate an UNSPEC in
22285      parallel with several USEs.  This allows the insn to be recognized
22286      by the push_multi pattern in the arm.md file.
22287
22288      The body of the insn looks something like this:
22289
22290        (parallel [
22291            (set (mem:BLK (pre_modify:SI (reg:SI sp)
22292                                         (const_int:SI <num>)))
22293                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22294            (use (reg:SI XX))
22295            (use (reg:SI YY))
22296            ...
22297         ])
22298
22299      For the frame note however, we try to be more explicit and actually
22300      show each register being stored into the stack frame, plus a (single)
22301      decrement of the stack pointer.  We do it this way in order to be
22302      friendly to the stack unwinding code, which only wants to see a single
22303      stack decrement per instruction.  The RTL we generate for the note looks
22304      something like this:
22305
22306       (sequence [
22307            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22308            (set (mem:SI (reg:SI sp)) (reg:SI r4))
22309            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22310            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22311            ...
22312         ])
22313
22314      FIXME:: In an ideal world the PRE_MODIFY would not exist and
22315      instead we'd have a parallel expression detailing all
22316      the stores to the various memory addresses so that debug
22317      information is more up-to-date. Remember however while writing
22318      this to take care of the constraints with the push instruction.
22319
22320      Note also that this has to be taken care of for the VFP registers.
22321
22322      For more see PR43399.  */
22323
22324   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22325   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22326   dwarf_par_index = 1;
22327
22328   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22329     {
22330       if (mask & (1 << i))
22331         {
22332           /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22333              following example reg-reg copy of SP to IP register is handled
22334              through .cfi_def_cfa_register directive and the .cfi_offset
22335              directive for IP register is skipped by dwarf code emitter.
22336              Example:
22337                 mov     ip, sp
22338                 .cfi_def_cfa_register 12
22339                 push    {fp, ip, lr, pc}
22340                 .cfi_offset 11, -16
22341                 .cfi_offset 13, -12
22342                 .cfi_offset 14, -8
22343
22344              Where as Arm-specific .save directive handling is different to that
22345              of dwarf code emitter and it doesn't consider reg-reg copies while
22346              updating the register list.  When PACBTI is enabled we manually
22347              updated the .save directive register list to use "ra_auth_code"
22348              (pseduo register 143) instead of IP register as shown in following
22349              pseduo code.
22350              Example:
22351                 pacbti  ip, lr, sp
22352                 .cfi_register 143, 12
22353                 push    {r3, r7, ip, lr}
22354                 .save {r3, r7, ra_auth_code, lr}
22355           */
22356           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22357           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22358             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22359
22360           XVECEXP (par, 0, 0)
22361             = gen_rtx_SET (gen_frame_mem
22362                            (BLKmode,
22363                             gen_rtx_PRE_MODIFY (Pmode,
22364                                                 stack_pointer_rtx,
22365                                                 plus_constant
22366                                                 (Pmode, stack_pointer_rtx,
22367                                                  -4 * num_regs))
22368                             ),
22369                            gen_rtx_UNSPEC (BLKmode,
22370                                            gen_rtvec (1, reg),
22371                                            UNSPEC_PUSH_MULT));
22372
22373           if (dwarf_regs_mask & (1 << i))
22374             {
22375               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22376                                  dwarf_reg);
22377               RTX_FRAME_RELATED_P (tmp) = 1;
22378               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22379             }
22380
22381           break;
22382         }
22383     }
22384
22385   for (j = 1, i++; j < num_regs; i++)
22386     {
22387       if (mask & (1 << i))
22388         {
22389           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22390           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22391             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22392
22393           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22394
22395           if (dwarf_regs_mask & (1 << i))
22396             {
22397               tmp
22398                 = gen_rtx_SET (gen_frame_mem
22399                                (SImode,
22400                                 plus_constant (Pmode, stack_pointer_rtx,
22401                                                4 * j)),
22402                                dwarf_reg);
22403               RTX_FRAME_RELATED_P (tmp) = 1;
22404               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22405             }
22406
22407           j++;
22408         }
22409     }
22410
22411   par = emit_insn (par);
22412
22413   tmp = gen_rtx_SET (stack_pointer_rtx,
22414                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22415   RTX_FRAME_RELATED_P (tmp) = 1;
22416   XVECEXP (dwarf, 0, 0) = tmp;
22417
22418   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22419
22420   return par;
22421 }
22422
22423 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22424    SIZE is the offset to be adjusted.
22425    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22426 static void
22427 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22428 {
22429   rtx dwarf;
22430
22431   RTX_FRAME_RELATED_P (insn) = 1;
22432   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22433   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22434 }
22435
22436 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22437    SAVED_REGS_MASK shows which registers need to be restored.
22438
22439    Unfortunately, since this insn does not reflect very well the actual
22440    semantics of the operation, we need to annotate the insn for the benefit
22441    of DWARF2 frame unwind information.  */
22442 static void
22443 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22444 {
22445   int num_regs = 0;
22446   int i, j;
22447   rtx par;
22448   rtx dwarf = NULL_RTX;
22449   rtx tmp, reg;
22450   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22451   int offset_adj;
22452   int emit_update;
22453
22454   offset_adj = return_in_pc ? 1 : 0;
22455   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22456     if (saved_regs_mask & (1 << i))
22457       num_regs++;
22458
22459   gcc_assert (num_regs && num_regs <= 16);
22460
22461   /* If SP is in reglist, then we don't emit SP update insn.  */
22462   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22463
22464   /* The parallel needs to hold num_regs SETs
22465      and one SET for the stack update.  */
22466   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22467
22468   if (return_in_pc)
22469     XVECEXP (par, 0, 0) = ret_rtx;
22470
22471   if (emit_update)
22472     {
22473       /* Increment the stack pointer, based on there being
22474          num_regs 4-byte registers to restore.  */
22475       tmp = gen_rtx_SET (stack_pointer_rtx,
22476                          plus_constant (Pmode,
22477                                         stack_pointer_rtx,
22478                                         4 * num_regs));
22479       RTX_FRAME_RELATED_P (tmp) = 1;
22480       XVECEXP (par, 0, offset_adj) = tmp;
22481     }
22482
22483   /* Now restore every reg, which may include PC.  */
22484   for (j = 0, i = 0; j < num_regs; i++)
22485     if (saved_regs_mask & (1 << i))
22486       {
22487         rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22488         if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22489           dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22490         if ((num_regs == 1) && emit_update && !return_in_pc)
22491           {
22492             /* Emit single load with writeback.  */
22493             tmp = gen_frame_mem (SImode,
22494                                  gen_rtx_POST_INC (Pmode,
22495                                                    stack_pointer_rtx));
22496             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22497             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22498                                               dwarf);
22499             return;
22500           }
22501
22502         tmp = gen_rtx_SET (reg,
22503                            gen_frame_mem
22504                            (SImode,
22505                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22506         RTX_FRAME_RELATED_P (tmp) = 1;
22507         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22508
22509         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22510            should not have PC, skip PC.  */
22511         if (i != PC_REGNUM)
22512           dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22513
22514         j++;
22515       }
22516
22517   if (return_in_pc)
22518     par = emit_jump_insn (par);
22519   else
22520     par = emit_insn (par);
22521
22522   REG_NOTES (par) = dwarf;
22523   if (!return_in_pc)
22524     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22525                                  stack_pointer_rtx, stack_pointer_rtx);
22526 }
22527
22528 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22529    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22530
22531    Unfortunately, since this insn does not reflect very well the actual
22532    semantics of the operation, we need to annotate the insn for the benefit
22533    of DWARF2 frame unwind information.  */
22534 static void
22535 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22536 {
22537   int i, j;
22538   rtx par;
22539   rtx dwarf = NULL_RTX;
22540   rtx tmp, reg;
22541
22542   gcc_assert (num_regs && num_regs <= 32);
22543
22544     /* Workaround ARM10 VFPr1 bug.  */
22545   if (num_regs == 2 && !arm_arch6)
22546     {
22547       if (first_reg == 15)
22548         first_reg--;
22549
22550       num_regs++;
22551     }
22552
22553   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22554      there could be up to 32 D-registers to restore.
22555      If there are more than 16 D-registers, make two recursive calls,
22556      each of which emits one pop_multi instruction.  */
22557   if (num_regs > 16)
22558     {
22559       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22560       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22561       return;
22562     }
22563
22564   /* The parallel needs to hold num_regs SETs
22565      and one SET for the stack update.  */
22566   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22567
22568   /* Increment the stack pointer, based on there being
22569      num_regs 8-byte registers to restore.  */
22570   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22571   RTX_FRAME_RELATED_P (tmp) = 1;
22572   XVECEXP (par, 0, 0) = tmp;
22573
22574   /* Now show every reg that will be restored, using a SET for each.  */
22575   for (j = 0, i=first_reg; j < num_regs; i += 2)
22576     {
22577       reg = gen_rtx_REG (DFmode, i);
22578
22579       tmp = gen_rtx_SET (reg,
22580                          gen_frame_mem
22581                          (DFmode,
22582                           plus_constant (Pmode, base_reg, 8 * j)));
22583       RTX_FRAME_RELATED_P (tmp) = 1;
22584       XVECEXP (par, 0, j + 1) = tmp;
22585
22586       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22587
22588       j++;
22589     }
22590
22591   par = emit_insn (par);
22592   REG_NOTES (par) = dwarf;
22593
22594   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22595   if (REGNO (base_reg) == IP_REGNUM)
22596     {
22597       RTX_FRAME_RELATED_P (par) = 1;
22598       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22599     }
22600   else
22601     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22602                                  base_reg, base_reg);
22603 }
22604
22605 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22606    number of registers are being popped, multiple LDRD patterns are created for
22607    all register pairs.  If odd number of registers are popped, last register is
22608    loaded by using LDR pattern.  */
22609 static void
22610 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22611 {
22612   int num_regs = 0;
22613   int i, j;
22614   rtx par = NULL_RTX;
22615   rtx dwarf = NULL_RTX;
22616   rtx tmp, reg, tmp1;
22617   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22618
22619   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22620     if (saved_regs_mask & (1 << i))
22621       num_regs++;
22622
22623   gcc_assert (num_regs && num_regs <= 16);
22624
22625   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22626      to be popped.  So, if num_regs is even, now it will become odd,
22627      and we can generate pop with PC.  If num_regs is odd, it will be
22628      even now, and ldr with return can be generated for PC.  */
22629   if (return_in_pc)
22630     num_regs--;
22631
22632   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22633
22634   /* Var j iterates over all the registers to gather all the registers in
22635      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22636      A PARALLEL RTX of register-pair is created here, so that pattern for
22637      LDRD can be matched.  As PC is always last register to be popped, and
22638      we have already decremented num_regs if PC, we don't have to worry
22639      about PC in this loop.  */
22640   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22641     if (saved_regs_mask & (1 << j))
22642       {
22643         /* Create RTX for memory load.  */
22644         reg = gen_rtx_REG (SImode, j);
22645         tmp = gen_rtx_SET (reg,
22646                            gen_frame_mem (SImode,
22647                                plus_constant (Pmode,
22648                                               stack_pointer_rtx, 4 * i)));
22649         RTX_FRAME_RELATED_P (tmp) = 1;
22650
22651         if (i % 2 == 0)
22652           {
22653             /* When saved-register index (i) is even, the RTX to be emitted is
22654                yet to be created.  Hence create it first.  The LDRD pattern we
22655                are generating is :
22656                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22657                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22658                where target registers need not be consecutive.  */
22659             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22660             dwarf = NULL_RTX;
22661           }
22662
22663         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22664            added as 0th element and if i is odd, reg_i is added as 1st element
22665            of LDRD pattern shown above.  */
22666         XVECEXP (par, 0, (i % 2)) = tmp;
22667         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22668
22669         if ((i % 2) == 1)
22670           {
22671             /* When saved-register index (i) is odd, RTXs for both the registers
22672                to be loaded are generated in above given LDRD pattern, and the
22673                pattern can be emitted now.  */
22674             par = emit_insn (par);
22675             REG_NOTES (par) = dwarf;
22676             RTX_FRAME_RELATED_P (par) = 1;
22677           }
22678
22679         i++;
22680       }
22681
22682   /* If the number of registers pushed is odd AND return_in_pc is false OR
22683      number of registers are even AND return_in_pc is true, last register is
22684      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22685      then LDR with post increment.  */
22686
22687   /* Increment the stack pointer, based on there being
22688      num_regs 4-byte registers to restore.  */
22689   tmp = gen_rtx_SET (stack_pointer_rtx,
22690                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22691   RTX_FRAME_RELATED_P (tmp) = 1;
22692   tmp = emit_insn (tmp);
22693   if (!return_in_pc)
22694     {
22695       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22696                                    stack_pointer_rtx, stack_pointer_rtx);
22697     }
22698
22699   dwarf = NULL_RTX;
22700
22701   if (((num_regs % 2) == 1 && !return_in_pc)
22702       || ((num_regs % 2) == 0 && return_in_pc))
22703     {
22704       /* Scan for the single register to be popped.  Skip until the saved
22705          register is found.  */
22706       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22707
22708       /* Gen LDR with post increment here.  */
22709       tmp1 = gen_rtx_MEM (SImode,
22710                           gen_rtx_POST_INC (SImode,
22711                                             stack_pointer_rtx));
22712       set_mem_alias_set (tmp1, get_frame_alias_set ());
22713
22714       reg = gen_rtx_REG (SImode, j);
22715       tmp = gen_rtx_SET (reg, tmp1);
22716       RTX_FRAME_RELATED_P (tmp) = 1;
22717       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22718
22719       if (return_in_pc)
22720         {
22721           /* If return_in_pc, j must be PC_REGNUM.  */
22722           gcc_assert (j == PC_REGNUM);
22723           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22724           XVECEXP (par, 0, 0) = ret_rtx;
22725           XVECEXP (par, 0, 1) = tmp;
22726           par = emit_jump_insn (par);
22727         }
22728       else
22729         {
22730           par = emit_insn (tmp);
22731           REG_NOTES (par) = dwarf;
22732           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22733                                        stack_pointer_rtx, stack_pointer_rtx);
22734         }
22735
22736     }
22737   else if ((num_regs % 2) == 1 && return_in_pc)
22738     {
22739       /* There are 2 registers to be popped.  So, generate the pattern
22740          pop_multiple_with_stack_update_and_return to pop in PC.  */
22741       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22742     }
22743
22744   return;
22745 }
22746
22747 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22748    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22749    offset addressing and then generates one separate stack udpate. This provides
22750    more scheduling freedom, compared to writeback on every load.  However,
22751    if the function returns using load into PC directly
22752    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22753    before the last load.  TODO: Add a peephole optimization to recognize
22754    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22755    peephole optimization to merge the load at stack-offset zero
22756    with the stack update instruction using load with writeback
22757    in post-index addressing mode.  */
22758 static void
22759 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22760 {
22761   int j = 0;
22762   int offset = 0;
22763   rtx par = NULL_RTX;
22764   rtx dwarf = NULL_RTX;
22765   rtx tmp, mem;
22766
22767   /* Restore saved registers.  */
22768   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22769   j = 0;
22770   while (j <= LAST_ARM_REGNUM)
22771     if (saved_regs_mask & (1 << j))
22772       {
22773         if ((j % 2) == 0
22774             && (saved_regs_mask & (1 << (j + 1)))
22775             && (j + 1) != PC_REGNUM)
22776           {
22777             /* Current register and next register form register pair for which
22778                LDRD can be generated. PC is always the last register popped, and
22779                we handle it separately.  */
22780             if (offset > 0)
22781               mem = gen_frame_mem (DImode,
22782                                    plus_constant (Pmode,
22783                                                   stack_pointer_rtx,
22784                                                   offset));
22785             else
22786               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22787
22788             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22789             tmp = emit_insn (tmp);
22790             RTX_FRAME_RELATED_P (tmp) = 1;
22791
22792             /* Generate dwarf info.  */
22793
22794             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22795                                     gen_rtx_REG (SImode, j),
22796                                     NULL_RTX);
22797             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22798                                     gen_rtx_REG (SImode, j + 1),
22799                                     dwarf);
22800
22801             REG_NOTES (tmp) = dwarf;
22802
22803             offset += 8;
22804             j += 2;
22805           }
22806         else if (j != PC_REGNUM)
22807           {
22808             /* Emit a single word load.  */
22809             if (offset > 0)
22810               mem = gen_frame_mem (SImode,
22811                                    plus_constant (Pmode,
22812                                                   stack_pointer_rtx,
22813                                                   offset));
22814             else
22815               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22816
22817             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22818             tmp = emit_insn (tmp);
22819             RTX_FRAME_RELATED_P (tmp) = 1;
22820
22821             /* Generate dwarf info.  */
22822             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22823                                               gen_rtx_REG (SImode, j),
22824                                               NULL_RTX);
22825
22826             offset += 4;
22827             j += 1;
22828           }
22829         else /* j == PC_REGNUM */
22830           j++;
22831       }
22832     else
22833       j++;
22834
22835   /* Update the stack.  */
22836   if (offset > 0)
22837     {
22838       tmp = gen_rtx_SET (stack_pointer_rtx,
22839                          plus_constant (Pmode,
22840                                         stack_pointer_rtx,
22841                                         offset));
22842       tmp = emit_insn (tmp);
22843       arm_add_cfa_adjust_cfa_note (tmp, offset,
22844                                    stack_pointer_rtx, stack_pointer_rtx);
22845       offset = 0;
22846     }
22847
22848   if (saved_regs_mask & (1 << PC_REGNUM))
22849     {
22850       /* Only PC is to be popped.  */
22851       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22852       XVECEXP (par, 0, 0) = ret_rtx;
22853       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22854                          gen_frame_mem (SImode,
22855                                         gen_rtx_POST_INC (SImode,
22856                                                           stack_pointer_rtx)));
22857       RTX_FRAME_RELATED_P (tmp) = 1;
22858       XVECEXP (par, 0, 1) = tmp;
22859       par = emit_jump_insn (par);
22860
22861       /* Generate dwarf info.  */
22862       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22863                               gen_rtx_REG (SImode, PC_REGNUM),
22864                               NULL_RTX);
22865       REG_NOTES (par) = dwarf;
22866       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22867                                    stack_pointer_rtx, stack_pointer_rtx);
22868     }
22869 }
22870
22871 /* Calculate the size of the return value that is passed in registers.  */
22872 static unsigned
22873 arm_size_return_regs (void)
22874 {
22875   machine_mode mode;
22876
22877   if (crtl->return_rtx != 0)
22878     mode = GET_MODE (crtl->return_rtx);
22879   else
22880     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22881
22882   return GET_MODE_SIZE (mode);
22883 }
22884
22885 /* Return true if the current function needs to save/restore LR.  */
22886 static bool
22887 thumb_force_lr_save (void)
22888 {
22889   return !cfun->machine->lr_save_eliminated
22890          && (!crtl->is_leaf
22891              || thumb_far_jump_used_p ()
22892              || df_regs_ever_live_p (LR_REGNUM));
22893 }
22894
22895 /* We do not know if r3 will be available because
22896    we do have an indirect tailcall happening in this
22897    particular case.  */
22898 static bool
22899 is_indirect_tailcall_p (rtx call)
22900 {
22901   rtx pat = PATTERN (call);
22902
22903   /* Indirect tail call.  */
22904   pat = XVECEXP (pat, 0, 0);
22905   if (GET_CODE (pat) == SET)
22906     pat = SET_SRC (pat);
22907
22908   pat = XEXP (XEXP (pat, 0), 0);
22909   return REG_P (pat);
22910 }
22911
22912 /* Return true if r3 is used by any of the tail call insns in the
22913    current function.  */
22914 static bool
22915 any_sibcall_could_use_r3 (void)
22916 {
22917   edge_iterator ei;
22918   edge e;
22919
22920   if (!crtl->tail_call_emit)
22921     return false;
22922   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22923     if (e->flags & EDGE_SIBCALL)
22924       {
22925         rtx_insn *call = BB_END (e->src);
22926         if (!CALL_P (call))
22927           call = prev_nonnote_nondebug_insn (call);
22928         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22929         if (find_regno_fusage (call, USE, 3)
22930             || is_indirect_tailcall_p (call))
22931           return true;
22932       }
22933   return false;
22934 }
22935
22936
22937 /* Compute the distance from register FROM to register TO.
22938    These can be the arg pointer (26), the soft frame pointer (25),
22939    the stack pointer (13) or the hard frame pointer (11).
22940    In thumb mode r7 is used as the soft frame pointer, if needed.
22941    Typical stack layout looks like this:
22942
22943        old stack pointer -> |    |
22944                              ----
22945                             |    | \
22946                             |    |   saved arguments for
22947                             |    |   vararg functions
22948                             |    | /
22949                               --
22950    hard FP & arg pointer -> |    | \
22951                             |    |   stack
22952                             |    |   frame
22953                             |    | /
22954                               --
22955                             |    | \
22956                             |    |   call saved
22957                             |    |   registers
22958       soft frame pointer -> |    | /
22959                               --
22960                             |    | \
22961                             |    |   local
22962                             |    |   variables
22963      locals base pointer -> |    | /
22964                               --
22965                             |    | \
22966                             |    |   outgoing
22967                             |    |   arguments
22968    current stack pointer -> |    | /
22969                               --
22970
22971   For a given function some or all of these stack components
22972   may not be needed, giving rise to the possibility of
22973   eliminating some of the registers.
22974
22975   The values returned by this function must reflect the behavior
22976   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22977
22978   The sign of the number returned reflects the direction of stack
22979   growth, so the values are positive for all eliminations except
22980   from the soft frame pointer to the hard frame pointer.
22981
22982   SFP may point just inside the local variables block to ensure correct
22983   alignment.  */
22984
22985
22986 /* Return cached stack offsets.  */
22987
22988 static arm_stack_offsets *
22989 arm_get_frame_offsets (void)
22990 {
22991   struct arm_stack_offsets *offsets;
22992
22993   offsets = &cfun->machine->stack_offsets;
22994
22995   return offsets;
22996 }
22997
22998
22999 /* Calculate stack offsets.  These are used to calculate register elimination
23000    offsets and in prologue/epilogue code.  Also calculates which registers
23001    should be saved.  */
23002
23003 static void
23004 arm_compute_frame_layout (void)
23005 {
23006   struct arm_stack_offsets *offsets;
23007   unsigned long func_type;
23008   int saved;
23009   int core_saved;
23010   HOST_WIDE_INT frame_size;
23011   int i;
23012
23013   offsets = &cfun->machine->stack_offsets;
23014
23015   /* Initially this is the size of the local variables.  It will translated
23016      into an offset once we have determined the size of preceding data.  */
23017   frame_size = ROUND_UP_WORD (get_frame_size ());
23018
23019   /* Space for variadic functions.  */
23020   offsets->saved_args = crtl->args.pretend_args_size;
23021
23022   /* In Thumb mode this is incorrect, but never used.  */
23023   offsets->frame
23024     = (offsets->saved_args
23025        + arm_compute_static_chain_stack_bytes ()
23026        + (frame_pointer_needed ? 4 : 0));
23027
23028   if (TARGET_32BIT)
23029     {
23030       unsigned int regno;
23031
23032       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
23033       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23034       saved = core_saved;
23035
23036       /* We know that SP will be doubleword aligned on entry, and we must
23037          preserve that condition at any subroutine call.  We also require the
23038          soft frame pointer to be doubleword aligned.  */
23039
23040       if (TARGET_REALLY_IWMMXT)
23041         {
23042           /* Check for the call-saved iWMMXt registers.  */
23043           for (regno = FIRST_IWMMXT_REGNUM;
23044                regno <= LAST_IWMMXT_REGNUM;
23045                regno++)
23046             if (reg_needs_saving_p (regno))
23047               saved += 8;
23048         }
23049
23050       func_type = arm_current_func_type ();
23051       /* Space for saved VFP registers.  */
23052       if (! IS_VOLATILE (func_type)
23053           && TARGET_VFP_BASE)
23054         saved += arm_get_vfp_saved_size ();
23055
23056       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23057          nonecure entry functions with VSTR/VLDR.  */
23058       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23059         saved += 4;
23060     }
23061   else /* TARGET_THUMB1 */
23062     {
23063       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23064       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23065       saved = core_saved;
23066       if (TARGET_BACKTRACE)
23067         saved += 16;
23068     }
23069
23070   /* Saved registers include the stack frame.  */
23071   offsets->saved_regs
23072     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23073   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23074
23075   /* A leaf function does not need any stack alignment if it has nothing
23076      on the stack.  */
23077   if (crtl->is_leaf && frame_size == 0
23078       /* However if it calls alloca(), we have a dynamically allocated
23079          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
23080       && ! cfun->calls_alloca)
23081     {
23082       offsets->outgoing_args = offsets->soft_frame;
23083       offsets->locals_base = offsets->soft_frame;
23084       return;
23085     }
23086
23087   /* Ensure SFP has the correct alignment.  */
23088   if (ARM_DOUBLEWORD_ALIGN
23089       && (offsets->soft_frame & 7))
23090     {
23091       offsets->soft_frame += 4;
23092       /* Try to align stack by pushing an extra reg.  Don't bother doing this
23093          when there is a stack frame as the alignment will be rolled into
23094          the normal stack adjustment.  */
23095       if (frame_size + crtl->outgoing_args_size == 0)
23096         {
23097           int reg = -1;
23098
23099           /* Register r3 is caller-saved.  Normally it does not need to be
23100              saved on entry by the prologue.  However if we choose to save
23101              it for padding then we may confuse the compiler into thinking
23102              a prologue sequence is required when in fact it is not.  This
23103              will occur when shrink-wrapping if r3 is used as a scratch
23104              register and there are no other callee-saved writes.
23105
23106              This situation can be avoided when other callee-saved registers
23107              are available and r3 is not mandatory if we choose a callee-saved
23108              register for padding.  */
23109           bool prefer_callee_reg_p = false;
23110
23111           /* If it is safe to use r3, then do so.  This sometimes
23112              generates better code on Thumb-2 by avoiding the need to
23113              use 32-bit push/pop instructions.  */
23114           if (! any_sibcall_could_use_r3 ()
23115               && arm_size_return_regs () <= 12
23116               && (offsets->saved_regs_mask & (1 << 3)) == 0
23117               && (TARGET_THUMB2
23118                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23119             {
23120               reg = 3;
23121               if (!TARGET_THUMB2)
23122                 prefer_callee_reg_p = true;
23123             }
23124           if (reg == -1
23125               || prefer_callee_reg_p)
23126             {
23127               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23128                 {
23129                   /* Avoid fixed registers; they may be changed at
23130                      arbitrary times so it's unsafe to restore them
23131                      during the epilogue.  */
23132                   if (!fixed_regs[i]
23133                       && (offsets->saved_regs_mask & (1 << i)) == 0)
23134                     {
23135                       reg = i;
23136                       break;
23137                     }
23138                 }
23139             }
23140
23141           if (reg != -1)
23142             {
23143               offsets->saved_regs += 4;
23144               offsets->saved_regs_mask |= (1 << reg);
23145             }
23146         }
23147     }
23148
23149   offsets->locals_base = offsets->soft_frame + frame_size;
23150   offsets->outgoing_args = (offsets->locals_base
23151                             + crtl->outgoing_args_size);
23152
23153   if (ARM_DOUBLEWORD_ALIGN)
23154     {
23155       /* Ensure SP remains doubleword aligned.  */
23156       if (offsets->outgoing_args & 7)
23157         offsets->outgoing_args += 4;
23158       gcc_assert (!(offsets->outgoing_args & 7));
23159     }
23160 }
23161
23162
23163 /* Calculate the relative offsets for the different stack pointers.  Positive
23164    offsets are in the direction of stack growth.  */
23165
23166 HOST_WIDE_INT
23167 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23168 {
23169   arm_stack_offsets *offsets;
23170
23171   offsets = arm_get_frame_offsets ();
23172
23173   /* OK, now we have enough information to compute the distances.
23174      There must be an entry in these switch tables for each pair
23175      of registers in ELIMINABLE_REGS, even if some of the entries
23176      seem to be redundant or useless.  */
23177   switch (from)
23178     {
23179     case ARG_POINTER_REGNUM:
23180       switch (to)
23181         {
23182         case THUMB_HARD_FRAME_POINTER_REGNUM:
23183           return 0;
23184
23185         case FRAME_POINTER_REGNUM:
23186           /* This is the reverse of the soft frame pointer
23187              to hard frame pointer elimination below.  */
23188           return offsets->soft_frame - offsets->saved_args;
23189
23190         case ARM_HARD_FRAME_POINTER_REGNUM:
23191           /* This is only non-zero in the case where the static chain register
23192              is stored above the frame.  */
23193           return offsets->frame - offsets->saved_args - 4;
23194
23195         case STACK_POINTER_REGNUM:
23196           /* If nothing has been pushed on the stack at all
23197              then this will return -4.  This *is* correct!  */
23198           return offsets->outgoing_args - (offsets->saved_args + 4);
23199
23200         default:
23201           gcc_unreachable ();
23202         }
23203       gcc_unreachable ();
23204
23205     case FRAME_POINTER_REGNUM:
23206       switch (to)
23207         {
23208         case THUMB_HARD_FRAME_POINTER_REGNUM:
23209           return 0;
23210
23211         case ARM_HARD_FRAME_POINTER_REGNUM:
23212           /* The hard frame pointer points to the top entry in the
23213              stack frame.  The soft frame pointer to the bottom entry
23214              in the stack frame.  If there is no stack frame at all,
23215              then they are identical.  */
23216
23217           return offsets->frame - offsets->soft_frame;
23218
23219         case STACK_POINTER_REGNUM:
23220           return offsets->outgoing_args - offsets->soft_frame;
23221
23222         default:
23223           gcc_unreachable ();
23224         }
23225       gcc_unreachable ();
23226
23227     default:
23228       /* You cannot eliminate from the stack pointer.
23229          In theory you could eliminate from the hard frame
23230          pointer to the stack pointer, but this will never
23231          happen, since if a stack frame is not needed the
23232          hard frame pointer will never be used.  */
23233       gcc_unreachable ();
23234     }
23235 }
23236
23237 /* Given FROM and TO register numbers, say whether this elimination is
23238    allowed.  Frame pointer elimination is automatically handled.
23239
23240    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
23241    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
23242    pointer, we must eliminate FRAME_POINTER_REGNUM into
23243    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23244    ARG_POINTER_REGNUM.  */
23245
23246 bool
23247 arm_can_eliminate (const int from, const int to)
23248 {
23249   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23250           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23251           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23252           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23253            true);
23254 }
23255
23256 /* Emit RTL to save coprocessor registers on function entry.  Returns the
23257    number of bytes pushed.  */
23258
23259 static int
23260 arm_save_coproc_regs(void)
23261 {
23262   int saved_size = 0;
23263   unsigned reg;
23264   unsigned start_reg;
23265   rtx insn;
23266
23267   if (TARGET_REALLY_IWMMXT)
23268   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23269     if (reg_needs_saving_p (reg))
23270       {
23271         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23272         insn = gen_rtx_MEM (V2SImode, insn);
23273         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23274         RTX_FRAME_RELATED_P (insn) = 1;
23275         saved_size += 8;
23276       }
23277
23278   if (TARGET_VFP_BASE)
23279     {
23280       start_reg = FIRST_VFP_REGNUM;
23281
23282       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23283         {
23284           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23285             {
23286               if (start_reg != reg)
23287                 saved_size += vfp_emit_fstmd (start_reg,
23288                                               (reg - start_reg) / 2);
23289               start_reg = reg + 2;
23290             }
23291         }
23292       if (start_reg != reg)
23293         saved_size += vfp_emit_fstmd (start_reg,
23294                                       (reg - start_reg) / 2);
23295     }
23296   return saved_size;
23297 }
23298
23299
23300 /* Set the Thumb frame pointer from the stack pointer.  */
23301
23302 static void
23303 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23304 {
23305   HOST_WIDE_INT amount;
23306   rtx insn, dwarf;
23307
23308   amount = offsets->outgoing_args - offsets->locals_base;
23309   if (amount < 1024)
23310     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23311                                   stack_pointer_rtx, GEN_INT (amount)));
23312   else
23313     {
23314       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23315       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
23316          expects the first two operands to be the same.  */
23317       if (TARGET_THUMB2)
23318         {
23319           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23320                                         stack_pointer_rtx,
23321                                         hard_frame_pointer_rtx));
23322         }
23323       else
23324         {
23325           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23326                                         hard_frame_pointer_rtx,
23327                                         stack_pointer_rtx));
23328         }
23329       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23330                            plus_constant (Pmode, stack_pointer_rtx, amount));
23331       RTX_FRAME_RELATED_P (dwarf) = 1;
23332       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23333     }
23334
23335   RTX_FRAME_RELATED_P (insn) = 1;
23336 }
23337
23338 struct scratch_reg {
23339   rtx reg;
23340   bool saved;
23341 };
23342
23343 /* Return a short-lived scratch register for use as a 2nd scratch register on
23344    function entry after the registers are saved in the prologue.  This register
23345    must be released by means of release_scratch_register_on_entry.  IP is not
23346    considered since it is always used as the 1st scratch register if available.
23347
23348    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23349    mask of live registers.  */
23350
23351 static void
23352 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23353                                unsigned long live_regs)
23354 {
23355   int regno = -1;
23356
23357   sr->saved = false;
23358
23359   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23360     regno = LR_REGNUM;
23361   else
23362     {
23363       unsigned int i;
23364
23365       for (i = 4; i < 11; i++)
23366         if (regno1 != i && (live_regs & (1 << i)) != 0)
23367           {
23368             regno = i;
23369             break;
23370           }
23371
23372       if (regno < 0)
23373         {
23374           /* If IP is used as the 1st scratch register for a nested function,
23375              then either r3 wasn't available or is used to preserve IP.  */
23376           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23377             regno1 = 3;
23378           regno = (regno1 == 3 ? 2 : 3);
23379           sr->saved
23380             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23381                                regno);
23382         }
23383     }
23384
23385   sr->reg = gen_rtx_REG (SImode, regno);
23386   if (sr->saved)
23387     {
23388       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23389       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23390       rtx x = gen_rtx_SET (stack_pointer_rtx,
23391                            plus_constant (Pmode, stack_pointer_rtx, -4));
23392       RTX_FRAME_RELATED_P (insn) = 1;
23393       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23394     }
23395 }
23396
23397 /* Release a scratch register obtained from the preceding function.  */
23398
23399 static void
23400 release_scratch_register_on_entry (struct scratch_reg *sr)
23401 {
23402   if (sr->saved)
23403     {
23404       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23405       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23406       rtx x = gen_rtx_SET (stack_pointer_rtx,
23407                            plus_constant (Pmode, stack_pointer_rtx, 4));
23408       RTX_FRAME_RELATED_P (insn) = 1;
23409       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23410     }
23411 }
23412
23413 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23414
23415 #if PROBE_INTERVAL > 4096
23416 #error Cannot use indexed addressing mode for stack probing
23417 #endif
23418
23419 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23420    inclusive.  These are offsets from the current stack pointer.  REGNO1
23421    is the index number of the 1st scratch register and LIVE_REGS is the
23422    mask of live registers.  */
23423
23424 static void
23425 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23426                             unsigned int regno1, unsigned long live_regs)
23427 {
23428   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23429
23430   /* See if we have a constant small number of probes to generate.  If so,
23431      that's the easy case.  */
23432   if (size <= PROBE_INTERVAL)
23433     {
23434       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23435       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23436       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23437     }
23438
23439   /* The run-time loop is made up of 10 insns in the generic case while the
23440      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23441   else if (size <= 5 * PROBE_INTERVAL)
23442     {
23443       HOST_WIDE_INT i, rem;
23444
23445       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23446       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23447       emit_stack_probe (reg1);
23448
23449       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23450          it exceeds SIZE.  If only two probes are needed, this will not
23451          generate any code.  Then probe at FIRST + SIZE.  */
23452       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23453         {
23454           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23455           emit_stack_probe (reg1);
23456         }
23457
23458       rem = size - (i - PROBE_INTERVAL);
23459       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23460         {
23461           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23462           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23463         }
23464       else
23465         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23466     }
23467
23468   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23469      extra careful with variables wrapping around because we might be at
23470      the very top (or the very bottom) of the address space and we have
23471      to be able to handle this case properly; in particular, we use an
23472      equality test for the loop condition.  */
23473   else
23474     {
23475       HOST_WIDE_INT rounded_size;
23476       struct scratch_reg sr;
23477
23478       get_scratch_register_on_entry (&sr, regno1, live_regs);
23479
23480       emit_move_insn (reg1, GEN_INT (first));
23481
23482
23483       /* Step 1: round SIZE to the previous multiple of the interval.  */
23484
23485       rounded_size = size & -PROBE_INTERVAL;
23486       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23487
23488
23489       /* Step 2: compute initial and final value of the loop counter.  */
23490
23491       /* TEST_ADDR = SP + FIRST.  */
23492       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23493
23494       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23495       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23496
23497
23498       /* Step 3: the loop
23499
23500          do
23501            {
23502              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23503              probe at TEST_ADDR
23504            }
23505          while (TEST_ADDR != LAST_ADDR)
23506
23507          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23508          until it is equal to ROUNDED_SIZE.  */
23509
23510       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23511
23512
23513       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23514          that SIZE is equal to ROUNDED_SIZE.  */
23515
23516       if (size != rounded_size)
23517         {
23518           HOST_WIDE_INT rem = size - rounded_size;
23519
23520           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23521             {
23522               emit_set_insn (sr.reg,
23523                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23524               emit_stack_probe (plus_constant (Pmode, sr.reg,
23525                                                PROBE_INTERVAL - rem));
23526             }
23527           else
23528             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23529         }
23530
23531       release_scratch_register_on_entry (&sr);
23532     }
23533
23534   /* Make sure nothing is scheduled before we are done.  */
23535   emit_insn (gen_blockage ());
23536 }
23537
23538 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23539    absolute addresses.  */
23540
23541 const char *
23542 output_probe_stack_range (rtx reg1, rtx reg2)
23543 {
23544   static int labelno = 0;
23545   char loop_lab[32];
23546   rtx xops[2];
23547
23548   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23549
23550   /* Loop.  */
23551   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23552
23553   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23554   xops[0] = reg1;
23555   xops[1] = GEN_INT (PROBE_INTERVAL);
23556   output_asm_insn ("sub\t%0, %0, %1", xops);
23557
23558   /* Probe at TEST_ADDR.  */
23559   output_asm_insn ("str\tr0, [%0, #0]", xops);
23560
23561   /* Test if TEST_ADDR == LAST_ADDR.  */
23562   xops[1] = reg2;
23563   output_asm_insn ("cmp\t%0, %1", xops);
23564
23565   /* Branch.  */
23566   fputs ("\tbne\t", asm_out_file);
23567   assemble_name_raw (asm_out_file, loop_lab);
23568   fputc ('\n', asm_out_file);
23569
23570   return "";
23571 }
23572
23573 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23574    function.  */
23575 void
23576 arm_expand_prologue (void)
23577 {
23578   rtx amount;
23579   rtx insn;
23580   rtx ip_rtx;
23581   unsigned long live_regs_mask;
23582   unsigned long func_type;
23583   int fp_offset = 0;
23584   int saved_pretend_args = 0;
23585   int saved_regs = 0;
23586   unsigned HOST_WIDE_INT args_to_push;
23587   HOST_WIDE_INT size;
23588   arm_stack_offsets *offsets;
23589   bool clobber_ip;
23590
23591   func_type = arm_current_func_type ();
23592
23593   /* Naked functions don't have prologues.  */
23594   if (IS_NAKED (func_type))
23595     {
23596       if (flag_stack_usage_info)
23597         current_function_static_stack_size = 0;
23598       return;
23599     }
23600
23601   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23602   args_to_push = crtl->args.pretend_args_size;
23603
23604   /* Compute which register we will have to save onto the stack.  */
23605   offsets = arm_get_frame_offsets ();
23606   live_regs_mask = offsets->saved_regs_mask;
23607
23608   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23609
23610   if (IS_STACKALIGN (func_type))
23611     {
23612       rtx r0, r1;
23613
23614       /* Handle a word-aligned stack pointer.  We generate the following:
23615
23616           mov r0, sp
23617           bic r1, r0, #7
23618           mov sp, r1
23619           <save and restore r0 in normal prologue/epilogue>
23620           mov sp, r0
23621           bx lr
23622
23623          The unwinder doesn't need to know about the stack realignment.
23624          Just tell it we saved SP in r0.  */
23625       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23626
23627       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23628       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23629
23630       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23631       RTX_FRAME_RELATED_P (insn) = 1;
23632       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23633
23634       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23635
23636       /* ??? The CFA changes here, which may cause GDB to conclude that it
23637          has entered a different function.  That said, the unwind info is
23638          correct, individually, before and after this instruction because
23639          we've described the save of SP, which will override the default
23640          handling of SP as restoring from the CFA.  */
23641       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23642     }
23643
23644   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23645      now the value must be -1 as stored by arm_init_machine_status ().  */
23646   cfun->machine->static_chain_stack_bytes
23647     = arm_compute_static_chain_stack_bytes ();
23648
23649   /* The static chain register is the same as the IP register.  If it is
23650      clobbered when creating the frame, we need to save and restore it.  */
23651   clobber_ip = (IS_NESTED (func_type)
23652                 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23653                      || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23654                           || flag_stack_clash_protection)
23655                          && !df_regs_ever_live_p (LR_REGNUM)
23656                          && arm_r3_live_at_start_p ()))
23657                     || arm_current_function_pac_enabled_p ()));
23658
23659   /* Find somewhere to store IP whilst the frame is being created.
23660      We try the following places in order:
23661
23662        1. The last argument register r3 if it is available.
23663        2. A slot on the stack above the frame if there are no
23664           arguments to push onto the stack.
23665        3. Register r3 again, after pushing the argument registers
23666           onto the stack, if this is a varargs function.
23667        4. The last slot on the stack created for the arguments to
23668           push, if this isn't a varargs function.
23669
23670      Note - we only need to tell the dwarf2 backend about the SP
23671      adjustment in the second variant; the static chain register
23672      doesn't need to be unwound, as it doesn't contain a value
23673      inherited from the caller.  */
23674   if (clobber_ip)
23675     {
23676       if (!arm_r3_live_at_start_p ())
23677         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23678       else if (args_to_push == 0)
23679         {
23680           rtx addr, dwarf;
23681
23682           saved_regs += 4;
23683
23684           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23685           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23686           fp_offset = 4;
23687
23688           /* Just tell the dwarf backend that we adjusted SP.  */
23689           dwarf = gen_rtx_SET (stack_pointer_rtx,
23690                                plus_constant (Pmode, stack_pointer_rtx,
23691                                               -fp_offset));
23692           RTX_FRAME_RELATED_P (insn) = 1;
23693           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23694           if (arm_current_function_pac_enabled_p ())
23695             cfun->machine->pacspval_needed = 1;
23696         }
23697       else
23698         {
23699           /* Store the args on the stack.  */
23700           if (cfun->machine->uses_anonymous_args)
23701             {
23702               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23703                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23704               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23705               saved_pretend_args = 1;
23706             }
23707           else
23708             {
23709               rtx addr, dwarf;
23710
23711               if (args_to_push == 4)
23712                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23713               else
23714                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23715                                            plus_constant (Pmode,
23716                                                           stack_pointer_rtx,
23717                                                           -args_to_push));
23718
23719               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23720
23721               /* Just tell the dwarf backend that we adjusted SP.  */
23722               dwarf = gen_rtx_SET (stack_pointer_rtx,
23723                                    plus_constant (Pmode, stack_pointer_rtx,
23724                                                   -args_to_push));
23725               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23726             }
23727
23728           RTX_FRAME_RELATED_P (insn) = 1;
23729           fp_offset = args_to_push;
23730           args_to_push = 0;
23731           if (arm_current_function_pac_enabled_p ())
23732             cfun->machine->pacspval_needed = 1;
23733         }
23734     }
23735
23736   if (arm_current_function_pac_enabled_p ())
23737     {
23738       /* If IP was clobbered we only emit a PAC instruction as the BTI
23739          one will be added before the push of the clobbered IP (if
23740          necessary) by the bti pass.  */
23741       if (aarch_bti_enabled () && !clobber_ip)
23742         insn = emit_insn (gen_pacbti_nop ());
23743       else
23744         insn = emit_insn (gen_pac_nop ());
23745
23746       rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23747       RTX_FRAME_RELATED_P (insn) = 1;
23748       add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23749     }
23750
23751   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23752     {
23753       if (IS_INTERRUPT (func_type))
23754         {
23755           /* Interrupt functions must not corrupt any registers.
23756              Creating a frame pointer however, corrupts the IP
23757              register, so we must push it first.  */
23758           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23759
23760           /* Do not set RTX_FRAME_RELATED_P on this insn.
23761              The dwarf stack unwinding code only wants to see one
23762              stack decrement per function, and this is not it.  If
23763              this instruction is labeled as being part of the frame
23764              creation sequence then dwarf2out_frame_debug_expr will
23765              die when it encounters the assignment of IP to FP
23766              later on, since the use of SP here establishes SP as
23767              the CFA register and not IP.
23768
23769              Anyway this instruction is not really part of the stack
23770              frame creation although it is part of the prologue.  */
23771         }
23772
23773       insn = emit_set_insn (ip_rtx,
23774                             plus_constant (Pmode, stack_pointer_rtx,
23775                                            fp_offset));
23776       RTX_FRAME_RELATED_P (insn) = 1;
23777     }
23778
23779   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23780   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23781     {
23782       saved_regs += 4;
23783       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23784                                                 GEN_INT (FPCXTNS_ENUM)));
23785       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23786                           plus_constant (Pmode, stack_pointer_rtx, -4));
23787       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23788       RTX_FRAME_RELATED_P (insn) = 1;
23789     }
23790
23791   if (args_to_push)
23792     {
23793       /* Push the argument registers, or reserve space for them.  */
23794       if (cfun->machine->uses_anonymous_args)
23795         insn = emit_multi_reg_push
23796           ((0xf0 >> (args_to_push / 4)) & 0xf,
23797            (0xf0 >> (args_to_push / 4)) & 0xf);
23798       else
23799         insn = emit_insn
23800           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23801                        GEN_INT (- args_to_push)));
23802       RTX_FRAME_RELATED_P (insn) = 1;
23803     }
23804
23805   /* If this is an interrupt service routine, and the link register
23806      is going to be pushed, and we're not generating extra
23807      push of IP (needed when frame is needed and frame layout if apcs),
23808      subtracting four from LR now will mean that the function return
23809      can be done with a single instruction.  */
23810   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23811       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23812       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23813       && TARGET_ARM)
23814     {
23815       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23816
23817       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23818     }
23819
23820   if (live_regs_mask)
23821     {
23822       unsigned long dwarf_regs_mask = live_regs_mask;
23823
23824       saved_regs += bit_count (live_regs_mask) * 4;
23825       if (optimize_size && !frame_pointer_needed
23826           && saved_regs == offsets->saved_regs - offsets->saved_args)
23827         {
23828           /* If no coprocessor registers are being pushed and we don't have
23829              to worry about a frame pointer then push extra registers to
23830              create the stack frame.  This is done in a way that does not
23831              alter the frame layout, so is independent of the epilogue.  */
23832           int n;
23833           int frame;
23834           n = 0;
23835           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23836             n++;
23837           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23838           if (frame && n * 4 >= frame)
23839             {
23840               n = frame / 4;
23841               live_regs_mask |= (1 << n) - 1;
23842               saved_regs += frame;
23843             }
23844         }
23845
23846       if (TARGET_LDRD
23847           && current_tune->prefer_ldrd_strd
23848           && !optimize_function_for_size_p (cfun))
23849         {
23850           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23851           if (TARGET_THUMB2)
23852             thumb2_emit_strd_push (live_regs_mask);
23853           else if (TARGET_ARM
23854                    && !TARGET_APCS_FRAME
23855                    && !IS_INTERRUPT (func_type))
23856             arm_emit_strd_push (live_regs_mask);
23857           else
23858             {
23859               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23860               RTX_FRAME_RELATED_P (insn) = 1;
23861             }
23862         }
23863       else
23864         {
23865           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23866           RTX_FRAME_RELATED_P (insn) = 1;
23867         }
23868     }
23869
23870   if (! IS_VOLATILE (func_type))
23871     saved_regs += arm_save_coproc_regs ();
23872
23873   if (frame_pointer_needed && TARGET_ARM)
23874     {
23875       /* Create the new frame pointer.  */
23876       if (TARGET_APCS_FRAME)
23877         {
23878           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23879           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23880           RTX_FRAME_RELATED_P (insn) = 1;
23881         }
23882       else
23883         {
23884           insn = GEN_INT (saved_regs - (4 + fp_offset));
23885           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23886                                         stack_pointer_rtx, insn));
23887           RTX_FRAME_RELATED_P (insn) = 1;
23888         }
23889     }
23890
23891   size = offsets->outgoing_args - offsets->saved_args;
23892   if (flag_stack_usage_info)
23893     current_function_static_stack_size = size;
23894
23895   /* If this isn't an interrupt service routine and we have a frame, then do
23896      stack checking.  We use IP as the first scratch register, except for the
23897      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23898   if (!IS_INTERRUPT (func_type)
23899       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23900           || flag_stack_clash_protection))
23901     {
23902       unsigned int regno;
23903
23904       if (!IS_NESTED (func_type) || clobber_ip)
23905         regno = IP_REGNUM;
23906       else if (df_regs_ever_live_p (LR_REGNUM))
23907         regno = LR_REGNUM;
23908       else
23909         regno = 3;
23910
23911       if (crtl->is_leaf && !cfun->calls_alloca)
23912         {
23913           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23914             arm_emit_probe_stack_range (get_stack_check_protect (),
23915                                         size - get_stack_check_protect (),
23916                                         regno, live_regs_mask);
23917         }
23918       else if (size > 0)
23919         arm_emit_probe_stack_range (get_stack_check_protect (), size,
23920                                     regno, live_regs_mask);
23921     }
23922
23923   /* Recover the static chain register.  */
23924   if (clobber_ip)
23925     {
23926       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23927         insn = gen_rtx_REG (SImode, 3);
23928       else
23929         {
23930           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23931           insn = gen_frame_mem (SImode, insn);
23932         }
23933       emit_set_insn (ip_rtx, insn);
23934       emit_insn (gen_force_register_use (ip_rtx));
23935     }
23936
23937   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23938     {
23939       /* This add can produce multiple insns for a large constant, so we
23940          need to get tricky.  */
23941       rtx_insn *last = get_last_insn ();
23942
23943       amount = GEN_INT (offsets->saved_args + saved_regs
23944                         - offsets->outgoing_args);
23945
23946       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23947                                     amount));
23948       do
23949         {
23950           last = last ? NEXT_INSN (last) : get_insns ();
23951           RTX_FRAME_RELATED_P (last) = 1;
23952         }
23953       while (last != insn);
23954
23955       /* If the frame pointer is needed, emit a special barrier that
23956          will prevent the scheduler from moving stores to the frame
23957          before the stack adjustment.  */
23958       if (frame_pointer_needed)
23959         emit_insn (gen_stack_tie (stack_pointer_rtx,
23960                                   hard_frame_pointer_rtx));
23961     }
23962
23963
23964   if (frame_pointer_needed && TARGET_THUMB2)
23965     thumb_set_frame_pointer (offsets);
23966
23967   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23968     {
23969       unsigned long mask;
23970
23971       mask = live_regs_mask;
23972       mask &= THUMB2_WORK_REGS;
23973       if (!IS_NESTED (func_type))
23974         mask |= (1 << IP_REGNUM);
23975       arm_load_pic_register (mask, NULL_RTX);
23976     }
23977
23978   /* If we are profiling, make sure no instructions are scheduled before
23979      the call to mcount.  Similarly if the user has requested no
23980      scheduling in the prolog.  Similarly if we want non-call exceptions
23981      using the EABI unwinder, to prevent faulting instructions from being
23982      swapped with a stack adjustment.  */
23983   if (crtl->profile || !TARGET_SCHED_PROLOG
23984       || (arm_except_unwind_info (&global_options) == UI_TARGET
23985           && cfun->can_throw_non_call_exceptions))
23986     emit_insn (gen_blockage ());
23987
23988   /* If the link register is being kept alive, with the return address in it,
23989      then make sure that it does not get reused by the ce2 pass.  */
23990   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23991     cfun->machine->lr_save_eliminated = 1;
23992 }
23993 \f
23994 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23995 static void
23996 arm_print_condition (FILE *stream)
23997 {
23998   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23999     {
24000       /* Branch conversion is not implemented for Thumb-2.  */
24001       if (TARGET_THUMB)
24002         {
24003           output_operand_lossage ("predicated Thumb instruction");
24004           return;
24005         }
24006       if (current_insn_predicate != NULL)
24007         {
24008           output_operand_lossage
24009             ("predicated instruction in conditional sequence");
24010           return;
24011         }
24012
24013       fputs (arm_condition_codes[arm_current_cc], stream);
24014     }
24015   else if (current_insn_predicate)
24016     {
24017       enum arm_cond_code code;
24018
24019       if (TARGET_THUMB1)
24020         {
24021           output_operand_lossage ("predicated Thumb instruction");
24022           return;
24023         }
24024
24025       code = get_arm_condition_code (current_insn_predicate);
24026       fputs (arm_condition_codes[code], stream);
24027     }
24028 }
24029
24030
24031 /* Globally reserved letters: acln
24032    Puncutation letters currently used: @_|?().!#
24033    Lower case letters currently used: bcdefhimpqtvwxyz
24034    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24035    Letters previously used, but now deprecated/obsolete: sWXYZ.
24036
24037    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24038
24039    If CODE is 'd', then the X is a condition operand and the instruction
24040    should only be executed if the condition is true.
24041    if CODE is 'D', then the X is a condition operand and the instruction
24042    should only be executed if the condition is false: however, if the mode
24043    of the comparison is CCFPEmode, then always execute the instruction -- we
24044    do this because in these circumstances !GE does not necessarily imply LT;
24045    in these cases the instruction pattern will take care to make sure that
24046    an instruction containing %d will follow, thereby undoing the effects of
24047    doing this instruction unconditionally.
24048    If CODE is 'N' then X is a floating point operand that must be negated
24049    before output.
24050    If CODE is 'B' then output a bitwise inverted value of X (a const int).
24051    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24052    If CODE is 'V', then the operand must be a CONST_INT representing
24053    the bits to preserve in the modified register (Rd) of a BFI or BFC
24054    instruction: print out both the width and lsb (shift) fields.  */
24055 static void
24056 arm_print_operand (FILE *stream, rtx x, int code)
24057 {
24058   switch (code)
24059     {
24060     case '@':
24061       fputs (ASM_COMMENT_START, stream);
24062       return;
24063
24064     case '_':
24065       fputs (user_label_prefix, stream);
24066       return;
24067
24068     case '|':
24069       fputs (REGISTER_PREFIX, stream);
24070       return;
24071
24072     case '?':
24073       arm_print_condition (stream);
24074       return;
24075
24076     case '.':
24077       /* The current condition code for a condition code setting instruction.
24078          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
24079       fputc('s', stream);
24080       arm_print_condition (stream);
24081       return;
24082
24083     case '!':
24084       /* If the instruction is conditionally executed then print
24085          the current condition code, otherwise print 's'.  */
24086       gcc_assert (TARGET_THUMB2);
24087       if (current_insn_predicate)
24088         arm_print_condition (stream);
24089       else
24090         fputc('s', stream);
24091       break;
24092
24093     /* %# is a "break" sequence. It doesn't output anything, but is used to
24094        separate e.g. operand numbers from following text, if that text consists
24095        of further digits which we don't want to be part of the operand
24096        number.  */
24097     case '#':
24098       return;
24099
24100     case 'N':
24101       {
24102         REAL_VALUE_TYPE r;
24103         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24104         fprintf (stream, "%s", fp_const_from_val (&r));
24105       }
24106       return;
24107
24108     /* An integer or symbol address without a preceding # sign.  */
24109     case 'c':
24110       switch (GET_CODE (x))
24111         {
24112         case CONST_INT:
24113           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24114           break;
24115
24116         case SYMBOL_REF:
24117           output_addr_const (stream, x);
24118           break;
24119
24120         case CONST:
24121           if (GET_CODE (XEXP (x, 0)) == PLUS
24122               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24123             {
24124               output_addr_const (stream, x);
24125               break;
24126             }
24127           /* Fall through.  */
24128
24129         default:
24130           output_operand_lossage ("Unsupported operand for code '%c'", code);
24131         }
24132       return;
24133
24134     /* An integer that we want to print in HEX.  */
24135     case 'x':
24136       switch (GET_CODE (x))
24137         {
24138         case CONST_INT:
24139           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24140           break;
24141
24142         default:
24143           output_operand_lossage ("Unsupported operand for code '%c'", code);
24144         }
24145       return;
24146
24147     case 'B':
24148       if (CONST_INT_P (x))
24149         {
24150           HOST_WIDE_INT val;
24151           val = ARM_SIGN_EXTEND (~INTVAL (x));
24152           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24153         }
24154       else
24155         {
24156           putc ('~', stream);
24157           output_addr_const (stream, x);
24158         }
24159       return;
24160
24161     case 'b':
24162       /* Print the log2 of a CONST_INT.  */
24163       {
24164         HOST_WIDE_INT val;
24165
24166         if (!CONST_INT_P (x)
24167             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24168           output_operand_lossage ("Unsupported operand for code '%c'", code);
24169         else
24170           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24171       }
24172       return;
24173
24174     case 'L':
24175       /* The low 16 bits of an immediate constant.  */
24176       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24177       return;
24178
24179     case 'i':
24180       fprintf (stream, "%s", arithmetic_instr (x, 1));
24181       return;
24182
24183     case 'I':
24184       fprintf (stream, "%s", arithmetic_instr (x, 0));
24185       return;
24186
24187     case 'S':
24188       {
24189         HOST_WIDE_INT val;
24190         const char *shift;
24191
24192         shift = shift_op (x, &val);
24193
24194         if (shift)
24195           {
24196             fprintf (stream, ", %s ", shift);
24197             if (val == -1)
24198               arm_print_operand (stream, XEXP (x, 1), 0);
24199             else
24200               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24201           }
24202       }
24203       return;
24204
24205       /* An explanation of the 'Q', 'R' and 'H' register operands:
24206
24207          In a pair of registers containing a DI or DF value the 'Q'
24208          operand returns the register number of the register containing
24209          the least significant part of the value.  The 'R' operand returns
24210          the register number of the register containing the most
24211          significant part of the value.
24212
24213          The 'H' operand returns the higher of the two register numbers.
24214          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24215          same as the 'Q' operand, since the most significant part of the
24216          value is held in the lower number register.  The reverse is true
24217          on systems where WORDS_BIG_ENDIAN is false.
24218
24219          The purpose of these operands is to distinguish between cases
24220          where the endian-ness of the values is important (for example
24221          when they are added together), and cases where the endian-ness
24222          is irrelevant, but the order of register operations is important.
24223          For example when loading a value from memory into a register
24224          pair, the endian-ness does not matter.  Provided that the value
24225          from the lower memory address is put into the lower numbered
24226          register, and the value from the higher address is put into the
24227          higher numbered register, the load will work regardless of whether
24228          the value being loaded is big-wordian or little-wordian.  The
24229          order of the two register loads can matter however, if the address
24230          of the memory location is actually held in one of the registers
24231          being overwritten by the load.
24232
24233          The 'Q' and 'R' constraints are also available for 64-bit
24234          constants.  */
24235     case 'Q':
24236       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24237         {
24238           rtx part = gen_lowpart (SImode, x);
24239           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24240           return;
24241         }
24242
24243       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24244         {
24245           output_operand_lossage ("invalid operand for code '%c'", code);
24246           return;
24247         }
24248
24249       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24250       return;
24251
24252     case 'R':
24253       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24254         {
24255           machine_mode mode = GET_MODE (x);
24256           rtx part;
24257
24258           if (mode == VOIDmode)
24259             mode = DImode;
24260           part = gen_highpart_mode (SImode, mode, x);
24261           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24262           return;
24263         }
24264
24265       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24266         {
24267           output_operand_lossage ("invalid operand for code '%c'", code);
24268           return;
24269         }
24270
24271       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24272       return;
24273
24274     case 'H':
24275       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24276         {
24277           output_operand_lossage ("invalid operand for code '%c'", code);
24278           return;
24279         }
24280
24281       asm_fprintf (stream, "%r", REGNO (x) + 1);
24282       return;
24283
24284     case 'J':
24285       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24286         {
24287           output_operand_lossage ("invalid operand for code '%c'", code);
24288           return;
24289         }
24290
24291       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24292       return;
24293
24294     case 'K':
24295       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24296         {
24297           output_operand_lossage ("invalid operand for code '%c'", code);
24298           return;
24299         }
24300
24301       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24302       return;
24303
24304     case 'm':
24305       asm_fprintf (stream, "%r",
24306                    REG_P (XEXP (x, 0))
24307                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24308       return;
24309
24310     case 'M':
24311       asm_fprintf (stream, "{%r-%r}",
24312                    REGNO (x),
24313                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24314       return;
24315
24316     /* Like 'M', but writing doubleword vector registers, for use by Neon
24317        insns.  */
24318     case 'h':
24319       {
24320         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24321         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24322         if (numregs == 1)
24323           asm_fprintf (stream, "{d%d}", regno);
24324         else
24325           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24326       }
24327       return;
24328
24329     case 'd':
24330       /* CONST_TRUE_RTX means always -- that's the default.  */
24331       if (x == const_true_rtx)
24332         return;
24333
24334       if (!COMPARISON_P (x))
24335         {
24336           output_operand_lossage ("invalid operand for code '%c'", code);
24337           return;
24338         }
24339
24340       fputs (arm_condition_codes[get_arm_condition_code (x)],
24341              stream);
24342       return;
24343
24344     case 'D':
24345       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
24346          want to do that.  */
24347       if (x == const_true_rtx)
24348         {
24349           output_operand_lossage ("instruction never executed");
24350           return;
24351         }
24352       if (!COMPARISON_P (x))
24353         {
24354           output_operand_lossage ("invalid operand for code '%c'", code);
24355           return;
24356         }
24357
24358       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24359                                  (get_arm_condition_code (x))],
24360              stream);
24361       return;
24362
24363     case 'V':
24364       {
24365         /* Output the LSB (shift) and width for a bitmask instruction
24366            based on a literal mask.  The LSB is printed first,
24367            followed by the width.
24368
24369            Eg. For 0b1...1110001, the result is #1, #3.  */
24370         if (!CONST_INT_P (x))
24371           {
24372             output_operand_lossage ("invalid operand for code '%c'", code);
24373             return;
24374           }
24375
24376         unsigned HOST_WIDE_INT val
24377           = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24378         int lsb = exact_log2 (val & -val);
24379         asm_fprintf (stream, "#%d, #%d", lsb,
24380                      (exact_log2 (val + (val & -val)) - lsb));
24381       }
24382       return;
24383
24384     case 's':
24385     case 'W':
24386     case 'X':
24387     case 'Y':
24388     case 'Z':
24389       /* Former Maverick support, removed after GCC-4.7.  */
24390       output_operand_lossage ("obsolete Maverick format code '%c'", code);
24391       return;
24392
24393     case 'U':
24394       if (!REG_P (x)
24395           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24396           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24397         /* Bad value for wCG register number.  */
24398         {
24399           output_operand_lossage ("invalid operand for code '%c'", code);
24400           return;
24401         }
24402
24403       else
24404         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24405       return;
24406
24407       /* Print an iWMMXt control register name.  */
24408     case 'w':
24409       if (!CONST_INT_P (x)
24410           || INTVAL (x) < 0
24411           || INTVAL (x) >= 16)
24412         /* Bad value for wC register number.  */
24413         {
24414           output_operand_lossage ("invalid operand for code '%c'", code);
24415           return;
24416         }
24417
24418       else
24419         {
24420           static const char * wc_reg_names [16] =
24421             {
24422               "wCID",  "wCon",  "wCSSF", "wCASF",
24423               "wC4",   "wC5",   "wC6",   "wC7",
24424               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24425               "wC12",  "wC13",  "wC14",  "wC15"
24426             };
24427
24428           fputs (wc_reg_names [INTVAL (x)], stream);
24429         }
24430       return;
24431
24432     /* Print the high single-precision register of a VFP double-precision
24433        register.  */
24434     case 'p':
24435       {
24436         machine_mode mode = GET_MODE (x);
24437         int regno;
24438
24439         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24440           {
24441             output_operand_lossage ("invalid operand for code '%c'", code);
24442             return;
24443           }
24444
24445         regno = REGNO (x);
24446         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24447           {
24448             output_operand_lossage ("invalid operand for code '%c'", code);
24449             return;
24450           }
24451
24452         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24453       }
24454       return;
24455
24456     /* Print a VFP/Neon double precision or quad precision register name.  */
24457     case 'P':
24458     case 'q':
24459       {
24460         machine_mode mode = GET_MODE (x);
24461         int is_quad = (code == 'q');
24462         int regno;
24463
24464         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24465           {
24466             output_operand_lossage ("invalid operand for code '%c'", code);
24467             return;
24468           }
24469
24470         if (!REG_P (x)
24471             || !IS_VFP_REGNUM (REGNO (x)))
24472           {
24473             output_operand_lossage ("invalid operand for code '%c'", code);
24474             return;
24475           }
24476
24477         regno = REGNO (x);
24478         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24479             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24480           {
24481             output_operand_lossage ("invalid operand for code '%c'", code);
24482             return;
24483           }
24484
24485         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24486           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24487       }
24488       return;
24489
24490     /* These two codes print the low/high doubleword register of a Neon quad
24491        register, respectively.  For pair-structure types, can also print
24492        low/high quadword registers.  */
24493     case 'e':
24494     case 'f':
24495       {
24496         machine_mode mode = GET_MODE (x);
24497         int regno;
24498
24499         if ((GET_MODE_SIZE (mode) != 16
24500              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24501           {
24502             output_operand_lossage ("invalid operand for code '%c'", code);
24503             return;
24504           }
24505
24506         regno = REGNO (x);
24507         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24508           {
24509             output_operand_lossage ("invalid operand for code '%c'", code);
24510             return;
24511           }
24512
24513         if (GET_MODE_SIZE (mode) == 16)
24514           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24515                                   + (code == 'f' ? 1 : 0));
24516         else
24517           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24518                                   + (code == 'f' ? 1 : 0));
24519       }
24520       return;
24521
24522     /* Print a VFPv3 floating-point constant, represented as an integer
24523        index.  */
24524     case 'G':
24525       {
24526         int index = vfp3_const_double_index (x);
24527         gcc_assert (index != -1);
24528         fprintf (stream, "%d", index);
24529       }
24530       return;
24531
24532     /* Print bits representing opcode features for Neon.
24533
24534        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24535        and polynomials as unsigned.
24536
24537        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24538
24539        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24540
24541     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24542     case 'T':
24543       {
24544         HOST_WIDE_INT bits = INTVAL (x);
24545         fputc ("uspf"[bits & 3], stream);
24546       }
24547       return;
24548
24549     /* Likewise, but signed and unsigned integers are both 'i'.  */
24550     case 'F':
24551       {
24552         HOST_WIDE_INT bits = INTVAL (x);
24553         fputc ("iipf"[bits & 3], stream);
24554       }
24555       return;
24556
24557     /* As for 'T', but emit 'u' instead of 'p'.  */
24558     case 't':
24559       {
24560         HOST_WIDE_INT bits = INTVAL (x);
24561         fputc ("usuf"[bits & 3], stream);
24562       }
24563       return;
24564
24565     /* Bit 2: rounding (vs none).  */
24566     case 'O':
24567       {
24568         HOST_WIDE_INT bits = INTVAL (x);
24569         fputs ((bits & 4) != 0 ? "r" : "", stream);
24570       }
24571       return;
24572
24573     /* Memory operand for vld1/vst1 instruction.  */
24574     case 'A':
24575       {
24576         rtx addr;
24577         bool postinc = FALSE;
24578         rtx postinc_reg = NULL;
24579         unsigned align, memsize, align_bits;
24580
24581         gcc_assert (MEM_P (x));
24582         addr = XEXP (x, 0);
24583         if (GET_CODE (addr) == POST_INC)
24584           {
24585             postinc = 1;
24586             addr = XEXP (addr, 0);
24587           }
24588         if (GET_CODE (addr) == POST_MODIFY)
24589           {
24590             postinc_reg = XEXP( XEXP (addr, 1), 1);
24591             addr = XEXP (addr, 0);
24592           }
24593         asm_fprintf (stream, "[%r", REGNO (addr));
24594
24595         /* We know the alignment of this access, so we can emit a hint in the
24596            instruction (for some alignments) as an aid to the memory subsystem
24597            of the target.  */
24598         align = MEM_ALIGN (x) >> 3;
24599         memsize = MEM_SIZE (x);
24600
24601         /* Only certain alignment specifiers are supported by the hardware.  */
24602         if (memsize == 32 && (align % 32) == 0)
24603           align_bits = 256;
24604         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24605           align_bits = 128;
24606         else if (memsize >= 8 && (align % 8) == 0)
24607           align_bits = 64;
24608         else
24609           align_bits = 0;
24610
24611         if (align_bits != 0)
24612           asm_fprintf (stream, ":%d", align_bits);
24613
24614         asm_fprintf (stream, "]");
24615
24616         if (postinc)
24617           fputs("!", stream);
24618         if (postinc_reg)
24619           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24620       }
24621       return;
24622
24623     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24624        rtx_code the memory operands output looks like following.
24625        1. [Rn], #+/-<imm>
24626        2. [Rn, #+/-<imm>]!
24627        3. [Rn, #+/-<imm>]
24628        4. [Rn].  */
24629     case 'E':
24630       {
24631         rtx addr;
24632         rtx postinc_reg = NULL;
24633         unsigned inc_val = 0;
24634         enum rtx_code code;
24635
24636         gcc_assert (MEM_P (x));
24637         addr = XEXP (x, 0);
24638         code = GET_CODE (addr);
24639         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24640             || code  == PRE_DEC)
24641           {
24642             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24643             inc_val = GET_MODE_SIZE (GET_MODE (x));
24644             if (code == POST_INC || code == POST_DEC)
24645               asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24646                                               ? "": "-", inc_val);
24647             else
24648               asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24649                                                ? "": "-", inc_val);
24650           }
24651         else if (code == POST_MODIFY || code == PRE_MODIFY)
24652           {
24653             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24654             postinc_reg = XEXP (XEXP (addr, 1), 1);
24655             if (postinc_reg && CONST_INT_P (postinc_reg))
24656               {
24657                 if (code == POST_MODIFY)
24658                   asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24659                 else
24660                   asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24661               }
24662           }
24663         else if (code == PLUS)
24664           {
24665             rtx base = XEXP (addr, 0);
24666             rtx index = XEXP (addr, 1);
24667
24668             gcc_assert (REG_P (base) && CONST_INT_P (index));
24669
24670             HOST_WIDE_INT offset = INTVAL (index);
24671             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24672           }
24673         else
24674           {
24675             gcc_assert (REG_P (addr));
24676             asm_fprintf (stream, "[%r]",REGNO (addr));
24677           }
24678       }
24679       return;
24680
24681     case 'C':
24682       {
24683         rtx addr;
24684
24685         gcc_assert (MEM_P (x));
24686         addr = XEXP (x, 0);
24687         gcc_assert (REG_P (addr));
24688         asm_fprintf (stream, "[%r]", REGNO (addr));
24689       }
24690       return;
24691
24692     /* Translate an S register number into a D register number and element index.  */
24693     case 'y':
24694       {
24695         machine_mode mode = GET_MODE (x);
24696         int regno;
24697
24698         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24699           {
24700             output_operand_lossage ("invalid operand for code '%c'", code);
24701             return;
24702           }
24703
24704         regno = REGNO (x);
24705         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24706           {
24707             output_operand_lossage ("invalid operand for code '%c'", code);
24708             return;
24709           }
24710
24711         regno = regno - FIRST_VFP_REGNUM;
24712         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24713       }
24714       return;
24715
24716     case 'v':
24717         gcc_assert (CONST_DOUBLE_P (x));
24718         int result;
24719         result = vfp3_const_double_for_fract_bits (x);
24720         if (result == 0)
24721           result = vfp3_const_double_for_bits (x);
24722         fprintf (stream, "#%d", result);
24723         return;
24724
24725     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24726        number into a D register number and element index.  */
24727     case 'z':
24728       {
24729         machine_mode mode = GET_MODE (x);
24730         int regno;
24731
24732         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24733           {
24734             output_operand_lossage ("invalid operand for code '%c'", code);
24735             return;
24736           }
24737
24738         regno = REGNO (x);
24739         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24740           {
24741             output_operand_lossage ("invalid operand for code '%c'", code);
24742             return;
24743           }
24744
24745         regno = regno - FIRST_VFP_REGNUM;
24746         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24747       }
24748       return;
24749
24750     default:
24751       if (x == 0)
24752         {
24753           output_operand_lossage ("missing operand");
24754           return;
24755         }
24756
24757       switch (GET_CODE (x))
24758         {
24759         case REG:
24760           asm_fprintf (stream, "%r", REGNO (x));
24761           break;
24762
24763         case MEM:
24764           output_address (GET_MODE (x), XEXP (x, 0));
24765           break;
24766
24767         case CONST_DOUBLE:
24768           {
24769             char fpstr[20];
24770             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24771                               sizeof (fpstr), 0, 1);
24772             fprintf (stream, "#%s", fpstr);
24773           }
24774           break;
24775
24776         default:
24777           gcc_assert (GET_CODE (x) != NEG);
24778           fputc ('#', stream);
24779           if (GET_CODE (x) == HIGH)
24780             {
24781               fputs (":lower16:", stream);
24782               x = XEXP (x, 0);
24783             }
24784
24785           output_addr_const (stream, x);
24786           break;
24787         }
24788     }
24789 }
24790 \f
24791 /* Target hook for printing a memory address.  */
24792 static void
24793 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24794 {
24795   if (TARGET_32BIT)
24796     {
24797       int is_minus = GET_CODE (x) == MINUS;
24798
24799       if (REG_P (x))
24800         asm_fprintf (stream, "[%r]", REGNO (x));
24801       else if (GET_CODE (x) == PLUS || is_minus)
24802         {
24803           rtx base = XEXP (x, 0);
24804           rtx index = XEXP (x, 1);
24805           HOST_WIDE_INT offset = 0;
24806           if (!REG_P (base)
24807               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24808             {
24809               /* Ensure that BASE is a register.  */
24810               /* (one of them must be).  */
24811               /* Also ensure the SP is not used as in index register.  */
24812               std::swap (base, index);
24813             }
24814           switch (GET_CODE (index))
24815             {
24816             case CONST_INT:
24817               offset = INTVAL (index);
24818               if (is_minus)
24819                 offset = -offset;
24820               asm_fprintf (stream, "[%r, #%wd]",
24821                            REGNO (base), offset);
24822               break;
24823
24824             case REG:
24825               asm_fprintf (stream, "[%r, %s%r]",
24826                            REGNO (base), is_minus ? "-" : "",
24827                            REGNO (index));
24828               break;
24829
24830             case MULT:
24831             case ASHIFTRT:
24832             case LSHIFTRT:
24833             case ASHIFT:
24834             case ROTATERT:
24835               {
24836                 asm_fprintf (stream, "[%r, %s%r",
24837                              REGNO (base), is_minus ? "-" : "",
24838                              REGNO (XEXP (index, 0)));
24839                 arm_print_operand (stream, index, 'S');
24840                 fputs ("]", stream);
24841                 break;
24842               }
24843
24844             default:
24845               gcc_unreachable ();
24846             }
24847         }
24848       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24849                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24850         {
24851           gcc_assert (REG_P (XEXP (x, 0)));
24852
24853           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24854             asm_fprintf (stream, "[%r, #%s%d]!",
24855                          REGNO (XEXP (x, 0)),
24856                          GET_CODE (x) == PRE_DEC ? "-" : "",
24857                          GET_MODE_SIZE (mode));
24858           else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24859             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24860           else
24861             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24862                          GET_CODE (x) == POST_DEC ? "-" : "",
24863                          GET_MODE_SIZE (mode));
24864         }
24865       else if (GET_CODE (x) == PRE_MODIFY)
24866         {
24867           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24868           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24869             asm_fprintf (stream, "#%wd]!",
24870                          INTVAL (XEXP (XEXP (x, 1), 1)));
24871           else
24872             asm_fprintf (stream, "%r]!",
24873                          REGNO (XEXP (XEXP (x, 1), 1)));
24874         }
24875       else if (GET_CODE (x) == POST_MODIFY)
24876         {
24877           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24878           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24879             asm_fprintf (stream, "#%wd",
24880                          INTVAL (XEXP (XEXP (x, 1), 1)));
24881           else
24882             asm_fprintf (stream, "%r",
24883                          REGNO (XEXP (XEXP (x, 1), 1)));
24884         }
24885       else output_addr_const (stream, x);
24886     }
24887   else
24888     {
24889       if (REG_P (x))
24890         asm_fprintf (stream, "[%r]", REGNO (x));
24891       else if (GET_CODE (x) == POST_INC)
24892         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24893       else if (GET_CODE (x) == PLUS)
24894         {
24895           gcc_assert (REG_P (XEXP (x, 0)));
24896           if (CONST_INT_P (XEXP (x, 1)))
24897             asm_fprintf (stream, "[%r, #%wd]",
24898                          REGNO (XEXP (x, 0)),
24899                          INTVAL (XEXP (x, 1)));
24900           else
24901             asm_fprintf (stream, "[%r, %r]",
24902                          REGNO (XEXP (x, 0)),
24903                          REGNO (XEXP (x, 1)));
24904         }
24905       else
24906         output_addr_const (stream, x);
24907     }
24908 }
24909 \f
24910 /* Target hook for indicating whether a punctuation character for
24911    TARGET_PRINT_OPERAND is valid.  */
24912 static bool
24913 arm_print_operand_punct_valid_p (unsigned char code)
24914 {
24915   return (code == '@' || code == '|' || code == '.'
24916           || code == '(' || code == ')' || code == '#'
24917           || (TARGET_32BIT && (code == '?'))
24918           || (TARGET_THUMB2 && (code == '!'))
24919           || (TARGET_THUMB && (code == '_')));
24920 }
24921 \f
24922 /* Target hook for assembling integer objects.  The ARM version needs to
24923    handle word-sized values specially.  */
24924 static bool
24925 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24926 {
24927   machine_mode mode;
24928
24929   if (size == UNITS_PER_WORD && aligned_p)
24930     {
24931       fputs ("\t.word\t", asm_out_file);
24932       output_addr_const (asm_out_file, x);
24933
24934       /* Mark symbols as position independent.  We only do this in the
24935          .text segment, not in the .data segment.  */
24936       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24937           (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24938         {
24939           /* See legitimize_pic_address for an explanation of the
24940              TARGET_VXWORKS_RTP check.  */
24941           /* References to weak symbols cannot be resolved locally:
24942              they may be overridden by a non-weak definition at link
24943              time.  */
24944           if (!arm_pic_data_is_text_relative
24945               || (SYMBOL_REF_P (x)
24946                   && (!SYMBOL_REF_LOCAL_P (x)
24947                       || (SYMBOL_REF_DECL (x)
24948                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24949                       || (SYMBOL_REF_FUNCTION_P (x)
24950                           && !arm_fdpic_local_funcdesc_p (x)))))
24951             {
24952               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24953                 fputs ("(GOTFUNCDESC)", asm_out_file);
24954               else
24955                 fputs ("(GOT)", asm_out_file);
24956             }
24957           else
24958             {
24959               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24960                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24961               else
24962                 {
24963                   bool is_readonly;
24964
24965                   if (!TARGET_FDPIC
24966                       || arm_is_segment_info_known (x, &is_readonly))
24967                     fputs ("(GOTOFF)", asm_out_file);
24968                   else
24969                     fputs ("(GOT)", asm_out_file);
24970                 }
24971             }
24972         }
24973
24974       /* For FDPIC we also have to mark symbol for .data section.  */
24975       if (TARGET_FDPIC
24976           && !making_const_table
24977           && SYMBOL_REF_P (x)
24978           && SYMBOL_REF_FUNCTION_P (x))
24979         fputs ("(FUNCDESC)", asm_out_file);
24980
24981       fputc ('\n', asm_out_file);
24982       return true;
24983     }
24984
24985   mode = GET_MODE (x);
24986
24987   if (arm_vector_mode_supported_p (mode))
24988     {
24989       int i, units;
24990
24991       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24992
24993       units = CONST_VECTOR_NUNITS (x);
24994       size = GET_MODE_UNIT_SIZE (mode);
24995
24996       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24997         for (i = 0; i < units; i++)
24998           {
24999             rtx elt = CONST_VECTOR_ELT (x, i);
25000             assemble_integer
25001               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
25002           }
25003       else
25004         for (i = 0; i < units; i++)
25005           {
25006             rtx elt = CONST_VECTOR_ELT (x, i);
25007             assemble_real
25008               (*CONST_DOUBLE_REAL_VALUE (elt),
25009                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
25010                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
25011           }
25012
25013       return true;
25014     }
25015
25016   return default_assemble_integer (x, size, aligned_p);
25017 }
25018
25019 static void
25020 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
25021 {
25022   section *s;
25023
25024   if (!TARGET_AAPCS_BASED)
25025     {
25026       (is_ctor ?
25027        default_named_section_asm_out_constructor
25028        : default_named_section_asm_out_destructor) (symbol, priority);
25029       return;
25030     }
25031
25032   /* Put these in the .init_array section, using a special relocation.  */
25033   if (priority != DEFAULT_INIT_PRIORITY)
25034     {
25035       char buf[18];
25036       sprintf (buf, "%s.%.5u",
25037                is_ctor ? ".init_array" : ".fini_array",
25038                priority);
25039       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
25040     }
25041   else if (is_ctor)
25042     s = ctors_section;
25043   else
25044     s = dtors_section;
25045
25046   switch_to_section (s);
25047   assemble_align (POINTER_SIZE);
25048   fputs ("\t.word\t", asm_out_file);
25049   output_addr_const (asm_out_file, symbol);
25050   fputs ("(target1)\n", asm_out_file);
25051 }
25052
25053 /* Add a function to the list of static constructors.  */
25054
25055 static void
25056 arm_elf_asm_constructor (rtx symbol, int priority)
25057 {
25058   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25059 }
25060
25061 /* Add a function to the list of static destructors.  */
25062
25063 static void
25064 arm_elf_asm_destructor (rtx symbol, int priority)
25065 {
25066   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25067 }
25068 \f
25069 /* A finite state machine takes care of noticing whether or not instructions
25070    can be conditionally executed, and thus decrease execution time and code
25071    size by deleting branch instructions.  The fsm is controlled by
25072    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
25073
25074 /* The state of the fsm controlling condition codes are:
25075    0: normal, do nothing special
25076    1: make ASM_OUTPUT_OPCODE not output this instruction
25077    2: make ASM_OUTPUT_OPCODE not output this instruction
25078    3: make instructions conditional
25079    4: make instructions conditional
25080
25081    State transitions (state->state by whom under condition):
25082    0 -> 1 final_prescan_insn if the `target' is a label
25083    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25084    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25085    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25086    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25087           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25088    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25089           (the target insn is arm_target_insn).
25090
25091    If the jump clobbers the conditions then we use states 2 and 4.
25092
25093    A similar thing can be done with conditional return insns.
25094
25095    XXX In case the `target' is an unconditional branch, this conditionalising
25096    of the instructions always reduces code size, but not always execution
25097    time.  But then, I want to reduce the code size to somewhere near what
25098    /bin/cc produces.  */
25099
25100 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25101    instructions.  When a COND_EXEC instruction is seen the subsequent
25102    instructions are scanned so that multiple conditional instructions can be
25103    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
25104    specify the length and true/false mask for the IT block.  These will be
25105    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
25106
25107 /* Returns the index of the ARM condition code string in
25108    `arm_condition_codes', or ARM_NV if the comparison is invalid.
25109    COMPARISON should be an rtx like `(eq (...) (...))'.  */
25110
25111 enum arm_cond_code
25112 maybe_get_arm_condition_code (rtx comparison)
25113 {
25114   machine_mode mode = GET_MODE (XEXP (comparison, 0));
25115   enum arm_cond_code code;
25116   enum rtx_code comp_code = GET_CODE (comparison);
25117
25118   if (GET_MODE_CLASS (mode) != MODE_CC)
25119     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25120                            XEXP (comparison, 1));
25121
25122   switch (mode)
25123     {
25124     case E_CC_DNEmode: code = ARM_NE; goto dominance;
25125     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25126     case E_CC_DGEmode: code = ARM_GE; goto dominance;
25127     case E_CC_DGTmode: code = ARM_GT; goto dominance;
25128     case E_CC_DLEmode: code = ARM_LE; goto dominance;
25129     case E_CC_DLTmode: code = ARM_LT; goto dominance;
25130     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25131     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25132     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25133     case E_CC_DLTUmode: code = ARM_CC;
25134
25135     dominance:
25136       if (comp_code == EQ)
25137         return ARM_INVERSE_CONDITION_CODE (code);
25138       if (comp_code == NE)
25139         return code;
25140       return ARM_NV;
25141
25142     case E_CC_NZmode:
25143       switch (comp_code)
25144         {
25145         case NE: return ARM_NE;
25146         case EQ: return ARM_EQ;
25147         case GE: return ARM_PL;
25148         case LT: return ARM_MI;
25149         default: return ARM_NV;
25150         }
25151
25152     case E_CC_Zmode:
25153       switch (comp_code)
25154         {
25155         case NE: return ARM_NE;
25156         case EQ: return ARM_EQ;
25157         default: return ARM_NV;
25158         }
25159
25160     case E_CC_Nmode:
25161       switch (comp_code)
25162         {
25163         case NE: return ARM_MI;
25164         case EQ: return ARM_PL;
25165         default: return ARM_NV;
25166         }
25167
25168     case E_CCFPEmode:
25169     case E_CCFPmode:
25170       /* We can handle all cases except UNEQ and LTGT.  */
25171       switch (comp_code)
25172         {
25173         case GE: return ARM_GE;
25174         case GT: return ARM_GT;
25175         case LE: return ARM_LS;
25176         case LT: return ARM_MI;
25177         case NE: return ARM_NE;
25178         case EQ: return ARM_EQ;
25179         case ORDERED: return ARM_VC;
25180         case UNORDERED: return ARM_VS;
25181         case UNLT: return ARM_LT;
25182         case UNLE: return ARM_LE;
25183         case UNGT: return ARM_HI;
25184         case UNGE: return ARM_PL;
25185           /* UNEQ and LTGT do not have a representation.  */
25186         case UNEQ: /* Fall through.  */
25187         case LTGT: /* Fall through.  */
25188         default: return ARM_NV;
25189         }
25190
25191     case E_CC_SWPmode:
25192       switch (comp_code)
25193         {
25194         case NE: return ARM_NE;
25195         case EQ: return ARM_EQ;
25196         case GE: return ARM_LE;
25197         case GT: return ARM_LT;
25198         case LE: return ARM_GE;
25199         case LT: return ARM_GT;
25200         case GEU: return ARM_LS;
25201         case GTU: return ARM_CC;
25202         case LEU: return ARM_CS;
25203         case LTU: return ARM_HI;
25204         default: return ARM_NV;
25205         }
25206
25207     case E_CC_Cmode:
25208       switch (comp_code)
25209         {
25210         case LTU: return ARM_CS;
25211         case GEU: return ARM_CC;
25212         default: return ARM_NV;
25213         }
25214
25215     case E_CC_NVmode:
25216       switch (comp_code)
25217         {
25218         case GE: return ARM_GE;
25219         case LT: return ARM_LT;
25220         default: return ARM_NV;
25221         }
25222
25223     case E_CC_Bmode:
25224       switch (comp_code)
25225         {
25226         case GEU: return ARM_CS;
25227         case LTU: return ARM_CC;
25228         default: return ARM_NV;
25229         }
25230
25231     case E_CC_Vmode:
25232       switch (comp_code)
25233         {
25234         case NE: return ARM_VS;
25235         case EQ: return ARM_VC;
25236         default: return ARM_NV;
25237         }
25238
25239     case E_CC_ADCmode:
25240       switch (comp_code)
25241         {
25242         case GEU: return ARM_CS;
25243         case LTU: return ARM_CC;
25244         default: return ARM_NV;
25245         }
25246
25247     case E_CCmode:
25248     case E_CC_RSBmode:
25249       switch (comp_code)
25250         {
25251         case NE: return ARM_NE;
25252         case EQ: return ARM_EQ;
25253         case GE: return ARM_GE;
25254         case GT: return ARM_GT;
25255         case LE: return ARM_LE;
25256         case LT: return ARM_LT;
25257         case GEU: return ARM_CS;
25258         case GTU: return ARM_HI;
25259         case LEU: return ARM_LS;
25260         case LTU: return ARM_CC;
25261         default: return ARM_NV;
25262         }
25263
25264     default: gcc_unreachable ();
25265     }
25266 }
25267
25268 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
25269 static enum arm_cond_code
25270 get_arm_condition_code (rtx comparison)
25271 {
25272   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25273   gcc_assert (code != ARM_NV);
25274   return code;
25275 }
25276
25277 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
25278    code registers when not targetting Thumb1.  The VFP condition register
25279    only exists when generating hard-float code.  */
25280 static bool
25281 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25282 {
25283   if (!TARGET_32BIT)
25284     return false;
25285
25286   *p1 = CC_REGNUM;
25287   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25288   return true;
25289 }
25290
25291 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25292    instructions.  */
25293 void
25294 thumb2_final_prescan_insn (rtx_insn *insn)
25295 {
25296   rtx_insn *first_insn = insn;
25297   rtx body = PATTERN (insn);
25298   rtx predicate;
25299   enum arm_cond_code code;
25300   int n;
25301   int mask;
25302   int max;
25303
25304   /* max_insns_skipped in the tune was already taken into account in the
25305      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
25306      just emit the IT blocks as we can.  It does not make sense to split
25307      the IT blocks.  */
25308   max = MAX_INSN_PER_IT_BLOCK;
25309
25310   /* Remove the previous insn from the count of insns to be output.  */
25311   if (arm_condexec_count)
25312       arm_condexec_count--;
25313
25314   /* Nothing to do if we are already inside a conditional block.  */
25315   if (arm_condexec_count)
25316     return;
25317
25318   if (GET_CODE (body) != COND_EXEC)
25319     return;
25320
25321   /* Conditional jumps are implemented directly.  */
25322   if (JUMP_P (insn))
25323     return;
25324
25325   predicate = COND_EXEC_TEST (body);
25326   arm_current_cc = get_arm_condition_code (predicate);
25327
25328   n = get_attr_ce_count (insn);
25329   arm_condexec_count = 1;
25330   arm_condexec_mask = (1 << n) - 1;
25331   arm_condexec_masklen = n;
25332   /* See if subsequent instructions can be combined into the same block.  */
25333   for (;;)
25334     {
25335       insn = next_nonnote_insn (insn);
25336
25337       /* Jumping into the middle of an IT block is illegal, so a label or
25338          barrier terminates the block.  */
25339       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25340         break;
25341
25342       body = PATTERN (insn);
25343       /* USE and CLOBBER aren't really insns, so just skip them.  */
25344       if (GET_CODE (body) == USE
25345           || GET_CODE (body) == CLOBBER)
25346         continue;
25347
25348       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
25349       if (GET_CODE (body) != COND_EXEC)
25350         break;
25351       /* Maximum number of conditionally executed instructions in a block.  */
25352       n = get_attr_ce_count (insn);
25353       if (arm_condexec_masklen + n > max)
25354         break;
25355
25356       predicate = COND_EXEC_TEST (body);
25357       code = get_arm_condition_code (predicate);
25358       mask = (1 << n) - 1;
25359       if (arm_current_cc == code)
25360         arm_condexec_mask |= (mask << arm_condexec_masklen);
25361       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25362         break;
25363
25364       arm_condexec_count++;
25365       arm_condexec_masklen += n;
25366
25367       /* A jump must be the last instruction in a conditional block.  */
25368       if (JUMP_P (insn))
25369         break;
25370     }
25371   /* Restore recog_data (getting the attributes of other insns can
25372      destroy this array, but final.cc assumes that it remains intact
25373      across this call).  */
25374   extract_constrain_insn_cached (first_insn);
25375 }
25376
25377 void
25378 arm_final_prescan_insn (rtx_insn *insn)
25379 {
25380   /* BODY will hold the body of INSN.  */
25381   rtx body = PATTERN (insn);
25382
25383   /* This will be 1 if trying to repeat the trick, and things need to be
25384      reversed if it appears to fail.  */
25385   int reverse = 0;
25386
25387   /* If we start with a return insn, we only succeed if we find another one.  */
25388   int seeking_return = 0;
25389   enum rtx_code return_code = UNKNOWN;
25390
25391   /* START_INSN will hold the insn from where we start looking.  This is the
25392      first insn after the following code_label if REVERSE is true.  */
25393   rtx_insn *start_insn = insn;
25394
25395   /* If in state 4, check if the target branch is reached, in order to
25396      change back to state 0.  */
25397   if (arm_ccfsm_state == 4)
25398     {
25399       if (insn == arm_target_insn)
25400         {
25401           arm_target_insn = NULL;
25402           arm_ccfsm_state = 0;
25403         }
25404       return;
25405     }
25406
25407   /* If in state 3, it is possible to repeat the trick, if this insn is an
25408      unconditional branch to a label, and immediately following this branch
25409      is the previous target label which is only used once, and the label this
25410      branch jumps to is not too far off.  */
25411   if (arm_ccfsm_state == 3)
25412     {
25413       if (simplejump_p (insn))
25414         {
25415           start_insn = next_nonnote_insn (start_insn);
25416           if (BARRIER_P (start_insn))
25417             {
25418               /* XXX Isn't this always a barrier?  */
25419               start_insn = next_nonnote_insn (start_insn);
25420             }
25421           if (LABEL_P (start_insn)
25422               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25423               && LABEL_NUSES (start_insn) == 1)
25424             reverse = TRUE;
25425           else
25426             return;
25427         }
25428       else if (ANY_RETURN_P (body))
25429         {
25430           start_insn = next_nonnote_insn (start_insn);
25431           if (BARRIER_P (start_insn))
25432             start_insn = next_nonnote_insn (start_insn);
25433           if (LABEL_P (start_insn)
25434               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25435               && LABEL_NUSES (start_insn) == 1)
25436             {
25437               reverse = TRUE;
25438               seeking_return = 1;
25439               return_code = GET_CODE (body);
25440             }
25441           else
25442             return;
25443         }
25444       else
25445         return;
25446     }
25447
25448   gcc_assert (!arm_ccfsm_state || reverse);
25449   if (!JUMP_P (insn))
25450     return;
25451
25452   /* This jump might be paralleled with a clobber of the condition codes
25453      the jump should always come first */
25454   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25455     body = XVECEXP (body, 0, 0);
25456
25457   if (reverse
25458       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25459           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25460     {
25461       int insns_skipped;
25462       int fail = FALSE, succeed = FALSE;
25463       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25464       int then_not_else = TRUE;
25465       rtx_insn *this_insn = start_insn;
25466       rtx label = 0;
25467
25468       /* Register the insn jumped to.  */
25469       if (reverse)
25470         {
25471           if (!seeking_return)
25472             label = XEXP (SET_SRC (body), 0);
25473         }
25474       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25475         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25476       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25477         {
25478           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25479           then_not_else = FALSE;
25480         }
25481       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25482         {
25483           seeking_return = 1;
25484           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25485         }
25486       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25487         {
25488           seeking_return = 1;
25489           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25490           then_not_else = FALSE;
25491         }
25492       else
25493         gcc_unreachable ();
25494
25495       /* See how many insns this branch skips, and what kind of insns.  If all
25496          insns are okay, and the label or unconditional branch to the same
25497          label is not too far away, succeed.  */
25498       for (insns_skipped = 0;
25499            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25500         {
25501           rtx scanbody;
25502
25503           this_insn = next_nonnote_insn (this_insn);
25504           if (!this_insn)
25505             break;
25506
25507           switch (GET_CODE (this_insn))
25508             {
25509             case CODE_LABEL:
25510               /* Succeed if it is the target label, otherwise fail since
25511                  control falls in from somewhere else.  */
25512               if (this_insn == label)
25513                 {
25514                   arm_ccfsm_state = 1;
25515                   succeed = TRUE;
25516                 }
25517               else
25518                 fail = TRUE;
25519               break;
25520
25521             case BARRIER:
25522               /* Succeed if the following insn is the target label.
25523                  Otherwise fail.
25524                  If return insns are used then the last insn in a function
25525                  will be a barrier.  */
25526               this_insn = next_nonnote_insn (this_insn);
25527               if (this_insn && this_insn == label)
25528                 {
25529                   arm_ccfsm_state = 1;
25530                   succeed = TRUE;
25531                 }
25532               else
25533                 fail = TRUE;
25534               break;
25535
25536             case CALL_INSN:
25537               /* The AAPCS says that conditional calls should not be
25538                  used since they make interworking inefficient (the
25539                  linker can't transform BL<cond> into BLX).  That's
25540                  only a problem if the machine has BLX.  */
25541               if (arm_arch5t)
25542                 {
25543                   fail = TRUE;
25544                   break;
25545                 }
25546
25547               /* Succeed if the following insn is the target label, or
25548                  if the following two insns are a barrier and the
25549                  target label.  */
25550               this_insn = next_nonnote_insn (this_insn);
25551               if (this_insn && BARRIER_P (this_insn))
25552                 this_insn = next_nonnote_insn (this_insn);
25553
25554               if (this_insn && this_insn == label
25555                   && insns_skipped < max_insns_skipped)
25556                 {
25557                   arm_ccfsm_state = 1;
25558                   succeed = TRUE;
25559                 }
25560               else
25561                 fail = TRUE;
25562               break;
25563
25564             case JUMP_INSN:
25565               /* If this is an unconditional branch to the same label, succeed.
25566                  If it is to another label, do nothing.  If it is conditional,
25567                  fail.  */
25568               /* XXX Probably, the tests for SET and the PC are
25569                  unnecessary.  */
25570
25571               scanbody = PATTERN (this_insn);
25572               if (GET_CODE (scanbody) == SET
25573                   && GET_CODE (SET_DEST (scanbody)) == PC)
25574                 {
25575                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25576                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25577                     {
25578                       arm_ccfsm_state = 2;
25579                       succeed = TRUE;
25580                     }
25581                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25582                     fail = TRUE;
25583                 }
25584               /* Fail if a conditional return is undesirable (e.g. on a
25585                  StrongARM), but still allow this if optimizing for size.  */
25586               else if (GET_CODE (scanbody) == return_code
25587                        && !use_return_insn (TRUE, NULL)
25588                        && !optimize_size)
25589                 fail = TRUE;
25590               else if (GET_CODE (scanbody) == return_code)
25591                 {
25592                   arm_ccfsm_state = 2;
25593                   succeed = TRUE;
25594                 }
25595               else if (GET_CODE (scanbody) == PARALLEL)
25596                 {
25597                   switch (get_attr_conds (this_insn))
25598                     {
25599                     case CONDS_NOCOND:
25600                       break;
25601                     default:
25602                       fail = TRUE;
25603                       break;
25604                     }
25605                 }
25606               else
25607                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25608
25609               break;
25610
25611             case INSN:
25612               /* Instructions using or affecting the condition codes make it
25613                  fail.  */
25614               scanbody = PATTERN (this_insn);
25615               if (!(GET_CODE (scanbody) == SET
25616                     || GET_CODE (scanbody) == PARALLEL)
25617                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25618                 fail = TRUE;
25619               break;
25620
25621             default:
25622               break;
25623             }
25624         }
25625       if (succeed)
25626         {
25627           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25628             arm_target_label = CODE_LABEL_NUMBER (label);
25629           else
25630             {
25631               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25632
25633               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25634                 {
25635                   this_insn = next_nonnote_insn (this_insn);
25636                   gcc_assert (!this_insn
25637                               || (!BARRIER_P (this_insn)
25638                                   && !LABEL_P (this_insn)));
25639                 }
25640               if (!this_insn)
25641                 {
25642                   /* Oh, dear! we ran off the end.. give up.  */
25643                   extract_constrain_insn_cached (insn);
25644                   arm_ccfsm_state = 0;
25645                   arm_target_insn = NULL;
25646                   return;
25647                 }
25648               arm_target_insn = this_insn;
25649             }
25650
25651           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25652              what it was.  */
25653           if (!reverse)
25654             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25655
25656           if (reverse || then_not_else)
25657             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25658         }
25659
25660       /* Restore recog_data (getting the attributes of other insns can
25661          destroy this array, but final.cc assumes that it remains intact
25662          across this call.  */
25663       extract_constrain_insn_cached (insn);
25664     }
25665 }
25666
25667 /* Output IT instructions.  */
25668 void
25669 thumb2_asm_output_opcode (FILE * stream)
25670 {
25671   char buff[5];
25672   int n;
25673
25674   if (arm_condexec_mask)
25675     {
25676       for (n = 0; n < arm_condexec_masklen; n++)
25677         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25678       buff[n] = 0;
25679       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25680                   arm_condition_codes[arm_current_cc]);
25681       arm_condexec_mask = 0;
25682     }
25683 }
25684
25685 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25686    UNITS_PER_WORD bytes wide.  */
25687 static unsigned int
25688 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25689 {
25690   if (IS_VPR_REGNUM (regno))
25691     return CEIL (GET_MODE_SIZE (mode), 2);
25692
25693   if (TARGET_32BIT
25694       && regno > PC_REGNUM
25695       && regno != FRAME_POINTER_REGNUM
25696       && regno != ARG_POINTER_REGNUM
25697       && !IS_VFP_REGNUM (regno))
25698     return 1;
25699
25700   return ARM_NUM_REGS (mode);
25701 }
25702
25703 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25704 static bool
25705 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25706 {
25707   if (GET_MODE_CLASS (mode) == MODE_CC)
25708     return (regno == CC_REGNUM
25709             || (TARGET_VFP_BASE
25710                 && regno == VFPCC_REGNUM));
25711
25712   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25713     return false;
25714
25715   if (IS_VPR_REGNUM (regno))
25716     return VALID_MVE_PRED_MODE (mode);
25717
25718   if (TARGET_THUMB1)
25719     /* For the Thumb we only allow values bigger than SImode in
25720        registers 0 - 6, so that there is always a second low
25721        register available to hold the upper part of the value.
25722        We probably we ought to ensure that the register is the
25723        start of an even numbered register pair.  */
25724     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25725
25726   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25727     {
25728       if (mode == DFmode || mode == DImode)
25729         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25730
25731       if (mode == HFmode || mode == BFmode || mode == HImode
25732           || mode == SFmode || mode == SImode)
25733         return VFP_REGNO_OK_FOR_SINGLE (regno);
25734
25735       if (TARGET_NEON)
25736         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25737                || (VALID_NEON_QREG_MODE (mode)
25738                    && NEON_REGNO_OK_FOR_QUAD (regno))
25739                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25740                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25741                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25742                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25743                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25744      if (TARGET_HAVE_MVE)
25745        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25746                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25747                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25748
25749       return false;
25750     }
25751
25752   if (TARGET_REALLY_IWMMXT)
25753     {
25754       if (IS_IWMMXT_GR_REGNUM (regno))
25755         return mode == SImode;
25756
25757       if (IS_IWMMXT_REGNUM (regno))
25758         return VALID_IWMMXT_REG_MODE (mode);
25759     }
25760
25761   /* We allow almost any value to be stored in the general registers.
25762      Restrict doubleword quantities to even register pairs in ARM state
25763      so that we can use ldrd. The same restriction applies for MVE
25764      in order to support Armv8.1-M Mainline instructions.
25765      Do not allow very large Neon structure  opaque modes in general
25766      registers; they would use too many.  */
25767   if (regno <= LAST_ARM_REGNUM)
25768     {
25769       if (ARM_NUM_REGS (mode) > 4)
25770         return false;
25771
25772       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25773         return true;
25774
25775       return !((TARGET_LDRD || TARGET_CDE)
25776                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25777     }
25778
25779   if (regno == FRAME_POINTER_REGNUM
25780       || regno == ARG_POINTER_REGNUM)
25781     /* We only allow integers in the fake hard registers.  */
25782     return GET_MODE_CLASS (mode) == MODE_INT;
25783
25784   return false;
25785 }
25786
25787 /* Implement TARGET_MODES_TIEABLE_P.  */
25788
25789 static bool
25790 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25791 {
25792   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25793     return true;
25794
25795   if (TARGET_HAVE_MVE
25796       && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25797     return true;
25798
25799   /* We specifically want to allow elements of "structure" modes to
25800      be tieable to the structure.  This more general condition allows
25801      other rarer situations too.  */
25802   if ((TARGET_NEON
25803        && (VALID_NEON_DREG_MODE (mode1)
25804            || VALID_NEON_QREG_MODE (mode1)
25805            || VALID_NEON_STRUCT_MODE (mode1))
25806        && (VALID_NEON_DREG_MODE (mode2)
25807            || VALID_NEON_QREG_MODE (mode2)
25808            || VALID_NEON_STRUCT_MODE (mode2)))
25809       || (TARGET_HAVE_MVE
25810           && (VALID_MVE_MODE (mode1)
25811               || VALID_MVE_STRUCT_MODE (mode1))
25812           && (VALID_MVE_MODE (mode2)
25813               || VALID_MVE_STRUCT_MODE (mode2))))
25814     return true;
25815
25816   return false;
25817 }
25818
25819 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25820    not used in arm mode.  */
25821
25822 enum reg_class
25823 arm_regno_class (int regno)
25824 {
25825   if (regno == PC_REGNUM)
25826     return NO_REGS;
25827
25828   if (IS_VPR_REGNUM (regno))
25829     return VPR_REG;
25830
25831   if (IS_PAC_REGNUM (regno))
25832     return PAC_REG;
25833
25834   if (TARGET_THUMB1)
25835     {
25836       if (regno == STACK_POINTER_REGNUM)
25837         return STACK_REG;
25838       if (regno == CC_REGNUM)
25839         return CC_REG;
25840       if (regno < 8)
25841         return LO_REGS;
25842       return HI_REGS;
25843     }
25844
25845   if (TARGET_THUMB2 && regno < 8)
25846     return LO_REGS;
25847
25848   if (   regno <= LAST_ARM_REGNUM
25849       || regno == FRAME_POINTER_REGNUM
25850       || regno == ARG_POINTER_REGNUM)
25851     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25852
25853   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25854     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25855
25856   if (IS_VFP_REGNUM (regno))
25857     {
25858       if (regno <= D7_VFP_REGNUM)
25859         return VFP_D0_D7_REGS;
25860       else if (regno <= LAST_LO_VFP_REGNUM)
25861         return VFP_LO_REGS;
25862       else
25863         return VFP_HI_REGS;
25864     }
25865
25866   if (IS_IWMMXT_REGNUM (regno))
25867     return IWMMXT_REGS;
25868
25869   if (IS_IWMMXT_GR_REGNUM (regno))
25870     return IWMMXT_GR_REGS;
25871
25872   return NO_REGS;
25873 }
25874
25875 /* Handle a special case when computing the offset
25876    of an argument from the frame pointer.  */
25877 int
25878 arm_debugger_arg_offset (int value, rtx addr)
25879 {
25880   rtx_insn *insn;
25881
25882   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25883   if (value != 0)
25884     return 0;
25885
25886   /* We can only cope with the case where the address is held in a register.  */
25887   if (!REG_P (addr))
25888     return 0;
25889
25890   /* If we are using the frame pointer to point at the argument, then
25891      an offset of 0 is correct.  */
25892   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25893     return 0;
25894
25895   /* If we are using the stack pointer to point at the
25896      argument, then an offset of 0 is correct.  */
25897   /* ??? Check this is consistent with thumb2 frame layout.  */
25898   if ((TARGET_THUMB || !frame_pointer_needed)
25899       && REGNO (addr) == SP_REGNUM)
25900     return 0;
25901
25902   /* Oh dear.  The argument is pointed to by a register rather
25903      than being held in a register, or being stored at a known
25904      offset from the frame pointer.  Since GDB only understands
25905      those two kinds of argument we must translate the address
25906      held in the register into an offset from the frame pointer.
25907      We do this by searching through the insns for the function
25908      looking to see where this register gets its value.  If the
25909      register is initialized from the frame pointer plus an offset
25910      then we are in luck and we can continue, otherwise we give up.
25911
25912      This code is exercised by producing debugging information
25913      for a function with arguments like this:
25914
25915            double func (double a, double b, int c, double d) {return d;}
25916
25917      Without this code the stab for parameter 'd' will be set to
25918      an offset of 0 from the frame pointer, rather than 8.  */
25919
25920   /* The if() statement says:
25921
25922      If the insn is a normal instruction
25923      and if the insn is setting the value in a register
25924      and if the register being set is the register holding the address of the argument
25925      and if the address is computing by an addition
25926      that involves adding to a register
25927      which is the frame pointer
25928      a constant integer
25929
25930      then...  */
25931
25932   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25933     {
25934       if (   NONJUMP_INSN_P (insn)
25935           && GET_CODE (PATTERN (insn)) == SET
25936           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25937           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25938           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25939           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25940           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25941              )
25942         {
25943           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25944
25945           break;
25946         }
25947     }
25948
25949   if (value == 0)
25950     {
25951       debug_rtx (addr);
25952       warning (0, "unable to compute real location of stacked parameter");
25953       value = 8; /* XXX magic hack */
25954     }
25955
25956   return value;
25957 }
25958 \f
25959 /* Implement TARGET_PROMOTED_TYPE.  */
25960
25961 static tree
25962 arm_promoted_type (const_tree t)
25963 {
25964   if (SCALAR_FLOAT_TYPE_P (t)
25965       && TYPE_PRECISION (t) == 16
25966       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25967     return float_type_node;
25968   return NULL_TREE;
25969 }
25970
25971 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25972    This simply adds HFmode as a supported mode; even though we don't
25973    implement arithmetic on this type directly, it's supported by
25974    optabs conversions, much the way the double-word arithmetic is
25975    special-cased in the default hook.  */
25976
25977 static bool
25978 arm_scalar_mode_supported_p (scalar_mode mode)
25979 {
25980   if (mode == HFmode)
25981     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25982   else if (ALL_FIXED_POINT_MODE_P (mode))
25983     return true;
25984   else
25985     return default_scalar_mode_supported_p (mode);
25986 }
25987
25988 /* Set the value of FLT_EVAL_METHOD.
25989    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25990
25991     0: evaluate all operations and constants, whose semantic type has at
25992        most the range and precision of type float, to the range and
25993        precision of float; evaluate all other operations and constants to
25994        the range and precision of the semantic type;
25995
25996     N, where _FloatN is a supported interchange floating type
25997        evaluate all operations and constants, whose semantic type has at
25998        most the range and precision of _FloatN type, to the range and
25999        precision of the _FloatN type; evaluate all other operations and
26000        constants to the range and precision of the semantic type;
26001
26002    If we have the ARMv8.2-A extensions then we support _Float16 in native
26003    precision, so we should set this to 16.  Otherwise, we support the type,
26004    but want to evaluate expressions in float precision, so set this to
26005    0.  */
26006
26007 static enum flt_eval_method
26008 arm_excess_precision (enum excess_precision_type type)
26009 {
26010   switch (type)
26011     {
26012       case EXCESS_PRECISION_TYPE_FAST:
26013       case EXCESS_PRECISION_TYPE_STANDARD:
26014         /* We can calculate either in 16-bit range and precision or
26015            32-bit range and precision.  Make that decision based on whether
26016            we have native support for the ARMv8.2-A 16-bit floating-point
26017            instructions or not.  */
26018         return (TARGET_VFP_FP16INST
26019                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26020                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
26021       case EXCESS_PRECISION_TYPE_IMPLICIT:
26022       case EXCESS_PRECISION_TYPE_FLOAT16:
26023         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26024       default:
26025         gcc_unreachable ();
26026     }
26027   return FLT_EVAL_METHOD_UNPREDICTABLE;
26028 }
26029
26030
26031 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
26032    _Float16 if we are using anything other than ieee format for 16-bit
26033    floating point.  Otherwise, punt to the default implementation.  */
26034 static opt_scalar_float_mode
26035 arm_floatn_mode (int n, bool extended)
26036 {
26037   if (!extended && n == 16)
26038     {
26039       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
26040         return HFmode;
26041       return opt_scalar_float_mode ();
26042     }
26043
26044   return default_floatn_mode (n, extended);
26045 }
26046
26047
26048 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26049    not to early-clobber SRC registers in the process.
26050
26051    We assume that the operands described by SRC and DEST represent a
26052    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
26053    number of components into which the copy has been decomposed.  */
26054 void
26055 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26056 {
26057   unsigned int i;
26058
26059   if (!reg_overlap_mentioned_p (operands[0], operands[1])
26060       || REGNO (operands[0]) < REGNO (operands[1]))
26061     {
26062       for (i = 0; i < count; i++)
26063         {
26064           operands[2 * i] = dest[i];
26065           operands[2 * i + 1] = src[i];
26066         }
26067     }
26068   else
26069     {
26070       for (i = 0; i < count; i++)
26071         {
26072           operands[2 * i] = dest[count - i - 1];
26073           operands[2 * i + 1] = src[count - i - 1];
26074         }
26075     }
26076 }
26077
26078 /* Split operands into moves from op[1] + op[2] into op[0].  */
26079
26080 void
26081 neon_split_vcombine (rtx operands[3])
26082 {
26083   unsigned int dest = REGNO (operands[0]);
26084   unsigned int src1 = REGNO (operands[1]);
26085   unsigned int src2 = REGNO (operands[2]);
26086   machine_mode halfmode = GET_MODE (operands[1]);
26087   unsigned int halfregs = REG_NREGS (operands[1]);
26088   rtx destlo, desthi;
26089
26090   if (src1 == dest && src2 == dest + halfregs)
26091     {
26092       /* No-op move.  Can't split to nothing; emit something.  */
26093       emit_note (NOTE_INSN_DELETED);
26094       return;
26095     }
26096
26097   /* Preserve register attributes for variable tracking.  */
26098   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26099   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26100                                GET_MODE_SIZE (halfmode));
26101
26102   /* Special case of reversed high/low parts.  Use VSWP.  */
26103   if (src2 == dest && src1 == dest + halfregs)
26104     {
26105       rtx x = gen_rtx_SET (destlo, operands[1]);
26106       rtx y = gen_rtx_SET (desthi, operands[2]);
26107       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26108       return;
26109     }
26110
26111   if (!reg_overlap_mentioned_p (operands[2], destlo))
26112     {
26113       /* Try to avoid unnecessary moves if part of the result
26114          is in the right place already.  */
26115       if (src1 != dest)
26116         emit_move_insn (destlo, operands[1]);
26117       if (src2 != dest + halfregs)
26118         emit_move_insn (desthi, operands[2]);
26119     }
26120   else
26121     {
26122       if (src2 != dest + halfregs)
26123         emit_move_insn (desthi, operands[2]);
26124       if (src1 != dest)
26125         emit_move_insn (destlo, operands[1]);
26126     }
26127 }
26128 \f
26129 /* Return the number (counting from 0) of
26130    the least significant set bit in MASK.  */
26131
26132 inline static int
26133 number_of_first_bit_set (unsigned mask)
26134 {
26135   return ctz_hwi (mask);
26136 }
26137
26138 /* Like emit_multi_reg_push, but allowing for a different set of
26139    registers to be described as saved.  MASK is the set of registers
26140    to be saved; REAL_REGS is the set of registers to be described as
26141    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26142
26143 static rtx_insn *
26144 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26145 {
26146   unsigned long regno;
26147   rtx par[10], tmp, reg;
26148   rtx_insn *insn;
26149   int i, j;
26150
26151   /* Build the parallel of the registers actually being stored.  */
26152   for (i = 0; mask; ++i, mask &= mask - 1)
26153     {
26154       regno = ctz_hwi (mask);
26155       reg = gen_rtx_REG (SImode, regno);
26156
26157       if (i == 0)
26158         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26159       else
26160         tmp = gen_rtx_USE (VOIDmode, reg);
26161
26162       par[i] = tmp;
26163     }
26164
26165   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26166   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26167   tmp = gen_frame_mem (BLKmode, tmp);
26168   tmp = gen_rtx_SET (tmp, par[0]);
26169   par[0] = tmp;
26170
26171   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26172   insn = emit_insn (tmp);
26173
26174   /* Always build the stack adjustment note for unwind info.  */
26175   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26176   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26177   par[0] = tmp;
26178
26179   /* Build the parallel of the registers recorded as saved for unwind.  */
26180   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26181     {
26182       regno = ctz_hwi (real_regs);
26183       reg = gen_rtx_REG (SImode, regno);
26184
26185       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26186       tmp = gen_frame_mem (SImode, tmp);
26187       tmp = gen_rtx_SET (tmp, reg);
26188       RTX_FRAME_RELATED_P (tmp) = 1;
26189       par[j + 1] = tmp;
26190     }
26191
26192   if (j == 0)
26193     tmp = par[0];
26194   else
26195     {
26196       RTX_FRAME_RELATED_P (par[0]) = 1;
26197       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26198     }
26199
26200   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26201
26202   return insn;
26203 }
26204
26205 /* Emit code to push or pop registers to or from the stack.  F is the
26206    assembly file.  MASK is the registers to pop.  */
26207 static void
26208 thumb_pop (FILE *f, unsigned long mask)
26209 {
26210   int regno;
26211   int lo_mask = mask & 0xFF;
26212
26213   gcc_assert (mask);
26214
26215   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26216     {
26217       /* Special case.  Do not generate a POP PC statement here, do it in
26218          thumb_exit() */
26219       thumb_exit (f, -1);
26220       return;
26221     }
26222
26223   fprintf (f, "\tpop\t{");
26224
26225   /* Look at the low registers first.  */
26226   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26227     {
26228       if (lo_mask & 1)
26229         {
26230           asm_fprintf (f, "%r", regno);
26231
26232           if ((lo_mask & ~1) != 0)
26233             fprintf (f, ", ");
26234         }
26235     }
26236
26237   if (mask & (1 << PC_REGNUM))
26238     {
26239       /* Catch popping the PC.  */
26240       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26241           || IS_CMSE_ENTRY (arm_current_func_type ()))
26242         {
26243           /* The PC is never poped directly, instead
26244              it is popped into r3 and then BX is used.  */
26245           fprintf (f, "}\n");
26246
26247           thumb_exit (f, -1);
26248
26249           return;
26250         }
26251       else
26252         {
26253           if (mask & 0xFF)
26254             fprintf (f, ", ");
26255
26256           asm_fprintf (f, "%r", PC_REGNUM);
26257         }
26258     }
26259
26260   fprintf (f, "}\n");
26261 }
26262
26263 /* Generate code to return from a thumb function.
26264    If 'reg_containing_return_addr' is -1, then the return address is
26265    actually on the stack, at the stack pointer.
26266
26267    Note: do not forget to update length attribute of corresponding insn pattern
26268    when changing assembly output (eg. length attribute of epilogue_insns when
26269    updating Armv8-M Baseline Security Extensions register clearing
26270    sequences).  */
26271 static void
26272 thumb_exit (FILE *f, int reg_containing_return_addr)
26273 {
26274   unsigned regs_available_for_popping;
26275   unsigned regs_to_pop;
26276   int pops_needed;
26277   unsigned available;
26278   unsigned required;
26279   machine_mode mode;
26280   int size;
26281   int restore_a4 = FALSE;
26282
26283   /* Compute the registers we need to pop.  */
26284   regs_to_pop = 0;
26285   pops_needed = 0;
26286
26287   if (reg_containing_return_addr == -1)
26288     {
26289       regs_to_pop |= 1 << LR_REGNUM;
26290       ++pops_needed;
26291     }
26292
26293   if (TARGET_BACKTRACE)
26294     {
26295       /* Restore the (ARM) frame pointer and stack pointer.  */
26296       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26297       pops_needed += 2;
26298     }
26299
26300   /* If there is nothing to pop then just emit the BX instruction and
26301      return.  */
26302   if (pops_needed == 0)
26303     {
26304       if (crtl->calls_eh_return)
26305         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26306
26307       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26308         {
26309           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26310              emitted by cmse_nonsecure_entry_clear_before_return ().  */
26311           if (!TARGET_HAVE_FPCXT_CMSE)
26312             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26313                          reg_containing_return_addr);
26314           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26315         }
26316       else
26317         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26318       return;
26319     }
26320   /* Otherwise if we are not supporting interworking and we have not created
26321      a backtrace structure and the function was not entered in ARM mode then
26322      just pop the return address straight into the PC.  */
26323   else if (!TARGET_INTERWORK
26324            && !TARGET_BACKTRACE
26325            && !is_called_in_ARM_mode (current_function_decl)
26326            && !crtl->calls_eh_return
26327            && !IS_CMSE_ENTRY (arm_current_func_type ()))
26328     {
26329       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26330       return;
26331     }
26332
26333   /* Find out how many of the (return) argument registers we can corrupt.  */
26334   regs_available_for_popping = 0;
26335
26336   /* If returning via __builtin_eh_return, the bottom three registers
26337      all contain information needed for the return.  */
26338   if (crtl->calls_eh_return)
26339     size = 12;
26340   else
26341     {
26342       /* If we can deduce the registers used from the function's
26343          return value.  This is more reliable that examining
26344          df_regs_ever_live_p () because that will be set if the register is
26345          ever used in the function, not just if the register is used
26346          to hold a return value.  */
26347
26348       if (crtl->return_rtx != 0)
26349         mode = GET_MODE (crtl->return_rtx);
26350       else
26351         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26352
26353       size = GET_MODE_SIZE (mode);
26354
26355       if (size == 0)
26356         {
26357           /* In a void function we can use any argument register.
26358              In a function that returns a structure on the stack
26359              we can use the second and third argument registers.  */
26360           if (mode == VOIDmode)
26361             regs_available_for_popping =
26362               (1 << ARG_REGISTER (1))
26363               | (1 << ARG_REGISTER (2))
26364               | (1 << ARG_REGISTER (3));
26365           else
26366             regs_available_for_popping =
26367               (1 << ARG_REGISTER (2))
26368               | (1 << ARG_REGISTER (3));
26369         }
26370       else if (size <= 4)
26371         regs_available_for_popping =
26372           (1 << ARG_REGISTER (2))
26373           | (1 << ARG_REGISTER (3));
26374       else if (size <= 8)
26375         regs_available_for_popping =
26376           (1 << ARG_REGISTER (3));
26377     }
26378
26379   /* Match registers to be popped with registers into which we pop them.  */
26380   for (available = regs_available_for_popping,
26381        required  = regs_to_pop;
26382        required != 0 && available != 0;
26383        available &= ~(available & - available),
26384        required  &= ~(required  & - required))
26385     -- pops_needed;
26386
26387   /* If we have any popping registers left over, remove them.  */
26388   if (available > 0)
26389     regs_available_for_popping &= ~available;
26390
26391   /* Otherwise if we need another popping register we can use
26392      the fourth argument register.  */
26393   else if (pops_needed)
26394     {
26395       /* If we have not found any free argument registers and
26396          reg a4 contains the return address, we must move it.  */
26397       if (regs_available_for_popping == 0
26398           && reg_containing_return_addr == LAST_ARG_REGNUM)
26399         {
26400           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26401           reg_containing_return_addr = LR_REGNUM;
26402         }
26403       else if (size > 12)
26404         {
26405           /* Register a4 is being used to hold part of the return value,
26406              but we have dire need of a free, low register.  */
26407           restore_a4 = TRUE;
26408
26409           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26410         }
26411
26412       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26413         {
26414           /* The fourth argument register is available.  */
26415           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26416
26417           --pops_needed;
26418         }
26419     }
26420
26421   /* Pop as many registers as we can.  */
26422   thumb_pop (f, regs_available_for_popping);
26423
26424   /* Process the registers we popped.  */
26425   if (reg_containing_return_addr == -1)
26426     {
26427       /* The return address was popped into the lowest numbered register.  */
26428       regs_to_pop &= ~(1 << LR_REGNUM);
26429
26430       reg_containing_return_addr =
26431         number_of_first_bit_set (regs_available_for_popping);
26432
26433       /* Remove this register for the mask of available registers, so that
26434          the return address will not be corrupted by further pops.  */
26435       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26436     }
26437
26438   /* If we popped other registers then handle them here.  */
26439   if (regs_available_for_popping)
26440     {
26441       int frame_pointer;
26442
26443       /* Work out which register currently contains the frame pointer.  */
26444       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26445
26446       /* Move it into the correct place.  */
26447       asm_fprintf (f, "\tmov\t%r, %r\n",
26448                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26449
26450       /* (Temporarily) remove it from the mask of popped registers.  */
26451       regs_available_for_popping &= ~(1 << frame_pointer);
26452       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26453
26454       if (regs_available_for_popping)
26455         {
26456           int stack_pointer;
26457
26458           /* We popped the stack pointer as well,
26459              find the register that contains it.  */
26460           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26461
26462           /* Move it into the stack register.  */
26463           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26464
26465           /* At this point we have popped all necessary registers, so
26466              do not worry about restoring regs_available_for_popping
26467              to its correct value:
26468
26469              assert (pops_needed == 0)
26470              assert (regs_available_for_popping == (1 << frame_pointer))
26471              assert (regs_to_pop == (1 << STACK_POINTER))  */
26472         }
26473       else
26474         {
26475           /* Since we have just move the popped value into the frame
26476              pointer, the popping register is available for reuse, and
26477              we know that we still have the stack pointer left to pop.  */
26478           regs_available_for_popping |= (1 << frame_pointer);
26479         }
26480     }
26481
26482   /* If we still have registers left on the stack, but we no longer have
26483      any registers into which we can pop them, then we must move the return
26484      address into the link register and make available the register that
26485      contained it.  */
26486   if (regs_available_for_popping == 0 && pops_needed > 0)
26487     {
26488       regs_available_for_popping |= 1 << reg_containing_return_addr;
26489
26490       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26491                    reg_containing_return_addr);
26492
26493       reg_containing_return_addr = LR_REGNUM;
26494     }
26495
26496   /* If we have registers left on the stack then pop some more.
26497      We know that at most we will want to pop FP and SP.  */
26498   if (pops_needed > 0)
26499     {
26500       int  popped_into;
26501       int  move_to;
26502
26503       thumb_pop (f, regs_available_for_popping);
26504
26505       /* We have popped either FP or SP.
26506          Move whichever one it is into the correct register.  */
26507       popped_into = number_of_first_bit_set (regs_available_for_popping);
26508       move_to     = number_of_first_bit_set (regs_to_pop);
26509
26510       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26511       --pops_needed;
26512     }
26513
26514   /* If we still have not popped everything then we must have only
26515      had one register available to us and we are now popping the SP.  */
26516   if (pops_needed > 0)
26517     {
26518       int  popped_into;
26519
26520       thumb_pop (f, regs_available_for_popping);
26521
26522       popped_into = number_of_first_bit_set (regs_available_for_popping);
26523
26524       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26525       /*
26526         assert (regs_to_pop == (1 << STACK_POINTER))
26527         assert (pops_needed == 1)
26528       */
26529     }
26530
26531   /* If necessary restore the a4 register.  */
26532   if (restore_a4)
26533     {
26534       if (reg_containing_return_addr != LR_REGNUM)
26535         {
26536           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26537           reg_containing_return_addr = LR_REGNUM;
26538         }
26539
26540       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26541     }
26542
26543   if (crtl->calls_eh_return)
26544     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26545
26546   /* Return to caller.  */
26547   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26548     {
26549       /* This is for the cases where LR is not being used to contain the return
26550          address.  It may therefore contain information that we might not want
26551          to leak, hence it must be cleared.  The value in R0 will never be a
26552          secret at this point, so it is safe to use it, see the clearing code
26553          in cmse_nonsecure_entry_clear_before_return ().  */
26554       if (reg_containing_return_addr != LR_REGNUM)
26555         asm_fprintf (f, "\tmov\tlr, r0\n");
26556
26557       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26558          by cmse_nonsecure_entry_clear_before_return ().  */
26559       if (!TARGET_HAVE_FPCXT_CMSE)
26560         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26561       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26562     }
26563   else
26564     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26565 }
26566 \f
26567 /* Scan INSN just before assembler is output for it.
26568    For Thumb-1, we track the status of the condition codes; this
26569    information is used in the cbranchsi4_insn pattern.  */
26570 void
26571 thumb1_final_prescan_insn (rtx_insn *insn)
26572 {
26573   if (flag_print_asm_name)
26574     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26575                  INSN_ADDRESSES (INSN_UID (insn)));
26576   /* Don't overwrite the previous setter when we get to a cbranch.  */
26577   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26578     {
26579       enum attr_conds conds;
26580
26581       if (cfun->machine->thumb1_cc_insn)
26582         {
26583           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26584               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26585             CC_STATUS_INIT;
26586         }
26587       conds = get_attr_conds (insn);
26588       if (conds == CONDS_SET)
26589         {
26590           rtx set = single_set (insn);
26591           cfun->machine->thumb1_cc_insn = insn;
26592           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26593           cfun->machine->thumb1_cc_op1 = const0_rtx;
26594           cfun->machine->thumb1_cc_mode = CC_NZmode;
26595           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26596             {
26597               rtx src1 = XEXP (SET_SRC (set), 1);
26598               if (src1 == const0_rtx)
26599                 cfun->machine->thumb1_cc_mode = CCmode;
26600             }
26601           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26602             {
26603               /* Record the src register operand instead of dest because
26604                  cprop_hardreg pass propagates src.  */
26605               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26606             }
26607         }
26608       else if (conds != CONDS_NOCOND)
26609         cfun->machine->thumb1_cc_insn = NULL_RTX;
26610     }
26611
26612     /* Check if unexpected far jump is used.  */
26613     if (cfun->machine->lr_save_eliminated
26614         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26615       internal_error("Unexpected thumb1 far jump");
26616 }
26617
26618 int
26619 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26620 {
26621   unsigned HOST_WIDE_INT mask = 0xff;
26622   int i;
26623
26624   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26625   if (val == 0) /* XXX */
26626     return 0;
26627
26628   for (i = 0; i < 25; i++)
26629     if ((val & (mask << i)) == val)
26630       return 1;
26631
26632   return 0;
26633 }
26634
26635 /* Returns nonzero if the current function contains,
26636    or might contain a far jump.  */
26637 static int
26638 thumb_far_jump_used_p (void)
26639 {
26640   rtx_insn *insn;
26641   bool far_jump = false;
26642   unsigned int func_size = 0;
26643
26644   /* If we have already decided that far jumps may be used,
26645      do not bother checking again, and always return true even if
26646      it turns out that they are not being used.  Once we have made
26647      the decision that far jumps are present (and that hence the link
26648      register will be pushed onto the stack) we cannot go back on it.  */
26649   if (cfun->machine->far_jump_used)
26650     return 1;
26651
26652   /* If this function is not being called from the prologue/epilogue
26653      generation code then it must be being called from the
26654      INITIAL_ELIMINATION_OFFSET macro.  */
26655   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26656     {
26657       /* In this case we know that we are being asked about the elimination
26658          of the arg pointer register.  If that register is not being used,
26659          then there are no arguments on the stack, and we do not have to
26660          worry that a far jump might force the prologue to push the link
26661          register, changing the stack offsets.  In this case we can just
26662          return false, since the presence of far jumps in the function will
26663          not affect stack offsets.
26664
26665          If the arg pointer is live (or if it was live, but has now been
26666          eliminated and so set to dead) then we do have to test to see if
26667          the function might contain a far jump.  This test can lead to some
26668          false negatives, since before reload is completed, then length of
26669          branch instructions is not known, so gcc defaults to returning their
26670          longest length, which in turn sets the far jump attribute to true.
26671
26672          A false negative will not result in bad code being generated, but it
26673          will result in a needless push and pop of the link register.  We
26674          hope that this does not occur too often.
26675
26676          If we need doubleword stack alignment this could affect the other
26677          elimination offsets so we can't risk getting it wrong.  */
26678       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26679         cfun->machine->arg_pointer_live = 1;
26680       else if (!cfun->machine->arg_pointer_live)
26681         return 0;
26682     }
26683
26684   /* We should not change far_jump_used during or after reload, as there is
26685      no chance to change stack frame layout.  */
26686   if (reload_in_progress || reload_completed)
26687     return 0;
26688
26689   /* Check to see if the function contains a branch
26690      insn with the far jump attribute set.  */
26691   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26692     {
26693       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26694         {
26695           far_jump = true;
26696         }
26697       func_size += get_attr_length (insn);
26698     }
26699
26700   /* Attribute far_jump will always be true for thumb1 before
26701      shorten_branch pass.  So checking far_jump attribute before
26702      shorten_branch isn't much useful.
26703
26704      Following heuristic tries to estimate more accurately if a far jump
26705      may finally be used.  The heuristic is very conservative as there is
26706      no chance to roll-back the decision of not to use far jump.
26707
26708      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26709      2-byte insn is associated with a 4 byte constant pool.  Using
26710      function size 2048/3 as the threshold is conservative enough.  */
26711   if (far_jump)
26712     {
26713       if ((func_size * 3) >= 2048)
26714         {
26715           /* Record the fact that we have decided that
26716              the function does use far jumps.  */
26717           cfun->machine->far_jump_used = 1;
26718           return 1;
26719         }
26720     }
26721
26722   return 0;
26723 }
26724
26725 /* Return nonzero if FUNC must be entered in ARM mode.  */
26726 static bool
26727 is_called_in_ARM_mode (tree func)
26728 {
26729   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26730
26731   /* Ignore the problem about functions whose address is taken.  */
26732   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26733     return true;
26734
26735 #ifdef ARM_PE
26736   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26737 #else
26738   return false;
26739 #endif
26740 }
26741
26742 /* Given the stack offsets and register mask in OFFSETS, decide how
26743    many additional registers to push instead of subtracting a constant
26744    from SP.  For epilogues the principle is the same except we use pop.
26745    FOR_PROLOGUE indicates which we're generating.  */
26746 static int
26747 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26748 {
26749   HOST_WIDE_INT amount;
26750   unsigned long live_regs_mask = offsets->saved_regs_mask;
26751   /* Extract a mask of the ones we can give to the Thumb's push/pop
26752      instruction.  */
26753   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26754   /* Then count how many other high registers will need to be pushed.  */
26755   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26756   int n_free, reg_base, size;
26757
26758   if (!for_prologue && frame_pointer_needed)
26759     amount = offsets->locals_base - offsets->saved_regs;
26760   else
26761     amount = offsets->outgoing_args - offsets->saved_regs;
26762
26763   /* If the stack frame size is 512 exactly, we can save one load
26764      instruction, which should make this a win even when optimizing
26765      for speed.  */
26766   if (!optimize_size && amount != 512)
26767     return 0;
26768
26769   /* Can't do this if there are high registers to push.  */
26770   if (high_regs_pushed != 0)
26771     return 0;
26772
26773   /* Shouldn't do it in the prologue if no registers would normally
26774      be pushed at all.  In the epilogue, also allow it if we'll have
26775      a pop insn for the PC.  */
26776   if  (l_mask == 0
26777        && (for_prologue
26778            || TARGET_BACKTRACE
26779            || (live_regs_mask & 1 << LR_REGNUM) == 0
26780            || TARGET_INTERWORK
26781            || crtl->args.pretend_args_size != 0))
26782     return 0;
26783
26784   /* Don't do this if thumb_expand_prologue wants to emit instructions
26785      between the push and the stack frame allocation.  */
26786   if (for_prologue
26787       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26788           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26789     return 0;
26790
26791   reg_base = 0;
26792   n_free = 0;
26793   if (!for_prologue)
26794     {
26795       size = arm_size_return_regs ();
26796       reg_base = ARM_NUM_INTS (size);
26797       live_regs_mask >>= reg_base;
26798     }
26799
26800   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26801          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26802     {
26803       live_regs_mask >>= 1;
26804       n_free++;
26805     }
26806
26807   if (n_free == 0)
26808     return 0;
26809   gcc_assert (amount / 4 * 4 == amount);
26810
26811   if (amount >= 512 && (amount - n_free * 4) < 512)
26812     return (amount - 508) / 4;
26813   if (amount <= n_free * 4)
26814     return amount / 4;
26815   return 0;
26816 }
26817
26818 /* The bits which aren't usefully expanded as rtl.  */
26819 const char *
26820 thumb1_unexpanded_epilogue (void)
26821 {
26822   arm_stack_offsets *offsets;
26823   int regno;
26824   unsigned long live_regs_mask = 0;
26825   int high_regs_pushed = 0;
26826   int extra_pop;
26827   int had_to_push_lr;
26828   int size;
26829
26830   if (cfun->machine->return_used_this_function != 0)
26831     return "";
26832
26833   if (IS_NAKED (arm_current_func_type ()))
26834     return "";
26835
26836   offsets = arm_get_frame_offsets ();
26837   live_regs_mask = offsets->saved_regs_mask;
26838   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26839
26840   /* If we can deduce the registers used from the function's return value.
26841      This is more reliable that examining df_regs_ever_live_p () because that
26842      will be set if the register is ever used in the function, not just if
26843      the register is used to hold a return value.  */
26844   size = arm_size_return_regs ();
26845
26846   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26847   if (extra_pop > 0)
26848     {
26849       unsigned long extra_mask = (1 << extra_pop) - 1;
26850       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26851     }
26852
26853   /* The prolog may have pushed some high registers to use as
26854      work registers.  e.g. the testsuite file:
26855      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26856      compiles to produce:
26857         push    {r4, r5, r6, r7, lr}
26858         mov     r7, r9
26859         mov     r6, r8
26860         push    {r6, r7}
26861      as part of the prolog.  We have to undo that pushing here.  */
26862
26863   if (high_regs_pushed)
26864     {
26865       unsigned long mask = live_regs_mask & 0xff;
26866       int next_hi_reg;
26867
26868       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26869
26870       if (mask == 0)
26871         /* Oh dear!  We have no low registers into which we can pop
26872            high registers!  */
26873         internal_error
26874           ("no low registers available for popping high registers");
26875
26876       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26877         if (live_regs_mask & (1 << next_hi_reg))
26878           break;
26879
26880       while (high_regs_pushed)
26881         {
26882           /* Find lo register(s) into which the high register(s) can
26883              be popped.  */
26884           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26885             {
26886               if (mask & (1 << regno))
26887                 high_regs_pushed--;
26888               if (high_regs_pushed == 0)
26889                 break;
26890             }
26891
26892           if (high_regs_pushed == 0 && regno >= 0)
26893             mask &= ~((1 << regno) - 1);
26894
26895           /* Pop the values into the low register(s).  */
26896           thumb_pop (asm_out_file, mask);
26897
26898           /* Move the value(s) into the high registers.  */
26899           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26900             {
26901               if (mask & (1 << regno))
26902                 {
26903                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26904                                regno);
26905
26906                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26907                        next_hi_reg--)
26908                     if (live_regs_mask & (1 << next_hi_reg))
26909                       break;
26910                 }
26911             }
26912         }
26913       live_regs_mask &= ~0x0f00;
26914     }
26915
26916   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26917   live_regs_mask &= 0xff;
26918
26919   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26920     {
26921       /* Pop the return address into the PC.  */
26922       if (had_to_push_lr)
26923         live_regs_mask |= 1 << PC_REGNUM;
26924
26925       /* Either no argument registers were pushed or a backtrace
26926          structure was created which includes an adjusted stack
26927          pointer, so just pop everything.  */
26928       if (live_regs_mask)
26929         thumb_pop (asm_out_file, live_regs_mask);
26930
26931       /* We have either just popped the return address into the
26932          PC or it is was kept in LR for the entire function.
26933          Note that thumb_pop has already called thumb_exit if the
26934          PC was in the list.  */
26935       if (!had_to_push_lr)
26936         thumb_exit (asm_out_file, LR_REGNUM);
26937     }
26938   else
26939     {
26940       /* Pop everything but the return address.  */
26941       if (live_regs_mask)
26942         thumb_pop (asm_out_file, live_regs_mask);
26943
26944       if (had_to_push_lr)
26945         {
26946           if (size > 12)
26947             {
26948               /* We have no free low regs, so save one.  */
26949               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26950                            LAST_ARG_REGNUM);
26951             }
26952
26953           /* Get the return address into a temporary register.  */
26954           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26955
26956           if (size > 12)
26957             {
26958               /* Move the return address to lr.  */
26959               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26960                            LAST_ARG_REGNUM);
26961               /* Restore the low register.  */
26962               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26963                            IP_REGNUM);
26964               regno = LR_REGNUM;
26965             }
26966           else
26967             regno = LAST_ARG_REGNUM;
26968         }
26969       else
26970         regno = LR_REGNUM;
26971
26972       /* Remove the argument registers that were pushed onto the stack.  */
26973       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26974                    SP_REGNUM, SP_REGNUM,
26975                    crtl->args.pretend_args_size);
26976
26977       thumb_exit (asm_out_file, regno);
26978     }
26979
26980   return "";
26981 }
26982
26983 /* Functions to save and restore machine-specific function data.  */
26984 static struct machine_function *
26985 arm_init_machine_status (void)
26986 {
26987   struct machine_function *machine;
26988   machine = ggc_cleared_alloc<machine_function> ();
26989
26990 #if ARM_FT_UNKNOWN != 0
26991   machine->func_type = ARM_FT_UNKNOWN;
26992 #endif
26993   machine->static_chain_stack_bytes = -1;
26994   machine->pacspval_needed = 0;
26995   return machine;
26996 }
26997
26998 /* Return an RTX indicating where the return address to the
26999    calling function can be found.  */
27000 rtx
27001 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27002 {
27003   if (count != 0)
27004     return NULL_RTX;
27005
27006   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27007 }
27008
27009 /* Do anything needed before RTL is emitted for each function.  */
27010 void
27011 arm_init_expanders (void)
27012 {
27013   /* Arrange to initialize and mark the machine per-function status.  */
27014   init_machine_status = arm_init_machine_status;
27015
27016   /* This is to stop the combine pass optimizing away the alignment
27017      adjustment of va_arg.  */
27018   /* ??? It is claimed that this should not be necessary.  */
27019   if (cfun)
27020     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27021 }
27022
27023 /* Check that FUNC is called with a different mode.  */
27024
27025 bool
27026 arm_change_mode_p (tree func)
27027 {
27028   if (TREE_CODE (func) != FUNCTION_DECL)
27029     return false;
27030
27031   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
27032
27033   if (!callee_tree)
27034     callee_tree = target_option_default_node;
27035
27036   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
27037   int flags = callee_opts->x_target_flags;
27038
27039   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
27040 }
27041
27042 /* Like arm_compute_initial_elimination offset.  Simpler because there
27043    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
27044    to point at the base of the local variables after static stack
27045    space for a function has been allocated.  */
27046
27047 HOST_WIDE_INT
27048 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27049 {
27050   arm_stack_offsets *offsets;
27051
27052   offsets = arm_get_frame_offsets ();
27053
27054   switch (from)
27055     {
27056     case ARG_POINTER_REGNUM:
27057       switch (to)
27058         {
27059         case STACK_POINTER_REGNUM:
27060           return offsets->outgoing_args - offsets->saved_args;
27061
27062         case FRAME_POINTER_REGNUM:
27063           return offsets->soft_frame - offsets->saved_args;
27064
27065         case ARM_HARD_FRAME_POINTER_REGNUM:
27066           return offsets->saved_regs - offsets->saved_args;
27067
27068         case THUMB_HARD_FRAME_POINTER_REGNUM:
27069           return offsets->locals_base - offsets->saved_args;
27070
27071         default:
27072           gcc_unreachable ();
27073         }
27074       break;
27075
27076     case FRAME_POINTER_REGNUM:
27077       switch (to)
27078         {
27079         case STACK_POINTER_REGNUM:
27080           return offsets->outgoing_args - offsets->soft_frame;
27081
27082         case ARM_HARD_FRAME_POINTER_REGNUM:
27083           return offsets->saved_regs - offsets->soft_frame;
27084
27085         case THUMB_HARD_FRAME_POINTER_REGNUM:
27086           return offsets->locals_base - offsets->soft_frame;
27087
27088         default:
27089           gcc_unreachable ();
27090         }
27091       break;
27092
27093     default:
27094       gcc_unreachable ();
27095     }
27096 }
27097
27098 /* Generate the function's prologue.  */
27099
27100 void
27101 thumb1_expand_prologue (void)
27102 {
27103   rtx_insn *insn;
27104
27105   HOST_WIDE_INT amount;
27106   HOST_WIDE_INT size;
27107   arm_stack_offsets *offsets;
27108   unsigned long func_type;
27109   int regno;
27110   unsigned long live_regs_mask;
27111   unsigned long l_mask;
27112   unsigned high_regs_pushed = 0;
27113   bool lr_needs_saving;
27114
27115   func_type = arm_current_func_type ();
27116
27117   /* Naked functions don't have prologues.  */
27118   if (IS_NAKED (func_type))
27119     {
27120       if (flag_stack_usage_info)
27121         current_function_static_stack_size = 0;
27122       return;
27123     }
27124
27125   if (IS_INTERRUPT (func_type))
27126     {
27127       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27128       return;
27129     }
27130
27131   if (is_called_in_ARM_mode (current_function_decl))
27132     emit_insn (gen_prologue_thumb1_interwork ());
27133
27134   offsets = arm_get_frame_offsets ();
27135   live_regs_mask = offsets->saved_regs_mask;
27136   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27137
27138   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27139   l_mask = live_regs_mask & 0x40ff;
27140   /* Then count how many other high registers will need to be pushed.  */
27141   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27142
27143   if (crtl->args.pretend_args_size)
27144     {
27145       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27146
27147       if (cfun->machine->uses_anonymous_args)
27148         {
27149           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27150           unsigned long mask;
27151
27152           mask = 1ul << (LAST_ARG_REGNUM + 1);
27153           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27154
27155           insn = thumb1_emit_multi_reg_push (mask, 0);
27156         }
27157       else
27158         {
27159           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27160                                         stack_pointer_rtx, x));
27161         }
27162       RTX_FRAME_RELATED_P (insn) = 1;
27163     }
27164
27165   if (TARGET_BACKTRACE)
27166     {
27167       HOST_WIDE_INT offset = 0;
27168       unsigned work_register;
27169       rtx work_reg, x, arm_hfp_rtx;
27170
27171       /* We have been asked to create a stack backtrace structure.
27172          The code looks like this:
27173
27174          0   .align 2
27175          0   func:
27176          0     sub   SP, #16         Reserve space for 4 registers.
27177          2     push  {R7}            Push low registers.
27178          4     add   R7, SP, #20     Get the stack pointer before the push.
27179          6     str   R7, [SP, #8]    Store the stack pointer
27180                                         (before reserving the space).
27181          8     mov   R7, PC          Get hold of the start of this code + 12.
27182         10     str   R7, [SP, #16]   Store it.
27183         12     mov   R7, FP          Get hold of the current frame pointer.
27184         14     str   R7, [SP, #4]    Store it.
27185         16     mov   R7, LR          Get hold of the current return address.
27186         18     str   R7, [SP, #12]   Store it.
27187         20     add   R7, SP, #16     Point at the start of the
27188                                         backtrace structure.
27189         22     mov   FP, R7          Put this value into the frame pointer.  */
27190
27191       work_register = thumb_find_work_register (live_regs_mask);
27192       work_reg = gen_rtx_REG (SImode, work_register);
27193       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27194
27195       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27196                                     stack_pointer_rtx, GEN_INT (-16)));
27197       RTX_FRAME_RELATED_P (insn) = 1;
27198
27199       if (l_mask)
27200         {
27201           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27202           RTX_FRAME_RELATED_P (insn) = 1;
27203           lr_needs_saving = false;
27204
27205           offset = bit_count (l_mask) * UNITS_PER_WORD;
27206         }
27207
27208       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27209       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27210
27211       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27212       x = gen_frame_mem (SImode, x);
27213       emit_move_insn (x, work_reg);
27214
27215       /* Make sure that the instruction fetching the PC is in the right place
27216          to calculate "start of backtrace creation code + 12".  */
27217       /* ??? The stores using the common WORK_REG ought to be enough to
27218          prevent the scheduler from doing anything weird.  Failing that
27219          we could always move all of the following into an UNSPEC_VOLATILE.  */
27220       if (l_mask)
27221         {
27222           x = gen_rtx_REG (SImode, PC_REGNUM);
27223           emit_move_insn (work_reg, x);
27224
27225           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27226           x = gen_frame_mem (SImode, x);
27227           emit_move_insn (x, work_reg);
27228
27229           emit_move_insn (work_reg, arm_hfp_rtx);
27230
27231           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27232           x = gen_frame_mem (SImode, x);
27233           emit_move_insn (x, work_reg);
27234         }
27235       else
27236         {
27237           emit_move_insn (work_reg, arm_hfp_rtx);
27238
27239           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27240           x = gen_frame_mem (SImode, x);
27241           emit_move_insn (x, work_reg);
27242
27243           x = gen_rtx_REG (SImode, PC_REGNUM);
27244           emit_move_insn (work_reg, x);
27245
27246           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27247           x = gen_frame_mem (SImode, x);
27248           emit_move_insn (x, work_reg);
27249         }
27250
27251       x = gen_rtx_REG (SImode, LR_REGNUM);
27252       emit_move_insn (work_reg, x);
27253
27254       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27255       x = gen_frame_mem (SImode, x);
27256       emit_move_insn (x, work_reg);
27257
27258       x = GEN_INT (offset + 12);
27259       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27260
27261       emit_move_insn (arm_hfp_rtx, work_reg);
27262     }
27263   /* Optimization:  If we are not pushing any low registers but we are going
27264      to push some high registers then delay our first push.  This will just
27265      be a push of LR and we can combine it with the push of the first high
27266      register.  */
27267   else if ((l_mask & 0xff) != 0
27268            || (high_regs_pushed == 0 && lr_needs_saving))
27269     {
27270       unsigned long mask = l_mask;
27271       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27272       insn = thumb1_emit_multi_reg_push (mask, mask);
27273       RTX_FRAME_RELATED_P (insn) = 1;
27274       lr_needs_saving = false;
27275     }
27276
27277   if (high_regs_pushed)
27278     {
27279       unsigned pushable_regs;
27280       unsigned next_hi_reg;
27281       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27282                                                  : crtl->args.info.nregs;
27283       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27284
27285       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27286         if (live_regs_mask & (1 << next_hi_reg))
27287           break;
27288
27289       /* Here we need to mask out registers used for passing arguments
27290          even if they can be pushed.  This is to avoid using them to
27291          stash the high registers.  Such kind of stash may clobber the
27292          use of arguments.  */
27293       pushable_regs = l_mask & (~arg_regs_mask);
27294       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27295
27296       /* Normally, LR can be used as a scratch register once it has been
27297          saved; but if the function examines its own return address then
27298          the value is still live and we need to avoid using it.  */
27299       bool return_addr_live
27300         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27301                            LR_REGNUM);
27302
27303       if (lr_needs_saving || return_addr_live)
27304         pushable_regs &= ~(1 << LR_REGNUM);
27305
27306       if (pushable_regs == 0)
27307         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27308
27309       while (high_regs_pushed > 0)
27310         {
27311           unsigned long real_regs_mask = 0;
27312           unsigned long push_mask = 0;
27313
27314           for (regno = LR_REGNUM; regno >= 0; regno --)
27315             {
27316               if (pushable_regs & (1 << regno))
27317                 {
27318                   emit_move_insn (gen_rtx_REG (SImode, regno),
27319                                   gen_rtx_REG (SImode, next_hi_reg));
27320
27321                   high_regs_pushed --;
27322                   real_regs_mask |= (1 << next_hi_reg);
27323                   push_mask |= (1 << regno);
27324
27325                   if (high_regs_pushed)
27326                     {
27327                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27328                            next_hi_reg --)
27329                         if (live_regs_mask & (1 << next_hi_reg))
27330                           break;
27331                     }
27332                   else
27333                     break;
27334                 }
27335             }
27336
27337           /* If we had to find a work register and we have not yet
27338              saved the LR then add it to the list of regs to push.  */
27339           if (lr_needs_saving)
27340             {
27341               push_mask |= 1 << LR_REGNUM;
27342               real_regs_mask |= 1 << LR_REGNUM;
27343               lr_needs_saving = false;
27344               /* If the return address is not live at this point, we
27345                  can add LR to the list of registers that we can use
27346                  for pushes.  */
27347               if (!return_addr_live)
27348                 pushable_regs |= 1 << LR_REGNUM;
27349             }
27350
27351           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27352           RTX_FRAME_RELATED_P (insn) = 1;
27353         }
27354     }
27355
27356   /* Load the pic register before setting the frame pointer,
27357      so we can use r7 as a temporary work register.  */
27358   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27359     arm_load_pic_register (live_regs_mask, NULL_RTX);
27360
27361   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27362     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27363                     stack_pointer_rtx);
27364
27365   size = offsets->outgoing_args - offsets->saved_args;
27366   if (flag_stack_usage_info)
27367     current_function_static_stack_size = size;
27368
27369   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
27370   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27371        || flag_stack_clash_protection)
27372       && size)
27373     sorry ("%<-fstack-check=specific%> for Thumb-1");
27374
27375   amount = offsets->outgoing_args - offsets->saved_regs;
27376   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27377   if (amount)
27378     {
27379       if (amount < 512)
27380         {
27381           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27382                                         GEN_INT (- amount)));
27383           RTX_FRAME_RELATED_P (insn) = 1;
27384         }
27385       else
27386         {
27387           rtx reg, dwarf;
27388
27389           /* The stack decrement is too big for an immediate value in a single
27390              insn.  In theory we could issue multiple subtracts, but after
27391              three of them it becomes more space efficient to place the full
27392              value in the constant pool and load into a register.  (Also the
27393              ARM debugger really likes to see only one stack decrement per
27394              function).  So instead we look for a scratch register into which
27395              we can load the decrement, and then we subtract this from the
27396              stack pointer.  Unfortunately on the thumb the only available
27397              scratch registers are the argument registers, and we cannot use
27398              these as they may hold arguments to the function.  Instead we
27399              attempt to locate a call preserved register which is used by this
27400              function.  If we can find one, then we know that it will have
27401              been pushed at the start of the prologue and so we can corrupt
27402              it now.  */
27403           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27404             if (live_regs_mask & (1 << regno))
27405               break;
27406
27407           gcc_assert(regno <= LAST_LO_REGNUM);
27408
27409           reg = gen_rtx_REG (SImode, regno);
27410
27411           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27412
27413           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27414                                         stack_pointer_rtx, reg));
27415
27416           dwarf = gen_rtx_SET (stack_pointer_rtx,
27417                                plus_constant (Pmode, stack_pointer_rtx,
27418                                               -amount));
27419           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27420           RTX_FRAME_RELATED_P (insn) = 1;
27421         }
27422     }
27423
27424   if (frame_pointer_needed)
27425     thumb_set_frame_pointer (offsets);
27426
27427   /* If we are profiling, make sure no instructions are scheduled before
27428      the call to mcount.  Similarly if the user has requested no
27429      scheduling in the prolog.  Similarly if we want non-call exceptions
27430      using the EABI unwinder, to prevent faulting instructions from being
27431      swapped with a stack adjustment.  */
27432   if (crtl->profile || !TARGET_SCHED_PROLOG
27433       || (arm_except_unwind_info (&global_options) == UI_TARGET
27434           && cfun->can_throw_non_call_exceptions))
27435     emit_insn (gen_blockage ());
27436
27437   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27438   if (live_regs_mask & 0xff)
27439     cfun->machine->lr_save_eliminated = 0;
27440 }
27441
27442 /* Clear caller saved registers not used to pass return values and leaked
27443    condition flags before exiting a cmse_nonsecure_entry function.  */
27444
27445 void
27446 cmse_nonsecure_entry_clear_before_return (void)
27447 {
27448   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27449   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27450   uint32_t padding_bits_to_clear = 0;
27451   auto_sbitmap to_clear_bitmap (maxregno + 1);
27452   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27453   tree result_type;
27454
27455   bitmap_clear (to_clear_bitmap);
27456   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27457   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27458
27459   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27460      registers.  */
27461   if (clear_vfpregs)
27462     {
27463       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27464
27465       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27466
27467       if (!TARGET_HAVE_FPCXT_CMSE)
27468         {
27469           /* Make sure we don't clear the two scratch registers used to clear
27470              the relevant FPSCR bits in output_return_instruction.  */
27471           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27472           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27473           emit_use (gen_rtx_REG (SImode, 4));
27474           bitmap_clear_bit (to_clear_bitmap, 4);
27475         }
27476     }
27477
27478   /* If the user has defined registers to be caller saved, these are no longer
27479      restored by the function before returning and must thus be cleared for
27480      security purposes.  */
27481   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27482     {
27483       /* We do not touch registers that can be used to pass arguments as per
27484          the AAPCS, since these should never be made callee-saved by user
27485          options.  */
27486       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27487         continue;
27488       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27489         continue;
27490       if (!callee_saved_reg_p (regno)
27491           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27492               || TARGET_HARD_FLOAT))
27493         bitmap_set_bit (to_clear_bitmap, regno);
27494     }
27495
27496   /* Make sure we do not clear the registers used to return the result in.  */
27497   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27498   if (!VOID_TYPE_P (result_type))
27499     {
27500       uint64_t to_clear_return_mask;
27501       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27502
27503       /* No need to check that we return in registers, because we don't
27504          support returning on stack yet.  */
27505       gcc_assert (REG_P (result_rtl));
27506       to_clear_return_mask
27507         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27508                                      &padding_bits_to_clear);
27509       if (to_clear_return_mask)
27510         {
27511           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27512           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27513             {
27514               if (to_clear_return_mask & (1ULL << regno))
27515                 bitmap_clear_bit (to_clear_bitmap, regno);
27516             }
27517         }
27518     }
27519
27520   if (padding_bits_to_clear != 0)
27521     {
27522       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27523       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27524
27525       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27526          returning a composite type, which only uses r0.  Let's make sure that
27527          r1-r3 is cleared too.  */
27528       bitmap_clear (to_clear_arg_regs_bitmap);
27529       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27530       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27531     }
27532
27533   /* Clear full registers that leak before returning.  */
27534   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27535   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27536   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27537                         clearing_reg);
27538 }
27539
27540 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27541    POP instruction can be generated.  LR should be replaced by PC.  All
27542    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27543    all we really need to check here is if single register is to be
27544    returned, or multiple register return.  */
27545 void
27546 thumb2_expand_return (bool simple_return)
27547 {
27548   int i, num_regs;
27549   unsigned long saved_regs_mask;
27550   arm_stack_offsets *offsets;
27551
27552   offsets = arm_get_frame_offsets ();
27553   saved_regs_mask = offsets->saved_regs_mask;
27554
27555   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27556     if (saved_regs_mask & (1 << i))
27557       num_regs++;
27558
27559   if (!simple_return && saved_regs_mask)
27560     {
27561       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27562          functions or adapt code to handle according to ACLE.  This path should
27563          not be reachable for cmse_nonsecure_entry functions though we prefer
27564          to assert it for now to ensure that future code changes do not silently
27565          change this behavior.  */
27566       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27567       if (arm_current_function_pac_enabled_p ())
27568         {
27569           gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27570           arm_emit_multi_reg_pop (saved_regs_mask);
27571           emit_insn (gen_aut_nop ());
27572           emit_jump_insn (simple_return_rtx);
27573         }
27574       else if (num_regs == 1)
27575         {
27576           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27577           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27578           rtx addr = gen_rtx_MEM (SImode,
27579                                   gen_rtx_POST_INC (SImode,
27580                                                     stack_pointer_rtx));
27581           set_mem_alias_set (addr, get_frame_alias_set ());
27582           XVECEXP (par, 0, 0) = ret_rtx;
27583           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27584           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27585           emit_jump_insn (par);
27586         }
27587       else
27588         {
27589           saved_regs_mask &= ~ (1 << LR_REGNUM);
27590           saved_regs_mask |=   (1 << PC_REGNUM);
27591           arm_emit_multi_reg_pop (saved_regs_mask);
27592         }
27593     }
27594   else
27595     {
27596       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27597         cmse_nonsecure_entry_clear_before_return ();
27598       emit_jump_insn (simple_return_rtx);
27599     }
27600 }
27601
27602 void
27603 thumb1_expand_epilogue (void)
27604 {
27605   HOST_WIDE_INT amount;
27606   arm_stack_offsets *offsets;
27607   int regno;
27608
27609   /* Naked functions don't have prologues.  */
27610   if (IS_NAKED (arm_current_func_type ()))
27611     return;
27612
27613   offsets = arm_get_frame_offsets ();
27614   amount = offsets->outgoing_args - offsets->saved_regs;
27615
27616   if (frame_pointer_needed)
27617     {
27618       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27619       amount = offsets->locals_base - offsets->saved_regs;
27620     }
27621   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27622
27623   gcc_assert (amount >= 0);
27624   if (amount)
27625     {
27626       emit_insn (gen_blockage ());
27627
27628       if (amount < 512)
27629         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27630                                GEN_INT (amount)));
27631       else
27632         {
27633           /* r3 is always free in the epilogue.  */
27634           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27635
27636           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27637           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27638         }
27639     }
27640
27641   /* Emit a USE (stack_pointer_rtx), so that
27642      the stack adjustment will not be deleted.  */
27643   emit_insn (gen_force_register_use (stack_pointer_rtx));
27644
27645   if (crtl->profile || !TARGET_SCHED_PROLOG)
27646     emit_insn (gen_blockage ());
27647
27648   /* Emit a clobber for each insn that will be restored in the epilogue,
27649      so that flow2 will get register lifetimes correct.  */
27650   for (regno = 0; regno < 13; regno++)
27651     if (reg_needs_saving_p (regno))
27652       emit_clobber (gen_rtx_REG (SImode, regno));
27653
27654   if (! df_regs_ever_live_p (LR_REGNUM))
27655     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27656
27657   /* Clear all caller-saved regs that are not used to return.  */
27658   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27659     cmse_nonsecure_entry_clear_before_return ();
27660 }
27661
27662 /* Epilogue code for APCS frame.  */
27663 static void
27664 arm_expand_epilogue_apcs_frame (bool really_return)
27665 {
27666   unsigned long func_type;
27667   unsigned long saved_regs_mask;
27668   int num_regs = 0;
27669   int i;
27670   int floats_from_frame = 0;
27671   arm_stack_offsets *offsets;
27672
27673   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27674   func_type = arm_current_func_type ();
27675
27676   /* Get frame offsets for ARM.  */
27677   offsets = arm_get_frame_offsets ();
27678   saved_regs_mask = offsets->saved_regs_mask;
27679
27680   /* Find the offset of the floating-point save area in the frame.  */
27681   floats_from_frame
27682     = (offsets->saved_args
27683        + arm_compute_static_chain_stack_bytes ()
27684        - offsets->frame);
27685
27686   /* Compute how many core registers saved and how far away the floats are.  */
27687   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27688     if (saved_regs_mask & (1 << i))
27689       {
27690         num_regs++;
27691         floats_from_frame += 4;
27692       }
27693
27694   if (TARGET_VFP_BASE)
27695     {
27696       int start_reg;
27697       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27698
27699       /* The offset is from IP_REGNUM.  */
27700       int saved_size = arm_get_vfp_saved_size ();
27701       if (saved_size > 0)
27702         {
27703           rtx_insn *insn;
27704           floats_from_frame += saved_size;
27705           insn = emit_insn (gen_addsi3 (ip_rtx,
27706                                         hard_frame_pointer_rtx,
27707                                         GEN_INT (-floats_from_frame)));
27708           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27709                                        ip_rtx, hard_frame_pointer_rtx);
27710         }
27711
27712       /* Generate VFP register multi-pop.  */
27713       start_reg = FIRST_VFP_REGNUM;
27714
27715       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27716         /* Look for a case where a reg does not need restoring.  */
27717         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27718           {
27719             if (start_reg != i)
27720               arm_emit_vfp_multi_reg_pop (start_reg,
27721                                           (i - start_reg) / 2,
27722                                           gen_rtx_REG (SImode,
27723                                                        IP_REGNUM));
27724             start_reg = i + 2;
27725           }
27726
27727       /* Restore the remaining regs that we have discovered (or possibly
27728          even all of them, if the conditional in the for loop never
27729          fired).  */
27730       if (start_reg != i)
27731         arm_emit_vfp_multi_reg_pop (start_reg,
27732                                     (i - start_reg) / 2,
27733                                     gen_rtx_REG (SImode, IP_REGNUM));
27734     }
27735
27736   if (TARGET_IWMMXT)
27737     {
27738       /* The frame pointer is guaranteed to be non-double-word aligned, as
27739          it is set to double-word-aligned old_stack_pointer - 4.  */
27740       rtx_insn *insn;
27741       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27742
27743       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27744         if (reg_needs_saving_p (i))
27745           {
27746             rtx addr = gen_frame_mem (V2SImode,
27747                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27748                                                 - lrm_count * 4));
27749             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27750             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27751                                                gen_rtx_REG (V2SImode, i),
27752                                                NULL_RTX);
27753             lrm_count += 2;
27754           }
27755     }
27756
27757   /* saved_regs_mask should contain IP which contains old stack pointer
27758      at the time of activation creation.  Since SP and IP are adjacent registers,
27759      we can restore the value directly into SP.  */
27760   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27761   saved_regs_mask &= ~(1 << IP_REGNUM);
27762   saved_regs_mask |= (1 << SP_REGNUM);
27763
27764   /* There are two registers left in saved_regs_mask - LR and PC.  We
27765      only need to restore LR (the return address), but to
27766      save time we can load it directly into PC, unless we need a
27767      special function exit sequence, or we are not really returning.  */
27768   if (really_return
27769       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27770       && !crtl->calls_eh_return)
27771     /* Delete LR from the register mask, so that LR on
27772        the stack is loaded into the PC in the register mask.  */
27773     saved_regs_mask &= ~(1 << LR_REGNUM);
27774   else
27775     saved_regs_mask &= ~(1 << PC_REGNUM);
27776
27777   num_regs = bit_count (saved_regs_mask);
27778   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27779     {
27780       rtx_insn *insn;
27781       emit_insn (gen_blockage ());
27782       /* Unwind the stack to just below the saved registers.  */
27783       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27784                                     hard_frame_pointer_rtx,
27785                                     GEN_INT (- 4 * num_regs)));
27786
27787       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27788                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27789     }
27790
27791   arm_emit_multi_reg_pop (saved_regs_mask);
27792
27793   if (IS_INTERRUPT (func_type))
27794     {
27795       /* Interrupt handlers will have pushed the
27796          IP onto the stack, so restore it now.  */
27797       rtx_insn *insn;
27798       rtx addr = gen_rtx_MEM (SImode,
27799                               gen_rtx_POST_INC (SImode,
27800                               stack_pointer_rtx));
27801       set_mem_alias_set (addr, get_frame_alias_set ());
27802       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27803       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27804                                          gen_rtx_REG (SImode, IP_REGNUM),
27805                                          NULL_RTX);
27806     }
27807
27808   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27809     return;
27810
27811   if (crtl->calls_eh_return)
27812     emit_insn (gen_addsi3 (stack_pointer_rtx,
27813                            stack_pointer_rtx,
27814                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27815
27816   if (IS_STACKALIGN (func_type))
27817     /* Restore the original stack pointer.  Before prologue, the stack was
27818        realigned and the original stack pointer saved in r0.  For details,
27819        see comment in arm_expand_prologue.  */
27820     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27821
27822   emit_jump_insn (simple_return_rtx);
27823 }
27824
27825 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27826    function is not a sibcall.  */
27827 void
27828 arm_expand_epilogue (bool really_return)
27829 {
27830   unsigned long func_type;
27831   unsigned long saved_regs_mask;
27832   int num_regs = 0;
27833   int i;
27834   int amount;
27835   arm_stack_offsets *offsets;
27836
27837   func_type = arm_current_func_type ();
27838
27839   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27840      let output_return_instruction take care of instruction emission if any.  */
27841   if (IS_NAKED (func_type)
27842       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27843     {
27844       if (really_return)
27845         emit_jump_insn (simple_return_rtx);
27846       return;
27847     }
27848
27849   /* If we are throwing an exception, then we really must be doing a
27850      return, so we can't tail-call.  */
27851   gcc_assert (!crtl->calls_eh_return || really_return);
27852
27853   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27854     {
27855       arm_expand_epilogue_apcs_frame (really_return);
27856       return;
27857     }
27858
27859   /* Get frame offsets for ARM.  */
27860   offsets = arm_get_frame_offsets ();
27861   saved_regs_mask = offsets->saved_regs_mask;
27862   num_regs = bit_count (saved_regs_mask);
27863
27864   if (frame_pointer_needed)
27865     {
27866       rtx_insn *insn;
27867       /* Restore stack pointer if necessary.  */
27868       if (TARGET_ARM)
27869         {
27870           /* In ARM mode, frame pointer points to first saved register.
27871              Restore stack pointer to last saved register.  */
27872           amount = offsets->frame - offsets->saved_regs;
27873
27874           /* Force out any pending memory operations that reference stacked data
27875              before stack de-allocation occurs.  */
27876           emit_insn (gen_blockage ());
27877           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27878                             hard_frame_pointer_rtx,
27879                             GEN_INT (amount)));
27880           arm_add_cfa_adjust_cfa_note (insn, amount,
27881                                        stack_pointer_rtx,
27882                                        hard_frame_pointer_rtx);
27883
27884           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27885              deleted.  */
27886           emit_insn (gen_force_register_use (stack_pointer_rtx));
27887         }
27888       else
27889         {
27890           /* In Thumb-2 mode, the frame pointer points to the last saved
27891              register.  */
27892           amount = offsets->locals_base - offsets->saved_regs;
27893           if (amount)
27894             {
27895               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27896                                 hard_frame_pointer_rtx,
27897                                 GEN_INT (amount)));
27898               arm_add_cfa_adjust_cfa_note (insn, amount,
27899                                            hard_frame_pointer_rtx,
27900                                            hard_frame_pointer_rtx);
27901             }
27902
27903           /* Force out any pending memory operations that reference stacked data
27904              before stack de-allocation occurs.  */
27905           emit_insn (gen_blockage ());
27906           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27907                                        hard_frame_pointer_rtx));
27908           arm_add_cfa_adjust_cfa_note (insn, 0,
27909                                        stack_pointer_rtx,
27910                                        hard_frame_pointer_rtx);
27911           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27912              deleted.  */
27913           emit_insn (gen_force_register_use (stack_pointer_rtx));
27914         }
27915     }
27916   else
27917     {
27918       /* Pop off outgoing args and local frame to adjust stack pointer to
27919          last saved register.  */
27920       amount = offsets->outgoing_args - offsets->saved_regs;
27921       if (amount)
27922         {
27923           rtx_insn *tmp;
27924           /* Force out any pending memory operations that reference stacked data
27925              before stack de-allocation occurs.  */
27926           emit_insn (gen_blockage ());
27927           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27928                                        stack_pointer_rtx,
27929                                        GEN_INT (amount)));
27930           arm_add_cfa_adjust_cfa_note (tmp, amount,
27931                                        stack_pointer_rtx, stack_pointer_rtx);
27932           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27933              not deleted.  */
27934           emit_insn (gen_force_register_use (stack_pointer_rtx));
27935         }
27936     }
27937
27938   if (TARGET_VFP_BASE)
27939     {
27940       /* Generate VFP register multi-pop.  */
27941       int end_reg = LAST_VFP_REGNUM + 1;
27942
27943       /* Scan the registers in reverse order.  We need to match
27944          any groupings made in the prologue and generate matching
27945          vldm operations.  The need to match groups is because,
27946          unlike pop, vldm can only do consecutive regs.  */
27947       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27948         /* Look for a case where a reg does not need restoring.  */
27949         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27950           {
27951             /* Restore the regs discovered so far (from reg+2 to
27952                end_reg).  */
27953             if (end_reg > i + 2)
27954               arm_emit_vfp_multi_reg_pop (i + 2,
27955                                           (end_reg - (i + 2)) / 2,
27956                                           stack_pointer_rtx);
27957             end_reg = i;
27958           }
27959
27960       /* Restore the remaining regs that we have discovered (or possibly
27961          even all of them, if the conditional in the for loop never
27962          fired).  */
27963       if (end_reg > i + 2)
27964         arm_emit_vfp_multi_reg_pop (i + 2,
27965                                     (end_reg - (i + 2)) / 2,
27966                                     stack_pointer_rtx);
27967     }
27968
27969   if (TARGET_IWMMXT)
27970     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27971       if (reg_needs_saving_p (i))
27972         {
27973           rtx_insn *insn;
27974           rtx addr = gen_rtx_MEM (V2SImode,
27975                                   gen_rtx_POST_INC (SImode,
27976                                                     stack_pointer_rtx));
27977           set_mem_alias_set (addr, get_frame_alias_set ());
27978           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27979           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27980                                              gen_rtx_REG (V2SImode, i),
27981                                              NULL_RTX);
27982           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27983                                        stack_pointer_rtx, stack_pointer_rtx);
27984         }
27985
27986   if (saved_regs_mask)
27987     {
27988       rtx insn;
27989       bool return_in_pc = false;
27990
27991       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27992           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27993           && !IS_CMSE_ENTRY (func_type)
27994           && !IS_STACKALIGN (func_type)
27995           && really_return
27996           && crtl->args.pretend_args_size == 0
27997           && saved_regs_mask & (1 << LR_REGNUM)
27998           && !crtl->calls_eh_return
27999           && !arm_current_function_pac_enabled_p ())
28000         {
28001           saved_regs_mask &= ~(1 << LR_REGNUM);
28002           saved_regs_mask |= (1 << PC_REGNUM);
28003           return_in_pc = true;
28004         }
28005
28006       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
28007         {
28008           for (i = 0; i <= LAST_ARM_REGNUM; i++)
28009             if (saved_regs_mask & (1 << i))
28010               {
28011                 rtx addr = gen_rtx_MEM (SImode,
28012                                         gen_rtx_POST_INC (SImode,
28013                                                           stack_pointer_rtx));
28014                 set_mem_alias_set (addr, get_frame_alias_set ());
28015
28016                 if (i == PC_REGNUM)
28017                   {
28018                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
28019                     XVECEXP (insn, 0, 0) = ret_rtx;
28020                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
28021                                                         addr);
28022                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
28023                     insn = emit_jump_insn (insn);
28024                   }
28025                 else
28026                   {
28027                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
28028                                                  addr));
28029                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28030                                                        gen_rtx_REG (SImode, i),
28031                                                        NULL_RTX);
28032                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28033                                                  stack_pointer_rtx,
28034                                                  stack_pointer_rtx);
28035                   }
28036               }
28037         }
28038       else
28039         {
28040           if (TARGET_LDRD
28041               && current_tune->prefer_ldrd_strd
28042               && !optimize_function_for_size_p (cfun))
28043             {
28044               if (TARGET_THUMB2)
28045                 thumb2_emit_ldrd_pop (saved_regs_mask);
28046               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28047                 arm_emit_ldrd_pop (saved_regs_mask);
28048               else
28049                 arm_emit_multi_reg_pop (saved_regs_mask);
28050             }
28051           else
28052             arm_emit_multi_reg_pop (saved_regs_mask);
28053         }
28054
28055       if (return_in_pc)
28056         return;
28057     }
28058
28059   amount
28060     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28061   if (amount)
28062     {
28063       int i, j;
28064       rtx dwarf = NULL_RTX;
28065       rtx_insn *tmp =
28066         emit_insn (gen_addsi3 (stack_pointer_rtx,
28067                                stack_pointer_rtx,
28068                                GEN_INT (amount)));
28069
28070       RTX_FRAME_RELATED_P (tmp) = 1;
28071
28072       if (cfun->machine->uses_anonymous_args)
28073         {
28074           /* Restore pretend args.  Refer arm_expand_prologue on how to save
28075              pretend_args in stack.  */
28076           int num_regs = crtl->args.pretend_args_size / 4;
28077           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28078           for (j = 0, i = 0; j < num_regs; i++)
28079             if (saved_regs_mask & (1 << i))
28080               {
28081                 rtx reg = gen_rtx_REG (SImode, i);
28082                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28083                 j++;
28084               }
28085           REG_NOTES (tmp) = dwarf;
28086         }
28087       arm_add_cfa_adjust_cfa_note (tmp, amount,
28088                                    stack_pointer_rtx, stack_pointer_rtx);
28089     }
28090
28091   if (IS_CMSE_ENTRY (func_type))
28092     {
28093       /* CMSE_ENTRY always returns.  */
28094       gcc_assert (really_return);
28095       /* Clear all caller-saved regs that are not used to return.  */
28096       cmse_nonsecure_entry_clear_before_return ();
28097
28098       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28099          VLDR.  */
28100       if (TARGET_HAVE_FPCXT_CMSE)
28101         {
28102           rtx_insn *insn;
28103
28104           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28105                                                    GEN_INT (FPCXTNS_ENUM)));
28106           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28107                                   plus_constant (Pmode, stack_pointer_rtx, 4));
28108           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28109           RTX_FRAME_RELATED_P (insn) = 1;
28110         }
28111     }
28112
28113   if (arm_current_function_pac_enabled_p ())
28114     emit_insn (gen_aut_nop ());
28115
28116   if (!really_return)
28117     return;
28118
28119   if (crtl->calls_eh_return)
28120     emit_insn (gen_addsi3 (stack_pointer_rtx,
28121                            stack_pointer_rtx,
28122                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28123
28124   if (IS_STACKALIGN (func_type))
28125     /* Restore the original stack pointer.  Before prologue, the stack was
28126        realigned and the original stack pointer saved in r0.  For details,
28127        see comment in arm_expand_prologue.  */
28128     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28129
28130   emit_jump_insn (simple_return_rtx);
28131 }
28132
28133 /* Implementation of insn prologue_thumb1_interwork.  This is the first
28134    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
28135
28136 const char *
28137 thumb1_output_interwork (void)
28138 {
28139   const char * name;
28140   FILE *f = asm_out_file;
28141
28142   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28143   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28144               == SYMBOL_REF);
28145   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28146
28147   /* Generate code sequence to switch us into Thumb mode.  */
28148   /* The .code 32 directive has already been emitted by
28149      ASM_DECLARE_FUNCTION_NAME.  */
28150   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28151   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28152
28153   /* Generate a label, so that the debugger will notice the
28154      change in instruction sets.  This label is also used by
28155      the assembler to bypass the ARM code when this function
28156      is called from a Thumb encoded function elsewhere in the
28157      same file.  Hence the definition of STUB_NAME here must
28158      agree with the definition in gas/config/tc-arm.c.  */
28159
28160 #define STUB_NAME ".real_start_of"
28161
28162   fprintf (f, "\t.code\t16\n");
28163 #ifdef ARM_PE
28164   if (arm_dllexport_name_p (name))
28165     name = arm_strip_name_encoding (name);
28166 #endif
28167   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28168   fprintf (f, "\t.thumb_func\n");
28169   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28170
28171   return "";
28172 }
28173
28174 /* Handle the case of a double word load into a low register from
28175    a computed memory address.  The computed address may involve a
28176    register which is overwritten by the load.  */
28177 const char *
28178 thumb_load_double_from_address (rtx *operands)
28179 {
28180   rtx addr;
28181   rtx base;
28182   rtx offset;
28183   rtx arg1;
28184   rtx arg2;
28185
28186   gcc_assert (REG_P (operands[0]));
28187   gcc_assert (MEM_P (operands[1]));
28188
28189   /* Get the memory address.  */
28190   addr = XEXP (operands[1], 0);
28191
28192   /* Work out how the memory address is computed.  */
28193   switch (GET_CODE (addr))
28194     {
28195     case REG:
28196       operands[2] = adjust_address (operands[1], SImode, 4);
28197
28198       if (REGNO (operands[0]) == REGNO (addr))
28199         {
28200           output_asm_insn ("ldr\t%H0, %2", operands);
28201           output_asm_insn ("ldr\t%0, %1", operands);
28202         }
28203       else
28204         {
28205           output_asm_insn ("ldr\t%0, %1", operands);
28206           output_asm_insn ("ldr\t%H0, %2", operands);
28207         }
28208       break;
28209
28210     case CONST:
28211       /* Compute <address> + 4 for the high order load.  */
28212       operands[2] = adjust_address (operands[1], SImode, 4);
28213
28214       output_asm_insn ("ldr\t%0, %1", operands);
28215       output_asm_insn ("ldr\t%H0, %2", operands);
28216       break;
28217
28218     case PLUS:
28219       arg1   = XEXP (addr, 0);
28220       arg2   = XEXP (addr, 1);
28221
28222       if (CONSTANT_P (arg1))
28223         base = arg2, offset = arg1;
28224       else
28225         base = arg1, offset = arg2;
28226
28227       gcc_assert (REG_P (base));
28228
28229       /* Catch the case of <address> = <reg> + <reg> */
28230       if (REG_P (offset))
28231         {
28232           int reg_offset = REGNO (offset);
28233           int reg_base   = REGNO (base);
28234           int reg_dest   = REGNO (operands[0]);
28235
28236           /* Add the base and offset registers together into the
28237              higher destination register.  */
28238           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28239                        reg_dest + 1, reg_base, reg_offset);
28240
28241           /* Load the lower destination register from the address in
28242              the higher destination register.  */
28243           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28244                        reg_dest, reg_dest + 1);
28245
28246           /* Load the higher destination register from its own address
28247              plus 4.  */
28248           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28249                        reg_dest + 1, reg_dest + 1);
28250         }
28251       else
28252         {
28253           /* Compute <address> + 4 for the high order load.  */
28254           operands[2] = adjust_address (operands[1], SImode, 4);
28255
28256           /* If the computed address is held in the low order register
28257              then load the high order register first, otherwise always
28258              load the low order register first.  */
28259           if (REGNO (operands[0]) == REGNO (base))
28260             {
28261               output_asm_insn ("ldr\t%H0, %2", operands);
28262               output_asm_insn ("ldr\t%0, %1", operands);
28263             }
28264           else
28265             {
28266               output_asm_insn ("ldr\t%0, %1", operands);
28267               output_asm_insn ("ldr\t%H0, %2", operands);
28268             }
28269         }
28270       break;
28271
28272     case LABEL_REF:
28273       /* With no registers to worry about we can just load the value
28274          directly.  */
28275       operands[2] = adjust_address (operands[1], SImode, 4);
28276
28277       output_asm_insn ("ldr\t%H0, %2", operands);
28278       output_asm_insn ("ldr\t%0, %1", operands);
28279       break;
28280
28281     default:
28282       gcc_unreachable ();
28283     }
28284
28285   return "";
28286 }
28287
28288 const char *
28289 thumb_output_move_mem_multiple (int n, rtx *operands)
28290 {
28291   switch (n)
28292     {
28293     case 2:
28294       if (REGNO (operands[4]) > REGNO (operands[5]))
28295         std::swap (operands[4], operands[5]);
28296
28297       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28298       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28299       break;
28300
28301     case 3:
28302       if (REGNO (operands[4]) > REGNO (operands[5]))
28303         std::swap (operands[4], operands[5]);
28304       if (REGNO (operands[5]) > REGNO (operands[6]))
28305         std::swap (operands[5], operands[6]);
28306       if (REGNO (operands[4]) > REGNO (operands[5]))
28307         std::swap (operands[4], operands[5]);
28308
28309       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28310       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28311       break;
28312
28313     default:
28314       gcc_unreachable ();
28315     }
28316
28317   return "";
28318 }
28319
28320 /* Output a call-via instruction for thumb state.  */
28321 const char *
28322 thumb_call_via_reg (rtx reg)
28323 {
28324   int regno = REGNO (reg);
28325   rtx *labelp;
28326
28327   gcc_assert (regno < LR_REGNUM);
28328
28329   /* If we are in the normal text section we can use a single instance
28330      per compilation unit.  If we are doing function sections, then we need
28331      an entry per section, since we can't rely on reachability.  */
28332   if (in_section == text_section)
28333     {
28334       thumb_call_reg_needed = 1;
28335
28336       if (thumb_call_via_label[regno] == NULL)
28337         thumb_call_via_label[regno] = gen_label_rtx ();
28338       labelp = thumb_call_via_label + regno;
28339     }
28340   else
28341     {
28342       if (cfun->machine->call_via[regno] == NULL)
28343         cfun->machine->call_via[regno] = gen_label_rtx ();
28344       labelp = cfun->machine->call_via + regno;
28345     }
28346
28347   output_asm_insn ("bl\t%a0", labelp);
28348   return "";
28349 }
28350
28351 /* Routines for generating rtl.  */
28352 void
28353 thumb_expand_cpymemqi (rtx *operands)
28354 {
28355   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28356   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28357   HOST_WIDE_INT len = INTVAL (operands[2]);
28358   HOST_WIDE_INT offset = 0;
28359
28360   while (len >= 12)
28361     {
28362       emit_insn (gen_cpymem12b (out, in, out, in));
28363       len -= 12;
28364     }
28365
28366   if (len >= 8)
28367     {
28368       emit_insn (gen_cpymem8b (out, in, out, in));
28369       len -= 8;
28370     }
28371
28372   if (len >= 4)
28373     {
28374       rtx reg = gen_reg_rtx (SImode);
28375       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28376       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28377       len -= 4;
28378       offset += 4;
28379     }
28380
28381   if (len >= 2)
28382     {
28383       rtx reg = gen_reg_rtx (HImode);
28384       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28385                                               plus_constant (Pmode, in,
28386                                                              offset))));
28387       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28388                                                                 offset)),
28389                             reg));
28390       len -= 2;
28391       offset += 2;
28392     }
28393
28394   if (len)
28395     {
28396       rtx reg = gen_reg_rtx (QImode);
28397       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28398                                               plus_constant (Pmode, in,
28399                                                              offset))));
28400       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28401                                                                 offset)),
28402                             reg));
28403     }
28404 }
28405
28406 void
28407 thumb_reload_out_hi (rtx *operands)
28408 {
28409   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28410 }
28411
28412 /* Return the length of a function name prefix
28413     that starts with the character 'c'.  */
28414 static int
28415 arm_get_strip_length (int c)
28416 {
28417   switch (c)
28418     {
28419     ARM_NAME_ENCODING_LENGTHS
28420       default: return 0;
28421     }
28422 }
28423
28424 /* Return a pointer to a function's name with any
28425    and all prefix encodings stripped from it.  */
28426 const char *
28427 arm_strip_name_encoding (const char *name)
28428 {
28429   int skip;
28430
28431   while ((skip = arm_get_strip_length (* name)))
28432     name += skip;
28433
28434   return name;
28435 }
28436
28437 /* If there is a '*' anywhere in the name's prefix, then
28438    emit the stripped name verbatim, otherwise prepend an
28439    underscore if leading underscores are being used.  */
28440 void
28441 arm_asm_output_labelref (FILE *stream, const char *name)
28442 {
28443   int skip;
28444   int verbatim = 0;
28445
28446   while ((skip = arm_get_strip_length (* name)))
28447     {
28448       verbatim |= (*name == '*');
28449       name += skip;
28450     }
28451
28452   if (verbatim)
28453     fputs (name, stream);
28454   else
28455     asm_fprintf (stream, "%U%s", name);
28456 }
28457
28458 /* This function is used to emit an EABI tag and its associated value.
28459    We emit the numerical value of the tag in case the assembler does not
28460    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28461    the tag name in a comment so that anyone reading the assembler output
28462    will know which tag is being set.
28463
28464    This function is not static because arm-c.cc needs it too.  */
28465
28466 void
28467 arm_emit_eabi_attribute (const char *name, int num, int val)
28468 {
28469   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28470   if (flag_verbose_asm || flag_debug_asm)
28471     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28472   asm_fprintf (asm_out_file, "\n");
28473 }
28474
28475 /* This function is used to print CPU tuning information as comment
28476    in assembler file.  Pointers are not printed for now.  */
28477
28478 void
28479 arm_print_tune_info (void)
28480 {
28481   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28482   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28483                current_tune->constant_limit);
28484   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28485                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28486   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28487                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28488   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28489                "prefetch.l1_cache_size:\t%d\n",
28490                current_tune->prefetch.l1_cache_size);
28491   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28492                "prefetch.l1_cache_line_size:\t%d\n",
28493                current_tune->prefetch.l1_cache_line_size);
28494   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28495                "prefer_constant_pool:\t%d\n",
28496                (int) current_tune->prefer_constant_pool);
28497   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28498                "branch_cost:\t(s:speed, p:predictable)\n");
28499   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28500   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28501                current_tune->branch_cost (false, false));
28502   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28503                current_tune->branch_cost (false, true));
28504   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28505                current_tune->branch_cost (true, false));
28506   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28507                current_tune->branch_cost (true, true));
28508   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28509                "prefer_ldrd_strd:\t%d\n",
28510                (int) current_tune->prefer_ldrd_strd);
28511   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28512                "logical_op_non_short_circuit:\t[%d,%d]\n",
28513                (int) current_tune->logical_op_non_short_circuit_thumb,
28514                (int) current_tune->logical_op_non_short_circuit_arm);
28515   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28516                "disparage_flag_setting_t16_encodings:\t%d\n",
28517                (int) current_tune->disparage_flag_setting_t16_encodings);
28518   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28519                "string_ops_prefer_neon:\t%d\n",
28520                (int) current_tune->string_ops_prefer_neon);
28521   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28522                "max_insns_inline_memset:\t%d\n",
28523                current_tune->max_insns_inline_memset);
28524   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28525                current_tune->fusible_ops);
28526   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28527                (int) current_tune->sched_autopref);
28528 }
28529
28530 /* The last set of target options used to emit .arch directives, etc.  This
28531    could be a function-local static if it were not required to expose it as a
28532    root to the garbage collector.  */
28533 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28534
28535 /* Print .arch and .arch_extension directives corresponding to the
28536    current architecture configuration.  */
28537 static void
28538 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28539 {
28540   arm_build_target build_target;
28541   /* If the target options haven't changed since the last time we were called
28542      there is nothing to do.  This should be sufficient to suppress the
28543      majority of redundant work.  */
28544   if (last_asm_targ_options == targ_options)
28545     return;
28546
28547   last_asm_targ_options = targ_options;
28548
28549   build_target.isa = sbitmap_alloc (isa_num_bits);
28550   arm_configure_build_target (&build_target, targ_options, false);
28551
28552   if (build_target.core_name
28553       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28554     {
28555       const char* truncated_name
28556         = arm_rewrite_selected_cpu (build_target.core_name);
28557       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28558     }
28559
28560   const arch_option *arch
28561     = arm_parse_arch_option_name (all_architectures, "-march",
28562                                   build_target.arch_name);
28563   auto_sbitmap opt_bits (isa_num_bits);
28564
28565   gcc_assert (arch);
28566
28567   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28568     {
28569       /* Keep backward compatability for assemblers which don't support
28570          armv7ve.  Fortunately, none of the following extensions are reset
28571          by a .fpu directive.  */
28572       asm_fprintf (stream, "\t.arch armv7-a\n");
28573       asm_fprintf (stream, "\t.arch_extension virt\n");
28574       asm_fprintf (stream, "\t.arch_extension idiv\n");
28575       asm_fprintf (stream, "\t.arch_extension sec\n");
28576       asm_fprintf (stream, "\t.arch_extension mp\n");
28577     }
28578   else
28579     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28580
28581   /* The .fpu directive will reset any architecture extensions from the
28582      assembler that relate to the fp/vector extensions.  So put this out before
28583      any .arch_extension directives.  */
28584   const char *fpu_name = (TARGET_SOFT_FLOAT
28585                           ? "softvfp"
28586                           : arm_identify_fpu_from_isa (build_target.isa));
28587   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28588
28589   if (!arch->common.extensions)
28590     return;
28591
28592   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28593        opt->name != NULL;
28594        opt++)
28595     {
28596       if (!opt->remove)
28597         {
28598           arm_initialize_isa (opt_bits, opt->isa_bits);
28599
28600           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28601              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28602              floating point instructions is disabled.  So the following check
28603              restricts the printing of ".arch_extension mve" and
28604              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28605              this special behaviour because the feature bit "mve" and
28606              "mve_float" are not part of "fpu bits", so they are not cleared
28607              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28608              TARGET_HAVE_MVE_FLOAT are disabled.  */
28609           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28610               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28611                   && !TARGET_HAVE_MVE_FLOAT))
28612             continue;
28613
28614           /* If every feature bit of this option is set in the target ISA
28615              specification, print out the option name.  However, don't print
28616              anything if all the bits are part of the FPU specification.  */
28617           if (bitmap_subset_p (opt_bits, build_target.isa)
28618               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28619             asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28620         }
28621     }
28622 }
28623
28624 static void
28625 arm_file_start (void)
28626 {
28627   int val;
28628   bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28629   bool bti = (aarch_enable_bti == 1);
28630
28631   arm_print_asm_arch_directives
28632     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28633
28634   if (TARGET_BPABI)
28635     {
28636       /* If we have a named cpu, but we the assembler does not support that
28637          name via .cpu, put out a cpu name attribute; but don't do this if the
28638          name starts with the fictitious prefix, 'generic'.  */
28639       if (arm_active_target.core_name
28640           && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28641           && !startswith (arm_active_target.core_name, "generic"))
28642         {
28643           const char* truncated_name
28644             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28645           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28646             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28647                          truncated_name);
28648         }
28649
28650       if (print_tune_info)
28651         arm_print_tune_info ();
28652
28653       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28654         arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28655
28656       if (TARGET_HARD_FLOAT_ABI)
28657         arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28658
28659       /* Some of these attributes only apply when the corresponding features
28660          are used.  However we don't have any easy way of figuring this out.
28661          Conservatively record the setting that would have been used.  */
28662
28663       if (flag_rounding_math)
28664         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28665
28666       if (!flag_unsafe_math_optimizations)
28667         {
28668           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28669           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28670         }
28671       if (flag_signaling_nans)
28672         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28673
28674       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28675                            flag_finite_math_only ? 1 : 3);
28676
28677       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28678       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28679       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28680                                flag_short_enums ? 1 : 2);
28681
28682       /* Tag_ABI_optimization_goals.  */
28683       if (optimize_size)
28684         val = 4;
28685       else if (optimize >= 2)
28686         val = 2;
28687       else if (optimize)
28688         val = 1;
28689       else
28690         val = 6;
28691       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28692
28693       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28694                                unaligned_access);
28695
28696       if (arm_fp16_format)
28697         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28698                              (int) arm_fp16_format);
28699
28700       if (TARGET_HAVE_PACBTI)
28701         {
28702           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28703           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28704         }
28705       else if (pac || bti)
28706         {
28707           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28708           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28709         }
28710
28711       if (bti)
28712         arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28713       if (pac)
28714         arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28715
28716       if (arm_lang_output_object_attributes_hook)
28717         arm_lang_output_object_attributes_hook();
28718     }
28719
28720   default_file_start ();
28721 }
28722
28723 static void
28724 arm_file_end (void)
28725 {
28726   int regno;
28727
28728   /* Just in case the last function output in the assembler had non-default
28729      architecture directives, we force the assembler state back to the default
28730      set, so that any 'calculated' build attributes are based on the default
28731      options rather than the special options for that function.  */
28732   arm_print_asm_arch_directives
28733     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28734
28735   if (NEED_INDICATE_EXEC_STACK)
28736     /* Add .note.GNU-stack.  */
28737     file_end_indicate_exec_stack ();
28738
28739   if (! thumb_call_reg_needed)
28740     return;
28741
28742   switch_to_section (text_section);
28743   asm_fprintf (asm_out_file, "\t.code 16\n");
28744   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28745
28746   for (regno = 0; regno < LR_REGNUM; regno++)
28747     {
28748       rtx label = thumb_call_via_label[regno];
28749
28750       if (label != 0)
28751         {
28752           targetm.asm_out.internal_label (asm_out_file, "L",
28753                                           CODE_LABEL_NUMBER (label));
28754           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28755         }
28756     }
28757 }
28758
28759 #ifndef ARM_PE
28760 /* Symbols in the text segment can be accessed without indirecting via the
28761    constant pool; it may take an extra binary operation, but this is still
28762    faster than indirecting via memory.  Don't do this when not optimizing,
28763    since we won't be calculating al of the offsets necessary to do this
28764    simplification.  */
28765
28766 static void
28767 arm_encode_section_info (tree decl, rtx rtl, int first)
28768 {
28769   if (optimize > 0 && TREE_CONSTANT (decl))
28770     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28771
28772   default_encode_section_info (decl, rtl, first);
28773 }
28774 #endif /* !ARM_PE */
28775
28776 static void
28777 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28778 {
28779   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28780       && !strcmp (prefix, "L"))
28781     {
28782       arm_ccfsm_state = 0;
28783       arm_target_insn = NULL;
28784     }
28785   default_internal_label (stream, prefix, labelno);
28786 }
28787
28788 /* Define classes to generate code as RTL or output asm to a file.
28789    Using templates then allows to use the same code to output code
28790    sequences in the two formats.  */
28791 class thumb1_const_rtl
28792 {
28793  public:
28794   thumb1_const_rtl (rtx dst) : dst (dst) {}
28795
28796   void mov (HOST_WIDE_INT val)
28797   {
28798     emit_set_insn (dst, GEN_INT (val));
28799   }
28800
28801   void add (HOST_WIDE_INT val)
28802   {
28803     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28804   }
28805
28806   void ashift (HOST_WIDE_INT shift)
28807   {
28808     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28809   }
28810
28811   void neg ()
28812   {
28813     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28814   }
28815
28816  private:
28817   rtx dst;
28818 };
28819
28820 class thumb1_const_print
28821 {
28822  public:
28823   thumb1_const_print (FILE *f, int regno)
28824   {
28825     t_file = f;
28826     dst_regname = reg_names[regno];
28827   }
28828
28829   void mov (HOST_WIDE_INT val)
28830   {
28831     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28832                  dst_regname, val);
28833   }
28834
28835   void add (HOST_WIDE_INT val)
28836   {
28837     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28838                  dst_regname, val);
28839   }
28840
28841   void ashift (HOST_WIDE_INT shift)
28842   {
28843     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28844                  dst_regname, shift);
28845   }
28846
28847   void neg ()
28848   {
28849     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28850   }
28851
28852  private:
28853   FILE *t_file;
28854   const char *dst_regname;
28855 };
28856
28857 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28858    Avoid generating useless code when one of the bytes is zero.  */
28859 template <class T>
28860 void
28861 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28862 {
28863   bool mov_done_p = false;
28864   unsigned HOST_WIDE_INT val = op1;
28865   int shift = 0;
28866   int i;
28867
28868   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28869
28870   if (val <= 255)
28871     {
28872       dst.mov (val);
28873       return;
28874     }
28875
28876   /* For negative numbers with the first nine bits set, build the
28877      opposite of OP1, then negate it, it's generally shorter and not
28878      longer.  */
28879   if ((val & 0xFF800000) == 0xFF800000)
28880     {
28881       thumb1_gen_const_int_1 (dst, -op1);
28882       dst.neg ();
28883       return;
28884     }
28885
28886   /* In the general case, we need 7 instructions to build
28887      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28888      do better if VAL is small enough, or
28889      right-shiftable by a suitable amount.  If the
28890      right-shift enables to encode at least one less byte,
28891      it's worth it: we save a adds and a lsls at the
28892      expense of a final lsls.  */
28893   int final_shift = number_of_first_bit_set (val);
28894
28895   int leading_zeroes = clz_hwi (val);
28896   int number_of_bytes_needed
28897     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28898        / BITS_PER_UNIT) + 1;
28899   int number_of_bytes_needed2
28900     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28901        / BITS_PER_UNIT) + 1;
28902
28903   if (number_of_bytes_needed2 < number_of_bytes_needed)
28904     val >>= final_shift;
28905   else
28906     final_shift = 0;
28907
28908   /* If we are in a very small range, we can use either a single movs
28909      or movs+adds.  */
28910   if (val <= 510)
28911     {
28912       if (val > 255)
28913         {
28914           unsigned HOST_WIDE_INT high = val - 255;
28915
28916           dst.mov (high);
28917           dst.add (255);
28918         }
28919       else
28920         dst.mov (val);
28921
28922       if (final_shift > 0)
28923         dst.ashift (final_shift);
28924     }
28925   else
28926     {
28927       /* General case, emit upper 3 bytes as needed.  */
28928       for (i = 0; i < 3; i++)
28929         {
28930           unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28931
28932           if (byte)
28933             {
28934               /* We are about to emit new bits, stop accumulating a
28935                  shift amount, and left-shift only if we have already
28936                  emitted some upper bits.  */
28937               if (mov_done_p)
28938                 {
28939                   dst.ashift (shift);
28940                   dst.add (byte);
28941                 }
28942               else
28943                 dst.mov (byte);
28944
28945               /* Stop accumulating shift amount since we've just
28946                  emitted some bits.  */
28947               shift = 0;
28948
28949               mov_done_p = true;
28950             }
28951
28952           if (mov_done_p)
28953             shift += 8;
28954         }
28955
28956       /* Emit lower byte.  */
28957       if (!mov_done_p)
28958         dst.mov (val & 0xff);
28959       else
28960         {
28961           dst.ashift (shift);
28962           if (val & 0xff)
28963             dst.add (val & 0xff);
28964         }
28965
28966       if (final_shift > 0)
28967         dst.ashift (final_shift);
28968     }
28969 }
28970
28971 /* Proxies for thumb1.md, since the thumb1_const_print and
28972    thumb1_const_rtl classes are not exported.  */
28973 void
28974 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28975 {
28976   thumb1_const_rtl t (dst);
28977   thumb1_gen_const_int_1 (t, op1);
28978 }
28979
28980 void
28981 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28982 {
28983   thumb1_const_print t (asm_out_file, REGNO (dst));
28984   thumb1_gen_const_int_1 (t, op1);
28985 }
28986
28987 /* Output code to add DELTA to the first argument, and then jump
28988    to FUNCTION.  Used for C++ multiple inheritance.  */
28989
28990 static void
28991 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28992                      HOST_WIDE_INT, tree function)
28993 {
28994   static int thunk_label = 0;
28995   char label[256];
28996   char labelpc[256];
28997   int mi_delta = delta;
28998   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28999   int shift = 0;
29000   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
29001                     ? 1 : 0);
29002   if (mi_delta < 0)
29003     mi_delta = - mi_delta;
29004
29005   final_start_function (emit_barrier (), file, 1);
29006
29007   if (TARGET_THUMB1)
29008     {
29009       int labelno = thunk_label++;
29010       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
29011       /* Thunks are entered in arm mode when available.  */
29012       if (TARGET_THUMB1_ONLY)
29013         {
29014           /* push r3 so we can use it as a temporary.  */
29015           /* TODO: Omit this save if r3 is not used.  */
29016           fputs ("\tpush {r3}\n", file);
29017
29018           /* With -mpure-code, we cannot load the address from the
29019              constant pool: we build it explicitly.  */
29020           if (target_pure_code)
29021             {
29022               fputs ("\tmovs\tr3, #:upper8_15:#", file);
29023               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29024               fputc ('\n', file);
29025               fputs ("\tlsls r3, #8\n", file);
29026               fputs ("\tadds\tr3, #:upper0_7:#", file);
29027               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29028               fputc ('\n', file);
29029               fputs ("\tlsls r3, #8\n", file);
29030               fputs ("\tadds\tr3, #:lower8_15:#", file);
29031               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29032               fputc ('\n', file);
29033               fputs ("\tlsls r3, #8\n", file);
29034               fputs ("\tadds\tr3, #:lower0_7:#", file);
29035               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29036               fputc ('\n', file);
29037             }
29038           else
29039             fputs ("\tldr\tr3, ", file);
29040         }
29041       else
29042         {
29043           fputs ("\tldr\tr12, ", file);
29044         }
29045
29046       if (!target_pure_code)
29047         {
29048           assemble_name (file, label);
29049           fputc ('\n', file);
29050         }
29051
29052       if (flag_pic)
29053         {
29054           /* If we are generating PIC, the ldr instruction below loads
29055              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
29056              the address of the add + 8, so we have:
29057
29058              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29059                  = target + 1.
29060
29061              Note that we have "+ 1" because some versions of GNU ld
29062              don't set the low bit of the result for R_ARM_REL32
29063              relocations against thumb function symbols.
29064              On ARMv6M this is +4, not +8.  */
29065           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29066           assemble_name (file, labelpc);
29067           fputs (":\n", file);
29068           if (TARGET_THUMB1_ONLY)
29069             {
29070               /* This is 2 insns after the start of the thunk, so we know it
29071                  is 4-byte aligned.  */
29072               fputs ("\tadd\tr3, pc, r3\n", file);
29073               fputs ("\tmov r12, r3\n", file);
29074             }
29075           else
29076             fputs ("\tadd\tr12, pc, r12\n", file);
29077         }
29078       else if (TARGET_THUMB1_ONLY)
29079         fputs ("\tmov r12, r3\n", file);
29080     }
29081   if (TARGET_THUMB1_ONLY)
29082     {
29083       if (mi_delta > 255)
29084         {
29085           /* With -mpure-code, we cannot load MI_DELTA from the
29086              constant pool: we build it explicitly.  */
29087           if (target_pure_code)
29088             {
29089               thumb1_const_print r3 (file, 3);
29090               thumb1_gen_const_int_1 (r3, mi_delta);
29091             }
29092           else
29093             {
29094               fputs ("\tldr\tr3, ", file);
29095               assemble_name (file, label);
29096               fputs ("+4\n", file);
29097             }
29098           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29099                        mi_op, this_regno, this_regno);
29100         }
29101       else if (mi_delta != 0)
29102         {
29103           /* Thumb1 unified syntax requires s suffix in instruction name when
29104              one of the operands is immediate.  */
29105           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29106                        mi_op, this_regno, this_regno,
29107                        mi_delta);
29108         }
29109     }
29110   else
29111     {
29112       /* TODO: Use movw/movt for large constants when available.  */
29113       while (mi_delta != 0)
29114         {
29115           if ((mi_delta & (3 << shift)) == 0)
29116             shift += 2;
29117           else
29118             {
29119               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29120                            mi_op, this_regno, this_regno,
29121                            mi_delta & (0xff << shift));
29122               mi_delta &= ~(0xff << shift);
29123               shift += 8;
29124             }
29125         }
29126     }
29127   if (TARGET_THUMB1)
29128     {
29129       if (TARGET_THUMB1_ONLY)
29130         fputs ("\tpop\t{r3}\n", file);
29131
29132       fprintf (file, "\tbx\tr12\n");
29133
29134       /* With -mpure-code, we don't need to emit literals for the
29135          function address and delta since we emitted code to build
29136          them.  */
29137       if (!target_pure_code)
29138         {
29139           ASM_OUTPUT_ALIGN (file, 2);
29140           assemble_name (file, label);
29141           fputs (":\n", file);
29142           if (flag_pic)
29143             {
29144               /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
29145               rtx tem = XEXP (DECL_RTL (function), 0);
29146               /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29147                  pipeline offset is four rather than eight.  Adjust the offset
29148                  accordingly.  */
29149               tem = plus_constant (GET_MODE (tem), tem,
29150                                    TARGET_THUMB1_ONLY ? -3 : -7);
29151               tem = gen_rtx_MINUS (GET_MODE (tem),
29152                                    tem,
29153                                    gen_rtx_SYMBOL_REF (Pmode,
29154                                                        ggc_strdup (labelpc)));
29155               assemble_integer (tem, 4, BITS_PER_WORD, 1);
29156             }
29157           else
29158             /* Output ".word .LTHUNKn".  */
29159             assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29160
29161           if (TARGET_THUMB1_ONLY && mi_delta > 255)
29162             assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29163         }
29164     }
29165   else
29166     {
29167       fputs ("\tb\t", file);
29168       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29169       if (NEED_PLT_RELOC)
29170         fputs ("(PLT)", file);
29171       fputc ('\n', file);
29172     }
29173
29174   final_end_function ();
29175 }
29176
29177 /* MI thunk handling for TARGET_32BIT.  */
29178
29179 static void
29180 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29181                        HOST_WIDE_INT vcall_offset, tree function)
29182 {
29183   const bool long_call_p = arm_is_long_call_p (function);
29184
29185   /* On ARM, this_regno is R0 or R1 depending on
29186      whether the function returns an aggregate or not.
29187   */
29188   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29189                                        function)
29190                     ? R1_REGNUM : R0_REGNUM);
29191
29192   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29193   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29194   reload_completed = 1;
29195   emit_note (NOTE_INSN_PROLOGUE_END);
29196
29197   /* Add DELTA to THIS_RTX.  */
29198   if (delta != 0)
29199     arm_split_constant (PLUS, Pmode, NULL_RTX,
29200                         delta, this_rtx, this_rtx, false);
29201
29202   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
29203   if (vcall_offset != 0)
29204     {
29205       /* Load *THIS_RTX.  */
29206       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29207       /* Compute *THIS_RTX + VCALL_OFFSET.  */
29208       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29209                           false);
29210       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
29211       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29212       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29213     }
29214
29215   /* Generate a tail call to the target function.  */
29216   if (!TREE_USED (function))
29217     {
29218       assemble_external (function);
29219       TREE_USED (function) = 1;
29220     }
29221   rtx funexp = XEXP (DECL_RTL (function), 0);
29222   if (long_call_p)
29223     {
29224       emit_move_insn (temp, funexp);
29225       funexp = temp;
29226     }
29227   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29228   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29229   SIBLING_CALL_P (insn) = 1;
29230   emit_barrier ();
29231
29232   /* Indirect calls require a bit of fixup in PIC mode.  */
29233   if (long_call_p)
29234     {
29235       split_all_insns_noflow ();
29236       arm_reorg ();
29237     }
29238
29239   insn = get_insns ();
29240   shorten_branches (insn);
29241   final_start_function (insn, file, 1);
29242   final (insn, file, 1);
29243   final_end_function ();
29244
29245   /* Stop pretending this is a post-reload pass.  */
29246   reload_completed = 0;
29247 }
29248
29249 /* Output code to add DELTA to the first argument, and then jump
29250    to FUNCTION.  Used for C++ multiple inheritance.  */
29251
29252 static void
29253 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29254                      HOST_WIDE_INT vcall_offset, tree function)
29255 {
29256   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29257
29258   assemble_start_function (thunk, fnname);
29259   if (TARGET_32BIT)
29260     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29261   else
29262     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29263   assemble_end_function (thunk, fnname);
29264 }
29265
29266 int
29267 arm_emit_vector_const (FILE *file, rtx x)
29268 {
29269   int i;
29270   const char * pattern;
29271
29272   gcc_assert (GET_CODE (x) == CONST_VECTOR);
29273
29274   switch (GET_MODE (x))
29275     {
29276     case E_V2SImode: pattern = "%08x"; break;
29277     case E_V4HImode: pattern = "%04x"; break;
29278     case E_V8QImode: pattern = "%02x"; break;
29279     default:       gcc_unreachable ();
29280     }
29281
29282   fprintf (file, "0x");
29283   for (i = CONST_VECTOR_NUNITS (x); i--;)
29284     {
29285       rtx element;
29286
29287       element = CONST_VECTOR_ELT (x, i);
29288       fprintf (file, pattern, INTVAL (element));
29289     }
29290
29291   return 1;
29292 }
29293
29294 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29295    HFmode constant pool entries are actually loaded with ldr.  */
29296 void
29297 arm_emit_fp16_const (rtx c)
29298 {
29299   long bits;
29300
29301   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29302   if (WORDS_BIG_ENDIAN)
29303     assemble_zeros (2);
29304   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29305   if (!WORDS_BIG_ENDIAN)
29306     assemble_zeros (2);
29307 }
29308
29309 const char *
29310 arm_output_load_gr (rtx *operands)
29311 {
29312   rtx reg;
29313   rtx offset;
29314   rtx wcgr;
29315   rtx sum;
29316
29317   if (!MEM_P (operands [1])
29318       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29319       || !REG_P (reg = XEXP (sum, 0))
29320       || !CONST_INT_P (offset = XEXP (sum, 1))
29321       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29322     return "wldrw%?\t%0, %1";
29323
29324   /* Fix up an out-of-range load of a GR register.  */
29325   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29326   wcgr = operands[0];
29327   operands[0] = reg;
29328   output_asm_insn ("ldr%?\t%0, %1", operands);
29329
29330   operands[0] = wcgr;
29331   operands[1] = reg;
29332   output_asm_insn ("tmcr%?\t%0, %1", operands);
29333   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29334
29335   return "";
29336 }
29337
29338 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29339
29340    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29341    named arg and all anonymous args onto the stack.
29342    XXX I know the prologue shouldn't be pushing registers, but it is faster
29343    that way.  */
29344
29345 static void
29346 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29347                             const function_arg_info &arg,
29348                             int *pretend_size,
29349                             int second_time ATTRIBUTE_UNUSED)
29350 {
29351   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29352   int nregs;
29353
29354   cfun->machine->uses_anonymous_args = 1;
29355   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29356     {
29357       nregs = pcum->aapcs_ncrn;
29358       if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29359           && (nregs & 1))
29360         {
29361           int res = arm_needs_doubleword_align (arg.mode, arg.type);
29362           if (res < 0 && warn_psabi)
29363             inform (input_location, "parameter passing for argument of "
29364                     "type %qT changed in GCC 7.1", arg.type);
29365           else if (res > 0)
29366             {
29367               nregs++;
29368               if (res > 1 && warn_psabi)
29369                 inform (input_location,
29370                         "parameter passing for argument of type "
29371                         "%qT changed in GCC 9.1", arg.type);
29372             }
29373         }
29374     }
29375   else
29376     nregs = pcum->nregs;
29377
29378   if (nregs < NUM_ARG_REGS)
29379     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29380 }
29381
29382 /* We can't rely on the caller doing the proper promotion when
29383    using APCS or ATPCS.  */
29384
29385 static bool
29386 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29387 {
29388     return !TARGET_AAPCS_BASED;
29389 }
29390
29391 static machine_mode
29392 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29393                            machine_mode mode,
29394                            int *punsignedp ATTRIBUTE_UNUSED,
29395                            const_tree fntype ATTRIBUTE_UNUSED,
29396                            int for_return ATTRIBUTE_UNUSED)
29397 {
29398   if (GET_MODE_CLASS (mode) == MODE_INT
29399       && GET_MODE_SIZE (mode) < 4)
29400     return SImode;
29401
29402   return mode;
29403 }
29404
29405
29406 static bool
29407 arm_default_short_enums (void)
29408 {
29409   return ARM_DEFAULT_SHORT_ENUMS;
29410 }
29411
29412
29413 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
29414
29415 static bool
29416 arm_align_anon_bitfield (void)
29417 {
29418   return TARGET_AAPCS_BASED;
29419 }
29420
29421
29422 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
29423
29424 static tree
29425 arm_cxx_guard_type (void)
29426 {
29427   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29428 }
29429
29430
29431 /* The EABI says test the least significant bit of a guard variable.  */
29432
29433 static bool
29434 arm_cxx_guard_mask_bit (void)
29435 {
29436   return TARGET_AAPCS_BASED;
29437 }
29438
29439
29440 /* The EABI specifies that all array cookies are 8 bytes long.  */
29441
29442 static tree
29443 arm_get_cookie_size (tree type)
29444 {
29445   tree size;
29446
29447   if (!TARGET_AAPCS_BASED)
29448     return default_cxx_get_cookie_size (type);
29449
29450   size = build_int_cst (sizetype, 8);
29451   return size;
29452 }
29453
29454
29455 /* The EABI says that array cookies should also contain the element size.  */
29456
29457 static bool
29458 arm_cookie_has_size (void)
29459 {
29460   return TARGET_AAPCS_BASED;
29461 }
29462
29463
29464 /* The EABI says constructors and destructors should return a pointer to
29465    the object constructed/destroyed.  */
29466
29467 static bool
29468 arm_cxx_cdtor_returns_this (void)
29469 {
29470   return TARGET_AAPCS_BASED;
29471 }
29472
29473 /* The EABI says that an inline function may never be the key
29474    method.  */
29475
29476 static bool
29477 arm_cxx_key_method_may_be_inline (void)
29478 {
29479   return !TARGET_AAPCS_BASED;
29480 }
29481
29482 static void
29483 arm_cxx_determine_class_data_visibility (tree decl)
29484 {
29485   if (!TARGET_AAPCS_BASED
29486       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29487     return;
29488
29489   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29490      is exported.  However, on systems without dynamic vague linkage,
29491      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29492   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29493     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29494   else
29495     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29496   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29497 }
29498
29499 static bool
29500 arm_cxx_class_data_always_comdat (void)
29501 {
29502   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29503      vague linkage if the class has no key function.  */
29504   return !TARGET_AAPCS_BASED;
29505 }
29506
29507
29508 /* The EABI says __aeabi_atexit should be used to register static
29509    destructors.  */
29510
29511 static bool
29512 arm_cxx_use_aeabi_atexit (void)
29513 {
29514   return TARGET_AAPCS_BASED;
29515 }
29516
29517
29518 void
29519 arm_set_return_address (rtx source, rtx scratch)
29520 {
29521   arm_stack_offsets *offsets;
29522   HOST_WIDE_INT delta;
29523   rtx addr, mem;
29524   unsigned long saved_regs;
29525
29526   offsets = arm_get_frame_offsets ();
29527   saved_regs = offsets->saved_regs_mask;
29528
29529   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29530     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29531   else
29532     {
29533       if (frame_pointer_needed)
29534         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29535       else
29536         {
29537           /* LR will be the first saved register.  */
29538           delta = offsets->outgoing_args - (offsets->frame + 4);
29539
29540
29541           if (delta >= 4096)
29542             {
29543               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29544                                      GEN_INT (delta & ~4095)));
29545               addr = scratch;
29546               delta &= 4095;
29547             }
29548           else
29549             addr = stack_pointer_rtx;
29550
29551           addr = plus_constant (Pmode, addr, delta);
29552         }
29553
29554       /* The store needs to be marked to prevent DSE from deleting
29555          it as dead if it is based on fp.  */
29556       mem = gen_frame_mem (Pmode, addr);
29557       MEM_VOLATILE_P (mem) = true;
29558       emit_move_insn (mem, source);
29559     }
29560 }
29561
29562
29563 void
29564 thumb_set_return_address (rtx source, rtx scratch)
29565 {
29566   arm_stack_offsets *offsets;
29567   HOST_WIDE_INT delta;
29568   HOST_WIDE_INT limit;
29569   int reg;
29570   rtx addr, mem;
29571   unsigned long mask;
29572
29573   emit_use (source);
29574
29575   offsets = arm_get_frame_offsets ();
29576   mask = offsets->saved_regs_mask;
29577   if (mask & (1 << LR_REGNUM))
29578     {
29579       limit = 1024;
29580       /* Find the saved regs.  */
29581       if (frame_pointer_needed)
29582         {
29583           delta = offsets->soft_frame - offsets->saved_args;
29584           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29585           if (TARGET_THUMB1)
29586             limit = 128;
29587         }
29588       else
29589         {
29590           delta = offsets->outgoing_args - offsets->saved_args;
29591           reg = SP_REGNUM;
29592         }
29593       /* Allow for the stack frame.  */
29594       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29595         delta -= 16;
29596       /* The link register is always the first saved register.  */
29597       delta -= 4;
29598
29599       /* Construct the address.  */
29600       addr = gen_rtx_REG (SImode, reg);
29601       if (delta > limit)
29602         {
29603           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29604           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29605           addr = scratch;
29606         }
29607       else
29608         addr = plus_constant (Pmode, addr, delta);
29609
29610       /* The store needs to be marked to prevent DSE from deleting
29611          it as dead if it is based on fp.  */
29612       mem = gen_frame_mem (Pmode, addr);
29613       MEM_VOLATILE_P (mem) = true;
29614       emit_move_insn (mem, source);
29615     }
29616   else
29617     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29618 }
29619
29620 /* Implements target hook vector_mode_supported_p.  */
29621 bool
29622 arm_vector_mode_supported_p (machine_mode mode)
29623 {
29624   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29625   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29626       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29627       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29628       || mode == V8BFmode))
29629     return true;
29630
29631   if ((TARGET_NEON || TARGET_IWMMXT)
29632       && ((mode == V2SImode)
29633           || (mode == V4HImode)
29634           || (mode == V8QImode)))
29635     return true;
29636
29637   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29638       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29639       || mode == V2HAmode))
29640     return true;
29641
29642   if (TARGET_HAVE_MVE
29643       && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29644     return true;
29645
29646   if (TARGET_HAVE_MVE_FLOAT
29647       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29648     return true;
29649
29650   return false;
29651 }
29652
29653 /* Implements target hook array_mode_supported_p.  */
29654
29655 static bool
29656 arm_array_mode_supported_p (machine_mode mode,
29657                             unsigned HOST_WIDE_INT nelems)
29658 {
29659   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29660      for now, as the lane-swapping logic needs to be extended in the expanders.
29661      See PR target/82518.  */
29662   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29663       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29664       && (nelems >= 2 && nelems <= 4))
29665     return true;
29666
29667   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29668       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29669     return true;
29670
29671   return false;
29672 }
29673
29674 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29675    registers when autovectorizing for Neon, at least until multiple vector
29676    widths are supported properly by the middle-end.  */
29677
29678 static machine_mode
29679 arm_preferred_simd_mode (scalar_mode mode)
29680 {
29681   if (TARGET_NEON)
29682     switch (mode)
29683       {
29684       case E_HFmode:
29685         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29686       case E_SFmode:
29687         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29688       case E_SImode:
29689         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29690       case E_HImode:
29691         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29692       case E_QImode:
29693         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29694       case E_DImode:
29695         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29696           return V2DImode;
29697         break;
29698
29699       default:;
29700       }
29701
29702   if (TARGET_REALLY_IWMMXT)
29703     switch (mode)
29704       {
29705       case E_SImode:
29706         return V2SImode;
29707       case E_HImode:
29708         return V4HImode;
29709       case E_QImode:
29710         return V8QImode;
29711
29712       default:;
29713       }
29714
29715   if (TARGET_HAVE_MVE)
29716     switch (mode)
29717       {
29718       case E_QImode:
29719         return V16QImode;
29720       case E_HImode:
29721         return V8HImode;
29722       case E_SImode:
29723         return V4SImode;
29724
29725       default:;
29726       }
29727
29728   if (TARGET_HAVE_MVE_FLOAT)
29729     switch (mode)
29730       {
29731       case E_HFmode:
29732         return V8HFmode;
29733       case E_SFmode:
29734         return V4SFmode;
29735
29736       default:;
29737       }
29738
29739   return word_mode;
29740 }
29741
29742 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29743
29744    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29745    using r0-r4 for function arguments, r7 for the stack frame and don't have
29746    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29747    potentially problematic instructions accept high registers so this is not
29748    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29749    that require many low registers.  */
29750 static bool
29751 arm_class_likely_spilled_p (reg_class_t rclass)
29752 {
29753   if ((TARGET_THUMB1 && rclass == LO_REGS)
29754       || rclass  == CC_REG)
29755     return true;
29756
29757   return default_class_likely_spilled_p (rclass);
29758 }
29759
29760 /* Implements target hook small_register_classes_for_mode_p.  */
29761 bool
29762 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29763 {
29764   return TARGET_THUMB1;
29765 }
29766
29767 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29768    ARM insns and therefore guarantee that the shift count is modulo 256.
29769    DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29770    guarantee no particular behavior for out-of-range counts.  */
29771
29772 static unsigned HOST_WIDE_INT
29773 arm_shift_truncation_mask (machine_mode mode)
29774 {
29775   return mode == SImode ? 255 : 0;
29776 }
29777
29778
29779 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29780
29781 unsigned int
29782 arm_debugger_regno (unsigned int regno)
29783 {
29784   if (regno < 16)
29785     return regno;
29786
29787   if (IS_VFP_REGNUM (regno))
29788     {
29789       /* See comment in arm_dwarf_register_span.  */
29790       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29791         return 64 + regno - FIRST_VFP_REGNUM;
29792       else
29793         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29794     }
29795
29796   if (IS_IWMMXT_GR_REGNUM (regno))
29797     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29798
29799   if (IS_IWMMXT_REGNUM (regno))
29800     return 112 + regno - FIRST_IWMMXT_REGNUM;
29801
29802   if (IS_PAC_REGNUM (regno))
29803     return DWARF_PAC_REGNUM;
29804
29805   return DWARF_FRAME_REGISTERS;
29806 }
29807
29808 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29809    GCC models tham as 64 32-bit registers, so we need to describe this to
29810    the DWARF generation code.  Other registers can use the default.  */
29811 static rtx
29812 arm_dwarf_register_span (rtx rtl)
29813 {
29814   machine_mode mode;
29815   unsigned regno;
29816   rtx parts[16];
29817   int nregs;
29818   int i;
29819
29820   regno = REGNO (rtl);
29821   if (!IS_VFP_REGNUM (regno))
29822     return NULL_RTX;
29823
29824   /* XXX FIXME: The EABI defines two VFP register ranges:
29825         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29826         256-287: D0-D31
29827      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29828      corresponding D register.  Until GDB supports this, we shall use the
29829      legacy encodings.  We also use these encodings for D0-D15 for
29830      compatibility with older debuggers.  */
29831   mode = GET_MODE (rtl);
29832   if (GET_MODE_SIZE (mode) < 8)
29833     return NULL_RTX;
29834
29835   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29836     {
29837       nregs = GET_MODE_SIZE (mode) / 4;
29838       for (i = 0; i < nregs; i += 2)
29839         if (TARGET_BIG_END)
29840           {
29841             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29842             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29843           }
29844         else
29845           {
29846             parts[i] = gen_rtx_REG (SImode, regno + i);
29847             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29848           }
29849     }
29850   else
29851     {
29852       nregs = GET_MODE_SIZE (mode) / 8;
29853       for (i = 0; i < nregs; i++)
29854         parts[i] = gen_rtx_REG (DImode, regno + i);
29855     }
29856
29857   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29858 }
29859
29860 #if ARM_UNWIND_INFO
29861 /* Emit unwind directives for a store-multiple instruction or stack pointer
29862    push during alignment.
29863    These should only ever be generated by the function prologue code, so
29864    expect them to have a particular form.
29865    The store-multiple instruction sometimes pushes pc as the last register,
29866    although it should not be tracked into unwind information, or for -Os
29867    sometimes pushes some dummy registers before first register that needs
29868    to be tracked in unwind information; such dummy registers are there just
29869    to avoid separate stack adjustment, and will not be restored in the
29870    epilogue.  */
29871
29872 static void
29873 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29874 {
29875   int i;
29876   HOST_WIDE_INT offset;
29877   HOST_WIDE_INT nregs;
29878   int reg_size;
29879   unsigned reg;
29880   unsigned lastreg;
29881   unsigned padfirst = 0, padlast = 0;
29882   rtx e;
29883
29884   e = XVECEXP (p, 0, 0);
29885   gcc_assert (GET_CODE (e) == SET);
29886
29887   /* First insn will adjust the stack pointer.  */
29888   gcc_assert (GET_CODE (e) == SET
29889               && REG_P (SET_DEST (e))
29890               && REGNO (SET_DEST (e)) == SP_REGNUM
29891               && GET_CODE (SET_SRC (e)) == PLUS);
29892
29893   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29894   nregs = XVECLEN (p, 0) - 1;
29895   gcc_assert (nregs);
29896
29897   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29898   if (reg < 16 || IS_PAC_REGNUM (reg))
29899     {
29900       /* For -Os dummy registers can be pushed at the beginning to
29901          avoid separate stack pointer adjustment.  */
29902       e = XVECEXP (p, 0, 1);
29903       e = XEXP (SET_DEST (e), 0);
29904       if (GET_CODE (e) == PLUS)
29905         padfirst = INTVAL (XEXP (e, 1));
29906       gcc_assert (padfirst == 0 || optimize_size);
29907       /* The function prologue may also push pc, but not annotate it as it is
29908          never restored.  We turn this into a stack pointer adjustment.  */
29909       e = XVECEXP (p, 0, nregs);
29910       e = XEXP (SET_DEST (e), 0);
29911       if (GET_CODE (e) == PLUS)
29912         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29913       else
29914         padlast = offset - 4;
29915       gcc_assert (padlast == 0 || padlast == 4);
29916       if (padlast == 4)
29917         fprintf (out_file, "\t.pad #4\n");
29918       reg_size = 4;
29919       fprintf (out_file, "\t.save {");
29920     }
29921   else if (IS_VFP_REGNUM (reg))
29922     {
29923       reg_size = 8;
29924       fprintf (out_file, "\t.vsave {");
29925     }
29926   else
29927     /* Unknown register type.  */
29928     gcc_unreachable ();
29929
29930   /* If the stack increment doesn't match the size of the saved registers,
29931      something has gone horribly wrong.  */
29932   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29933
29934   offset = padfirst;
29935   lastreg = 0;
29936   /* The remaining insns will describe the stores.  */
29937   for (i = 1; i <= nregs; i++)
29938     {
29939       /* Expect (set (mem <addr>) (reg)).
29940          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29941       e = XVECEXP (p, 0, i);
29942       gcc_assert (GET_CODE (e) == SET
29943                   && MEM_P (SET_DEST (e))
29944                   && REG_P (SET_SRC (e)));
29945
29946       reg = REGNO (SET_SRC (e));
29947       gcc_assert (reg >= lastreg);
29948
29949       if (i != 1)
29950         fprintf (out_file, ", ");
29951       /* We can't use %r for vfp because we need to use the
29952          double precision register names.  */
29953       if (IS_VFP_REGNUM (reg))
29954         asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29955       else if (IS_PAC_REGNUM (reg))
29956         asm_fprintf (asm_out_file, "ra_auth_code");
29957       else
29958         asm_fprintf (out_file, "%r", reg);
29959
29960       if (flag_checking)
29961         {
29962           /* Check that the addresses are consecutive.  */
29963           e = XEXP (SET_DEST (e), 0);
29964           if (GET_CODE (e) == PLUS)
29965             gcc_assert (REG_P (XEXP (e, 0))
29966                         && REGNO (XEXP (e, 0)) == SP_REGNUM
29967                         && CONST_INT_P (XEXP (e, 1))
29968                         && offset == INTVAL (XEXP (e, 1)));
29969           else
29970             gcc_assert (i == 1
29971                         && REG_P (e)
29972                         && REGNO (e) == SP_REGNUM);
29973           offset += reg_size;
29974         }
29975     }
29976   fprintf (out_file, "}\n");
29977   if (padfirst)
29978     fprintf (out_file, "\t.pad #%d\n", padfirst);
29979 }
29980
29981 /*  Emit unwind directives for a SET.  */
29982
29983 static void
29984 arm_unwind_emit_set (FILE * out_file, rtx p)
29985 {
29986   rtx e0;
29987   rtx e1;
29988   unsigned reg;
29989
29990   e0 = XEXP (p, 0);
29991   e1 = XEXP (p, 1);
29992   switch (GET_CODE (e0))
29993     {
29994     case MEM:
29995       /* Pushing a single register.  */
29996       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29997           || !REG_P (XEXP (XEXP (e0, 0), 0))
29998           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29999         abort ();
30000
30001       asm_fprintf (out_file, "\t.save ");
30002       if (IS_VFP_REGNUM (REGNO (e1)))
30003         asm_fprintf(out_file, "{d%d}\n",
30004                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
30005       else
30006         asm_fprintf(out_file, "{%r}\n", REGNO (e1));
30007       break;
30008
30009     case REG:
30010       if (REGNO (e0) == SP_REGNUM)
30011         {
30012           /* A stack increment.  */
30013           if (GET_CODE (e1) != PLUS
30014               || !REG_P (XEXP (e1, 0))
30015               || REGNO (XEXP (e1, 0)) != SP_REGNUM
30016               || !CONST_INT_P (XEXP (e1, 1)))
30017             abort ();
30018
30019           asm_fprintf (out_file, "\t.pad #%wd\n",
30020                        -INTVAL (XEXP (e1, 1)));
30021         }
30022       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
30023         {
30024           HOST_WIDE_INT offset;
30025
30026           if (GET_CODE (e1) == PLUS)
30027             {
30028               if (!REG_P (XEXP (e1, 0))
30029                   || !CONST_INT_P (XEXP (e1, 1)))
30030                 abort ();
30031               reg = REGNO (XEXP (e1, 0));
30032               offset = INTVAL (XEXP (e1, 1));
30033               asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
30034                            HARD_FRAME_POINTER_REGNUM, reg,
30035                            offset);
30036             }
30037           else if (REG_P (e1))
30038             {
30039               reg = REGNO (e1);
30040               asm_fprintf (out_file, "\t.setfp %r, %r\n",
30041                            HARD_FRAME_POINTER_REGNUM, reg);
30042             }
30043           else
30044             abort ();
30045         }
30046       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30047         {
30048           /* Move from sp to reg.  */
30049           asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30050         }
30051       else if (GET_CODE (e1) == PLUS
30052               && REG_P (XEXP (e1, 0))
30053               && REGNO (XEXP (e1, 0)) == SP_REGNUM
30054               && CONST_INT_P (XEXP (e1, 1)))
30055         {
30056           /* Set reg to offset from sp.  */
30057           asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30058                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30059         }
30060       else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30061         {
30062           if (cfun->machine->pacspval_needed)
30063             asm_fprintf (out_file, "\t.pacspval\n");
30064         }
30065       else
30066         abort ();
30067       break;
30068
30069     default:
30070       abort ();
30071     }
30072 }
30073
30074
30075 /* Emit unwind directives for the given insn.  */
30076
30077 static void
30078 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30079 {
30080   rtx note, pat;
30081   bool handled_one = false;
30082
30083   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30084     return;
30085
30086   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30087       && (TREE_NOTHROW (current_function_decl)
30088           || crtl->all_throwers_are_sibcalls))
30089     return;
30090
30091   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30092     return;
30093
30094   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30095     {
30096       switch (REG_NOTE_KIND (note))
30097         {
30098         case REG_FRAME_RELATED_EXPR:
30099           pat = XEXP (note, 0);
30100           goto found;
30101
30102         case REG_CFA_REGISTER:
30103           pat = XEXP (note, 0);
30104           if (pat == NULL)
30105             {
30106               pat = PATTERN (insn);
30107               if (GET_CODE (pat) == PARALLEL)
30108                 pat = XVECEXP (pat, 0, 0);
30109             }
30110
30111           /* Only emitted for IS_STACKALIGN re-alignment.  */
30112           {
30113             rtx dest, src;
30114             unsigned reg;
30115
30116             src = SET_SRC (pat);
30117             dest = SET_DEST (pat);
30118
30119             gcc_assert (src == stack_pointer_rtx
30120                         || IS_PAC_REGNUM (REGNO (src)));
30121             reg = REGNO (dest);
30122
30123             if (IS_PAC_REGNUM (REGNO (src)))
30124               arm_unwind_emit_set (out_file, PATTERN (insn));
30125             else
30126               asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30127                            reg + 0x90, reg);
30128           }
30129           handled_one = true;
30130           break;
30131
30132         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
30133            to get correct dwarf information for shrink-wrap.  We should not
30134            emit unwind information for it because these are used either for
30135            pretend arguments or notes to adjust sp and restore registers from
30136            stack.  */
30137         case REG_CFA_DEF_CFA:
30138         case REG_CFA_ADJUST_CFA:
30139         case REG_CFA_RESTORE:
30140           return;
30141
30142         case REG_CFA_EXPRESSION:
30143         case REG_CFA_OFFSET:
30144           /* ??? Only handling here what we actually emit.  */
30145           gcc_unreachable ();
30146
30147         default:
30148           break;
30149         }
30150     }
30151   if (handled_one)
30152     return;
30153   pat = PATTERN (insn);
30154  found:
30155
30156   switch (GET_CODE (pat))
30157     {
30158     case SET:
30159       arm_unwind_emit_set (out_file, pat);
30160       break;
30161
30162     case SEQUENCE:
30163       /* Store multiple.  */
30164       arm_unwind_emit_sequence (out_file, pat);
30165       break;
30166
30167     default:
30168       abort();
30169     }
30170 }
30171
30172
30173 /* Output a reference from a function exception table to the type_info
30174    object X.  The EABI specifies that the symbol should be relocated by
30175    an R_ARM_TARGET2 relocation.  */
30176
30177 static bool
30178 arm_output_ttype (rtx x)
30179 {
30180   fputs ("\t.word\t", asm_out_file);
30181   output_addr_const (asm_out_file, x);
30182   /* Use special relocations for symbol references.  */
30183   if (!CONST_INT_P (x))
30184     fputs ("(TARGET2)", asm_out_file);
30185   fputc ('\n', asm_out_file);
30186
30187   return TRUE;
30188 }
30189
30190 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
30191
30192 static void
30193 arm_asm_emit_except_personality (rtx personality)
30194 {
30195   fputs ("\t.personality\t", asm_out_file);
30196   output_addr_const (asm_out_file, personality);
30197   fputc ('\n', asm_out_file);
30198 }
30199 #endif /* ARM_UNWIND_INFO */
30200
30201 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
30202
30203 static void
30204 arm_asm_init_sections (void)
30205 {
30206 #if ARM_UNWIND_INFO
30207   exception_section = get_unnamed_section (0, output_section_asm_op,
30208                                            "\t.handlerdata");
30209 #endif /* ARM_UNWIND_INFO */
30210
30211 #ifdef OBJECT_FORMAT_ELF
30212   if (target_pure_code)
30213     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30214 #endif
30215 }
30216
30217 /* Output unwind directives for the start/end of a function.  */
30218
30219 void
30220 arm_output_fn_unwind (FILE * f, bool prologue)
30221 {
30222   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30223     return;
30224
30225   if (prologue)
30226     fputs ("\t.fnstart\n", f);
30227   else
30228     {
30229       /* If this function will never be unwound, then mark it as such.
30230          The came condition is used in arm_unwind_emit to suppress
30231          the frame annotations.  */
30232       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30233           && (TREE_NOTHROW (current_function_decl)
30234               || crtl->all_throwers_are_sibcalls))
30235         fputs("\t.cantunwind\n", f);
30236
30237       fputs ("\t.fnend\n", f);
30238     }
30239 }
30240
30241 static bool
30242 arm_emit_tls_decoration (FILE *fp, rtx x)
30243 {
30244   enum tls_reloc reloc;
30245   rtx val;
30246
30247   val = XVECEXP (x, 0, 0);
30248   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30249
30250   output_addr_const (fp, val);
30251
30252   switch (reloc)
30253     {
30254     case TLS_GD32:
30255       fputs ("(tlsgd)", fp);
30256       break;
30257     case TLS_GD32_FDPIC:
30258       fputs ("(tlsgd_fdpic)", fp);
30259       break;
30260     case TLS_LDM32:
30261       fputs ("(tlsldm)", fp);
30262       break;
30263     case TLS_LDM32_FDPIC:
30264       fputs ("(tlsldm_fdpic)", fp);
30265       break;
30266     case TLS_LDO32:
30267       fputs ("(tlsldo)", fp);
30268       break;
30269     case TLS_IE32:
30270       fputs ("(gottpoff)", fp);
30271       break;
30272     case TLS_IE32_FDPIC:
30273       fputs ("(gottpoff_fdpic)", fp);
30274       break;
30275     case TLS_LE32:
30276       fputs ("(tpoff)", fp);
30277       break;
30278     case TLS_DESCSEQ:
30279       fputs ("(tlsdesc)", fp);
30280       break;
30281     default:
30282       gcc_unreachable ();
30283     }
30284
30285   switch (reloc)
30286     {
30287     case TLS_GD32:
30288     case TLS_LDM32:
30289     case TLS_IE32:
30290     case TLS_DESCSEQ:
30291       fputs (" + (. - ", fp);
30292       output_addr_const (fp, XVECEXP (x, 0, 2));
30293       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30294       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30295       output_addr_const (fp, XVECEXP (x, 0, 3));
30296       fputc (')', fp);
30297       break;
30298     default:
30299       break;
30300     }
30301
30302   return TRUE;
30303 }
30304
30305 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
30306
30307 static void
30308 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30309 {
30310   gcc_assert (size == 4);
30311   fputs ("\t.word\t", file);
30312   output_addr_const (file, x);
30313   fputs ("(tlsldo)", file);
30314 }
30315
30316 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
30317
30318 static bool
30319 arm_output_addr_const_extra (FILE *fp, rtx x)
30320 {
30321   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30322     return arm_emit_tls_decoration (fp, x);
30323   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30324     {
30325       char label[256];
30326       int labelno = INTVAL (XVECEXP (x, 0, 0));
30327
30328       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30329       assemble_name_raw (fp, label);
30330
30331       return TRUE;
30332     }
30333   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30334     {
30335       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30336       if (GOT_PCREL)
30337         fputs ("+.", fp);
30338       fputs ("-(", fp);
30339       output_addr_const (fp, XVECEXP (x, 0, 0));
30340       fputc (')', fp);
30341       return TRUE;
30342     }
30343   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30344     {
30345       output_addr_const (fp, XVECEXP (x, 0, 0));
30346       if (GOT_PCREL)
30347         fputs ("+.", fp);
30348       fputs ("-(", fp);
30349       output_addr_const (fp, XVECEXP (x, 0, 1));
30350       fputc (')', fp);
30351       return TRUE;
30352     }
30353   else if (GET_CODE (x) == CONST_VECTOR)
30354     return arm_emit_vector_const (fp, x);
30355
30356   return FALSE;
30357 }
30358
30359 /* Output assembly for a shift instruction.
30360    SET_FLAGS determines how the instruction modifies the condition codes.
30361    0 - Do not set condition codes.
30362    1 - Set condition codes.
30363    2 - Use smallest instruction.  */
30364 const char *
30365 arm_output_shift(rtx * operands, int set_flags)
30366 {
30367   char pattern[100];
30368   static const char flag_chars[3] = {'?', '.', '!'};
30369   const char *shift;
30370   HOST_WIDE_INT val;
30371   char c;
30372
30373   c = flag_chars[set_flags];
30374   shift = shift_op(operands[3], &val);
30375   if (shift)
30376     {
30377       if (val != -1)
30378         operands[2] = GEN_INT(val);
30379       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30380     }
30381   else
30382     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30383
30384   output_asm_insn (pattern, operands);
30385   return "";
30386 }
30387
30388 /* Output assembly for a WMMX immediate shift instruction.  */
30389 const char *
30390 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30391 {
30392   int shift = INTVAL (operands[2]);
30393   char templ[50];
30394   machine_mode opmode = GET_MODE (operands[0]);
30395
30396   gcc_assert (shift >= 0);
30397
30398   /* If the shift value in the register versions is > 63 (for D qualifier),
30399      31 (for W qualifier) or 15 (for H qualifier).  */
30400   if (((opmode == V4HImode) && (shift > 15))
30401         || ((opmode == V2SImode) && (shift > 31))
30402         || ((opmode == DImode) && (shift > 63)))
30403   {
30404     if (wror_or_wsra)
30405       {
30406         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30407         output_asm_insn (templ, operands);
30408         if (opmode == DImode)
30409           {
30410             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30411             output_asm_insn (templ, operands);
30412           }
30413       }
30414     else
30415       {
30416         /* The destination register will contain all zeros.  */
30417         sprintf (templ, "wzero\t%%0");
30418         output_asm_insn (templ, operands);
30419       }
30420     return "";
30421   }
30422
30423   if ((opmode == DImode) && (shift > 32))
30424     {
30425       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30426       output_asm_insn (templ, operands);
30427       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30428       output_asm_insn (templ, operands);
30429     }
30430   else
30431     {
30432       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30433       output_asm_insn (templ, operands);
30434     }
30435   return "";
30436 }
30437
30438 /* Output assembly for a WMMX tinsr instruction.  */
30439 const char *
30440 arm_output_iwmmxt_tinsr (rtx *operands)
30441 {
30442   int mask = INTVAL (operands[3]);
30443   int i;
30444   char templ[50];
30445   int units = mode_nunits[GET_MODE (operands[0])];
30446   gcc_assert ((mask & (mask - 1)) == 0);
30447   for (i = 0; i < units; ++i)
30448     {
30449       if ((mask & 0x01) == 1)
30450         {
30451           break;
30452         }
30453       mask >>= 1;
30454     }
30455   gcc_assert (i < units);
30456   {
30457     switch (GET_MODE (operands[0]))
30458       {
30459       case E_V8QImode:
30460         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30461         break;
30462       case E_V4HImode:
30463         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30464         break;
30465       case E_V2SImode:
30466         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30467         break;
30468       default:
30469         gcc_unreachable ();
30470         break;
30471       }
30472     output_asm_insn (templ, operands);
30473   }
30474   return "";
30475 }
30476
30477 /* Output an arm casesi dispatch sequence.  Used by arm_casesi_internal insn.
30478    Responsible for the handling of switch statements in arm.  */
30479 const char *
30480 arm_output_casesi (rtx *operands)
30481 {
30482   char label[100];
30483   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30484   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30485   output_asm_insn ("cmp\t%0, %1", operands);
30486   output_asm_insn ("bhi\t%l3", operands);
30487   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
30488   switch (GET_MODE (diff_vec))
30489     {
30490     case E_QImode:
30491       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30492         output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
30493       else
30494         output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
30495       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30496       break;
30497     case E_HImode:
30498       if (REGNO (operands[4]) != REGNO (operands[5]))
30499         {
30500           output_asm_insn ("add\t%4, %0, %0", operands);
30501           if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30502             output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
30503           else
30504             output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
30505         }
30506       else
30507         {
30508           output_asm_insn ("add\t%4, %5, %0", operands);
30509           if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30510             output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
30511           else
30512             output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
30513         }
30514       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30515       break;
30516     case E_SImode:
30517       if (flag_pic)
30518         {
30519           output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
30520           output_asm_insn ("add\t%|pc, %|pc, %4", operands);
30521         }
30522       else
30523         output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
30524       break;
30525     default:
30526       gcc_unreachable ();
30527     }
30528     assemble_label (asm_out_file, label);
30529     output_asm_insn ("nop", operands);
30530   return "";
30531 }
30532
30533 /* Output a Thumb-1 casesi dispatch sequence.  */
30534 const char *
30535 thumb1_output_casesi (rtx *operands)
30536 {
30537   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30538
30539   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30540
30541   switch (GET_MODE(diff_vec))
30542     {
30543     case E_QImode:
30544       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30545               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30546     case E_HImode:
30547       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30548               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30549     case E_SImode:
30550       return "bl\t%___gnu_thumb1_case_si";
30551     default:
30552       gcc_unreachable ();
30553     }
30554 }
30555
30556 /* Output a Thumb-2 casesi instruction.  */
30557 const char *
30558 thumb2_output_casesi (rtx *operands)
30559 {
30560   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30561
30562   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30563
30564   output_asm_insn ("cmp\t%0, %1", operands);
30565   output_asm_insn ("bhi\t%l3", operands);
30566   switch (GET_MODE(diff_vec))
30567     {
30568     case E_QImode:
30569       return "tbb\t[%|pc, %0]";
30570     case E_HImode:
30571       return "tbh\t[%|pc, %0, lsl #1]";
30572     case E_SImode:
30573       if (flag_pic)
30574         {
30575           output_asm_insn ("adr\t%4, %l2", operands);
30576           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30577           output_asm_insn ("add\t%4, %4, %5", operands);
30578           return "bx\t%4";
30579         }
30580       else
30581         {
30582           output_asm_insn ("adr\t%4, %l2", operands);
30583           return "ldr\t%|pc, [%4, %0, lsl #2]";
30584         }
30585     default:
30586       gcc_unreachable ();
30587     }
30588 }
30589
30590 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30591    per-core tuning structs.  */
30592 static int
30593 arm_issue_rate (void)
30594 {
30595   return current_tune->issue_rate;
30596 }
30597
30598 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30599 static int
30600 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30601 {
30602   if (DEBUG_INSN_P (insn))
30603     return more;
30604
30605   rtx_code code = GET_CODE (PATTERN (insn));
30606   if (code == USE || code == CLOBBER)
30607     return more;
30608
30609   if (get_attr_type (insn) == TYPE_NO_INSN)
30610     return more;
30611
30612   return more - 1;
30613 }
30614
30615 /* Return how many instructions should scheduler lookahead to choose the
30616    best one.  */
30617 static int
30618 arm_first_cycle_multipass_dfa_lookahead (void)
30619 {
30620   int issue_rate = arm_issue_rate ();
30621
30622   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30623 }
30624
30625 /* Enable modeling of L2 auto-prefetcher.  */
30626 static int
30627 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30628 {
30629   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30630 }
30631
30632 const char *
30633 arm_mangle_type (const_tree type)
30634 {
30635   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30636      has to be managled as if it is in the "std" namespace.  */
30637   if (TARGET_AAPCS_BASED
30638       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30639     return "St9__va_list";
30640
30641   /* Half-precision floating point types.  */
30642   if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
30643     {
30644       if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30645         return NULL;
30646       if (TYPE_MODE (type) == BFmode)
30647         return "u6__bf16";
30648       else
30649         return "Dh";
30650     }
30651
30652   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30653      builtin type.  */
30654   if (TYPE_NAME (type) != NULL)
30655     return arm_mangle_builtin_type (type);
30656
30657   /* Use the default mangling.  */
30658   return NULL;
30659 }
30660
30661 /* Order of allocation of core registers for Thumb: this allocation is
30662    written over the corresponding initial entries of the array
30663    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30664    first.  Saving and restoring a low register is usually cheaper than
30665    using a call-clobbered high register.  */
30666
30667 static const int thumb_core_reg_alloc_order[] =
30668 {
30669    3,  2,  1,  0,  4,  5,  6,  7,
30670   12, 14,  8,  9, 10, 11
30671 };
30672
30673 /* Adjust register allocation order when compiling for Thumb.  */
30674
30675 void
30676 arm_order_regs_for_local_alloc (void)
30677 {
30678   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30679   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30680   if (TARGET_THUMB)
30681     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30682             sizeof (thumb_core_reg_alloc_order));
30683 }
30684
30685 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30686
30687 bool
30688 arm_frame_pointer_required (void)
30689 {
30690   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30691     return true;
30692
30693   /* If the function receives nonlocal gotos, it needs to save the frame
30694      pointer in the nonlocal_goto_save_area object.  */
30695   if (cfun->has_nonlocal_label)
30696     return true;
30697
30698   /* The frame pointer is required for non-leaf APCS frames.  */
30699   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30700     return true;
30701
30702   /* If we are probing the stack in the prologue, we will have a faulting
30703      instruction prior to the stack adjustment and this requires a frame
30704      pointer if we want to catch the exception using the EABI unwinder.  */
30705   if (!IS_INTERRUPT (arm_current_func_type ())
30706       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30707           || flag_stack_clash_protection)
30708       && arm_except_unwind_info (&global_options) == UI_TARGET
30709       && cfun->can_throw_non_call_exceptions)
30710     {
30711       HOST_WIDE_INT size = get_frame_size ();
30712
30713       /* That's irrelevant if there is no stack adjustment.  */
30714       if (size <= 0)
30715         return false;
30716
30717       /* That's relevant only if there is a stack probe.  */
30718       if (crtl->is_leaf && !cfun->calls_alloca)
30719         {
30720           /* We don't have the final size of the frame so adjust.  */
30721           size += 32 * UNITS_PER_WORD;
30722           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30723             return true;
30724         }
30725       else
30726         return true;
30727     }
30728
30729   return false;
30730 }
30731
30732 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30733    All modes except THUMB1 have conditional execution.
30734    If we have conditional arithmetic, return false before reload to
30735    enable some ifcvt transformations. */
30736 static bool
30737 arm_have_conditional_execution (void)
30738 {
30739   bool has_cond_exec, enable_ifcvt_trans;
30740
30741   /* Only THUMB1 cannot support conditional execution. */
30742   has_cond_exec = !TARGET_THUMB1;
30743
30744   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30745      before reload. */
30746   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30747
30748   return has_cond_exec && !enable_ifcvt_trans;
30749 }
30750
30751 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30752 static HOST_WIDE_INT
30753 arm_vector_alignment (const_tree type)
30754 {
30755   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30756
30757   if (TARGET_AAPCS_BASED)
30758     align = MIN (align, 64);
30759
30760   return align;
30761 }
30762
30763 static unsigned int
30764 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30765 {
30766   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30767     {
30768       modes->safe_push (V16QImode);
30769       modes->safe_push (V8QImode);
30770     }
30771   return 0;
30772 }
30773
30774 static bool
30775 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30776 {
30777   /* Vectors which aren't in packed structures will not be less aligned than
30778      the natural alignment of their element type, so this is safe.  */
30779   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30780     return !is_packed;
30781
30782   return default_builtin_vector_alignment_reachable (type, is_packed);
30783 }
30784
30785 static bool
30786 arm_builtin_support_vector_misalignment (machine_mode mode,
30787                                          const_tree type, int misalignment,
30788                                          bool is_packed)
30789 {
30790   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30791     {
30792       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30793
30794       if (is_packed)
30795         return align == 1;
30796
30797       /* If the misalignment is unknown, we should be able to handle the access
30798          so long as it is not to a member of a packed data structure.  */
30799       if (misalignment == -1)
30800         return true;
30801
30802       /* Return true if the misalignment is a multiple of the natural alignment
30803          of the vector's element type.  This is probably always going to be
30804          true in practice, since we've already established that this isn't a
30805          packed access.  */
30806       return ((misalignment % align) == 0);
30807     }
30808
30809   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30810                                                       is_packed);
30811 }
30812
30813 static void
30814 arm_conditional_register_usage (void)
30815 {
30816   int regno;
30817
30818   if (TARGET_THUMB1 && optimize_size)
30819     {
30820       /* When optimizing for size on Thumb-1, it's better not
30821         to use the HI regs, because of the overhead of
30822         stacking them.  */
30823       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30824         fixed_regs[regno] = call_used_regs[regno] = 1;
30825     }
30826
30827   /* The link register can be clobbered by any branch insn,
30828      but we have no way to track that at present, so mark
30829      it as unavailable.  */
30830   if (TARGET_THUMB1)
30831     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30832
30833   if (TARGET_32BIT && TARGET_VFP_BASE)
30834     {
30835       /* VFPv3 registers are disabled when earlier VFP
30836          versions are selected due to the definition of
30837          LAST_VFP_REGNUM.  */
30838       for (regno = FIRST_VFP_REGNUM;
30839            regno <= LAST_VFP_REGNUM; ++ regno)
30840         {
30841           fixed_regs[regno] = 0;
30842           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30843             || regno >= FIRST_VFP_REGNUM + 32;
30844         }
30845       if (TARGET_HAVE_MVE)
30846         fixed_regs[VPR_REGNUM] = 0;
30847     }
30848
30849   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30850     {
30851       regno = FIRST_IWMMXT_GR_REGNUM;
30852       /* The 2002/10/09 revision of the XScale ABI has wCG0
30853          and wCG1 as call-preserved registers.  The 2002/11/21
30854          revision changed this so that all wCG registers are
30855          scratch registers.  */
30856       for (regno = FIRST_IWMMXT_GR_REGNUM;
30857            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30858         fixed_regs[regno] = 0;
30859       /* The XScale ABI has wR0 - wR9 as scratch registers,
30860          the rest as call-preserved registers.  */
30861       for (regno = FIRST_IWMMXT_REGNUM;
30862            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30863         {
30864           fixed_regs[regno] = 0;
30865           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30866         }
30867     }
30868
30869   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30870     {
30871       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30872       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30873     }
30874   else if (TARGET_APCS_STACK)
30875     {
30876       fixed_regs[10]     = 1;
30877       call_used_regs[10] = 1;
30878     }
30879   /* -mcaller-super-interworking reserves r11 for calls to
30880      _interwork_r11_call_via_rN().  Making the register global
30881      is an easy way of ensuring that it remains valid for all
30882      calls.  */
30883   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30884       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30885     {
30886       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30887       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30888       if (TARGET_CALLER_INTERWORKING)
30889         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30890     }
30891
30892   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30893   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30894   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30895
30896   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30897 }
30898
30899 static reg_class_t
30900 arm_preferred_rename_class (reg_class_t rclass)
30901 {
30902   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30903      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30904      and code size can be reduced.  */
30905   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30906     return LO_REGS;
30907   else
30908     return NO_REGS;
30909 }
30910
30911 /* Compute the attribute "length" of insn "*push_multi".
30912    So this function MUST be kept in sync with that insn pattern.  */
30913 int
30914 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30915 {
30916   int i, regno, hi_reg;
30917   int num_saves = XVECLEN (parallel_op, 0);
30918
30919   /* ARM mode.  */
30920   if (TARGET_ARM)
30921     return 4;
30922   /* Thumb1 mode.  */
30923   if (TARGET_THUMB1)
30924     return 2;
30925
30926   /* Thumb2 mode.  */
30927   regno = REGNO (first_op);
30928   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30929      list is 8-bit.  Normally this means all registers in the list must be
30930      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30931      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30932      with 16-bit encoding.  */
30933   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30934   for (i = 1; i < num_saves && !hi_reg; i++)
30935     {
30936       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30937       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30938     }
30939
30940   if (!hi_reg)
30941     return 2;
30942   return 4;
30943 }
30944
30945 /* Compute the attribute "length" of insn.  Currently, this function is used
30946    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30947    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30948    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30949    true if OPERANDS contains insn which explicit updates base register.  */
30950
30951 int
30952 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30953 {
30954   /* ARM mode.  */
30955   if (TARGET_ARM)
30956     return 4;
30957   /* Thumb1 mode.  */
30958   if (TARGET_THUMB1)
30959     return 2;
30960
30961   rtx parallel_op = operands[0];
30962   /* Initialize to elements number of PARALLEL.  */
30963   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30964   /* Initialize the value to base register.  */
30965   unsigned regno = REGNO (operands[1]);
30966   /* Skip return and write back pattern.
30967      We only need register pop pattern for later analysis.  */
30968   unsigned first_indx = 0;
30969   first_indx += return_pc ? 1 : 0;
30970   first_indx += write_back_p ? 1 : 0;
30971
30972   /* A pop operation can be done through LDM or POP.  If the base register is SP
30973      and if it's with write back, then a LDM will be alias of POP.  */
30974   bool pop_p = (regno == SP_REGNUM && write_back_p);
30975   bool ldm_p = !pop_p;
30976
30977   /* Check base register for LDM.  */
30978   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30979     return 4;
30980
30981   /* Check each register in the list.  */
30982   for (; indx >= first_indx; indx--)
30983     {
30984       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30985       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30986          comment in arm_attr_length_push_multi.  */
30987       if (REGNO_REG_CLASS (regno) == HI_REGS
30988           && (regno != PC_REGNUM || ldm_p))
30989         return 4;
30990     }
30991
30992   return 2;
30993 }
30994
30995 /* Compute the number of instructions emitted by output_move_double.  */
30996 int
30997 arm_count_output_move_double_insns (rtx *operands)
30998 {
30999   int count;
31000   rtx ops[2];
31001   /* output_move_double may modify the operands array, so call it
31002      here on a copy of the array.  */
31003   ops[0] = operands[0];
31004   ops[1] = operands[1];
31005   output_move_double (ops, false, &count);
31006   return count;
31007 }
31008
31009 /* Same as above, but operands are a register/memory pair in SImode.
31010    Assumes operands has the base register in position 0 and memory in position
31011    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
31012 int
31013 arm_count_ldrdstrd_insns (rtx *operands, bool load)
31014 {
31015   int count;
31016   rtx ops[2];
31017   int regnum, memnum;
31018   if (load)
31019     regnum = 0, memnum = 1;
31020   else
31021     regnum = 1, memnum = 0;
31022   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
31023   ops[memnum] = adjust_address (operands[2], DImode, 0);
31024   output_move_double (ops, false, &count);
31025   return count;
31026 }
31027
31028
31029 int
31030 vfp3_const_double_for_fract_bits (rtx operand)
31031 {
31032   REAL_VALUE_TYPE r0;
31033
31034   if (!CONST_DOUBLE_P (operand))
31035     return 0;
31036
31037   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
31038   if (exact_real_inverse (DFmode, &r0)
31039       && !REAL_VALUE_NEGATIVE (r0))
31040     {
31041       if (exact_real_truncate (DFmode, &r0))
31042         {
31043           HOST_WIDE_INT value = real_to_integer (&r0);
31044           value = value & 0xffffffff;
31045           if ((value != 0) && ( (value & (value - 1)) == 0))
31046             {
31047               int ret = exact_log2 (value);
31048               gcc_assert (IN_RANGE (ret, 0, 31));
31049               return ret;
31050             }
31051         }
31052     }
31053   return 0;
31054 }
31055
31056 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31057    log2 is in [1, 32], return that log2.  Otherwise return -1.
31058    This is used in the patterns for vcvt.s32.f32 floating-point to
31059    fixed-point conversions.  */
31060
31061 int
31062 vfp3_const_double_for_bits (rtx x)
31063 {
31064   const REAL_VALUE_TYPE *r;
31065
31066   if (!CONST_DOUBLE_P (x))
31067     return -1;
31068
31069   r = CONST_DOUBLE_REAL_VALUE (x);
31070
31071   if (REAL_VALUE_NEGATIVE (*r)
31072       || REAL_VALUE_ISNAN (*r)
31073       || REAL_VALUE_ISINF (*r)
31074       || !real_isinteger (r, SFmode))
31075     return -1;
31076
31077   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
31078
31079 /* The exact_log2 above will have returned -1 if this is
31080    not an exact log2.  */
31081   if (!IN_RANGE (hwint, 1, 32))
31082     return -1;
31083
31084   return hwint;
31085 }
31086
31087 \f
31088 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
31089
31090 static void
31091 arm_pre_atomic_barrier (enum memmodel model)
31092 {
31093   if (need_atomic_barrier_p (model, true))
31094     emit_insn (gen_memory_barrier ());
31095 }
31096
31097 static void
31098 arm_post_atomic_barrier (enum memmodel model)
31099 {
31100   if (need_atomic_barrier_p (model, false))
31101     emit_insn (gen_memory_barrier ());
31102 }
31103
31104 /* Emit the load-exclusive and store-exclusive instructions.
31105    Use acquire and release versions if necessary.  */
31106
31107 static void
31108 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31109 {
31110   rtx (*gen) (rtx, rtx);
31111
31112   if (acq)
31113     {
31114       switch (mode)
31115         {
31116         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31117         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31118         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31119         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31120         default:
31121           gcc_unreachable ();
31122         }
31123     }
31124   else
31125     {
31126       switch (mode)
31127         {
31128         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31129         case E_HImode: gen = gen_arm_load_exclusivehi; break;
31130         case E_SImode: gen = gen_arm_load_exclusivesi; break;
31131         case E_DImode: gen = gen_arm_load_exclusivedi; break;
31132         default:
31133           gcc_unreachable ();
31134         }
31135     }
31136
31137   emit_insn (gen (rval, mem));
31138 }
31139
31140 static void
31141 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31142                           rtx mem, bool rel)
31143 {
31144   rtx (*gen) (rtx, rtx, rtx);
31145
31146   if (rel)
31147     {
31148       switch (mode)
31149         {
31150         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31151         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31152         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31153         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31154         default:
31155           gcc_unreachable ();
31156         }
31157     }
31158   else
31159     {
31160       switch (mode)
31161         {
31162         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31163         case E_HImode: gen = gen_arm_store_exclusivehi; break;
31164         case E_SImode: gen = gen_arm_store_exclusivesi; break;
31165         case E_DImode: gen = gen_arm_store_exclusivedi; break;
31166         default:
31167           gcc_unreachable ();
31168         }
31169     }
31170
31171   emit_insn (gen (bval, rval, mem));
31172 }
31173
31174 /* Mark the previous jump instruction as unlikely.  */
31175
31176 static void
31177 emit_unlikely_jump (rtx insn)
31178 {
31179   rtx_insn *jump = emit_jump_insn (insn);
31180   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31181 }
31182
31183 /* Expand a compare and swap pattern.  */
31184
31185 void
31186 arm_expand_compare_and_swap (rtx operands[])
31187 {
31188   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31189   machine_mode mode, cmp_mode;
31190
31191   bval = operands[0];
31192   rval = operands[1];
31193   mem = operands[2];
31194   oldval = operands[3];
31195   newval = operands[4];
31196   is_weak = operands[5];
31197   mod_s = operands[6];
31198   mod_f = operands[7];
31199   mode = GET_MODE (mem);
31200
31201   /* Normally the succ memory model must be stronger than fail, but in the
31202      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31203      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
31204
31205   if (TARGET_HAVE_LDACQ
31206       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31207       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31208     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31209
31210   switch (mode)
31211     {
31212     case E_QImode:
31213     case E_HImode:
31214       /* For narrow modes, we're going to perform the comparison in SImode,
31215          so do the zero-extension now.  */
31216       rval = gen_reg_rtx (SImode);
31217       oldval = convert_modes (SImode, mode, oldval, true);
31218       /* FALLTHRU */
31219
31220     case E_SImode:
31221       /* Force the value into a register if needed.  We waited until after
31222          the zero-extension above to do this properly.  */
31223       if (!arm_add_operand (oldval, SImode))
31224         oldval = force_reg (SImode, oldval);
31225       break;
31226
31227     case E_DImode:
31228       if (!cmpdi_operand (oldval, mode))
31229         oldval = force_reg (mode, oldval);
31230       break;
31231
31232     default:
31233       gcc_unreachable ();
31234     }
31235
31236   if (TARGET_THUMB1)
31237     cmp_mode = E_SImode;
31238   else
31239     cmp_mode = CC_Zmode;
31240
31241   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31242   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31243                                         oldval, newval, is_weak, mod_s, mod_f));
31244
31245   if (mode == QImode || mode == HImode)
31246     emit_move_insn (operands[1], gen_lowpart (mode, rval));
31247
31248   /* In all cases, we arrange for success to be signaled by Z set.
31249      This arrangement allows for the boolean result to be used directly
31250      in a subsequent branch, post optimization.  For Thumb-1 targets, the
31251      boolean negation of the result is also stored in bval because Thumb-1
31252      backend lacks dependency tracking for CC flag due to flag-setting not
31253      being represented at RTL level.  */
31254   if (TARGET_THUMB1)
31255       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31256   else
31257     {
31258       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31259       emit_insn (gen_rtx_SET (bval, x));
31260     }
31261 }
31262
31263 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
31264    another memory store between the load-exclusive and store-exclusive can
31265    reset the monitor from Exclusive to Open state.  This means we must wait
31266    until after reload to split the pattern, lest we get a register spill in
31267    the middle of the atomic sequence.  Success of the compare and swap is
31268    indicated by the Z flag set for 32bit targets and by neg_bval being zero
31269    for Thumb-1 targets (ie. negation of the boolean value returned by
31270    atomic_compare_and_swapmode standard pattern in operand 0).  */
31271
31272 void
31273 arm_split_compare_and_swap (rtx operands[])
31274 {
31275   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31276   machine_mode mode;
31277   enum memmodel mod_s, mod_f;
31278   bool is_weak;
31279   rtx_code_label *label1, *label2;
31280   rtx x, cond;
31281
31282   rval = operands[1];
31283   mem = operands[2];
31284   oldval = operands[3];
31285   newval = operands[4];
31286   is_weak = (operands[5] != const0_rtx);
31287   mod_s_rtx = operands[6];
31288   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31289   mod_f = memmodel_from_int (INTVAL (operands[7]));
31290   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31291   mode = GET_MODE (mem);
31292
31293   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31294
31295   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31296   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31297
31298   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
31299      a full barrier is emitted after the store-release.  */
31300   if (is_armv8_sync)
31301     use_acquire = false;
31302
31303   /* Checks whether a barrier is needed and emits one accordingly.  */
31304   if (!(use_acquire || use_release))
31305     arm_pre_atomic_barrier (mod_s);
31306
31307   label1 = NULL;
31308   if (!is_weak)
31309     {
31310       label1 = gen_label_rtx ();
31311       emit_label (label1);
31312     }
31313   label2 = gen_label_rtx ();
31314
31315   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31316
31317   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31318      as required to communicate with arm_expand_compare_and_swap.  */
31319   if (TARGET_32BIT)
31320     {
31321       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31322       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31323       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31324                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31325       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31326     }
31327   else
31328     {
31329       cond = gen_rtx_NE (VOIDmode, rval, oldval);
31330       if (thumb1_cmpneg_operand (oldval, SImode))
31331         {
31332           rtx src = rval;
31333           if (!satisfies_constraint_L (oldval))
31334             {
31335               gcc_assert (satisfies_constraint_J (oldval));
31336
31337               /* For such immediates, ADDS needs the source and destination regs
31338                  to be the same.
31339
31340                  Normally this would be handled by RA, but this is all happening
31341                  after RA.  */
31342               emit_move_insn (neg_bval, rval);
31343               src = neg_bval;
31344             }
31345
31346           emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31347                                                        label2, cond));
31348         }
31349       else
31350         {
31351           emit_move_insn (neg_bval, const1_rtx);
31352           emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31353         }
31354     }
31355
31356   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31357
31358   /* Weak or strong, we want EQ to be true for success, so that we
31359      match the flags that we got from the compare above.  */
31360   if (TARGET_32BIT)
31361     {
31362       cond = gen_rtx_REG (CCmode, CC_REGNUM);
31363       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31364       emit_insn (gen_rtx_SET (cond, x));
31365     }
31366
31367   if (!is_weak)
31368     {
31369       /* Z is set to boolean value of !neg_bval, as required to communicate
31370          with arm_expand_compare_and_swap.  */
31371       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31372       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31373     }
31374
31375   if (!is_mm_relaxed (mod_f))
31376     emit_label (label2);
31377
31378   /* Checks whether a barrier is needed and emits one accordingly.  */
31379   if (is_armv8_sync
31380       || !(use_acquire || use_release))
31381     arm_post_atomic_barrier (mod_s);
31382
31383   if (is_mm_relaxed (mod_f))
31384     emit_label (label2);
31385 }
31386
31387 /* Split an atomic operation pattern.  Operation is given by CODE and is one
31388    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31389    operation).  Operation is performed on the content at MEM and on VALUE
31390    following the memory model MODEL_RTX.  The content at MEM before and after
31391    the operation is returned in OLD_OUT and NEW_OUT respectively while the
31392    success of the operation is returned in COND.  Using a scratch register or
31393    an operand register for these determines what result is returned for that
31394    pattern.  */
31395
31396 void
31397 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31398                      rtx value, rtx model_rtx, rtx cond)
31399 {
31400   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31401   machine_mode mode = GET_MODE (mem);
31402   machine_mode wmode = (mode == DImode ? DImode : SImode);
31403   rtx_code_label *label;
31404   bool all_low_regs, bind_old_new;
31405   rtx x;
31406
31407   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31408
31409   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31410   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31411
31412   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
31413      a full barrier is emitted after the store-release.  */
31414   if (is_armv8_sync)
31415     use_acquire = false;
31416
31417   /* Checks whether a barrier is needed and emits one accordingly.  */
31418   if (!(use_acquire || use_release))
31419     arm_pre_atomic_barrier (model);
31420
31421   label = gen_label_rtx ();
31422   emit_label (label);
31423
31424   if (new_out)
31425     new_out = gen_lowpart (wmode, new_out);
31426   if (old_out)
31427     old_out = gen_lowpart (wmode, old_out);
31428   else
31429     old_out = new_out;
31430   value = simplify_gen_subreg (wmode, value, mode, 0);
31431
31432   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31433
31434   /* Does the operation require destination and first operand to use the same
31435      register?  This is decided by register constraints of relevant insn
31436      patterns in thumb1.md.  */
31437   gcc_assert (!new_out || REG_P (new_out));
31438   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31439                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31440                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31441   bind_old_new =
31442     (TARGET_THUMB1
31443      && code != SET
31444      && code != MINUS
31445      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31446
31447   /* We want to return the old value while putting the result of the operation
31448      in the same register as the old value so copy the old value over to the
31449      destination register and use that register for the operation.  */
31450   if (old_out && bind_old_new)
31451     {
31452       emit_move_insn (new_out, old_out);
31453       old_out = new_out;
31454     }
31455
31456   switch (code)
31457     {
31458     case SET:
31459       new_out = value;
31460       break;
31461
31462     case NOT:
31463       x = gen_rtx_AND (wmode, old_out, value);
31464       emit_insn (gen_rtx_SET (new_out, x));
31465       x = gen_rtx_NOT (wmode, new_out);
31466       emit_insn (gen_rtx_SET (new_out, x));
31467       break;
31468
31469     case MINUS:
31470       if (CONST_INT_P (value))
31471         {
31472           value = gen_int_mode (-INTVAL (value), wmode);
31473           code = PLUS;
31474         }
31475       /* FALLTHRU */
31476
31477     case PLUS:
31478       if (mode == DImode)
31479         {
31480           /* DImode plus/minus need to clobber flags.  */
31481           /* The adddi3 and subdi3 patterns are incorrectly written so that
31482              they require matching operands, even when we could easily support
31483              three operands.  Thankfully, this can be fixed up post-splitting,
31484              as the individual add+adc patterns do accept three operands and
31485              post-reload cprop can make these moves go away.  */
31486           emit_move_insn (new_out, old_out);
31487           if (code == PLUS)
31488             x = gen_adddi3 (new_out, new_out, value);
31489           else
31490             x = gen_subdi3 (new_out, new_out, value);
31491           emit_insn (x);
31492           break;
31493         }
31494       /* FALLTHRU */
31495
31496     default:
31497       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31498       emit_insn (gen_rtx_SET (new_out, x));
31499       break;
31500     }
31501
31502   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31503                             use_release);
31504
31505   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31506   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31507
31508   /* Checks whether a barrier is needed and emits one accordingly.  */
31509   if (is_armv8_sync
31510       || !(use_acquire || use_release))
31511     arm_post_atomic_barrier (model);
31512 }
31513 \f
31514 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
31515 opt_machine_mode
31516 arm_mode_to_pred_mode (machine_mode mode)
31517 {
31518   switch (GET_MODE_NUNITS (mode))
31519     {
31520     case 16: return V16BImode;
31521     case 8: return V8BImode;
31522     case 4: return V4BImode;
31523     case 2: return V2QImode;
31524     }
31525   return opt_machine_mode ();
31526 }
31527
31528 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31529    If CAN_INVERT, store either the result or its inverse in TARGET
31530    and return true if TARGET contains the inverse.  If !CAN_INVERT,
31531    always store the result in TARGET, never its inverse.
31532
31533    Note that the handling of floating-point comparisons is not
31534    IEEE compliant.  */
31535
31536 bool
31537 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31538                            bool can_invert)
31539 {
31540   machine_mode cmp_result_mode = GET_MODE (target);
31541   machine_mode cmp_mode = GET_MODE (op0);
31542
31543   bool inverted;
31544
31545   /* MVE supports more comparisons than Neon.  */
31546   if (TARGET_HAVE_MVE)
31547       inverted = false;
31548   else
31549     switch (code)
31550       {
31551         /* For these we need to compute the inverse of the requested
31552            comparison.  */
31553       case UNORDERED:
31554       case UNLT:
31555       case UNLE:
31556       case UNGT:
31557       case UNGE:
31558       case UNEQ:
31559       case NE:
31560         code = reverse_condition_maybe_unordered (code);
31561         if (!can_invert)
31562           {
31563             /* Recursively emit the inverted comparison into a temporary
31564                and then store its inverse in TARGET.  This avoids reusing
31565                TARGET (which for integer NE could be one of the inputs).  */
31566             rtx tmp = gen_reg_rtx (cmp_result_mode);
31567             if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31568               gcc_unreachable ();
31569             emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31570             return false;
31571           }
31572         inverted = true;
31573         break;
31574
31575       default:
31576         inverted = false;
31577         break;
31578       }
31579
31580   switch (code)
31581     {
31582     /* These are natively supported by Neon for zero comparisons, but otherwise
31583        require the operands to be swapped. For MVE, we can only compare
31584        registers.  */
31585     case LE:
31586     case LT:
31587       if (!TARGET_HAVE_MVE)
31588         if (op1 != CONST0_RTX (cmp_mode))
31589           {
31590             code = swap_condition (code);
31591             std::swap (op0, op1);
31592           }
31593       /* Fall through.  */
31594
31595     /* These are natively supported by Neon for both register and zero
31596        operands. MVE supports registers only.  */
31597     case EQ:
31598     case GE:
31599     case GT:
31600     case NE:
31601       if (TARGET_HAVE_MVE)
31602         {
31603           switch (GET_MODE_CLASS (cmp_mode))
31604             {
31605             case MODE_VECTOR_INT:
31606               emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31607                                         op0, force_reg (cmp_mode, op1)));
31608               break;
31609             case MODE_VECTOR_FLOAT:
31610               if (TARGET_HAVE_MVE_FLOAT)
31611                 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31612                                             op0, force_reg (cmp_mode, op1)));
31613               else
31614                 gcc_unreachable ();
31615               break;
31616             default:
31617               gcc_unreachable ();
31618             }
31619         }
31620       else
31621         emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31622       return inverted;
31623
31624     /* These are natively supported for register operands only.
31625        Comparisons with zero aren't useful and should be folded
31626        or canonicalized by target-independent code.  */
31627     case GEU:
31628     case GTU:
31629       if (TARGET_HAVE_MVE)
31630         emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31631                                   op0, force_reg (cmp_mode, op1)));
31632       else
31633         emit_insn (gen_neon_vc (code, cmp_mode, target,
31634                                 op0, force_reg (cmp_mode, op1)));
31635       return inverted;
31636
31637     /* These require the operands to be swapped and likewise do not
31638        support comparisons with zero.  */
31639     case LEU:
31640     case LTU:
31641       if (TARGET_HAVE_MVE)
31642         emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31643                                   force_reg (cmp_mode, op1), op0));
31644       else
31645         emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31646                                 target, force_reg (cmp_mode, op1), op0));
31647       return inverted;
31648
31649     /* These need a combination of two comparisons.  */
31650     case LTGT:
31651     case ORDERED:
31652       {
31653         /* Operands are LTGT iff (a > b || a > b).
31654            Operands are ORDERED iff (a > b || a <= b).  */
31655         rtx gt_res = gen_reg_rtx (cmp_result_mode);
31656         rtx alt_res = gen_reg_rtx (cmp_result_mode);
31657         rtx_code alt_code = (code == LTGT ? LT : LE);
31658         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31659             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31660           gcc_unreachable ();
31661         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31662                                                      gt_res, alt_res)));
31663         return inverted;
31664       }
31665
31666     default:
31667       gcc_unreachable ();
31668     }
31669 }
31670
31671 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31672    CMP_RESULT_MODE is the mode of the comparison result.  */
31673
31674 void
31675 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31676 {
31677   /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31678      arm_expand_vector_compare, and another one here.  */
31679   rtx mask;
31680
31681   if (TARGET_HAVE_MVE)
31682     mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31683   else
31684     mask = gen_reg_rtx (cmp_result_mode);
31685
31686   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31687                                              operands[4], operands[5], true);
31688   if (inverted)
31689     std::swap (operands[1], operands[2]);
31690   if (TARGET_NEON)
31691   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31692                             mask, operands[1], operands[2]));
31693   else
31694     {
31695       machine_mode cmp_mode = GET_MODE (operands[0]);
31696
31697       switch (GET_MODE_CLASS (cmp_mode))
31698         {
31699         case MODE_VECTOR_INT:
31700           emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
31701                                 operands[1], operands[2], mask));
31702           break;
31703         case MODE_VECTOR_FLOAT:
31704           if (TARGET_HAVE_MVE_FLOAT)
31705             emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
31706                                     operands[1], operands[2], mask));
31707           else
31708             gcc_unreachable ();
31709           break;
31710         default:
31711           gcc_unreachable ();
31712         }
31713     }
31714 }
31715 \f
31716 #define MAX_VECT_LEN 16
31717
31718 struct expand_vec_perm_d
31719 {
31720   rtx target, op0, op1;
31721   vec_perm_indices perm;
31722   machine_mode vmode;
31723   bool one_vector_p;
31724   bool testing_p;
31725 };
31726
31727 /* Generate a variable permutation.  */
31728
31729 static void
31730 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31731 {
31732   machine_mode vmode = GET_MODE (target);
31733   bool one_vector_p = rtx_equal_p (op0, op1);
31734
31735   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31736   gcc_checking_assert (GET_MODE (op0) == vmode);
31737   gcc_checking_assert (GET_MODE (op1) == vmode);
31738   gcc_checking_assert (GET_MODE (sel) == vmode);
31739   gcc_checking_assert (TARGET_NEON);
31740
31741   if (one_vector_p)
31742     {
31743       if (vmode == V8QImode)
31744         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31745       else
31746         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31747     }
31748   else
31749     {
31750       rtx pair;
31751
31752       if (vmode == V8QImode)
31753         {
31754           pair = gen_reg_rtx (V16QImode);
31755           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31756           pair = gen_lowpart (TImode, pair);
31757           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31758         }
31759       else
31760         {
31761           pair = gen_reg_rtx (OImode);
31762           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31763           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31764         }
31765     }
31766 }
31767
31768 void
31769 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31770 {
31771   machine_mode vmode = GET_MODE (target);
31772   unsigned int nelt = GET_MODE_NUNITS (vmode);
31773   bool one_vector_p = rtx_equal_p (op0, op1);
31774   rtx mask;
31775
31776   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31777      numbering of elements for big-endian, we must reverse the order.  */
31778   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31779
31780   /* The VTBL instruction does not use a modulo index, so we must take care
31781      of that ourselves.  */
31782   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31783   mask = gen_const_vec_duplicate (vmode, mask);
31784   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31785
31786   arm_expand_vec_perm_1 (target, op0, op1, sel);
31787 }
31788
31789 /* Map lane ordering between architectural lane order, and GCC lane order,
31790    taking into account ABI.  See comment above output_move_neon for details.  */
31791
31792 static int
31793 neon_endian_lane_map (machine_mode mode, int lane)
31794 {
31795   if (BYTES_BIG_ENDIAN)
31796   {
31797     int nelems = GET_MODE_NUNITS (mode);
31798     /* Reverse lane order.  */
31799     lane = (nelems - 1 - lane);
31800     /* Reverse D register order, to match ABI.  */
31801     if (GET_MODE_SIZE (mode) == 16)
31802       lane = lane ^ (nelems / 2);
31803   }
31804   return lane;
31805 }
31806
31807 /* Some permutations index into pairs of vectors, this is a helper function
31808    to map indexes into those pairs of vectors.  */
31809
31810 static int
31811 neon_pair_endian_lane_map (machine_mode mode, int lane)
31812 {
31813   int nelem = GET_MODE_NUNITS (mode);
31814   if (BYTES_BIG_ENDIAN)
31815     lane =
31816       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31817   return lane;
31818 }
31819
31820 /* Generate or test for an insn that supports a constant permutation.  */
31821
31822 /* Recognize patterns for the VUZP insns.  */
31823
31824 static bool
31825 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31826 {
31827   unsigned int i, odd, mask, nelt = d->perm.length ();
31828   rtx out0, out1, in0, in1;
31829   int first_elem;
31830   int swap_nelt;
31831
31832   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31833     return false;
31834
31835   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31836      big endian pattern on 64 bit vectors, so we correct for that.  */
31837   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31838     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31839
31840   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31841
31842   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31843     odd = 0;
31844   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31845     odd = 1;
31846   else
31847     return false;
31848   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31849
31850   for (i = 0; i < nelt; i++)
31851     {
31852       unsigned elt =
31853         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31854       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31855         return false;
31856     }
31857
31858   /* Success!  */
31859   if (d->testing_p)
31860     return true;
31861
31862   in0 = d->op0;
31863   in1 = d->op1;
31864   if (swap_nelt != 0)
31865     std::swap (in0, in1);
31866
31867   out0 = d->target;
31868   out1 = gen_reg_rtx (d->vmode);
31869   if (odd)
31870     std::swap (out0, out1);
31871
31872   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31873   return true;
31874 }
31875
31876 /* Recognize patterns for the VZIP insns.  */
31877
31878 static bool
31879 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31880 {
31881   unsigned int i, high, mask, nelt = d->perm.length ();
31882   rtx out0, out1, in0, in1;
31883   int first_elem;
31884   bool is_swapped;
31885
31886   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31887     return false;
31888
31889   is_swapped = BYTES_BIG_ENDIAN;
31890
31891   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31892
31893   high = nelt / 2;
31894   if (first_elem == neon_endian_lane_map (d->vmode, high))
31895     ;
31896   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31897     high = 0;
31898   else
31899     return false;
31900   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31901
31902   for (i = 0; i < nelt / 2; i++)
31903     {
31904       unsigned elt =
31905         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31906       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31907           != elt)
31908         return false;
31909       elt =
31910         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31911       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31912           != elt)
31913         return false;
31914     }
31915
31916   /* Success!  */
31917   if (d->testing_p)
31918     return true;
31919
31920   in0 = d->op0;
31921   in1 = d->op1;
31922   if (is_swapped)
31923     std::swap (in0, in1);
31924
31925   out0 = d->target;
31926   out1 = gen_reg_rtx (d->vmode);
31927   if (high)
31928     std::swap (out0, out1);
31929
31930   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31931   return true;
31932 }
31933
31934 /* Recognize patterns for the VREV insns.  */
31935 static bool
31936 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31937 {
31938   unsigned int i, j, diff, nelt = d->perm.length ();
31939   rtx (*gen) (machine_mode, rtx, rtx);
31940
31941   if (!d->one_vector_p)
31942     return false;
31943
31944   diff = d->perm[0];
31945   switch (diff)
31946     {
31947     case 7:
31948        switch (d->vmode)
31949         {
31950          case E_V16QImode:
31951          case E_V8QImode:
31952           gen = gen_neon_vrev64;
31953           break;
31954          default:
31955           return false;
31956         }
31957        break;
31958     case 3:
31959        switch (d->vmode)
31960         {
31961         case E_V16QImode:
31962         case E_V8QImode:
31963           gen = gen_neon_vrev32;
31964           break;
31965         case E_V8HImode:
31966         case E_V4HImode:
31967         case E_V8HFmode:
31968         case E_V4HFmode:
31969           gen = gen_neon_vrev64;
31970           break;
31971         default:
31972           return false;
31973         }
31974       break;
31975     case 1:
31976       switch (d->vmode)
31977         {
31978         case E_V16QImode:
31979         case E_V8QImode:
31980           gen = gen_neon_vrev16;
31981           break;
31982         case E_V8HImode:
31983         case E_V4HImode:
31984           gen = gen_neon_vrev32;
31985           break;
31986         case E_V4SImode:
31987         case E_V2SImode:
31988         case E_V4SFmode:
31989         case E_V2SFmode:
31990           gen = gen_neon_vrev64;
31991           break;
31992         default:
31993           return false;
31994         }
31995       break;
31996     default:
31997       return false;
31998     }
31999
32000   for (i = 0; i < nelt ; i += diff + 1)
32001     for (j = 0; j <= diff; j += 1)
32002       {
32003         /* This is guaranteed to be true as the value of diff
32004            is 7, 3, 1 and we should have enough elements in the
32005            queue to generate this. Getting a vector mask with a
32006            value of diff other than these values implies that
32007            something is wrong by the time we get here.  */
32008         gcc_assert (i + j < nelt);
32009         if (d->perm[i + j] != i + diff - j)
32010           return false;
32011       }
32012
32013   /* Success! */
32014   if (d->testing_p)
32015     return true;
32016
32017   emit_insn (gen (d->vmode, d->target, d->op0));
32018   return true;
32019 }
32020
32021 /* Recognize patterns for the VTRN insns.  */
32022
32023 static bool
32024 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
32025 {
32026   unsigned int i, odd, mask, nelt = d->perm.length ();
32027   rtx out0, out1, in0, in1;
32028
32029   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32030     return false;
32031
32032   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
32033   if (d->perm[0] == 0)
32034     odd = 0;
32035   else if (d->perm[0] == 1)
32036     odd = 1;
32037   else
32038     return false;
32039   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32040
32041   for (i = 0; i < nelt; i += 2)
32042     {
32043       if (d->perm[i] != i + odd)
32044         return false;
32045       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
32046         return false;
32047     }
32048
32049   /* Success!  */
32050   if (d->testing_p)
32051     return true;
32052
32053   in0 = d->op0;
32054   in1 = d->op1;
32055   if (BYTES_BIG_ENDIAN)
32056     {
32057       std::swap (in0, in1);
32058       odd = !odd;
32059     }
32060
32061   out0 = d->target;
32062   out1 = gen_reg_rtx (d->vmode);
32063   if (odd)
32064     std::swap (out0, out1);
32065
32066   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
32067   return true;
32068 }
32069
32070 /* Recognize patterns for the VEXT insns.  */
32071
32072 static bool
32073 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
32074 {
32075   unsigned int i, nelt = d->perm.length ();
32076   rtx offset;
32077
32078   unsigned int location;
32079
32080   unsigned int next  = d->perm[0] + 1;
32081
32082   /* TODO: Handle GCC's numbering of elements for big-endian.  */
32083   if (BYTES_BIG_ENDIAN)
32084     return false;
32085
32086   /* Check if the extracted indexes are increasing by one.  */
32087   for (i = 1; i < nelt; next++, i++)
32088     {
32089       /* If we hit the most significant element of the 2nd vector in
32090          the previous iteration, no need to test further.  */
32091       if (next == 2 * nelt)
32092         return false;
32093
32094       /* If we are operating on only one vector: it could be a
32095          rotation.  If there are only two elements of size < 64, let
32096          arm_evpc_neon_vrev catch it.  */
32097       if (d->one_vector_p && (next == nelt))
32098         {
32099           if ((nelt == 2) && (d->vmode != V2DImode))
32100             return false;
32101           else
32102             next = 0;
32103         }
32104
32105       if (d->perm[i] != next)
32106         return false;
32107     }
32108
32109   location = d->perm[0];
32110
32111   /* Success! */
32112   if (d->testing_p)
32113     return true;
32114
32115   offset = GEN_INT (location);
32116
32117   if(d->vmode == E_DImode)
32118     return false;
32119
32120   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32121   return true;
32122 }
32123
32124 /* The NEON VTBL instruction is a fully variable permuation that's even
32125    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
32126    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
32127    can do slightly better by expanding this as a constant where we don't
32128    have to apply a mask.  */
32129
32130 static bool
32131 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32132 {
32133   rtx rperm[MAX_VECT_LEN], sel;
32134   machine_mode vmode = d->vmode;
32135   unsigned int i, nelt = d->perm.length ();
32136
32137   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
32138      numbering of elements for big-endian, we must reverse the order.  */
32139   if (BYTES_BIG_ENDIAN)
32140     return false;
32141
32142   if (d->testing_p)
32143     return true;
32144
32145   /* Generic code will try constant permutation twice.  Once with the
32146      original mode and again with the elements lowered to QImode.
32147      So wait and don't do the selector expansion ourselves.  */
32148   if (vmode != V8QImode && vmode != V16QImode)
32149     return false;
32150
32151   for (i = 0; i < nelt; ++i)
32152     rperm[i] = GEN_INT (d->perm[i]);
32153   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32154   sel = force_reg (vmode, sel);
32155
32156   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32157   return true;
32158 }
32159
32160 static bool
32161 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32162 {
32163   /* Check if the input mask matches vext before reordering the
32164      operands.  */
32165   if (TARGET_NEON)
32166     if (arm_evpc_neon_vext (d))
32167       return true;
32168
32169   /* The pattern matching functions above are written to look for a small
32170      number to begin the sequence (0, 1, N/2).  If we begin with an index
32171      from the second operand, we can swap the operands.  */
32172   unsigned int nelt = d->perm.length ();
32173   if (d->perm[0] >= nelt)
32174     {
32175       d->perm.rotate_inputs (1);
32176       std::swap (d->op0, d->op1);
32177     }
32178
32179   if (TARGET_NEON)
32180     {
32181       if (arm_evpc_neon_vuzp (d))
32182         return true;
32183       if (arm_evpc_neon_vzip (d))
32184         return true;
32185       if (arm_evpc_neon_vrev (d))
32186         return true;
32187       if (arm_evpc_neon_vtrn (d))
32188         return true;
32189       return arm_evpc_neon_vtbl (d);
32190     }
32191   return false;
32192 }
32193
32194 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
32195
32196 static bool
32197 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32198                               rtx target, rtx op0, rtx op1,
32199                               const vec_perm_indices &sel)
32200 {
32201   if (vmode != op_mode)
32202     return false;
32203
32204   struct expand_vec_perm_d d;
32205   int i, nelt, which;
32206
32207   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32208     return false;
32209
32210   d.target = target;
32211   if (op0)
32212     {
32213       rtx nop0 = force_reg (vmode, op0);
32214       if (op0 == op1)
32215         op1 = nop0;
32216       op0 = nop0;
32217     }
32218   if (op1)
32219     op1 = force_reg (vmode, op1);
32220   d.op0 = op0;
32221   d.op1 = op1;
32222
32223   d.vmode = vmode;
32224   gcc_assert (VECTOR_MODE_P (d.vmode));
32225   d.testing_p = !target;
32226
32227   nelt = GET_MODE_NUNITS (d.vmode);
32228   for (i = which = 0; i < nelt; ++i)
32229     {
32230       int ei = sel[i] & (2 * nelt - 1);
32231       which |= (ei < nelt ? 1 : 2);
32232     }
32233
32234   switch (which)
32235     {
32236     default:
32237       gcc_unreachable();
32238
32239     case 3:
32240       d.one_vector_p = false;
32241       if (d.testing_p || !rtx_equal_p (op0, op1))
32242         break;
32243
32244       /* The elements of PERM do not suggest that only the first operand
32245          is used, but both operands are identical.  Allow easier matching
32246          of the permutation by folding the permutation into the single
32247          input vector.  */
32248       /* FALLTHRU */
32249     case 2:
32250       d.op0 = op1;
32251       d.one_vector_p = true;
32252       break;
32253
32254     case 1:
32255       d.op1 = op0;
32256       d.one_vector_p = true;
32257       break;
32258     }
32259
32260   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32261
32262   if (!d.testing_p)
32263     return arm_expand_vec_perm_const_1 (&d);
32264
32265   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32266   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32267   if (!d.one_vector_p)
32268     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32269
32270   start_sequence ();
32271   bool ret = arm_expand_vec_perm_const_1 (&d);
32272   end_sequence ();
32273
32274   return ret;
32275 }
32276
32277 bool
32278 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32279 {
32280   /* If we are soft float and we do not have ldrd
32281      then all auto increment forms are ok.  */
32282   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32283     return true;
32284
32285   switch (code)
32286     {
32287       /* Post increment and Pre Decrement are supported for all
32288          instruction forms except for vector forms.  */
32289     case ARM_POST_INC:
32290     case ARM_PRE_DEC:
32291       if (VECTOR_MODE_P (mode))
32292         {
32293           if (code != ARM_PRE_DEC)
32294             return true;
32295           else
32296             return false;
32297         }
32298
32299       return true;
32300
32301     case ARM_POST_DEC:
32302     case ARM_PRE_INC:
32303       /* Without LDRD and mode size greater than
32304          word size, there is no point in auto-incrementing
32305          because ldm and stm will not have these forms.  */
32306       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32307         return false;
32308
32309       /* Vector and floating point modes do not support
32310          these auto increment forms.  */
32311       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32312         return false;
32313
32314       return true;
32315
32316     default:
32317       return false;
32318
32319     }
32320
32321   return false;
32322 }
32323
32324 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32325    on ARM, since we know that shifts by negative amounts are no-ops.
32326    Additionally, the default expansion code is not available or suitable
32327    for post-reload insn splits (this can occur when the register allocator
32328    chooses not to do a shift in NEON).
32329
32330    This function is used in both initial expand and post-reload splits, and
32331    handles all kinds of 64-bit shifts.
32332
32333    Input requirements:
32334     - It is safe for the input and output to be the same register, but
32335       early-clobber rules apply for the shift amount and scratch registers.
32336     - Shift by register requires both scratch registers.  In all other cases
32337       the scratch registers may be NULL.
32338     - Ashiftrt by a register also clobbers the CC register.  */
32339 void
32340 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32341                                rtx amount, rtx scratch1, rtx scratch2)
32342 {
32343   rtx out_high = gen_highpart (SImode, out);
32344   rtx out_low = gen_lowpart (SImode, out);
32345   rtx in_high = gen_highpart (SImode, in);
32346   rtx in_low = gen_lowpart (SImode, in);
32347
32348   /* Terminology:
32349         in = the register pair containing the input value.
32350         out = the destination register pair.
32351         up = the high- or low-part of each pair.
32352         down = the opposite part to "up".
32353      In a shift, we can consider bits to shift from "up"-stream to
32354      "down"-stream, so in a left-shift "up" is the low-part and "down"
32355      is the high-part of each register pair.  */
32356
32357   rtx out_up   = code == ASHIFT ? out_low : out_high;
32358   rtx out_down = code == ASHIFT ? out_high : out_low;
32359   rtx in_up   = code == ASHIFT ? in_low : in_high;
32360   rtx in_down = code == ASHIFT ? in_high : in_low;
32361
32362   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32363   gcc_assert (out
32364               && (REG_P (out) || SUBREG_P (out))
32365               && GET_MODE (out) == DImode);
32366   gcc_assert (in
32367               && (REG_P (in) || SUBREG_P (in))
32368               && GET_MODE (in) == DImode);
32369   gcc_assert (amount
32370               && (((REG_P (amount) || SUBREG_P (amount))
32371                    && GET_MODE (amount) == SImode)
32372                   || CONST_INT_P (amount)));
32373   gcc_assert (scratch1 == NULL
32374               || (GET_CODE (scratch1) == SCRATCH)
32375               || (GET_MODE (scratch1) == SImode
32376                   && REG_P (scratch1)));
32377   gcc_assert (scratch2 == NULL
32378               || (GET_CODE (scratch2) == SCRATCH)
32379               || (GET_MODE (scratch2) == SImode
32380                   && REG_P (scratch2)));
32381   gcc_assert (!REG_P (out) || !REG_P (amount)
32382               || !HARD_REGISTER_P (out)
32383               || (REGNO (out) != REGNO (amount)
32384                   && REGNO (out) + 1 != REGNO (amount)));
32385
32386   /* Macros to make following code more readable.  */
32387   #define SUB_32(DEST,SRC) \
32388             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32389   #define RSB_32(DEST,SRC) \
32390             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32391   #define SUB_S_32(DEST,SRC) \
32392             gen_addsi3_compare0 ((DEST), (SRC), \
32393                                  GEN_INT (-32))
32394   #define SET(DEST,SRC) \
32395             gen_rtx_SET ((DEST), (SRC))
32396   #define SHIFT(CODE,SRC,AMOUNT) \
32397             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32398   #define LSHIFT(CODE,SRC,AMOUNT) \
32399             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32400                             SImode, (SRC), (AMOUNT))
32401   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32402             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32403                             SImode, (SRC), (AMOUNT))
32404   #define ORR(A,B) \
32405             gen_rtx_IOR (SImode, (A), (B))
32406   #define BRANCH(COND,LABEL) \
32407             gen_arm_cond_branch ((LABEL), \
32408                                  gen_rtx_ ## COND (CCmode, cc_reg, \
32409                                                    const0_rtx), \
32410                                  cc_reg)
32411
32412   /* Shifts by register and shifts by constant are handled separately.  */
32413   if (CONST_INT_P (amount))
32414     {
32415       /* We have a shift-by-constant.  */
32416
32417       /* First, handle out-of-range shift amounts.
32418          In both cases we try to match the result an ARM instruction in a
32419          shift-by-register would give.  This helps reduce execution
32420          differences between optimization levels, but it won't stop other
32421          parts of the compiler doing different things.  This is "undefined
32422          behavior, in any case.  */
32423       if (INTVAL (amount) <= 0)
32424         emit_insn (gen_movdi (out, in));
32425       else if (INTVAL (amount) >= 64)
32426         {
32427           if (code == ASHIFTRT)
32428             {
32429               rtx const31_rtx = GEN_INT (31);
32430               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32431               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32432             }
32433           else
32434             emit_insn (gen_movdi (out, const0_rtx));
32435         }
32436
32437       /* Now handle valid shifts. */
32438       else if (INTVAL (amount) < 32)
32439         {
32440           /* Shifts by a constant less than 32.  */
32441           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32442
32443           /* Clearing the out register in DImode first avoids lots
32444              of spilling and results in less stack usage.
32445              Later this redundant insn is completely removed.
32446              Do that only if "in" and "out" are different registers.  */
32447           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32448             emit_insn (SET (out, const0_rtx));
32449           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32450           emit_insn (SET (out_down,
32451                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
32452                                out_down)));
32453           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32454         }
32455       else
32456         {
32457           /* Shifts by a constant greater than 31.  */
32458           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32459
32460           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32461             emit_insn (SET (out, const0_rtx));
32462           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32463           if (code == ASHIFTRT)
32464             emit_insn (gen_ashrsi3 (out_up, in_up,
32465                                     GEN_INT (31)));
32466           else
32467             emit_insn (SET (out_up, const0_rtx));
32468         }
32469     }
32470   else
32471     {
32472       /* We have a shift-by-register.  */
32473       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32474
32475       /* This alternative requires the scratch registers.  */
32476       gcc_assert (scratch1 && REG_P (scratch1));
32477       gcc_assert (scratch2 && REG_P (scratch2));
32478
32479       /* We will need the values "amount-32" and "32-amount" later.
32480          Swapping them around now allows the later code to be more general. */
32481       switch (code)
32482         {
32483         case ASHIFT:
32484           emit_insn (SUB_32 (scratch1, amount));
32485           emit_insn (RSB_32 (scratch2, amount));
32486           break;
32487         case ASHIFTRT:
32488           emit_insn (RSB_32 (scratch1, amount));
32489           /* Also set CC = amount > 32.  */
32490           emit_insn (SUB_S_32 (scratch2, amount));
32491           break;
32492         case LSHIFTRT:
32493           emit_insn (RSB_32 (scratch1, amount));
32494           emit_insn (SUB_32 (scratch2, amount));
32495           break;
32496         default:
32497           gcc_unreachable ();
32498         }
32499
32500       /* Emit code like this:
32501
32502          arithmetic-left:
32503             out_down = in_down << amount;
32504             out_down = (in_up << (amount - 32)) | out_down;
32505             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32506             out_up = in_up << amount;
32507
32508          arithmetic-right:
32509             out_down = in_down >> amount;
32510             out_down = (in_up << (32 - amount)) | out_down;
32511             if (amount < 32)
32512               out_down = ((signed)in_up >> (amount - 32)) | out_down;
32513             out_up = in_up << amount;
32514
32515          logical-right:
32516             out_down = in_down >> amount;
32517             out_down = (in_up << (32 - amount)) | out_down;
32518             if (amount < 32)
32519               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32520             out_up = in_up << amount;
32521
32522           The ARM and Thumb2 variants are the same but implemented slightly
32523           differently.  If this were only called during expand we could just
32524           use the Thumb2 case and let combine do the right thing, but this
32525           can also be called from post-reload splitters.  */
32526
32527       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32528
32529       if (!TARGET_THUMB2)
32530         {
32531           /* Emit code for ARM mode.  */
32532           emit_insn (SET (out_down,
32533                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32534           if (code == ASHIFTRT)
32535             {
32536               rtx_code_label *done_label = gen_label_rtx ();
32537               emit_jump_insn (BRANCH (LT, done_label));
32538               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32539                                              out_down)));
32540               emit_label (done_label);
32541             }
32542           else
32543             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32544                                            out_down)));
32545         }
32546       else
32547         {
32548           /* Emit code for Thumb2 mode.
32549              Thumb2 can't do shift and or in one insn.  */
32550           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32551           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32552
32553           if (code == ASHIFTRT)
32554             {
32555               rtx_code_label *done_label = gen_label_rtx ();
32556               emit_jump_insn (BRANCH (LT, done_label));
32557               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32558               emit_insn (SET (out_down, ORR (out_down, scratch2)));
32559               emit_label (done_label);
32560             }
32561           else
32562             {
32563               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32564               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32565             }
32566         }
32567
32568       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32569     }
32570
32571   #undef SUB_32
32572   #undef RSB_32
32573   #undef SUB_S_32
32574   #undef SET
32575   #undef SHIFT
32576   #undef LSHIFT
32577   #undef REV_LSHIFT
32578   #undef ORR
32579   #undef BRANCH
32580 }
32581
32582 /* Returns true if the pattern is a valid symbolic address, which is either a
32583    symbol_ref or (symbol_ref + addend).
32584
32585    According to the ARM ELF ABI, the initial addend of REL-type relocations
32586    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32587    literal field of the instruction as a 16-bit signed value in the range
32588    -32768 <= A < 32768.
32589
32590    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32591    unsigned range of 0 <= A < 256 as described in the AAELF32
32592    relocation handling documentation: REL-type relocations are encoded
32593    as unsigned in this case.  */
32594
32595 bool
32596 arm_valid_symbolic_address_p (rtx addr)
32597 {
32598   rtx xop0, xop1 = NULL_RTX;
32599   rtx tmp = addr;
32600
32601   if (target_word_relocations)
32602     return false;
32603
32604   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32605     return true;
32606
32607   /* (const (plus: symbol_ref const_int))  */
32608   if (GET_CODE (addr) == CONST)
32609     tmp = XEXP (addr, 0);
32610
32611   if (GET_CODE (tmp) == PLUS)
32612     {
32613       xop0 = XEXP (tmp, 0);
32614       xop1 = XEXP (tmp, 1);
32615
32616       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32617         {
32618           if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32619             return IN_RANGE (INTVAL (xop1), 0, 0xff);
32620           else
32621             return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32622         }
32623     }
32624
32625   return false;
32626 }
32627
32628 /* Returns true if a valid comparison operation and makes
32629    the operands in a form that is valid.  */
32630 bool
32631 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32632 {
32633   enum rtx_code code = GET_CODE (*comparison);
32634   int code_int;
32635   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32636     ? GET_MODE (*op2) : GET_MODE (*op1);
32637
32638   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32639
32640   if (code == UNEQ || code == LTGT)
32641     return false;
32642
32643   code_int = (int)code;
32644   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32645   PUT_CODE (*comparison, (enum rtx_code)code_int);
32646
32647   switch (mode)
32648     {
32649     case E_SImode:
32650       if (!arm_add_operand (*op1, mode))
32651         *op1 = force_reg (mode, *op1);
32652       if (!arm_add_operand (*op2, mode))
32653         *op2 = force_reg (mode, *op2);
32654       return true;
32655
32656     case E_DImode:
32657       /* gen_compare_reg() will sort out any invalid operands.  */
32658       return true;
32659
32660     case E_HFmode:
32661       if (!TARGET_VFP_FP16INST)
32662         break;
32663       /* FP16 comparisons are done in SF mode.  */
32664       mode = SFmode;
32665       *op1 = convert_to_mode (mode, *op1, 1);
32666       *op2 = convert_to_mode (mode, *op2, 1);
32667       /* Fall through.  */
32668     case E_SFmode:
32669     case E_DFmode:
32670       if (!vfp_compare_operand (*op1, mode))
32671         *op1 = force_reg (mode, *op1);
32672       if (!vfp_compare_operand (*op2, mode))
32673         *op2 = force_reg (mode, *op2);
32674       return true;
32675     default:
32676       break;
32677     }
32678
32679   return false;
32680
32681 }
32682
32683 /* Maximum number of instructions to set block of memory.  */
32684 static int
32685 arm_block_set_max_insns (void)
32686 {
32687   if (optimize_function_for_size_p (cfun))
32688     return 4;
32689   else
32690     return current_tune->max_insns_inline_memset;
32691 }
32692
32693 /* Return TRUE if it's profitable to set block of memory for
32694    non-vectorized case.  VAL is the value to set the memory
32695    with.  LENGTH is the number of bytes to set.  ALIGN is the
32696    alignment of the destination memory in bytes.  UNALIGNED_P
32697    is TRUE if we can only set the memory with instructions
32698    meeting alignment requirements.  USE_STRD_P is TRUE if we
32699    can use strd to set the memory.  */
32700 static bool
32701 arm_block_set_non_vect_profit_p (rtx val,
32702                                  unsigned HOST_WIDE_INT length,
32703                                  unsigned HOST_WIDE_INT align,
32704                                  bool unaligned_p, bool use_strd_p)
32705 {
32706   int num = 0;
32707   /* For leftovers in bytes of 0-7, we can set the memory block using
32708      strb/strh/str with minimum instruction number.  */
32709   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32710
32711   if (unaligned_p)
32712     {
32713       num = arm_const_inline_cost (SET, val);
32714       num += length / align + length % align;
32715     }
32716   else if (use_strd_p)
32717     {
32718       num = arm_const_double_inline_cost (val);
32719       num += (length >> 3) + leftover[length & 7];
32720     }
32721   else
32722     {
32723       num = arm_const_inline_cost (SET, val);
32724       num += (length >> 2) + leftover[length & 3];
32725     }
32726
32727   /* We may be able to combine last pair STRH/STRB into a single STR
32728      by shifting one byte back.  */
32729   if (unaligned_access && length > 3 && (length & 3) == 3)
32730     num--;
32731
32732   return (num <= arm_block_set_max_insns ());
32733 }
32734
32735 /* Return TRUE if it's profitable to set block of memory for
32736    vectorized case.  LENGTH is the number of bytes to set.
32737    ALIGN is the alignment of destination memory in bytes.
32738    MODE is the vector mode used to set the memory.  */
32739 static bool
32740 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32741                              unsigned HOST_WIDE_INT align,
32742                              machine_mode mode)
32743 {
32744   int num;
32745   bool unaligned_p = ((align & 3) != 0);
32746   unsigned int nelt = GET_MODE_NUNITS (mode);
32747
32748   /* Instruction loading constant value.  */
32749   num = 1;
32750   /* Instructions storing the memory.  */
32751   num += (length + nelt - 1) / nelt;
32752   /* Instructions adjusting the address expression.  Only need to
32753      adjust address expression if it's 4 bytes aligned and bytes
32754      leftover can only be stored by mis-aligned store instruction.  */
32755   if (!unaligned_p && (length & 3) != 0)
32756     num++;
32757
32758   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32759   if (!unaligned_p && mode == V16QImode)
32760     num--;
32761
32762   return (num <= arm_block_set_max_insns ());
32763 }
32764
32765 /* Set a block of memory using vectorization instructions for the
32766    unaligned case.  We fill the first LENGTH bytes of the memory
32767    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32768    the alignment requirement of memory.  Return TRUE if succeeded.  */
32769 static bool
32770 arm_block_set_unaligned_vect (rtx dstbase,
32771                               unsigned HOST_WIDE_INT length,
32772                               unsigned HOST_WIDE_INT value,
32773                               unsigned HOST_WIDE_INT align)
32774 {
32775   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32776   rtx dst, mem;
32777   rtx val_vec, reg;
32778   rtx (*gen_func) (rtx, rtx);
32779   machine_mode mode;
32780   unsigned HOST_WIDE_INT v = value;
32781   unsigned int offset = 0;
32782   gcc_assert ((align & 0x3) != 0);
32783   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32784   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32785   if (length >= nelt_v16)
32786     {
32787       mode = V16QImode;
32788       gen_func = gen_movmisalignv16qi;
32789     }
32790   else
32791     {
32792       mode = V8QImode;
32793       gen_func = gen_movmisalignv8qi;
32794     }
32795   nelt_mode = GET_MODE_NUNITS (mode);
32796   gcc_assert (length >= nelt_mode);
32797   /* Skip if it isn't profitable.  */
32798   if (!arm_block_set_vect_profit_p (length, align, mode))
32799     return false;
32800
32801   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32802   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32803
32804   v = sext_hwi (v, BITS_PER_WORD);
32805
32806   reg = gen_reg_rtx (mode);
32807   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32808   /* Emit instruction loading the constant value.  */
32809   emit_move_insn (reg, val_vec);
32810
32811   /* Handle nelt_mode bytes in a vector.  */
32812   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32813     {
32814       emit_insn ((*gen_func) (mem, reg));
32815       if (i + 2 * nelt_mode <= length)
32816         {
32817           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32818           offset += nelt_mode;
32819           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32820         }
32821     }
32822
32823   /* If there are not less than nelt_v8 bytes leftover, we must be in
32824      V16QI mode.  */
32825   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32826
32827   /* Handle (8, 16) bytes leftover.  */
32828   if (i + nelt_v8 < length)
32829     {
32830       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32831       offset += length - i;
32832       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32833
32834       /* We are shifting bytes back, set the alignment accordingly.  */
32835       if ((length & 1) != 0 && align >= 2)
32836         set_mem_align (mem, BITS_PER_UNIT);
32837
32838       emit_insn (gen_movmisalignv16qi (mem, reg));
32839     }
32840   /* Handle (0, 8] bytes leftover.  */
32841   else if (i < length && i + nelt_v8 >= length)
32842     {
32843       if (mode == V16QImode)
32844         reg = gen_lowpart (V8QImode, reg);
32845
32846       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32847                                               + (nelt_mode - nelt_v8))));
32848       offset += (length - i) + (nelt_mode - nelt_v8);
32849       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32850
32851       /* We are shifting bytes back, set the alignment accordingly.  */
32852       if ((length & 1) != 0 && align >= 2)
32853         set_mem_align (mem, BITS_PER_UNIT);
32854
32855       emit_insn (gen_movmisalignv8qi (mem, reg));
32856     }
32857
32858   return true;
32859 }
32860
32861 /* Set a block of memory using vectorization instructions for the
32862    aligned case.  We fill the first LENGTH bytes of the memory area
32863    starting from DSTBASE with byte constant VALUE.  ALIGN is the
32864    alignment requirement of memory.  Return TRUE if succeeded.  */
32865 static bool
32866 arm_block_set_aligned_vect (rtx dstbase,
32867                             unsigned HOST_WIDE_INT length,
32868                             unsigned HOST_WIDE_INT value,
32869                             unsigned HOST_WIDE_INT align)
32870 {
32871   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32872   rtx dst, addr, mem;
32873   rtx val_vec, reg;
32874   machine_mode mode;
32875   unsigned int offset = 0;
32876
32877   gcc_assert ((align & 0x3) == 0);
32878   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32879   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32880   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32881     mode = V16QImode;
32882   else
32883     mode = V8QImode;
32884
32885   nelt_mode = GET_MODE_NUNITS (mode);
32886   gcc_assert (length >= nelt_mode);
32887   /* Skip if it isn't profitable.  */
32888   if (!arm_block_set_vect_profit_p (length, align, mode))
32889     return false;
32890
32891   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32892
32893   reg = gen_reg_rtx (mode);
32894   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32895   /* Emit instruction loading the constant value.  */
32896   emit_move_insn (reg, val_vec);
32897
32898   i = 0;
32899   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
32900   if (mode == V16QImode)
32901     {
32902       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32903       emit_insn (gen_movmisalignv16qi (mem, reg));
32904       i += nelt_mode;
32905       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
32906       if (i + nelt_v8 < length && i + nelt_v16 > length)
32907         {
32908           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32909           offset += length - nelt_mode;
32910           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32911           /* We are shifting bytes back, set the alignment accordingly.  */
32912           if ((length & 0x3) == 0)
32913             set_mem_align (mem, BITS_PER_UNIT * 4);
32914           else if ((length & 0x1) == 0)
32915             set_mem_align (mem, BITS_PER_UNIT * 2);
32916           else
32917             set_mem_align (mem, BITS_PER_UNIT);
32918
32919           emit_insn (gen_movmisalignv16qi (mem, reg));
32920           return true;
32921         }
32922       /* Fall through for bytes leftover.  */
32923       mode = V8QImode;
32924       nelt_mode = GET_MODE_NUNITS (mode);
32925       reg = gen_lowpart (V8QImode, reg);
32926     }
32927
32928   /* Handle 8 bytes in a vector.  */
32929   for (; (i + nelt_mode <= length); i += nelt_mode)
32930     {
32931       addr = plus_constant (Pmode, dst, i);
32932       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32933       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32934         emit_move_insn (mem, reg);
32935       else
32936         emit_insn (gen_unaligned_storev8qi (mem, reg));
32937     }
32938
32939   /* Handle single word leftover by shifting 4 bytes back.  We can
32940      use aligned access for this case.  */
32941   if (i + UNITS_PER_WORD == length)
32942     {
32943       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32944       offset += i - UNITS_PER_WORD;
32945       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32946       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32947       if (align > UNITS_PER_WORD)
32948         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32949
32950       emit_insn (gen_unaligned_storev8qi (mem, reg));
32951     }
32952   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32953      We have to use unaligned access for this case.  */
32954   else if (i < length)
32955     {
32956       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32957       offset += length - nelt_mode;
32958       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32959       /* We are shifting bytes back, set the alignment accordingly.  */
32960       if ((length & 1) == 0)
32961         set_mem_align (mem, BITS_PER_UNIT * 2);
32962       else
32963         set_mem_align (mem, BITS_PER_UNIT);
32964
32965       emit_insn (gen_movmisalignv8qi (mem, reg));
32966     }
32967
32968   return true;
32969 }
32970
32971 /* Set a block of memory using plain strh/strb instructions, only
32972    using instructions allowed by ALIGN on processor.  We fill the
32973    first LENGTH bytes of the memory area starting from DSTBASE
32974    with byte constant VALUE.  ALIGN is the alignment requirement
32975    of memory.  */
32976 static bool
32977 arm_block_set_unaligned_non_vect (rtx dstbase,
32978                                   unsigned HOST_WIDE_INT length,
32979                                   unsigned HOST_WIDE_INT value,
32980                                   unsigned HOST_WIDE_INT align)
32981 {
32982   unsigned int i;
32983   rtx dst, addr, mem;
32984   rtx val_exp, val_reg, reg;
32985   machine_mode mode;
32986   HOST_WIDE_INT v = value;
32987
32988   gcc_assert (align == 1 || align == 2);
32989
32990   if (align == 2)
32991     v |= (value << BITS_PER_UNIT);
32992
32993   v = sext_hwi (v, BITS_PER_WORD);
32994   val_exp = GEN_INT (v);
32995   /* Skip if it isn't profitable.  */
32996   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32997                                         align, true, false))
32998     return false;
32999
33000   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33001   mode = (align == 2 ? HImode : QImode);
33002   val_reg = force_reg (SImode, val_exp);
33003   reg = gen_lowpart (mode, val_reg);
33004
33005   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
33006     {
33007       addr = plus_constant (Pmode, dst, i);
33008       mem = adjust_automodify_address (dstbase, mode, addr, i);
33009       emit_move_insn (mem, reg);
33010     }
33011
33012   /* Handle single byte leftover.  */
33013   if (i + 1 == length)
33014     {
33015       reg = gen_lowpart (QImode, val_reg);
33016       addr = plus_constant (Pmode, dst, i);
33017       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33018       emit_move_insn (mem, reg);
33019       i++;
33020     }
33021
33022   gcc_assert (i == length);
33023   return true;
33024 }
33025
33026 /* Set a block of memory using plain strd/str/strh/strb instructions,
33027    to permit unaligned copies on processors which support unaligned
33028    semantics for those instructions.  We fill the first LENGTH bytes
33029    of the memory area starting from DSTBASE with byte constant VALUE.
33030    ALIGN is the alignment requirement of memory.  */
33031 static bool
33032 arm_block_set_aligned_non_vect (rtx dstbase,
33033                                 unsigned HOST_WIDE_INT length,
33034                                 unsigned HOST_WIDE_INT value,
33035                                 unsigned HOST_WIDE_INT align)
33036 {
33037   unsigned int i;
33038   rtx dst, addr, mem;
33039   rtx val_exp, val_reg, reg;
33040   unsigned HOST_WIDE_INT v;
33041   bool use_strd_p;
33042
33043   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
33044                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
33045
33046   v = (value | (value << 8) | (value << 16) | (value << 24));
33047   if (length < UNITS_PER_WORD)
33048     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
33049
33050   if (use_strd_p)
33051     v |= (v << BITS_PER_WORD);
33052   else
33053     v = sext_hwi (v, BITS_PER_WORD);
33054
33055   val_exp = GEN_INT (v);
33056   /* Skip if it isn't profitable.  */
33057   if (!arm_block_set_non_vect_profit_p (val_exp, length,
33058                                         align, false, use_strd_p))
33059     {
33060       if (!use_strd_p)
33061         return false;
33062
33063       /* Try without strd.  */
33064       v = (v >> BITS_PER_WORD);
33065       v = sext_hwi (v, BITS_PER_WORD);
33066       val_exp = GEN_INT (v);
33067       use_strd_p = false;
33068       if (!arm_block_set_non_vect_profit_p (val_exp, length,
33069                                             align, false, use_strd_p))
33070         return false;
33071     }
33072
33073   i = 0;
33074   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33075   /* Handle double words using strd if possible.  */
33076   if (use_strd_p)
33077     {
33078       val_reg = force_reg (DImode, val_exp);
33079       reg = val_reg;
33080       for (; (i + 8 <= length); i += 8)
33081         {
33082           addr = plus_constant (Pmode, dst, i);
33083           mem = adjust_automodify_address (dstbase, DImode, addr, i);
33084           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33085             emit_move_insn (mem, reg);
33086           else
33087             emit_insn (gen_unaligned_storedi (mem, reg));
33088         }
33089     }
33090   else
33091     val_reg = force_reg (SImode, val_exp);
33092
33093   /* Handle words.  */
33094   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
33095   for (; (i + 4 <= length); i += 4)
33096     {
33097       addr = plus_constant (Pmode, dst, i);
33098       mem = adjust_automodify_address (dstbase, SImode, addr, i);
33099       if ((align & 3) == 0)
33100         emit_move_insn (mem, reg);
33101       else
33102         emit_insn (gen_unaligned_storesi (mem, reg));
33103     }
33104
33105   /* Merge last pair of STRH and STRB into a STR if possible.  */
33106   if (unaligned_access && i > 0 && (i + 3) == length)
33107     {
33108       addr = plus_constant (Pmode, dst, i - 1);
33109       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33110       /* We are shifting one byte back, set the alignment accordingly.  */
33111       if ((align & 1) == 0)
33112         set_mem_align (mem, BITS_PER_UNIT);
33113
33114       /* Most likely this is an unaligned access, and we can't tell at
33115          compilation time.  */
33116       emit_insn (gen_unaligned_storesi (mem, reg));
33117       return true;
33118     }
33119
33120   /* Handle half word leftover.  */
33121   if (i + 2 <= length)
33122     {
33123       reg = gen_lowpart (HImode, val_reg);
33124       addr = plus_constant (Pmode, dst, i);
33125       mem = adjust_automodify_address (dstbase, HImode, addr, i);
33126       if ((align & 1) == 0)
33127         emit_move_insn (mem, reg);
33128       else
33129         emit_insn (gen_unaligned_storehi (mem, reg));
33130
33131       i += 2;
33132     }
33133
33134   /* Handle single byte leftover.  */
33135   if (i + 1 == length)
33136     {
33137       reg = gen_lowpart (QImode, val_reg);
33138       addr = plus_constant (Pmode, dst, i);
33139       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33140       emit_move_insn (mem, reg);
33141     }
33142
33143   return true;
33144 }
33145
33146 /* Set a block of memory using vectorization instructions for both
33147    aligned and unaligned cases.  We fill the first LENGTH bytes of
33148    the memory area starting from DSTBASE with byte constant VALUE.
33149    ALIGN is the alignment requirement of memory.  */
33150 static bool
33151 arm_block_set_vect (rtx dstbase,
33152                     unsigned HOST_WIDE_INT length,
33153                     unsigned HOST_WIDE_INT value,
33154                     unsigned HOST_WIDE_INT align)
33155 {
33156   /* Check whether we need to use unaligned store instruction.  */
33157   if (((align & 3) != 0 || (length & 3) != 0)
33158       /* Check whether unaligned store instruction is available.  */
33159       && (!unaligned_access || BYTES_BIG_ENDIAN))
33160     return false;
33161
33162   if ((align & 3) == 0)
33163     return arm_block_set_aligned_vect (dstbase, length, value, align);
33164   else
33165     return arm_block_set_unaligned_vect (dstbase, length, value, align);
33166 }
33167
33168 /* Expand string store operation.  Firstly we try to do that by using
33169    vectorization instructions, then try with ARM unaligned access and
33170    double-word store if profitable.  OPERANDS[0] is the destination,
33171    OPERANDS[1] is the number of bytes, operands[2] is the value to
33172    initialize the memory, OPERANDS[3] is the known alignment of the
33173    destination.  */
33174 bool
33175 arm_gen_setmem (rtx *operands)
33176 {
33177   rtx dstbase = operands[0];
33178   unsigned HOST_WIDE_INT length;
33179   unsigned HOST_WIDE_INT value;
33180   unsigned HOST_WIDE_INT align;
33181
33182   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33183     return false;
33184
33185   length = UINTVAL (operands[1]);
33186   if (length > 64)
33187     return false;
33188
33189   value = (UINTVAL (operands[2]) & 0xFF);
33190   align = UINTVAL (operands[3]);
33191   if (TARGET_NEON && length >= 8
33192       && current_tune->string_ops_prefer_neon
33193       && arm_block_set_vect (dstbase, length, value, align))
33194     return true;
33195
33196   if (!unaligned_access && (align & 3) != 0)
33197     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33198
33199   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33200 }
33201
33202
33203 static bool
33204 arm_macro_fusion_p (void)
33205 {
33206   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33207 }
33208
33209 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33210    for MOVW / MOVT macro fusion.  */
33211
33212 static bool
33213 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33214 {
33215   /* We are trying to fuse
33216      movw imm / movt imm
33217     instructions as a group that gets scheduled together.  */
33218
33219   rtx set_dest = SET_DEST (curr_set);
33220
33221   if (GET_MODE (set_dest) != SImode)
33222     return false;
33223
33224   /* We are trying to match:
33225      prev (movw)  == (set (reg r0) (const_int imm16))
33226      curr (movt) == (set (zero_extract (reg r0)
33227                                         (const_int 16)
33228                                         (const_int 16))
33229                           (const_int imm16_1))
33230      or
33231      prev (movw) == (set (reg r1)
33232                           (high (symbol_ref ("SYM"))))
33233     curr (movt) == (set (reg r0)
33234                         (lo_sum (reg r1)
33235                                 (symbol_ref ("SYM"))))  */
33236
33237     if (GET_CODE (set_dest) == ZERO_EXTRACT)
33238       {
33239         if (CONST_INT_P (SET_SRC (curr_set))
33240             && CONST_INT_P (SET_SRC (prev_set))
33241             && REG_P (XEXP (set_dest, 0))
33242             && REG_P (SET_DEST (prev_set))
33243             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33244           return true;
33245
33246       }
33247     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33248              && REG_P (SET_DEST (curr_set))
33249              && REG_P (SET_DEST (prev_set))
33250              && GET_CODE (SET_SRC (prev_set)) == HIGH
33251              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33252       return true;
33253
33254   return false;
33255 }
33256
33257 static bool
33258 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33259 {
33260   rtx prev_set = single_set (prev);
33261   rtx curr_set = single_set (curr);
33262
33263   if (!prev_set
33264       || !curr_set)
33265     return false;
33266
33267   if (any_condjump_p (curr))
33268     return false;
33269
33270   if (!arm_macro_fusion_p ())
33271     return false;
33272
33273   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33274       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33275     return true;
33276
33277   return false;
33278 }
33279
33280 /* Return true iff the instruction fusion described by OP is enabled.  */
33281 bool
33282 arm_fusion_enabled_p (tune_params::fuse_ops op)
33283 {
33284   return current_tune->fusible_ops & op;
33285 }
33286
33287 /* Return TRUE if return address signing mechanism is enabled.  */
33288 bool
33289 arm_current_function_pac_enabled_p (void)
33290 {
33291   return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33292           || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33293               && !crtl->is_leaf));
33294 }
33295
33296 /* Raise an error if the current target arch is not bti compatible.  */
33297 void aarch_bti_arch_check (void)
33298 {
33299   if (!arm_arch8m_main)
33300     error ("This architecture does not support branch protection instructions");
33301 }
33302
33303 /* Return TRUE if Branch Target Identification Mechanism is enabled.  */
33304 bool
33305 aarch_bti_enabled (void)
33306 {
33307   return aarch_enable_bti != 0;
33308 }
33309
33310 /* Check if INSN is a BTI J insn.  */
33311 bool
33312 aarch_bti_j_insn_p (rtx_insn *insn)
33313 {
33314   if (!insn || !INSN_P (insn))
33315     return false;
33316
33317   rtx pat = PATTERN (insn);
33318   return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33319 }
33320
33321 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
33322 bool
33323 aarch_pac_insn_p (rtx x)
33324 {
33325   if (!x || !INSN_P (x))
33326     return false;
33327
33328   rtx pat = PATTERN (x);
33329
33330   if (GET_CODE (pat) == SET)
33331     {
33332       rtx tmp = XEXP (pat, 1);
33333       if (tmp
33334           && ((GET_CODE (tmp) == UNSPEC
33335                && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33336               || (GET_CODE (tmp) == UNSPEC_VOLATILE
33337                   && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33338         return true;
33339     }
33340
33341   return false;
33342 }
33343
33344  /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33345     For Arm, both of these map to a simple BTI instruction.  */
33346
33347 rtx
33348 aarch_gen_bti_c (void)
33349 {
33350   return gen_bti_nop ();
33351 }
33352
33353 rtx
33354 aarch_gen_bti_j (void)
33355 {
33356   return gen_bti_nop ();
33357 }
33358
33359 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
33360    scheduled for speculative execution.  Reject the long-running division
33361    and square-root instructions.  */
33362
33363 static bool
33364 arm_sched_can_speculate_insn (rtx_insn *insn)
33365 {
33366   switch (get_attr_type (insn))
33367     {
33368       case TYPE_SDIV:
33369       case TYPE_UDIV:
33370       case TYPE_FDIVS:
33371       case TYPE_FDIVD:
33372       case TYPE_FSQRTS:
33373       case TYPE_FSQRTD:
33374       case TYPE_NEON_FP_SQRT_S:
33375       case TYPE_NEON_FP_SQRT_D:
33376       case TYPE_NEON_FP_SQRT_S_Q:
33377       case TYPE_NEON_FP_SQRT_D_Q:
33378       case TYPE_NEON_FP_DIV_S:
33379       case TYPE_NEON_FP_DIV_D:
33380       case TYPE_NEON_FP_DIV_S_Q:
33381       case TYPE_NEON_FP_DIV_D_Q:
33382         return false;
33383       default:
33384         return true;
33385     }
33386 }
33387
33388 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
33389
33390 static unsigned HOST_WIDE_INT
33391 arm_asan_shadow_offset (void)
33392 {
33393   return HOST_WIDE_INT_1U << 29;
33394 }
33395
33396
33397 /* This is a temporary fix for PR60655.  Ideally we need
33398    to handle most of these cases in the generic part but
33399    currently we reject minus (..) (sym_ref).  We try to
33400    ameliorate the case with minus (sym_ref1) (sym_ref2)
33401    where they are in the same section.  */
33402
33403 static bool
33404 arm_const_not_ok_for_debug_p (rtx p)
33405 {
33406   tree decl_op0 = NULL;
33407   tree decl_op1 = NULL;
33408
33409   if (GET_CODE (p) == UNSPEC)
33410     return true;
33411   if (GET_CODE (p) == MINUS)
33412     {
33413       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33414         {
33415           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33416           if (decl_op1
33417               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33418               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33419             {
33420               if ((VAR_P (decl_op1)
33421                    || TREE_CODE (decl_op1) == CONST_DECL)
33422                   && (VAR_P (decl_op0)
33423                       || TREE_CODE (decl_op0) == CONST_DECL))
33424                 return (get_variable_section (decl_op1, false)
33425                         != get_variable_section (decl_op0, false));
33426
33427               if (TREE_CODE (decl_op1) == LABEL_DECL
33428                   && TREE_CODE (decl_op0) == LABEL_DECL)
33429                 return (DECL_CONTEXT (decl_op1)
33430                         != DECL_CONTEXT (decl_op0));
33431             }
33432
33433           return true;
33434         }
33435     }
33436
33437   return false;
33438 }
33439
33440 /* return TRUE if x is a reference to a value in a constant pool */
33441 extern bool
33442 arm_is_constant_pool_ref (rtx x)
33443 {
33444   return (MEM_P (x)
33445           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33446           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33447 }
33448
33449 /* Remember the last target of arm_set_current_function.  */
33450 static GTY(()) tree arm_previous_fndecl;
33451
33452 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
33453
33454 void
33455 save_restore_target_globals (tree new_tree)
33456 {
33457   /* If we have a previous state, use it.  */
33458   if (TREE_TARGET_GLOBALS (new_tree))
33459     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33460   else if (new_tree == target_option_default_node)
33461     restore_target_globals (&default_target_globals);
33462   else
33463     {
33464       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
33465       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33466     }
33467
33468   arm_option_params_internal ();
33469 }
33470
33471 /* Invalidate arm_previous_fndecl.  */
33472
33473 void
33474 arm_reset_previous_fndecl (void)
33475 {
33476   arm_previous_fndecl = NULL_TREE;
33477 }
33478
33479 /* Establish appropriate back-end context for processing the function
33480    FNDECL.  The argument might be NULL to indicate processing at top
33481    level, outside of any function scope.  */
33482
33483 static void
33484 arm_set_current_function (tree fndecl)
33485 {
33486   if (!fndecl || fndecl == arm_previous_fndecl)
33487     return;
33488
33489   tree old_tree = (arm_previous_fndecl
33490                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33491                    : NULL_TREE);
33492
33493   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33494
33495   /* If current function has no attributes but previous one did,
33496      use the default node.  */
33497   if (! new_tree && old_tree)
33498     new_tree = target_option_default_node;
33499
33500   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
33501      the default have been handled by save_restore_target_globals from
33502      arm_pragma_target_parse.  */
33503   if (old_tree == new_tree)
33504     return;
33505
33506   arm_previous_fndecl = fndecl;
33507
33508   /* First set the target options.  */
33509   cl_target_option_restore (&global_options, &global_options_set,
33510                             TREE_TARGET_OPTION (new_tree));
33511
33512   save_restore_target_globals (new_tree);
33513
33514   arm_override_options_after_change_1 (&global_options, &global_options_set);
33515 }
33516
33517 /* Implement TARGET_OPTION_PRINT.  */
33518
33519 static void
33520 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33521 {
33522   int flags = ptr->x_target_flags;
33523   const char *fpu_name;
33524
33525   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33526               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33527
33528   fprintf (file, "%*sselected isa %s\n", indent, "",
33529            TARGET_THUMB2_P (flags) ? "thumb2" :
33530            TARGET_THUMB_P (flags) ? "thumb1" :
33531            "arm");
33532
33533   if (ptr->x_arm_arch_string)
33534     fprintf (file, "%*sselected architecture %s\n", indent, "",
33535              ptr->x_arm_arch_string);
33536
33537   if (ptr->x_arm_cpu_string)
33538     fprintf (file, "%*sselected CPU %s\n", indent, "",
33539              ptr->x_arm_cpu_string);
33540
33541   if (ptr->x_arm_tune_string)
33542     fprintf (file, "%*sselected tune %s\n", indent, "",
33543              ptr->x_arm_tune_string);
33544
33545   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33546 }
33547
33548 /* Hook to determine if one function can safely inline another.  */
33549
33550 static bool
33551 arm_can_inline_p (tree caller, tree callee)
33552 {
33553   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33554   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33555   bool can_inline = true;
33556
33557   struct cl_target_option *caller_opts
33558         = TREE_TARGET_OPTION (caller_tree ? caller_tree
33559                                            : target_option_default_node);
33560
33561   struct cl_target_option *callee_opts
33562         = TREE_TARGET_OPTION (callee_tree ? callee_tree
33563                                            : target_option_default_node);
33564
33565   if (callee_opts == caller_opts)
33566     return true;
33567
33568   /* Callee's ISA features should be a subset of the caller's.  */
33569   struct arm_build_target caller_target;
33570   struct arm_build_target callee_target;
33571   caller_target.isa = sbitmap_alloc (isa_num_bits);
33572   callee_target.isa = sbitmap_alloc (isa_num_bits);
33573
33574   arm_configure_build_target (&caller_target, caller_opts, false);
33575   arm_configure_build_target (&callee_target, callee_opts, false);
33576   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33577     can_inline = false;
33578
33579   sbitmap_free (caller_target.isa);
33580   sbitmap_free (callee_target.isa);
33581
33582   /* OK to inline between different modes.
33583      Function with mode specific instructions, e.g using asm,
33584      must be explicitly protected with noinline.  */
33585   return can_inline;
33586 }
33587
33588 /* Hook to fix function's alignment affected by target attribute.  */
33589
33590 static void
33591 arm_relayout_function (tree fndecl)
33592 {
33593   if (DECL_USER_ALIGN (fndecl))
33594     return;
33595
33596   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33597
33598   if (!callee_tree)
33599     callee_tree = target_option_default_node;
33600
33601   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33602   SET_DECL_ALIGN
33603     (fndecl,
33604      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33605 }
33606
33607 /* Inner function to process the attribute((target(...))), take an argument and
33608    set the current options from the argument.  If we have a list, recursively
33609    go over the list.  */
33610
33611 static bool
33612 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33613 {
33614   if (TREE_CODE (args) == TREE_LIST)
33615     {
33616       bool ret = true;
33617
33618       for (; args; args = TREE_CHAIN (args))
33619         if (TREE_VALUE (args)
33620             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33621           ret = false;
33622       return ret;
33623     }
33624
33625   else if (TREE_CODE (args) != STRING_CST)
33626     {
33627       error ("attribute %<target%> argument not a string");
33628       return false;
33629     }
33630
33631   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33632   char *q;
33633
33634   while ((q = strtok (argstr, ",")) != NULL)
33635     {
33636       argstr = NULL;
33637       if (!strcmp (q, "thumb"))
33638         {
33639           opts->x_target_flags |= MASK_THUMB;
33640           if (TARGET_FDPIC && !arm_arch_thumb2)
33641             sorry ("FDPIC mode is not supported in Thumb-1 mode");
33642         }
33643
33644       else if (!strcmp (q, "arm"))
33645         opts->x_target_flags &= ~MASK_THUMB;
33646
33647       else if (!strcmp (q, "general-regs-only"))
33648         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33649
33650       else if (startswith (q, "fpu="))
33651         {
33652           int fpu_index;
33653           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33654                                        &fpu_index, CL_TARGET))
33655             {
33656               error ("invalid fpu for target attribute or pragma %qs", q);
33657               return false;
33658             }
33659           if (fpu_index == TARGET_FPU_auto)
33660             {
33661               /* This doesn't really make sense until we support
33662                  general dynamic selection of the architecture and all
33663                  sub-features.  */
33664               sorry ("auto fpu selection not currently permitted here");
33665               return false;
33666             }
33667           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33668         }
33669       else if (startswith (q, "arch="))
33670         {
33671           char *arch = q + 5;
33672           const arch_option *arm_selected_arch
33673              = arm_parse_arch_option_name (all_architectures, "arch", arch);
33674
33675           if (!arm_selected_arch)
33676             {
33677               error ("invalid architecture for target attribute or pragma %qs",
33678                      q);
33679               return false;
33680             }
33681
33682           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33683         }
33684       else if (q[0] == '+')
33685         {
33686           opts->x_arm_arch_string
33687             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33688         }
33689       else
33690         {
33691           error ("unknown target attribute or pragma %qs", q);
33692           return false;
33693         }
33694     }
33695
33696   return true;
33697 }
33698
33699 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
33700
33701 tree
33702 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33703                                  struct gcc_options *opts_set)
33704 {
33705   struct cl_target_option cl_opts;
33706
33707   if (!arm_valid_target_attribute_rec (args, opts))
33708     return NULL_TREE;
33709
33710   cl_target_option_save (&cl_opts, opts, opts_set);
33711   arm_configure_build_target (&arm_active_target, &cl_opts, false);
33712   arm_option_check_internal (opts);
33713   /* Do any overrides, such as global options arch=xxx.
33714      We do this since arm_active_target was overridden.  */
33715   arm_option_reconfigure_globals ();
33716   arm_options_perform_arch_sanity_checks ();
33717   arm_option_override_internal (opts, opts_set);
33718
33719   return build_target_option_node (opts, opts_set);
33720 }
33721
33722 static void
33723 add_attribute (const char * mode, tree *attributes)
33724 {
33725   size_t len = strlen (mode);
33726   tree value = build_string (len, mode);
33727
33728   TREE_TYPE (value) = build_array_type (char_type_node,
33729                                         build_index_type (size_int (len)));
33730
33731   *attributes = tree_cons (get_identifier ("target"),
33732                            build_tree_list (NULL_TREE, value),
33733                            *attributes);
33734 }
33735
33736 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33737
33738 static void
33739 arm_insert_attributes (tree fndecl, tree * attributes)
33740 {
33741   const char *mode;
33742
33743   if (! TARGET_FLIP_THUMB)
33744     return;
33745
33746   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33747       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33748    return;
33749
33750   /* Nested definitions must inherit mode.  */
33751   if (current_function_decl)
33752    {
33753      mode = TARGET_THUMB ? "thumb" : "arm";
33754      add_attribute (mode, attributes);
33755      return;
33756    }
33757
33758   /* If there is already a setting don't change it.  */
33759   if (lookup_attribute ("target", *attributes) != NULL)
33760     return;
33761
33762   mode = thumb_flipper ? "thumb" : "arm";
33763   add_attribute (mode, attributes);
33764
33765   thumb_flipper = !thumb_flipper;
33766 }
33767
33768 /* Hook to validate attribute((target("string"))).  */
33769
33770 static bool
33771 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33772                               tree args, int ARG_UNUSED (flags))
33773 {
33774   bool ret = true;
33775   struct gcc_options func_options, func_options_set;
33776   tree cur_tree, new_optimize;
33777   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33778
33779   /* Get the optimization options of the current function.  */
33780   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33781
33782   /* If the function changed the optimization levels as well as setting target
33783      options, start with the optimizations specified.  */
33784   if (!func_optimize)
33785     func_optimize = optimization_default_node;
33786
33787   /* Init func_options.  */
33788   memset (&func_options, 0, sizeof (func_options));
33789   init_options_struct (&func_options, NULL);
33790   lang_hooks.init_options_struct (&func_options);
33791   memset (&func_options_set, 0, sizeof (func_options_set));
33792
33793   /* Initialize func_options to the defaults.  */
33794   cl_optimization_restore (&func_options, &func_options_set,
33795                            TREE_OPTIMIZATION (func_optimize));
33796
33797   cl_target_option_restore (&func_options, &func_options_set,
33798                             TREE_TARGET_OPTION (target_option_default_node));
33799
33800   /* Set func_options flags with new target mode.  */
33801   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33802                                               &func_options_set);
33803
33804   if (cur_tree == NULL_TREE)
33805     ret = false;
33806
33807   new_optimize = build_optimization_node (&func_options, &func_options_set);
33808
33809   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33810
33811   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33812
33813   return ret;
33814 }
33815
33816 /* Match an ISA feature bitmap to a named FPU.  We always use the
33817    first entry that exactly matches the feature set, so that we
33818    effectively canonicalize the FPU name for the assembler.  */
33819 static const char*
33820 arm_identify_fpu_from_isa (sbitmap isa)
33821 {
33822   auto_sbitmap fpubits (isa_num_bits);
33823   auto_sbitmap cand_fpubits (isa_num_bits);
33824
33825   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33826
33827   /* If there are no ISA feature bits relating to the FPU, we must be
33828      doing soft-float.  */
33829   if (bitmap_empty_p (fpubits))
33830     return "softvfp";
33831
33832   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33833     {
33834       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33835       if (bitmap_equal_p (fpubits, cand_fpubits))
33836         return all_fpus[i].name;
33837     }
33838   /* We must find an entry, or things have gone wrong.  */
33839   gcc_unreachable ();
33840 }
33841
33842 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33843    by the function fndecl.  */
33844 void
33845 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33846 {
33847   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33848
33849   struct cl_target_option *targ_options;
33850   if (target_parts)
33851     targ_options = TREE_TARGET_OPTION (target_parts);
33852   else
33853     targ_options = TREE_TARGET_OPTION (target_option_current_node);
33854   gcc_assert (targ_options);
33855
33856   arm_print_asm_arch_directives (stream, targ_options);
33857
33858   fprintf (stream, "\t.syntax unified\n");
33859
33860   if (TARGET_THUMB)
33861     {
33862       if (is_called_in_ARM_mode (decl)
33863           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33864               && cfun->is_thunk))
33865         fprintf (stream, "\t.code 32\n");
33866       else if (TARGET_THUMB1)
33867         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33868       else
33869         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33870     }
33871   else
33872     fprintf (stream, "\t.arm\n");
33873
33874   if (TARGET_POKE_FUNCTION_NAME)
33875     arm_poke_function_name (stream, (const char *) name);
33876 }
33877
33878 /* If MEM is in the form of [base+offset], extract the two parts
33879    of address and set to BASE and OFFSET, otherwise return false
33880    after clearing BASE and OFFSET.  */
33881
33882 static bool
33883 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33884 {
33885   rtx addr;
33886
33887   gcc_assert (MEM_P (mem));
33888
33889   addr = XEXP (mem, 0);
33890
33891   /* Strip off const from addresses like (const (addr)).  */
33892   if (GET_CODE (addr) == CONST)
33893     addr = XEXP (addr, 0);
33894
33895   if (REG_P (addr))
33896     {
33897       *base = addr;
33898       *offset = const0_rtx;
33899       return true;
33900     }
33901
33902   if (GET_CODE (addr) == PLUS
33903       && GET_CODE (XEXP (addr, 0)) == REG
33904       && CONST_INT_P (XEXP (addr, 1)))
33905     {
33906       *base = XEXP (addr, 0);
33907       *offset = XEXP (addr, 1);
33908       return true;
33909     }
33910
33911   *base = NULL_RTX;
33912   *offset = NULL_RTX;
33913
33914   return false;
33915 }
33916
33917 /* If INSN is a load or store of address in the form of [base+offset],
33918    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
33919    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
33920    otherwise return FALSE.  */
33921
33922 static bool
33923 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33924 {
33925   rtx x, dest, src;
33926
33927   gcc_assert (INSN_P (insn));
33928   x = PATTERN (insn);
33929   if (GET_CODE (x) != SET)
33930     return false;
33931
33932   src = SET_SRC (x);
33933   dest = SET_DEST (x);
33934   if (REG_P (src) && MEM_P (dest))
33935     {
33936       *is_load = false;
33937       extract_base_offset_in_addr (dest, base, offset);
33938     }
33939   else if (MEM_P (src) && REG_P (dest))
33940     {
33941       *is_load = true;
33942       extract_base_offset_in_addr (src, base, offset);
33943     }
33944   else
33945     return false;
33946
33947   return (*base != NULL_RTX && *offset != NULL_RTX);
33948 }
33949
33950 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33951
33952    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33953    and PRI are only calculated for these instructions.  For other instruction,
33954    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33955    instruction fusion can be supported by returning different priorities.
33956
33957    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33958
33959 static void
33960 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33961                            int *fusion_pri, int *pri)
33962 {
33963   int tmp, off_val;
33964   bool is_load;
33965   rtx base, offset;
33966
33967   gcc_assert (INSN_P (insn));
33968
33969   tmp = max_pri - 1;
33970   if (!fusion_load_store (insn, &base, &offset, &is_load))
33971     {
33972       *pri = tmp;
33973       *fusion_pri = tmp;
33974       return;
33975     }
33976
33977   /* Load goes first.  */
33978   if (is_load)
33979     *fusion_pri = tmp - 1;
33980   else
33981     *fusion_pri = tmp - 2;
33982
33983   tmp /= 2;
33984
33985   /* INSN with smaller base register goes first.  */
33986   tmp -= ((REGNO (base) & 0xff) << 20);
33987
33988   /* INSN with smaller offset goes first.  */
33989   off_val = (int)(INTVAL (offset));
33990   if (off_val >= 0)
33991     tmp -= (off_val & 0xfffff);
33992   else
33993     tmp += ((- off_val) & 0xfffff);
33994
33995   *pri = tmp;
33996   return;
33997 }
33998
33999
34000 /* Construct and return a PARALLEL RTX vector with elements numbering the
34001    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34002    the vector - from the perspective of the architecture.  This does not
34003    line up with GCC's perspective on lane numbers, so we end up with
34004    different masks depending on our target endian-ness.  The diagram
34005    below may help.  We must draw the distinction when building masks
34006    which select one half of the vector.  An instruction selecting
34007    architectural low-lanes for a big-endian target, must be described using
34008    a mask selecting GCC high-lanes.
34009
34010                  Big-Endian             Little-Endian
34011
34012 GCC             0   1   2   3           3   2   1   0
34013               | x | x | x | x |       | x | x | x | x |
34014 Architecture    3   2   1   0           3   2   1   0
34015
34016 Low Mask:         { 2, 3 }                { 0, 1 }
34017 High Mask:        { 0, 1 }                { 2, 3 }
34018 */
34019
34020 rtx
34021 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
34022 {
34023   int nunits = GET_MODE_NUNITS (mode);
34024   rtvec v = rtvec_alloc (nunits / 2);
34025   int high_base = nunits / 2;
34026   int low_base = 0;
34027   int base;
34028   rtx t1;
34029   int i;
34030
34031   if (BYTES_BIG_ENDIAN)
34032     base = high ? low_base : high_base;
34033   else
34034     base = high ? high_base : low_base;
34035
34036   for (i = 0; i < nunits / 2; i++)
34037     RTVEC_ELT (v, i) = GEN_INT (base + i);
34038
34039   t1 = gen_rtx_PARALLEL (mode, v);
34040   return t1;
34041 }
34042
34043 /* Check OP for validity as a PARALLEL RTX vector with elements
34044    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34045    from the perspective of the architecture.  See the diagram above
34046    arm_simd_vect_par_cnst_half_p for more details.  */
34047
34048 bool
34049 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
34050                                        bool high)
34051 {
34052   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
34053   HOST_WIDE_INT count_op = XVECLEN (op, 0);
34054   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
34055   int i = 0;
34056
34057   if (!VECTOR_MODE_P (mode))
34058     return false;
34059
34060   if (count_op != count_ideal)
34061     return false;
34062
34063   for (i = 0; i < count_ideal; i++)
34064     {
34065       rtx elt_op = XVECEXP (op, 0, i);
34066       rtx elt_ideal = XVECEXP (ideal, 0, i);
34067
34068       if (!CONST_INT_P (elt_op)
34069           || INTVAL (elt_ideal) != INTVAL (elt_op))
34070         return false;
34071     }
34072   return true;
34073 }
34074
34075 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34076    in Thumb1.  */
34077 static bool
34078 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
34079                          const_tree)
34080 {
34081   /* For now, we punt and not handle this for TARGET_THUMB1.  */
34082   if (vcall_offset && TARGET_THUMB1)
34083     return false;
34084
34085   /* Otherwise ok.  */
34086   return true;
34087 }
34088
34089 /* Generate RTL for a conditional branch with rtx comparison CODE in
34090    mode CC_MODE. The destination of the unlikely conditional branch
34091    is LABEL_REF.  */
34092
34093 void
34094 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
34095                           rtx label_ref)
34096 {
34097   rtx x;
34098   x = gen_rtx_fmt_ee (code, VOIDmode,
34099                       gen_rtx_REG (cc_mode, CC_REGNUM),
34100                       const0_rtx);
34101
34102   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34103                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
34104                             pc_rtx);
34105   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34106 }
34107
34108 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34109
34110    For pure-code sections there is no letter code for this attribute, so
34111    output all the section flags numerically when this is needed.  */
34112
34113 static bool
34114 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34115 {
34116
34117   if (flags & SECTION_ARM_PURECODE)
34118     {
34119       *num = 0x20000000;
34120
34121       if (!(flags & SECTION_DEBUG))
34122         *num |= 0x2;
34123       if (flags & SECTION_EXCLUDE)
34124         *num |= 0x80000000;
34125       if (flags & SECTION_WRITE)
34126         *num |= 0x1;
34127       if (flags & SECTION_CODE)
34128         *num |= 0x4;
34129       if (flags & SECTION_MERGE)
34130         *num |= 0x10;
34131       if (flags & SECTION_STRINGS)
34132         *num |= 0x20;
34133       if (flags & SECTION_TLS)
34134         *num |= 0x400;
34135       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34136         *num |= 0x200;
34137
34138         return true;
34139     }
34140
34141   return false;
34142 }
34143
34144 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34145
34146    If pure-code is passed as an option, make sure all functions are in
34147    sections that have the SHF_ARM_PURECODE attribute.  */
34148
34149 static section *
34150 arm_function_section (tree decl, enum node_frequency freq,
34151                       bool startup, bool exit)
34152 {
34153   const char * section_name;
34154   section * sec;
34155
34156   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34157     return default_function_section (decl, freq, startup, exit);
34158
34159   if (!target_pure_code)
34160     return default_function_section (decl, freq, startup, exit);
34161
34162
34163   section_name = DECL_SECTION_NAME (decl);
34164
34165   /* If a function is not in a named section then it falls under the 'default'
34166      text section, also known as '.text'.  We can preserve previous behavior as
34167      the default text section already has the SHF_ARM_PURECODE section
34168      attribute.  */
34169   if (!section_name)
34170     {
34171       section *default_sec = default_function_section (decl, freq, startup,
34172                                                        exit);
34173
34174       /* If default_sec is not null, then it must be a special section like for
34175          example .text.startup.  We set the pure-code attribute and return the
34176          same section to preserve existing behavior.  */
34177       if (default_sec)
34178           default_sec->common.flags |= SECTION_ARM_PURECODE;
34179       return default_sec;
34180     }
34181
34182   /* Otherwise look whether a section has already been created with
34183      'section_name'.  */
34184   sec = get_named_section (decl, section_name, 0);
34185   if (!sec)
34186     /* If that is not the case passing NULL as the section's name to
34187        'get_named_section' will create a section with the declaration's
34188        section name.  */
34189     sec = get_named_section (decl, NULL, 0);
34190
34191   /* Set the SHF_ARM_PURECODE attribute.  */
34192   sec->common.flags |= SECTION_ARM_PURECODE;
34193
34194   return sec;
34195 }
34196
34197 /* Implements the TARGET_SECTION_FLAGS hook.
34198
34199    If DECL is a function declaration and pure-code is passed as an option
34200    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
34201    section's name and RELOC indicates whether the declarations initializer may
34202    contain runtime relocations.  */
34203
34204 static unsigned int
34205 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34206 {
34207   unsigned int flags = default_section_type_flags (decl, name, reloc);
34208
34209   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34210     flags |= SECTION_ARM_PURECODE;
34211
34212   return flags;
34213 }
34214
34215 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
34216
34217 static void
34218 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34219                            rtx op0, rtx op1,
34220                            rtx *quot_p, rtx *rem_p)
34221 {
34222   if (mode == SImode)
34223     gcc_assert (!TARGET_IDIV);
34224
34225   scalar_int_mode libval_mode
34226     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34227
34228   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34229                                         libval_mode, op0, mode, op1, mode);
34230
34231   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34232   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34233                                        GET_MODE_SIZE (mode));
34234
34235   gcc_assert (quotient);
34236   gcc_assert (remainder);
34237
34238   *quot_p = quotient;
34239   *rem_p = remainder;
34240 }
34241
34242 /*  This function checks for the availability of the coprocessor builtin passed
34243     in BUILTIN for the current target.  Returns true if it is available and
34244     false otherwise.  If a BUILTIN is passed for which this function has not
34245     been implemented it will cause an exception.  */
34246
34247 bool
34248 arm_coproc_builtin_available (enum unspecv builtin)
34249 {
34250   /* None of these builtins are available in Thumb mode if the target only
34251      supports Thumb-1.  */
34252   if (TARGET_THUMB1)
34253     return false;
34254
34255   switch (builtin)
34256     {
34257       case VUNSPEC_CDP:
34258       case VUNSPEC_LDC:
34259       case VUNSPEC_LDCL:
34260       case VUNSPEC_STC:
34261       case VUNSPEC_STCL:
34262       case VUNSPEC_MCR:
34263       case VUNSPEC_MRC:
34264         if (arm_arch4)
34265           return true;
34266         break;
34267       case VUNSPEC_CDP2:
34268       case VUNSPEC_LDC2:
34269       case VUNSPEC_LDC2L:
34270       case VUNSPEC_STC2:
34271       case VUNSPEC_STC2L:
34272       case VUNSPEC_MCR2:
34273       case VUNSPEC_MRC2:
34274         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34275            ARMv8-{A,M}.  */
34276         if (arm_arch5t)
34277           return true;
34278         break;
34279       case VUNSPEC_MCRR:
34280       case VUNSPEC_MRRC:
34281         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34282            ARMv8-{A,M}.  */
34283         if (arm_arch6 || arm_arch5te)
34284           return true;
34285         break;
34286       case VUNSPEC_MCRR2:
34287       case VUNSPEC_MRRC2:
34288         if (arm_arch6)
34289           return true;
34290         break;
34291       default:
34292         gcc_unreachable ();
34293     }
34294   return false;
34295 }
34296
34297 /* This function returns true if OP is a valid memory operand for the ldc and
34298    stc coprocessor instructions and false otherwise.  */
34299
34300 bool
34301 arm_coproc_ldc_stc_legitimate_address (rtx op)
34302 {
34303   HOST_WIDE_INT range;
34304   /* Has to be a memory operand.  */
34305   if (!MEM_P (op))
34306     return false;
34307
34308   op = XEXP (op, 0);
34309
34310   /* We accept registers.  */
34311   if (REG_P (op))
34312     return true;
34313
34314   switch GET_CODE (op)
34315     {
34316       case PLUS:
34317         {
34318           /* Or registers with an offset.  */
34319           if (!REG_P (XEXP (op, 0)))
34320             return false;
34321
34322           op = XEXP (op, 1);
34323
34324           /* The offset must be an immediate though.  */
34325           if (!CONST_INT_P (op))
34326             return false;
34327
34328           range = INTVAL (op);
34329
34330           /* Within the range of [-1020,1020].  */
34331           if (!IN_RANGE (range, -1020, 1020))
34332             return false;
34333
34334           /* And a multiple of 4.  */
34335           return (range % 4) == 0;
34336         }
34337       case PRE_INC:
34338       case POST_INC:
34339       case PRE_DEC:
34340       case POST_DEC:
34341         return REG_P (XEXP (op, 0));
34342       default:
34343         gcc_unreachable ();
34344     }
34345   return false;
34346 }
34347
34348 /* Return the diagnostic message string if conversion from FROMTYPE to
34349    TOTYPE is not allowed, NULL otherwise.  */
34350
34351 static const char *
34352 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34353 {
34354   if (element_mode (fromtype) != element_mode (totype))
34355     {
34356       /* Do no allow conversions to/from BFmode scalar types.  */
34357       if (TYPE_MODE (fromtype) == BFmode)
34358         return N_("invalid conversion from type %<bfloat16_t%>");
34359       if (TYPE_MODE (totype) == BFmode)
34360         return N_("invalid conversion to type %<bfloat16_t%>");
34361     }
34362
34363   /* Conversion allowed.  */
34364   return NULL;
34365 }
34366
34367 /* Return the diagnostic message string if the unary operation OP is
34368    not permitted on TYPE, NULL otherwise.  */
34369
34370 static const char *
34371 arm_invalid_unary_op (int op, const_tree type)
34372 {
34373   /* Reject all single-operand operations on BFmode except for &.  */
34374   if (element_mode (type) == BFmode && op != ADDR_EXPR)
34375     return N_("operation not permitted on type %<bfloat16_t%>");
34376
34377   /* Operation allowed.  */
34378   return NULL;
34379 }
34380
34381 /* Return the diagnostic message string if the binary operation OP is
34382    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
34383
34384 static const char *
34385 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34386                            const_tree type2)
34387 {
34388   /* Reject all 2-operand operations on BFmode.  */
34389   if (element_mode (type1) == BFmode
34390       || element_mode (type2) == BFmode)
34391     return N_("operation not permitted on type %<bfloat16_t%>");
34392
34393   /* Operation allowed.  */
34394   return NULL;
34395 }
34396
34397 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34398
34399    In VFPv1, VFP registers could only be accessed in the mode they were
34400    set, so subregs would be invalid there.  However, we don't support
34401    VFPv1 at the moment, and the restriction was lifted in VFPv2.
34402
34403    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34404    VFP registers in little-endian order.  We can't describe that accurately to
34405    GCC, so avoid taking subregs of such values.
34406
34407    The only exception is going from a 128-bit to a 64-bit type.  In that
34408    case the data layout happens to be consistent for big-endian, so we
34409    explicitly allow that case.  */
34410
34411 static bool
34412 arm_can_change_mode_class (machine_mode from, machine_mode to,
34413                            reg_class_t rclass)
34414 {
34415   if (TARGET_BIG_END
34416       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34417       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34418           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34419       && reg_classes_intersect_p (VFP_REGS, rclass))
34420     return false;
34421   return true;
34422 }
34423
34424 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
34425    strcpy from constants will be faster.  */
34426
34427 static HOST_WIDE_INT
34428 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34429 {
34430   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34431   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34432     return MAX (align, BITS_PER_WORD * factor);
34433   return align;
34434 }
34435
34436 /* Emit a speculation barrier on target architectures that do not have
34437    DSB/ISB directly.  Such systems probably don't need a barrier
34438    themselves, but if the code is ever run on a later architecture, it
34439    might become a problem.  */
34440 void
34441 arm_emit_speculation_barrier_function ()
34442 {
34443   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34444 }
34445
34446 /* Have we recorded an explicit access to the Q bit of APSR?.  */
34447 bool
34448 arm_q_bit_access (void)
34449 {
34450   if (cfun && cfun->decl)
34451     return lookup_attribute ("acle qbit",
34452                              DECL_ATTRIBUTES (cfun->decl));
34453   return true;
34454 }
34455
34456 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
34457 bool
34458 arm_ge_bits_access (void)
34459 {
34460   if (cfun && cfun->decl)
34461     return lookup_attribute ("acle gebits",
34462                              DECL_ATTRIBUTES (cfun->decl));
34463   return true;
34464 }
34465
34466 /* NULL if insn INSN is valid within a low-overhead loop.
34467    Otherwise return why doloop cannot be applied.  */
34468
34469 static const char *
34470 arm_invalid_within_doloop (const rtx_insn *insn)
34471 {
34472   if (!TARGET_HAVE_LOB)
34473     return default_invalid_within_doloop (insn);
34474
34475   if (CALL_P (insn))
34476     return "Function call in the loop.";
34477
34478   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34479     return "LR is used inside loop.";
34480
34481   return NULL;
34482 }
34483
34484 bool
34485 arm_target_insn_ok_for_lob (rtx insn)
34486 {
34487   basic_block bb = BLOCK_FOR_INSN (insn);
34488   /* Make sure the basic block of the target insn is a simple latch
34489      having as single predecessor and successor the body of the loop
34490      itself.  Only simple loops with a single basic block as body are
34491      supported for 'low over head loop' making sure that LE target is
34492      above LE itself in the generated code.  */
34493
34494   return single_succ_p (bb)
34495     && single_pred_p (bb)
34496     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34497     && contains_no_active_insn_p (bb);
34498 }
34499
34500 #if CHECKING_P
34501 namespace selftest {
34502
34503 /* Scan the static data tables generated by parsecpu.awk looking for
34504    potential issues with the data.  We primarily check for
34505    inconsistencies in the option extensions at present (extensions
34506    that duplicate others but aren't marked as aliases).  Furthermore,
34507    for correct canonicalization later options must never be a subset
34508    of an earlier option.  Any extension should also only specify other
34509    feature bits and never an architecture bit.  The architecture is inferred
34510    from the declaration of the extension.  */
34511 static void
34512 arm_test_cpu_arch_data (void)
34513 {
34514   const arch_option *arch;
34515   const cpu_option *cpu;
34516   auto_sbitmap target_isa (isa_num_bits);
34517   auto_sbitmap isa1 (isa_num_bits);
34518   auto_sbitmap isa2 (isa_num_bits);
34519
34520   for (arch = all_architectures; arch->common.name != NULL; ++arch)
34521     {
34522       const cpu_arch_extension *ext1, *ext2;
34523
34524       if (arch->common.extensions == NULL)
34525         continue;
34526
34527       arm_initialize_isa (target_isa, arch->common.isa_bits);
34528
34529       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34530         {
34531           if (ext1->alias)
34532             continue;
34533
34534           arm_initialize_isa (isa1, ext1->isa_bits);
34535           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34536             {
34537               if (ext2->alias || ext1->remove != ext2->remove)
34538                 continue;
34539
34540               arm_initialize_isa (isa2, ext2->isa_bits);
34541               /* If the option is a subset of the parent option, it doesn't
34542                  add anything and so isn't useful.  */
34543               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34544
34545               /* If the extension specifies any architectural bits then
34546                  disallow it.  Extensions should only specify feature bits.  */
34547               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34548             }
34549         }
34550     }
34551
34552   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34553     {
34554       const cpu_arch_extension *ext1, *ext2;
34555
34556       if (cpu->common.extensions == NULL)
34557         continue;
34558
34559       arm_initialize_isa (target_isa, arch->common.isa_bits);
34560
34561       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34562         {
34563           if (ext1->alias)
34564             continue;
34565
34566           arm_initialize_isa (isa1, ext1->isa_bits);
34567           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34568             {
34569               if (ext2->alias || ext1->remove != ext2->remove)
34570                 continue;
34571
34572               arm_initialize_isa (isa2, ext2->isa_bits);
34573               /* If the option is a subset of the parent option, it doesn't
34574                  add anything and so isn't useful.  */
34575               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34576
34577               /* If the extension specifies any architectural bits then
34578                  disallow it.  Extensions should only specify feature bits.  */
34579               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34580             }
34581         }
34582     }
34583 }
34584
34585 /* Scan the static data tables generated by parsecpu.awk looking for
34586    potential issues with the data.  Here we check for consistency between the
34587    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34588    a feature bit that is not defined by any FPU flag.  */
34589 static void
34590 arm_test_fpu_data (void)
34591 {
34592   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34593   auto_sbitmap fpubits (isa_num_bits);
34594   auto_sbitmap tmpset (isa_num_bits);
34595
34596   static const enum isa_feature fpu_bitlist_internal[]
34597     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34598   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34599
34600   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34601   {
34602     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34603     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34604     bitmap_clear (isa_all_fpubits_internal);
34605     bitmap_copy (isa_all_fpubits_internal, tmpset);
34606   }
34607
34608   if (!bitmap_empty_p (isa_all_fpubits_internal))
34609     {
34610         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34611                          " group that are not defined by any FPU.\n"
34612                          "       Check your arm-cpus.in.\n");
34613         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34614     }
34615 }
34616
34617 static void
34618 arm_run_selftests (void)
34619 {
34620   arm_test_cpu_arch_data ();
34621   arm_test_fpu_data ();
34622 }
34623 } /* Namespace selftest.  */
34624
34625 #undef TARGET_RUN_TARGET_SELFTESTS
34626 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34627 #endif /* CHECKING_P */
34628
34629 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34630    global variable based guard use the default else
34631    return a null tree.  */
34632 static tree
34633 arm_stack_protect_guard (void)
34634 {
34635   if (arm_stack_protector_guard == SSP_GLOBAL)
34636     return default_stack_protect_guard ();
34637
34638   return NULL_TREE;
34639 }
34640
34641 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34642    Unlike the arm version, we do NOT implement asm flag outputs.  */
34643
34644 rtx_insn *
34645 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34646                       vec<machine_mode> & /*input_modes*/,
34647                       vec<const char *> &constraints,
34648                       vec<rtx> &, vec<rtx> & /*clobbers*/,
34649                       HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34650 {
34651   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34652     if (startswith (constraints[i], "=@cc"))
34653       {
34654         sorry ("%<asm%> flags not supported in thumb1 mode");
34655         break;
34656       }
34657   return NULL;
34658 }
34659
34660 /* Generate code to enable conditional branches in functions over 1 MiB.
34661    Parameters are:
34662      operands: is the operands list of the asm insn (see arm_cond_branch or
34663        arm_cond_branch_reversed).
34664      pos_label: is an index into the operands array where operands[pos_label] is
34665        the asm label of the final jump destination.
34666      dest: is a string which is used to generate the asm label of the intermediate
34667        destination
34668    branch_format: is a string denoting the intermediate branch format, e.g.
34669      "beq", "bne", etc.  */
34670
34671 const char *
34672 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34673                     const char * branch_format)
34674 {
34675   rtx_code_label * tmp_label = gen_label_rtx ();
34676   char label_buf[256];
34677   char buffer[128];
34678   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34679                         CODE_LABEL_NUMBER (tmp_label));
34680   const char *label_ptr = arm_strip_name_encoding (label_buf);
34681   rtx dest_label = operands[pos_label];
34682   operands[pos_label] = tmp_label;
34683
34684   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34685   output_asm_insn (buffer, operands);
34686
34687   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34688   operands[pos_label] = dest_label;
34689   output_asm_insn (buffer, operands);
34690   return "";
34691 }
34692
34693 /* If given mode matches, load from memory to LO_REGS.
34694    (i.e [Rn], Rn <= LO_REGS).  */
34695 enum reg_class
34696 arm_mode_base_reg_class (machine_mode mode)
34697 {
34698   if (TARGET_HAVE_MVE
34699       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34700     return LO_REGS;
34701
34702   return MODE_BASE_REG_REG_CLASS (mode);
34703 }
34704
34705 struct gcc_target targetm = TARGET_INITIALIZER;
34706
34707 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
34708
34709 opt_machine_mode
34710 arm_get_mask_mode (machine_mode mode)
34711 {
34712   if (TARGET_HAVE_MVE)
34713     return arm_mode_to_pred_mode (mode);
34714
34715   return default_get_mask_mode (mode);
34716 }
34717
34718 /* Output assembly to read the thread pointer from the appropriate TPIDR
34719    register into DEST.  If PRED_P also emit the %? that can be used to
34720    output the predication code.  */
34721
34722 const char *
34723 arm_output_load_tpidr (rtx dst, bool pred_p)
34724 {
34725   char buf[64];
34726   int tpidr_coproc_num = -1;
34727   switch (target_thread_pointer)
34728     {
34729     case TP_TPIDRURW:
34730       tpidr_coproc_num = 2;
34731       break;
34732     case TP_TPIDRURO:
34733       tpidr_coproc_num = 3;
34734       break;
34735     case TP_TPIDRPRW:
34736       tpidr_coproc_num = 4;
34737       break;
34738     default:
34739       gcc_unreachable ();
34740     }
34741   snprintf (buf, sizeof (buf),
34742             "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34743             pred_p ? "%?" : "", tpidr_coproc_num);
34744   output_asm_insn (buf, &dst);
34745   return "";
34746 }
34747
34748 #include "gt-arm.h"