gcc/config/arm/arm.cc

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2023 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "cfgloop.h"
  36 #include "df.h"
  37 #include "tm_p.h"
  38 #include "stringpool.h"
  39 #include "attribs.h"
  40 #include "optabs.h"
  41 #include "regs.h"
  42 #include "emit-rtl.h"
  43 #include "recog.h"
  44 #include "cgraph.h"
  45 #include "diagnostic-core.h"
  46 #include "alias.h"
  47 #include "fold-const.h"
  48 #include "stor-layout.h"
  49 #include "calls.h"
  50 #include "varasm.h"
  51 #include "output.h"
  52 #include "insn-attr.h"
  53 #include "flags.h"
  54 #include "reload.h"
  55 #include "explow.h"
  56 #include "expr.h"
  57 #include "cfgrtl.h"
  58 #include "sched-int.h"
  59 #include "common/common-target.h"
  60 #include "langhooks.h"
  61 #include "intl.h"
  62 #include "libfuncs.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73 #include "tree-vectorizer.h"
  74 #include "opts.h"
  75 #include "aarch-common.h"
  76 #include "aarch-common-protos.h"
  77
  78 /* This file should be included last.  */
  79 #include "target-def.h"
  80
  81 /* Forward definitions of types.  */
  82 typedef struct minipool_node    Mnode;
  83 typedef struct minipool_fixup   Mfix;
  84
  85 void (*arm_lang_output_object_attributes_hook)(void);
  86
  87 struct four_ints
  88 {
  89   int i[4];
  90 };
  91
  92 /* Forward function declarations.  */
  93 static bool arm_const_not_ok_for_debug_p (rtx);
  94 static int arm_needs_doubleword_align (machine_mode, const_tree);
  95 static int arm_compute_static_chain_stack_bytes (void);
  96 static arm_stack_offsets *arm_get_frame_offsets (void);
  97 static void arm_compute_frame_layout (void);
  98 static void arm_add_gc_roots (void);
  99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 100                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 101 static unsigned bit_count (unsigned long);
 102 static unsigned bitmap_popcount (const sbitmap);
 103 static int arm_address_register_rtx_p (rtx, int);
 104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 105 static bool is_called_in_ARM_mode (tree);
 106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 111 inline static int thumb1_index_register_rtx_p (rtx, int);
 112 static int thumb_far_jump_used_p (void);
 113 static bool thumb_force_lr_save (void);
 114 static unsigned arm_size_return_regs (void);
 115 static bool arm_assemble_integer (rtx, unsigned int, int);
 116 static void arm_print_operand (FILE *, rtx, int);
 117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 118 static bool arm_print_operand_punct_valid_p (unsigned char code);
 119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 120 static arm_cc get_arm_condition_code (rtx);
 121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 122 static const char *output_multi_immediate (rtx *, const char *, const char *,
 123                                            int, HOST_WIDE_INT);
 124 static const char *shift_op (rtx, HOST_WIDE_INT *);
 125 static struct machine_function *arm_init_machine_status (void);
 126 static void thumb_exit (FILE *, int);
 127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 129 static Mnode *add_minipool_forward_ref (Mfix *);
 130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 131 static Mnode *add_minipool_backward_ref (Mfix *);
 132 static void assign_minipool_offsets (Mfix *);
 133 static void arm_print_value (FILE *, rtx);
 134 static void dump_minipool (rtx_insn *);
 135 static int arm_barrier_cost (rtx_insn *);
 136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 139                                machine_mode, rtx);
 140 static void arm_reorg (void);
 141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 143 static unsigned long arm_compute_save_core_reg_mask (void);
 144 static unsigned long arm_isr_value (tree);
 145 static unsigned long arm_compute_func_type (void);
 146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 151 #endif
 152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 154 static void arm_output_function_epilogue (FILE *);
 155 static void arm_output_function_prologue (FILE *);
 156 static int arm_comp_type_attributes (const_tree, const_tree);
 157 static void arm_set_default_type_attributes (tree);
 158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 160 static int optimal_immediate_sequence (enum rtx_code code,
 161                                        unsigned HOST_WIDE_INT val,
 162                                        struct four_ints *return_sequence);
 163 static int optimal_immediate_sequence_1 (enum rtx_code code,
 164                                          unsigned HOST_WIDE_INT val,
 165                                          struct four_ints *return_sequence,
 166                                          int i);
 167 static int arm_get_strip_length (int);
 168 static bool arm_function_ok_for_sibcall (tree, tree);
 169 static machine_mode arm_promote_function_mode (const_tree,
 170                                                     machine_mode, int *,
 171                                                     const_tree, int);
 172 static bool arm_return_in_memory (const_tree, const_tree);
 173 static rtx arm_function_value (const_tree, const_tree, bool);
 174 static rtx arm_libcall_value_1 (machine_mode);
 175 static rtx arm_libcall_value (machine_mode, const_rtx);
 176 static bool arm_function_value_regno_p (const unsigned int);
 177 static void arm_internal_label (FILE *, const char *, unsigned long);
 178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 179                                  tree);
 180 static bool arm_have_conditional_execution (void);
 181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 182 static bool arm_legitimate_constant_p (machine_mode, rtx);
 183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 184 static int arm_insn_cost (rtx_insn *, bool);
 185 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 186 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 187 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 188 static void emit_constant_insn (rtx cond, rtx pattern);
 189 static rtx_insn *emit_set_insn (rtx, rtx);
 190 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 191 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 192 static void arm_emit_multi_reg_pop (unsigned long);
 193 static int vfp_emit_fstmd (int, int);
 194 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 195 static int arm_arg_partial_bytes (cumulative_args_t,
 196                                   const function_arg_info &);
 197 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 198 static void arm_function_arg_advance (cumulative_args_t,
 199                                       const function_arg_info &);
 200 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 201 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 202 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 203                                       const_tree);
 204 static rtx aapcs_libcall_value (machine_mode);
 205 static int aapcs_select_return_coproc (const_tree, const_tree);
 206
 207 #ifdef OBJECT_FORMAT_ELF
 208 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 209 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 210 #endif
 211 #ifndef ARM_PE
 212 static void arm_encode_section_info (tree, rtx, int);
 213 #endif
 214
 215 static void arm_file_end (void);
 216 static void arm_file_start (void);
 217 static void arm_insert_attributes (tree, tree *);
 218
 219 static void arm_setup_incoming_varargs (cumulative_args_t,
 220                                         const function_arg_info &, int *, int);
 221 static bool arm_pass_by_reference (cumulative_args_t,
 222                                    const function_arg_info &);
 223 static bool arm_promote_prototypes (const_tree);
 224 static bool arm_default_short_enums (void);
 225 static bool arm_align_anon_bitfield (void);
 226 static bool arm_return_in_msb (const_tree);
 227 static bool arm_must_pass_in_stack (const function_arg_info &);
 228 static bool arm_return_in_memory (const_tree, const_tree);
 229 #if ARM_UNWIND_INFO
 230 static void arm_unwind_emit (FILE *, rtx_insn *);
 231 static bool arm_output_ttype (rtx);
 232 static void arm_asm_emit_except_personality (rtx);
 233 #endif
 234 static void arm_asm_init_sections (void);
 235 static rtx arm_dwarf_register_span (rtx);
 236
 237 static tree arm_cxx_guard_type (void);
 238 static bool arm_cxx_guard_mask_bit (void);
 239 static tree arm_get_cookie_size (tree);
 240 static bool arm_cookie_has_size (void);
 241 static bool arm_cxx_cdtor_returns_this (void);
 242 static bool arm_cxx_key_method_may_be_inline (void);
 243 static void arm_cxx_determine_class_data_visibility (tree);
 244 static bool arm_cxx_class_data_always_comdat (void);
 245 static bool arm_cxx_use_aeabi_atexit (void);
 246 static void arm_init_libfuncs (void);
 247 static tree arm_build_builtin_va_list (void);
 248 static void arm_expand_builtin_va_start (tree, rtx);
 249 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 250 static void arm_option_override (void);
 251 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 252                                 struct cl_target_option *);
 253 static void arm_override_options_after_change (void);
 254 static void arm_option_print (FILE *, int, struct cl_target_option *);
 255 static void arm_set_current_function (tree);
 256 static bool arm_can_inline_p (tree, tree);
 257 static void arm_relayout_function (tree);
 258 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 260 static bool arm_sched_can_speculate_insn (rtx_insn *);
 261 static bool arm_macro_fusion_p (void);
 262 static bool arm_cannot_copy_insn_p (rtx_insn *);
 263 static int arm_issue_rate (void);
 264 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 265 static int arm_first_cycle_multipass_dfa_lookahead (void);
 266 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 267 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 268 static bool arm_output_addr_const_extra (FILE *, rtx);
 269 static bool arm_allocate_stack_slots_for_args (void);
 270 static bool arm_warn_func_return (tree);
 271 static tree arm_promoted_type (const_tree t);
 272 static bool arm_scalar_mode_supported_p (scalar_mode);
 273 static bool arm_frame_pointer_required (void);
 274 static bool arm_can_eliminate (const int, const int);
 275 static void arm_asm_trampoline_template (FILE *);
 276 static void arm_trampoline_init (rtx, tree, rtx);
 277 static rtx arm_trampoline_adjust_address (rtx);
 278 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 280 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 281 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 282 static bool arm_array_mode_supported_p (machine_mode,
 283                                         unsigned HOST_WIDE_INT);
 284 static machine_mode arm_preferred_simd_mode (scalar_mode);
 285 static bool arm_class_likely_spilled_p (reg_class_t);
 286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 289                                                      const_tree type,
 290                                                      int misalignment,
 291                                                      bool is_packed);
 292 static void arm_conditional_register_usage (void);
 293 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 294 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 295 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 296 static int arm_default_branch_cost (bool, bool);
 297 static int arm_cortex_a5_branch_cost (bool, bool);
 298 static int arm_cortex_m_branch_cost (bool, bool);
 299 static int arm_cortex_m7_branch_cost (bool, bool);
 300
 301 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
 302                                           rtx, const vec_perm_indices &);
 303
 304 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 305
 306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 307                                            tree vectype,
 308                                            int misalign ATTRIBUTE_UNUSED);
 309
 310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 311                                          bool op0_preserve_value);
 312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 313
 314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 316                                      const_tree);
 317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 320                                                 int reloc);
 321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 327 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 328                                        vec<machine_mode> &,
 329                                        vec<const char *> &, vec<rtx> &,
 330                                        HARD_REG_SET &, location_t);
 331 static const char *arm_identify_fpu_from_isa (sbitmap);
 332 \f
 333 /* Table of machine attributes.  */
 334 static const struct attribute_spec arm_attribute_table[] =
 335 {
 336   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 337        affects_type_identity, handler, exclude } */
 338   /* Function calls made to this symbol must be done indirectly, because
 339      it may lie outside of the 26 bit addressing range of a normal function
 340      call.  */
 341   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 342   /* Whereas these functions are always known to reside within the 26 bit
 343      addressing range.  */
 344   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 345   /* Specify the procedure call conventions for a function.  */
 346   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 347     NULL },
 348   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 349   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 350     NULL },
 351   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 352     NULL },
 353   { "naked",        0, 0, true,  false, false, false,
 354     arm_handle_fndecl_attribute, NULL },
 355 #ifdef ARM_PE
 356   /* ARM/PE has three new attributes:
 357      interfacearm - ?
 358      dllexport - for exporting a function/variable that will live in a dll
 359      dllimport - for importing a function/variable from a dll
 360
 361      Microsoft allows multiple declspecs in one __declspec, separating
 362      them with spaces.  We do NOT support this.  Instead, use __declspec
 363      multiple times.
 364   */
 365   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 366   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 367   { "interfacearm", 0, 0, true,  false, false, false,
 368     arm_handle_fndecl_attribute, NULL },
 369 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 370   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 371     NULL },
 372   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 373     NULL },
 374   { "notshared",    0, 0, false, true, false, false,
 375     arm_handle_notshared_attribute, NULL },
 376 #endif
 377   /* ARMv8-M Security Extensions support.  */
 378   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 379     arm_handle_cmse_nonsecure_entry, NULL },
 380   { "cmse_nonsecure_call", 0, 0, false, false, false, true,
 381     arm_handle_cmse_nonsecure_call, NULL },
 382   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
 383   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 384 };
 385 \f
 386 /* Initialize the GCC target structure.  */
 387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 388 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 389 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 390 #endif
 391
 392 #undef TARGET_CHECK_BUILTIN_CALL
 393 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 394
 395 #undef TARGET_LEGITIMIZE_ADDRESS
 396 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 397
 398 #undef  TARGET_ATTRIBUTE_TABLE
 399 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 400
 401 #undef  TARGET_INSERT_ATTRIBUTES
 402 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 403
 404 #undef TARGET_ASM_FILE_START
 405 #define TARGET_ASM_FILE_START arm_file_start
 406 #undef TARGET_ASM_FILE_END
 407 #define TARGET_ASM_FILE_END arm_file_end
 408
 409 #undef  TARGET_ASM_ALIGNED_SI_OP
 410 #define TARGET_ASM_ALIGNED_SI_OP NULL
 411 #undef  TARGET_ASM_INTEGER
 412 #define TARGET_ASM_INTEGER arm_assemble_integer
 413
 414 #undef TARGET_PRINT_OPERAND
 415 #define TARGET_PRINT_OPERAND arm_print_operand
 416 #undef TARGET_PRINT_OPERAND_ADDRESS
 417 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 418 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 419 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 420
 421 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 422 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 423
 424 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 425 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 426
 427 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 428 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 429
 430 #undef TARGET_CAN_INLINE_P
 431 #define TARGET_CAN_INLINE_P arm_can_inline_p
 432
 433 #undef TARGET_RELAYOUT_FUNCTION
 434 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 435
 436 #undef  TARGET_OPTION_OVERRIDE
 437 #define TARGET_OPTION_OVERRIDE arm_option_override
 438
 439 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 440 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 441
 442 #undef TARGET_OPTION_RESTORE
 443 #define TARGET_OPTION_RESTORE arm_option_restore
 444
 445 #undef TARGET_OPTION_PRINT
 446 #define TARGET_OPTION_PRINT arm_option_print
 447
 448 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 449 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 450
 451 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 452 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 453
 454 #undef TARGET_SCHED_MACRO_FUSION_P
 455 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 456
 457 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 458 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 459
 460 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 461 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 462
 463 #undef  TARGET_SCHED_ADJUST_COST
 464 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 465
 466 #undef TARGET_SET_CURRENT_FUNCTION
 467 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 468
 469 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 470 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 471
 472 #undef TARGET_SCHED_REORDER
 473 #define TARGET_SCHED_REORDER arm_sched_reorder
 474
 475 #undef TARGET_REGISTER_MOVE_COST
 476 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 477
 478 #undef TARGET_MEMORY_MOVE_COST
 479 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 480
 481 #undef TARGET_ENCODE_SECTION_INFO
 482 #ifdef ARM_PE
 483 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 484 #else
 485 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 486 #endif
 487
 488 #undef  TARGET_STRIP_NAME_ENCODING
 489 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 490
 491 #undef  TARGET_ASM_INTERNAL_LABEL
 492 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 493
 494 #undef TARGET_FLOATN_MODE
 495 #define TARGET_FLOATN_MODE arm_floatn_mode
 496
 497 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 498 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 499
 500 #undef  TARGET_FUNCTION_VALUE
 501 #define TARGET_FUNCTION_VALUE arm_function_value
 502
 503 #undef  TARGET_LIBCALL_VALUE
 504 #define TARGET_LIBCALL_VALUE arm_libcall_value
 505
 506 #undef TARGET_FUNCTION_VALUE_REGNO_P
 507 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 508
 509 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 510 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 511 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 512 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 513
 514 #undef  TARGET_RTX_COSTS
 515 #define TARGET_RTX_COSTS arm_rtx_costs
 516 #undef  TARGET_ADDRESS_COST
 517 #define TARGET_ADDRESS_COST arm_address_cost
 518 #undef TARGET_INSN_COST
 519 #define TARGET_INSN_COST arm_insn_cost
 520
 521 #undef TARGET_SHIFT_TRUNCATION_MASK
 522 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 523 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 524 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 525 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 526 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 527 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 528 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 529 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 530 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 531   arm_autovectorize_vector_modes
 532
 533 #undef  TARGET_MACHINE_DEPENDENT_REORG
 534 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 535
 536 #undef  TARGET_INIT_BUILTINS
 537 #define TARGET_INIT_BUILTINS  arm_init_builtins
 538 #undef  TARGET_EXPAND_BUILTIN
 539 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 540 #undef  TARGET_BUILTIN_DECL
 541 #define TARGET_BUILTIN_DECL arm_builtin_decl
 542
 543 #undef TARGET_INIT_LIBFUNCS
 544 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 545
 546 #undef TARGET_PROMOTE_FUNCTION_MODE
 547 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 548 #undef TARGET_PROMOTE_PROTOTYPES
 549 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 550 #undef TARGET_PASS_BY_REFERENCE
 551 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 552 #undef TARGET_ARG_PARTIAL_BYTES
 553 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 554 #undef TARGET_FUNCTION_ARG
 555 #define TARGET_FUNCTION_ARG arm_function_arg
 556 #undef TARGET_FUNCTION_ARG_ADVANCE
 557 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 558 #undef TARGET_FUNCTION_ARG_PADDING
 559 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 560 #undef TARGET_FUNCTION_ARG_BOUNDARY
 561 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 562
 563 #undef  TARGET_SETUP_INCOMING_VARARGS
 564 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 565
 566 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 567 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 568
 569 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 570 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 571 #undef TARGET_TRAMPOLINE_INIT
 572 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 573 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 574 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 575
 576 #undef TARGET_WARN_FUNC_RETURN
 577 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 578
 579 #undef TARGET_DEFAULT_SHORT_ENUMS
 580 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 581
 582 #undef TARGET_ALIGN_ANON_BITFIELD
 583 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 584
 585 #undef TARGET_NARROW_VOLATILE_BITFIELD
 586 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 587
 588 #undef TARGET_CXX_GUARD_TYPE
 589 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 590
 591 #undef TARGET_CXX_GUARD_MASK_BIT
 592 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 593
 594 #undef TARGET_CXX_GET_COOKIE_SIZE
 595 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 596
 597 #undef TARGET_CXX_COOKIE_HAS_SIZE
 598 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 599
 600 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 601 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 602
 603 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 604 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 605
 606 #undef TARGET_CXX_USE_AEABI_ATEXIT
 607 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 608
 609 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 610 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 611   arm_cxx_determine_class_data_visibility
 612
 613 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 614 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 615
 616 #undef TARGET_RETURN_IN_MSB
 617 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 618
 619 #undef TARGET_RETURN_IN_MEMORY
 620 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 621
 622 #undef TARGET_MUST_PASS_IN_STACK
 623 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 624
 625 #if ARM_UNWIND_INFO
 626 #undef TARGET_ASM_UNWIND_EMIT
 627 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 628
 629 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 630 #undef TARGET_ASM_TTYPE
 631 #define TARGET_ASM_TTYPE arm_output_ttype
 632
 633 #undef TARGET_ARM_EABI_UNWINDER
 634 #define TARGET_ARM_EABI_UNWINDER true
 635
 636 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 637 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 638
 639 #endif /* ARM_UNWIND_INFO */
 640
 641 #undef TARGET_ASM_INIT_SECTIONS
 642 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 643
 644 #undef TARGET_DWARF_REGISTER_SPAN
 645 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 646
 647 #undef  TARGET_CANNOT_COPY_INSN_P
 648 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 649
 650 #ifdef HAVE_AS_TLS
 651 #undef TARGET_HAVE_TLS
 652 #define TARGET_HAVE_TLS true
 653 #endif
 654
 655 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 656 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 657
 658 #undef TARGET_LEGITIMATE_CONSTANT_P
 659 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 660
 661 #undef TARGET_CANNOT_FORCE_CONST_MEM
 662 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 663
 664 #undef TARGET_MAX_ANCHOR_OFFSET
 665 #define TARGET_MAX_ANCHOR_OFFSET 4095
 666
 667 /* The minimum is set such that the total size of the block
 668    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 669    divisible by eight, ensuring natural spacing of anchors.  */
 670 #undef TARGET_MIN_ANCHOR_OFFSET
 671 #define TARGET_MIN_ANCHOR_OFFSET -4088
 672
 673 #undef TARGET_SCHED_ISSUE_RATE
 674 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 675
 676 #undef TARGET_SCHED_VARIABLE_ISSUE
 677 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 678
 679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 681   arm_first_cycle_multipass_dfa_lookahead
 682
 683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 685   arm_first_cycle_multipass_dfa_lookahead_guard
 686
 687 #undef TARGET_MANGLE_TYPE
 688 #define TARGET_MANGLE_TYPE arm_mangle_type
 689
 690 #undef TARGET_INVALID_CONVERSION
 691 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 692
 693 #undef TARGET_INVALID_UNARY_OP
 694 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 695
 696 #undef TARGET_INVALID_BINARY_OP
 697 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 698
 699 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 700 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 701
 702 #undef TARGET_BUILD_BUILTIN_VA_LIST
 703 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 704 #undef TARGET_EXPAND_BUILTIN_VA_START
 705 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 707 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 708
 709 #ifdef HAVE_AS_TLS
 710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 712 #endif
 713
 714 #undef TARGET_LEGITIMATE_ADDRESS_P
 715 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 716
 717 #undef TARGET_PREFERRED_RELOAD_CLASS
 718 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 719
 720 #undef TARGET_PROMOTED_TYPE
 721 #define TARGET_PROMOTED_TYPE arm_promoted_type
 722
 723 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 724 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 725
 726 #undef TARGET_COMPUTE_FRAME_LAYOUT
 727 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 728
 729 #undef TARGET_FRAME_POINTER_REQUIRED
 730 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 731
 732 #undef TARGET_CAN_ELIMINATE
 733 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 734
 735 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 736 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 737
 738 #undef TARGET_CLASS_LIKELY_SPILLED_P
 739 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 740
 741 #undef TARGET_VECTORIZE_BUILTINS
 742 #define TARGET_VECTORIZE_BUILTINS
 743
 744 #undef TARGET_VECTOR_ALIGNMENT
 745 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 746
 747 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 748 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 749   arm_vector_alignment_reachable
 750
 751 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 752 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 753   arm_builtin_support_vector_misalignment
 754
 755 #undef TARGET_PREFERRED_RENAME_CLASS
 756 #define TARGET_PREFERRED_RENAME_CLASS \
 757   arm_preferred_rename_class
 758
 759 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 760 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 761
 762 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 763 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 764   arm_builtin_vectorization_cost
 765
 766 #undef TARGET_CANONICALIZE_COMPARISON
 767 #define TARGET_CANONICALIZE_COMPARISON \
 768   arm_canonicalize_comparison
 769
 770 #undef TARGET_ASAN_SHADOW_OFFSET
 771 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 772
 773 #undef MAX_INSN_PER_IT_BLOCK
 774 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 775
 776 #undef TARGET_CAN_USE_DOLOOP_P
 777 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 778
 779 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 780 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 781
 782 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 783 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 784
 785 #undef TARGET_SCHED_FUSION_PRIORITY
 786 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 787
 788 #undef  TARGET_ASM_FUNCTION_SECTION
 789 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 790
 791 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 792 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 793
 794 #undef TARGET_SECTION_TYPE_FLAGS
 795 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 796
 797 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 798 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 799
 800 #undef TARGET_C_EXCESS_PRECISION
 801 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 802
 803 /* Although the architecture reserves bits 0 and 1, only the former is
 804    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 805 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 806 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 807
 808 #undef TARGET_FIXED_CONDITION_CODE_REGS
 809 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 810
 811 #undef TARGET_HARD_REGNO_NREGS
 812 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 813 #undef TARGET_HARD_REGNO_MODE_OK
 814 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 815
 816 #undef TARGET_MODES_TIEABLE_P
 817 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 818
 819 #undef TARGET_CAN_CHANGE_MODE_CLASS
 820 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 821
 822 #undef TARGET_CONSTANT_ALIGNMENT
 823 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 824
 825 #undef TARGET_INVALID_WITHIN_DOLOOP
 826 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 827
 828 #undef TARGET_MD_ASM_ADJUST
 829 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 830
 831 #undef TARGET_STACK_PROTECT_GUARD
 832 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 833
 834 #undef TARGET_VECTORIZE_GET_MASK_MODE
 835 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 836 \f
 837 /* Obstack for minipool constant handling.  */
 838 static struct obstack minipool_obstack;
 839 static char *         minipool_startobj;
 840
 841 /* The maximum number of insns skipped which
 842    will be conditionalised if possible.  */
 843 static int max_insns_skipped = 5;
 844
 845 /* True if we are currently building a constant table.  */
 846 int making_const_table;
 847
 848 /* The processor for which instructions should be scheduled.  */
 849 enum processor_type arm_tune = TARGET_CPU_arm_none;
 850
 851 /* The current tuning set.  */
 852 const struct tune_params *current_tune;
 853
 854 /* Which floating point hardware to schedule for.  */
 855 int arm_fpu_attr;
 856
 857 /* Used for Thumb call_via trampolines.  */
 858 rtx thumb_call_via_label[14];
 859 static int thumb_call_reg_needed;
 860
 861 /* The bits in this mask specify which instruction scheduling options should
 862    be used.  */
 863 unsigned int tune_flags = 0;
 864
 865 /* The highest ARM architecture version supported by the
 866    target.  */
 867 enum base_architecture arm_base_arch = BASE_ARCH_0;
 868
 869 /* Active target architecture and tuning.  */
 870
 871 struct arm_build_target arm_active_target;
 872
 873 /* The following are used in the arm.md file as equivalents to bits
 874    in the above two flag variables.  */
 875
 876 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 877 int arm_arch4 = 0;
 878
 879 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 880 int arm_arch4t = 0;
 881
 882 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 883 int arm_arch5t = 0;
 884
 885 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 886 int arm_arch5te = 0;
 887
 888 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 889 int arm_arch6 = 0;
 890
 891 /* Nonzero if this chip supports the ARM 6K extensions.  */
 892 int arm_arch6k = 0;
 893
 894 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 895 int arm_arch6kz = 0;
 896
 897 /* Nonzero if instructions present in ARMv6-M can be used.  */
 898 int arm_arch6m = 0;
 899
 900 /* Nonzero if this chip supports the ARM 7 extensions.  */
 901 int arm_arch7 = 0;
 902
 903 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 904 int arm_arch_lpae = 0;
 905
 906 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 907 int arm_arch_notm = 0;
 908
 909 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 910 int arm_arch7em = 0;
 911
 912 /* Nonzero if instructions present in ARMv8 can be used.  */
 913 int arm_arch8 = 0;
 914
 915 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 916 int arm_arch8_1 = 0;
 917
 918 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 919 int arm_arch8_2 = 0;
 920
 921 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 922 int arm_arch8_3 = 0;
 923
 924 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 925 int arm_arch8_4 = 0;
 926
 927 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
 928    extensions.  */
 929 int arm_arch8m_main = 0;
 930
 931 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 932    extensions.  */
 933 int arm_arch8_1m_main = 0;
 934
 935 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 936    Architecture 8.2.  */
 937 int arm_fp16_inst = 0;
 938
 939 /* Nonzero if this chip can benefit from load scheduling.  */
 940 int arm_ld_sched = 0;
 941
 942 /* Nonzero if this chip is a StrongARM.  */
 943 int arm_tune_strongarm = 0;
 944
 945 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 946 int arm_arch_iwmmxt = 0;
 947
 948 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 949 int arm_arch_iwmmxt2 = 0;
 950
 951 /* Nonzero if this chip is an XScale.  */
 952 int arm_arch_xscale = 0;
 953
 954 /* Nonzero if tuning for XScale  */
 955 int arm_tune_xscale = 0;
 956
 957 /* Nonzero if we want to tune for stores that access the write-buffer.
 958    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 959 int arm_tune_wbuf = 0;
 960
 961 /* Nonzero if tuning for Cortex-A9.  */
 962 int arm_tune_cortex_a9 = 0;
 963
 964 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 965    preprocessor.
 966    XXX This is a bit of a hack, it's intended to help work around
 967    problems in GLD which doesn't understand that armv5t code is
 968    interworking clean.  */
 969 int arm_cpp_interwork = 0;
 970
 971 /* Nonzero if chip supports Thumb 1.  */
 972 int arm_arch_thumb1;
 973
 974 /* Nonzero if chip supports Thumb 2.  */
 975 int arm_arch_thumb2;
 976
 977 /* Nonzero if chip supports integer division instruction.  */
 978 int arm_arch_arm_hwdiv;
 979 int arm_arch_thumb_hwdiv;
 980
 981 /* Nonzero if chip disallows volatile memory access in IT block.  */
 982 int arm_arch_no_volatile_ce;
 983
 984 /* Nonzero if we shouldn't use literal pools.  */
 985 bool arm_disable_literal_pool = false;
 986
 987 /* The register number to be used for the PIC offset register.  */
 988 unsigned arm_pic_register = INVALID_REGNUM;
 989
 990 enum arm_pcs arm_pcs_default;
 991
 992 /* For an explanation of these variables, see final_prescan_insn below.  */
 993 int arm_ccfsm_state;
 994 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 995 enum arm_cond_code arm_current_cc;
 996
 997 rtx arm_target_insn;
 998 int arm_target_label;
 999 /* The number of conditionally executed insns, including the current insn.  */
1000 int arm_condexec_count = 0;
1001 /* A bitmask specifying the patterns for the IT block.
1002    Zero means do not output an IT block before this insn. */
1003 int arm_condexec_mask = 0;
1004 /* The number of bits used in arm_condexec_mask.  */
1005 int arm_condexec_masklen = 0;
1006
1007 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1008 int arm_arch_crc = 0;
1009
1010 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1011 int arm_arch_dotprod = 0;
1012
1013 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1014 int arm_arch_cmse = 0;
1015
1016 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1017 int arm_m_profile_small_mul = 0;
1018
1019 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1020 int arm_arch_i8mm = 0;
1021
1022 /* Nonzero if chip supports the BFloat16 instructions.  */
1023 int arm_arch_bf16 = 0;
1024
1025 /* Nonzero if chip supports the Custom Datapath Extension.  */
1026 int arm_arch_cde = 0;
1027 int arm_arch_cde_coproc = 0;
1028 const int arm_arch_cde_coproc_bits[] = {
1029   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1030 };
1031
1032 /* The condition codes of the ARM, and the inverse function.  */
1033 static const char * const arm_condition_codes[] =
1034 {
1035   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1036   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1037 };
1038
1039 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1040 int arm_regs_in_sequence[] =
1041 {
1042   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1043 };
1044
1045 #define DEF_FP_SYSREG(reg) #reg,
1046 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1047   FP_SYSREGS
1048 };
1049 #undef DEF_FP_SYSREG
1050
1051 #define ARM_LSL_NAME "lsl"
1052 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1053
1054 #define THUMB2_WORK_REGS                                        \
1055   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)              \
1056             | (1 << SP_REGNUM)                                  \
1057             | (1 << PC_REGNUM)                                  \
1058             | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM        \
1059                ? (1 << PIC_OFFSET_TABLE_REGNUM)                 \
1060                : 0)))
1061 \f
1062 /* Initialization code.  */
1063
1064 struct cpu_tune
1065 {
1066   enum processor_type scheduler;
1067   unsigned int tune_flags;
1068   const struct tune_params *tune;
1069 };
1070
1071 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1072 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1073   {                                                             \
1074     num_slots,                                                  \
1075     l1_size,                                                    \
1076     l1_line_size                                                \
1077   }
1078
1079 /* arm generic vectorizer costs.  */
1080 static const
1081 struct cpu_vec_costs arm_default_vec_cost = {
1082   1,                                    /* scalar_stmt_cost.  */
1083   1,                                    /* scalar load_cost.  */
1084   1,                                    /* scalar_store_cost.  */
1085   1,                                    /* vec_stmt_cost.  */
1086   1,                                    /* vec_to_scalar_cost.  */
1087   1,                                    /* scalar_to_vec_cost.  */
1088   1,                                    /* vec_align_load_cost.  */
1089   1,                                    /* vec_unalign_load_cost.  */
1090   1,                                    /* vec_unalign_store_cost.  */
1091   1,                                    /* vec_store_cost.  */
1092   3,                                    /* cond_taken_branch_cost.  */
1093   1,                                    /* cond_not_taken_branch_cost.  */
1094 };
1095
1096 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1097 #include "aarch-cost-tables.h"
1098
1099
1100
1101 const struct cpu_cost_table cortexa9_extra_costs =
1102 {
1103   /* ALU */
1104   {
1105     0,                  /* arith.  */
1106     0,                  /* logical.  */
1107     0,                  /* shift.  */
1108     COSTS_N_INSNS (1),  /* shift_reg.  */
1109     COSTS_N_INSNS (1),  /* arith_shift.  */
1110     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1111     0,                  /* log_shift.  */
1112     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1113     COSTS_N_INSNS (1),  /* extend.  */
1114     COSTS_N_INSNS (2),  /* extend_arith.  */
1115     COSTS_N_INSNS (1),  /* bfi.  */
1116     COSTS_N_INSNS (1),  /* bfx.  */
1117     0,                  /* clz.  */
1118     0,                  /* rev.  */
1119     0,                  /* non_exec.  */
1120     true                /* non_exec_costs_exec.  */
1121   },
1122   {
1123     /* MULT SImode */
1124     {
1125       COSTS_N_INSNS (3),        /* simple.  */
1126       COSTS_N_INSNS (3),        /* flag_setting.  */
1127       COSTS_N_INSNS (2),        /* extend.  */
1128       COSTS_N_INSNS (3),        /* add.  */
1129       COSTS_N_INSNS (2),        /* extend_add.  */
1130       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1131     },
1132     /* MULT DImode */
1133     {
1134       0,                        /* simple (N/A).  */
1135       0,                        /* flag_setting (N/A).  */
1136       COSTS_N_INSNS (4),        /* extend.  */
1137       0,                        /* add (N/A).  */
1138       COSTS_N_INSNS (4),        /* extend_add.  */
1139       0                         /* idiv (N/A).  */
1140     }
1141   },
1142   /* LD/ST */
1143   {
1144     COSTS_N_INSNS (2),  /* load.  */
1145     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1146     COSTS_N_INSNS (2),  /* ldrd.  */
1147     COSTS_N_INSNS (2),  /* ldm_1st.  */
1148     1,                  /* ldm_regs_per_insn_1st.  */
1149     2,                  /* ldm_regs_per_insn_subsequent.  */
1150     COSTS_N_INSNS (5),  /* loadf.  */
1151     COSTS_N_INSNS (5),  /* loadd.  */
1152     COSTS_N_INSNS (1),  /* load_unaligned.  */
1153     COSTS_N_INSNS (2),  /* store.  */
1154     COSTS_N_INSNS (2),  /* strd.  */
1155     COSTS_N_INSNS (2),  /* stm_1st.  */
1156     1,                  /* stm_regs_per_insn_1st.  */
1157     2,                  /* stm_regs_per_insn_subsequent.  */
1158     COSTS_N_INSNS (1),  /* storef.  */
1159     COSTS_N_INSNS (1),  /* stored.  */
1160     COSTS_N_INSNS (1),  /* store_unaligned.  */
1161     COSTS_N_INSNS (1),  /* loadv.  */
1162     COSTS_N_INSNS (1)   /* storev.  */
1163   },
1164   {
1165     /* FP SFmode */
1166     {
1167       COSTS_N_INSNS (14),       /* div.  */
1168       COSTS_N_INSNS (4),        /* mult.  */
1169       COSTS_N_INSNS (7),        /* mult_addsub. */
1170       COSTS_N_INSNS (30),       /* fma.  */
1171       COSTS_N_INSNS (3),        /* addsub.  */
1172       COSTS_N_INSNS (1),        /* fpconst.  */
1173       COSTS_N_INSNS (1),        /* neg.  */
1174       COSTS_N_INSNS (3),        /* compare.  */
1175       COSTS_N_INSNS (3),        /* widen.  */
1176       COSTS_N_INSNS (3),        /* narrow.  */
1177       COSTS_N_INSNS (3),        /* toint.  */
1178       COSTS_N_INSNS (3),        /* fromint.  */
1179       COSTS_N_INSNS (3)         /* roundint.  */
1180     },
1181     /* FP DFmode */
1182     {
1183       COSTS_N_INSNS (24),       /* div.  */
1184       COSTS_N_INSNS (5),        /* mult.  */
1185       COSTS_N_INSNS (8),        /* mult_addsub.  */
1186       COSTS_N_INSNS (30),       /* fma.  */
1187       COSTS_N_INSNS (3),        /* addsub.  */
1188       COSTS_N_INSNS (1),        /* fpconst.  */
1189       COSTS_N_INSNS (1),        /* neg.  */
1190       COSTS_N_INSNS (3),        /* compare.  */
1191       COSTS_N_INSNS (3),        /* widen.  */
1192       COSTS_N_INSNS (3),        /* narrow.  */
1193       COSTS_N_INSNS (3),        /* toint.  */
1194       COSTS_N_INSNS (3),        /* fromint.  */
1195       COSTS_N_INSNS (3)         /* roundint.  */
1196     }
1197   },
1198   /* Vector */
1199   {
1200     COSTS_N_INSNS (1),  /* alu.  */
1201     COSTS_N_INSNS (4),  /* mult.  */
1202     COSTS_N_INSNS (1),  /* movi.  */
1203     COSTS_N_INSNS (2),  /* dup.  */
1204     COSTS_N_INSNS (2)   /* extract.  */
1205   }
1206 };
1207
1208 const struct cpu_cost_table cortexa8_extra_costs =
1209 {
1210   /* ALU */
1211   {
1212     0,                  /* arith.  */
1213     0,                  /* logical.  */
1214     COSTS_N_INSNS (1),  /* shift.  */
1215     0,                  /* shift_reg.  */
1216     COSTS_N_INSNS (1),  /* arith_shift.  */
1217     0,                  /* arith_shift_reg.  */
1218     COSTS_N_INSNS (1),  /* log_shift.  */
1219     0,                  /* log_shift_reg.  */
1220     0,                  /* extend.  */
1221     0,                  /* extend_arith.  */
1222     0,                  /* bfi.  */
1223     0,                  /* bfx.  */
1224     0,                  /* clz.  */
1225     0,                  /* rev.  */
1226     0,                  /* non_exec.  */
1227     true                /* non_exec_costs_exec.  */
1228   },
1229   {
1230     /* MULT SImode */
1231     {
1232       COSTS_N_INSNS (1),        /* simple.  */
1233       COSTS_N_INSNS (1),        /* flag_setting.  */
1234       COSTS_N_INSNS (1),        /* extend.  */
1235       COSTS_N_INSNS (1),        /* add.  */
1236       COSTS_N_INSNS (1),        /* extend_add.  */
1237       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1238     },
1239     /* MULT DImode */
1240     {
1241       0,                        /* simple (N/A).  */
1242       0,                        /* flag_setting (N/A).  */
1243       COSTS_N_INSNS (2),        /* extend.  */
1244       0,                        /* add (N/A).  */
1245       COSTS_N_INSNS (2),        /* extend_add.  */
1246       0                         /* idiv (N/A).  */
1247     }
1248   },
1249   /* LD/ST */
1250   {
1251     COSTS_N_INSNS (1),  /* load.  */
1252     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1253     COSTS_N_INSNS (1),  /* ldrd.  */
1254     COSTS_N_INSNS (1),  /* ldm_1st.  */
1255     1,                  /* ldm_regs_per_insn_1st.  */
1256     2,                  /* ldm_regs_per_insn_subsequent.  */
1257     COSTS_N_INSNS (1),  /* loadf.  */
1258     COSTS_N_INSNS (1),  /* loadd.  */
1259     COSTS_N_INSNS (1),  /* load_unaligned.  */
1260     COSTS_N_INSNS (1),  /* store.  */
1261     COSTS_N_INSNS (1),  /* strd.  */
1262     COSTS_N_INSNS (1),  /* stm_1st.  */
1263     1,                  /* stm_regs_per_insn_1st.  */
1264     2,                  /* stm_regs_per_insn_subsequent.  */
1265     COSTS_N_INSNS (1),  /* storef.  */
1266     COSTS_N_INSNS (1),  /* stored.  */
1267     COSTS_N_INSNS (1),  /* store_unaligned.  */
1268     COSTS_N_INSNS (1),  /* loadv.  */
1269     COSTS_N_INSNS (1)   /* storev.  */
1270   },
1271   {
1272     /* FP SFmode */
1273     {
1274       COSTS_N_INSNS (36),       /* div.  */
1275       COSTS_N_INSNS (11),       /* mult.  */
1276       COSTS_N_INSNS (20),       /* mult_addsub. */
1277       COSTS_N_INSNS (30),       /* fma.  */
1278       COSTS_N_INSNS (9),        /* addsub.  */
1279       COSTS_N_INSNS (3),        /* fpconst.  */
1280       COSTS_N_INSNS (3),        /* neg.  */
1281       COSTS_N_INSNS (6),        /* compare.  */
1282       COSTS_N_INSNS (4),        /* widen.  */
1283       COSTS_N_INSNS (4),        /* narrow.  */
1284       COSTS_N_INSNS (8),        /* toint.  */
1285       COSTS_N_INSNS (8),        /* fromint.  */
1286       COSTS_N_INSNS (8)         /* roundint.  */
1287     },
1288     /* FP DFmode */
1289     {
1290       COSTS_N_INSNS (64),       /* div.  */
1291       COSTS_N_INSNS (16),       /* mult.  */
1292       COSTS_N_INSNS (25),       /* mult_addsub.  */
1293       COSTS_N_INSNS (30),       /* fma.  */
1294       COSTS_N_INSNS (9),        /* addsub.  */
1295       COSTS_N_INSNS (3),        /* fpconst.  */
1296       COSTS_N_INSNS (3),        /* neg.  */
1297       COSTS_N_INSNS (6),        /* compare.  */
1298       COSTS_N_INSNS (6),        /* widen.  */
1299       COSTS_N_INSNS (6),        /* narrow.  */
1300       COSTS_N_INSNS (8),        /* toint.  */
1301       COSTS_N_INSNS (8),        /* fromint.  */
1302       COSTS_N_INSNS (8)         /* roundint.  */
1303     }
1304   },
1305   /* Vector */
1306   {
1307     COSTS_N_INSNS (1),  /* alu.  */
1308     COSTS_N_INSNS (4),  /* mult.  */
1309     COSTS_N_INSNS (1),  /* movi.  */
1310     COSTS_N_INSNS (2),  /* dup.  */
1311     COSTS_N_INSNS (2)   /* extract.  */
1312   }
1313 };
1314
1315 const struct cpu_cost_table cortexa5_extra_costs =
1316 {
1317   /* ALU */
1318   {
1319     0,                  /* arith.  */
1320     0,                  /* logical.  */
1321     COSTS_N_INSNS (1),  /* shift.  */
1322     COSTS_N_INSNS (1),  /* shift_reg.  */
1323     COSTS_N_INSNS (1),  /* arith_shift.  */
1324     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1325     COSTS_N_INSNS (1),  /* log_shift.  */
1326     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1327     COSTS_N_INSNS (1),  /* extend.  */
1328     COSTS_N_INSNS (1),  /* extend_arith.  */
1329     COSTS_N_INSNS (1),  /* bfi.  */
1330     COSTS_N_INSNS (1),  /* bfx.  */
1331     COSTS_N_INSNS (1),  /* clz.  */
1332     COSTS_N_INSNS (1),  /* rev.  */
1333     0,                  /* non_exec.  */
1334     true                /* non_exec_costs_exec.  */
1335   },
1336
1337   {
1338     /* MULT SImode */
1339     {
1340       0,                        /* simple.  */
1341       COSTS_N_INSNS (1),        /* flag_setting.  */
1342       COSTS_N_INSNS (1),        /* extend.  */
1343       COSTS_N_INSNS (1),        /* add.  */
1344       COSTS_N_INSNS (1),        /* extend_add.  */
1345       COSTS_N_INSNS (7)         /* idiv.  */
1346     },
1347     /* MULT DImode */
1348     {
1349       0,                        /* simple (N/A).  */
1350       0,                        /* flag_setting (N/A).  */
1351       COSTS_N_INSNS (1),        /* extend.  */
1352       0,                        /* add.  */
1353       COSTS_N_INSNS (2),        /* extend_add.  */
1354       0                         /* idiv (N/A).  */
1355     }
1356   },
1357   /* LD/ST */
1358   {
1359     COSTS_N_INSNS (1),  /* load.  */
1360     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1361     COSTS_N_INSNS (6),  /* ldrd.  */
1362     COSTS_N_INSNS (1),  /* ldm_1st.  */
1363     1,                  /* ldm_regs_per_insn_1st.  */
1364     2,                  /* ldm_regs_per_insn_subsequent.  */
1365     COSTS_N_INSNS (2),  /* loadf.  */
1366     COSTS_N_INSNS (4),  /* loadd.  */
1367     COSTS_N_INSNS (1),  /* load_unaligned.  */
1368     COSTS_N_INSNS (1),  /* store.  */
1369     COSTS_N_INSNS (3),  /* strd.  */
1370     COSTS_N_INSNS (1),  /* stm_1st.  */
1371     1,                  /* stm_regs_per_insn_1st.  */
1372     2,                  /* stm_regs_per_insn_subsequent.  */
1373     COSTS_N_INSNS (2),  /* storef.  */
1374     COSTS_N_INSNS (2),  /* stored.  */
1375     COSTS_N_INSNS (1),  /* store_unaligned.  */
1376     COSTS_N_INSNS (1),  /* loadv.  */
1377     COSTS_N_INSNS (1)   /* storev.  */
1378   },
1379   {
1380     /* FP SFmode */
1381     {
1382       COSTS_N_INSNS (15),       /* div.  */
1383       COSTS_N_INSNS (3),        /* mult.  */
1384       COSTS_N_INSNS (7),        /* mult_addsub. */
1385       COSTS_N_INSNS (7),        /* fma.  */
1386       COSTS_N_INSNS (3),        /* addsub.  */
1387       COSTS_N_INSNS (3),        /* fpconst.  */
1388       COSTS_N_INSNS (3),        /* neg.  */
1389       COSTS_N_INSNS (3),        /* compare.  */
1390       COSTS_N_INSNS (3),        /* widen.  */
1391       COSTS_N_INSNS (3),        /* narrow.  */
1392       COSTS_N_INSNS (3),        /* toint.  */
1393       COSTS_N_INSNS (3),        /* fromint.  */
1394       COSTS_N_INSNS (3)         /* roundint.  */
1395     },
1396     /* FP DFmode */
1397     {
1398       COSTS_N_INSNS (30),       /* div.  */
1399       COSTS_N_INSNS (6),        /* mult.  */
1400       COSTS_N_INSNS (10),       /* mult_addsub.  */
1401       COSTS_N_INSNS (7),        /* fma.  */
1402       COSTS_N_INSNS (3),        /* addsub.  */
1403       COSTS_N_INSNS (3),        /* fpconst.  */
1404       COSTS_N_INSNS (3),        /* neg.  */
1405       COSTS_N_INSNS (3),        /* compare.  */
1406       COSTS_N_INSNS (3),        /* widen.  */
1407       COSTS_N_INSNS (3),        /* narrow.  */
1408       COSTS_N_INSNS (3),        /* toint.  */
1409       COSTS_N_INSNS (3),        /* fromint.  */
1410       COSTS_N_INSNS (3)         /* roundint.  */
1411     }
1412   },
1413   /* Vector */
1414   {
1415     COSTS_N_INSNS (1),  /* alu.  */
1416     COSTS_N_INSNS (4),  /* mult.  */
1417     COSTS_N_INSNS (1),  /* movi.  */
1418     COSTS_N_INSNS (2),  /* dup.  */
1419     COSTS_N_INSNS (2)   /* extract.  */
1420   }
1421 };
1422
1423
1424 const struct cpu_cost_table cortexa7_extra_costs =
1425 {
1426   /* ALU */
1427   {
1428     0,                  /* arith.  */
1429     0,                  /* logical.  */
1430     COSTS_N_INSNS (1),  /* shift.  */
1431     COSTS_N_INSNS (1),  /* shift_reg.  */
1432     COSTS_N_INSNS (1),  /* arith_shift.  */
1433     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1434     COSTS_N_INSNS (1),  /* log_shift.  */
1435     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1436     COSTS_N_INSNS (1),  /* extend.  */
1437     COSTS_N_INSNS (1),  /* extend_arith.  */
1438     COSTS_N_INSNS (1),  /* bfi.  */
1439     COSTS_N_INSNS (1),  /* bfx.  */
1440     COSTS_N_INSNS (1),  /* clz.  */
1441     COSTS_N_INSNS (1),  /* rev.  */
1442     0,                  /* non_exec.  */
1443     true                /* non_exec_costs_exec.  */
1444   },
1445
1446   {
1447     /* MULT SImode */
1448     {
1449       0,                        /* simple.  */
1450       COSTS_N_INSNS (1),        /* flag_setting.  */
1451       COSTS_N_INSNS (1),        /* extend.  */
1452       COSTS_N_INSNS (1),        /* add.  */
1453       COSTS_N_INSNS (1),        /* extend_add.  */
1454       COSTS_N_INSNS (7)         /* idiv.  */
1455     },
1456     /* MULT DImode */
1457     {
1458       0,                        /* simple (N/A).  */
1459       0,                        /* flag_setting (N/A).  */
1460       COSTS_N_INSNS (1),        /* extend.  */
1461       0,                        /* add.  */
1462       COSTS_N_INSNS (2),        /* extend_add.  */
1463       0                         /* idiv (N/A).  */
1464     }
1465   },
1466   /* LD/ST */
1467   {
1468     COSTS_N_INSNS (1),  /* load.  */
1469     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1470     COSTS_N_INSNS (3),  /* ldrd.  */
1471     COSTS_N_INSNS (1),  /* ldm_1st.  */
1472     1,                  /* ldm_regs_per_insn_1st.  */
1473     2,                  /* ldm_regs_per_insn_subsequent.  */
1474     COSTS_N_INSNS (2),  /* loadf.  */
1475     COSTS_N_INSNS (2),  /* loadd.  */
1476     COSTS_N_INSNS (1),  /* load_unaligned.  */
1477     COSTS_N_INSNS (1),  /* store.  */
1478     COSTS_N_INSNS (3),  /* strd.  */
1479     COSTS_N_INSNS (1),  /* stm_1st.  */
1480     1,                  /* stm_regs_per_insn_1st.  */
1481     2,                  /* stm_regs_per_insn_subsequent.  */
1482     COSTS_N_INSNS (2),  /* storef.  */
1483     COSTS_N_INSNS (2),  /* stored.  */
1484     COSTS_N_INSNS (1),  /* store_unaligned.  */
1485     COSTS_N_INSNS (1),  /* loadv.  */
1486     COSTS_N_INSNS (1)   /* storev.  */
1487   },
1488   {
1489     /* FP SFmode */
1490     {
1491       COSTS_N_INSNS (15),       /* div.  */
1492       COSTS_N_INSNS (3),        /* mult.  */
1493       COSTS_N_INSNS (7),        /* mult_addsub. */
1494       COSTS_N_INSNS (7),        /* fma.  */
1495       COSTS_N_INSNS (3),        /* addsub.  */
1496       COSTS_N_INSNS (3),        /* fpconst.  */
1497       COSTS_N_INSNS (3),        /* neg.  */
1498       COSTS_N_INSNS (3),        /* compare.  */
1499       COSTS_N_INSNS (3),        /* widen.  */
1500       COSTS_N_INSNS (3),        /* narrow.  */
1501       COSTS_N_INSNS (3),        /* toint.  */
1502       COSTS_N_INSNS (3),        /* fromint.  */
1503       COSTS_N_INSNS (3)         /* roundint.  */
1504     },
1505     /* FP DFmode */
1506     {
1507       COSTS_N_INSNS (30),       /* div.  */
1508       COSTS_N_INSNS (6),        /* mult.  */
1509       COSTS_N_INSNS (10),       /* mult_addsub.  */
1510       COSTS_N_INSNS (7),        /* fma.  */
1511       COSTS_N_INSNS (3),        /* addsub.  */
1512       COSTS_N_INSNS (3),        /* fpconst.  */
1513       COSTS_N_INSNS (3),        /* neg.  */
1514       COSTS_N_INSNS (3),        /* compare.  */
1515       COSTS_N_INSNS (3),        /* widen.  */
1516       COSTS_N_INSNS (3),        /* narrow.  */
1517       COSTS_N_INSNS (3),        /* toint.  */
1518       COSTS_N_INSNS (3),        /* fromint.  */
1519       COSTS_N_INSNS (3)         /* roundint.  */
1520     }
1521   },
1522   /* Vector */
1523   {
1524     COSTS_N_INSNS (1),  /* alu.  */
1525     COSTS_N_INSNS (4),  /* mult.  */
1526     COSTS_N_INSNS (1),  /* movi.  */
1527     COSTS_N_INSNS (2),  /* dup.  */
1528     COSTS_N_INSNS (2)   /* extract.  */
1529   }
1530 };
1531
1532 const struct cpu_cost_table cortexa12_extra_costs =
1533 {
1534   /* ALU */
1535   {
1536     0,                  /* arith.  */
1537     0,                  /* logical.  */
1538     0,                  /* shift.  */
1539     COSTS_N_INSNS (1),  /* shift_reg.  */
1540     COSTS_N_INSNS (1),  /* arith_shift.  */
1541     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1542     COSTS_N_INSNS (1),  /* log_shift.  */
1543     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1544     0,                  /* extend.  */
1545     COSTS_N_INSNS (1),  /* extend_arith.  */
1546     0,                  /* bfi.  */
1547     COSTS_N_INSNS (1),  /* bfx.  */
1548     COSTS_N_INSNS (1),  /* clz.  */
1549     COSTS_N_INSNS (1),  /* rev.  */
1550     0,                  /* non_exec.  */
1551     true                /* non_exec_costs_exec.  */
1552   },
1553   /* MULT SImode */
1554   {
1555     {
1556       COSTS_N_INSNS (2),        /* simple.  */
1557       COSTS_N_INSNS (3),        /* flag_setting.  */
1558       COSTS_N_INSNS (2),        /* extend.  */
1559       COSTS_N_INSNS (3),        /* add.  */
1560       COSTS_N_INSNS (2),        /* extend_add.  */
1561       COSTS_N_INSNS (18)        /* idiv.  */
1562     },
1563     /* MULT DImode */
1564     {
1565       0,                        /* simple (N/A).  */
1566       0,                        /* flag_setting (N/A).  */
1567       COSTS_N_INSNS (3),        /* extend.  */
1568       0,                        /* add (N/A).  */
1569       COSTS_N_INSNS (3),        /* extend_add.  */
1570       0                         /* idiv (N/A).  */
1571     }
1572   },
1573   /* LD/ST */
1574   {
1575     COSTS_N_INSNS (3),  /* load.  */
1576     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1577     COSTS_N_INSNS (3),  /* ldrd.  */
1578     COSTS_N_INSNS (3),  /* ldm_1st.  */
1579     1,                  /* ldm_regs_per_insn_1st.  */
1580     2,                  /* ldm_regs_per_insn_subsequent.  */
1581     COSTS_N_INSNS (3),  /* loadf.  */
1582     COSTS_N_INSNS (3),  /* loadd.  */
1583     0,                  /* load_unaligned.  */
1584     0,                  /* store.  */
1585     0,                  /* strd.  */
1586     0,                  /* stm_1st.  */
1587     1,                  /* stm_regs_per_insn_1st.  */
1588     2,                  /* stm_regs_per_insn_subsequent.  */
1589     COSTS_N_INSNS (2),  /* storef.  */
1590     COSTS_N_INSNS (2),  /* stored.  */
1591     0,                  /* store_unaligned.  */
1592     COSTS_N_INSNS (1),  /* loadv.  */
1593     COSTS_N_INSNS (1)   /* storev.  */
1594   },
1595   {
1596     /* FP SFmode */
1597     {
1598       COSTS_N_INSNS (17),       /* div.  */
1599       COSTS_N_INSNS (4),        /* mult.  */
1600       COSTS_N_INSNS (8),        /* mult_addsub. */
1601       COSTS_N_INSNS (8),        /* fma.  */
1602       COSTS_N_INSNS (4),        /* addsub.  */
1603       COSTS_N_INSNS (2),        /* fpconst. */
1604       COSTS_N_INSNS (2),        /* neg.  */
1605       COSTS_N_INSNS (2),        /* compare.  */
1606       COSTS_N_INSNS (4),        /* widen.  */
1607       COSTS_N_INSNS (4),        /* narrow.  */
1608       COSTS_N_INSNS (4),        /* toint.  */
1609       COSTS_N_INSNS (4),        /* fromint.  */
1610       COSTS_N_INSNS (4)         /* roundint.  */
1611     },
1612     /* FP DFmode */
1613     {
1614       COSTS_N_INSNS (31),       /* div.  */
1615       COSTS_N_INSNS (4),        /* mult.  */
1616       COSTS_N_INSNS (8),        /* mult_addsub.  */
1617       COSTS_N_INSNS (8),        /* fma.  */
1618       COSTS_N_INSNS (4),        /* addsub.  */
1619       COSTS_N_INSNS (2),        /* fpconst.  */
1620       COSTS_N_INSNS (2),        /* neg.  */
1621       COSTS_N_INSNS (2),        /* compare.  */
1622       COSTS_N_INSNS (4),        /* widen.  */
1623       COSTS_N_INSNS (4),        /* narrow.  */
1624       COSTS_N_INSNS (4),        /* toint.  */
1625       COSTS_N_INSNS (4),        /* fromint.  */
1626       COSTS_N_INSNS (4)         /* roundint.  */
1627     }
1628   },
1629   /* Vector */
1630   {
1631     COSTS_N_INSNS (1),  /* alu.  */
1632     COSTS_N_INSNS (4),  /* mult.  */
1633     COSTS_N_INSNS (1),  /* movi.  */
1634     COSTS_N_INSNS (2),  /* dup.  */
1635     COSTS_N_INSNS (2)   /* extract.  */
1636   }
1637 };
1638
1639 const struct cpu_cost_table cortexa15_extra_costs =
1640 {
1641   /* ALU */
1642   {
1643     0,                  /* arith.  */
1644     0,                  /* logical.  */
1645     0,                  /* shift.  */
1646     0,                  /* shift_reg.  */
1647     COSTS_N_INSNS (1),  /* arith_shift.  */
1648     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1649     COSTS_N_INSNS (1),  /* log_shift.  */
1650     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1651     0,                  /* extend.  */
1652     COSTS_N_INSNS (1),  /* extend_arith.  */
1653     COSTS_N_INSNS (1),  /* bfi.  */
1654     0,                  /* bfx.  */
1655     0,                  /* clz.  */
1656     0,                  /* rev.  */
1657     0,                  /* non_exec.  */
1658     true                /* non_exec_costs_exec.  */
1659   },
1660   /* MULT SImode */
1661   {
1662     {
1663       COSTS_N_INSNS (2),        /* simple.  */
1664       COSTS_N_INSNS (3),        /* flag_setting.  */
1665       COSTS_N_INSNS (2),        /* extend.  */
1666       COSTS_N_INSNS (2),        /* add.  */
1667       COSTS_N_INSNS (2),        /* extend_add.  */
1668       COSTS_N_INSNS (18)        /* idiv.  */
1669     },
1670     /* MULT DImode */
1671     {
1672       0,                        /* simple (N/A).  */
1673       0,                        /* flag_setting (N/A).  */
1674       COSTS_N_INSNS (3),        /* extend.  */
1675       0,                        /* add (N/A).  */
1676       COSTS_N_INSNS (3),        /* extend_add.  */
1677       0                         /* idiv (N/A).  */
1678     }
1679   },
1680   /* LD/ST */
1681   {
1682     COSTS_N_INSNS (3),  /* load.  */
1683     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1684     COSTS_N_INSNS (3),  /* ldrd.  */
1685     COSTS_N_INSNS (4),  /* ldm_1st.  */
1686     1,                  /* ldm_regs_per_insn_1st.  */
1687     2,                  /* ldm_regs_per_insn_subsequent.  */
1688     COSTS_N_INSNS (4),  /* loadf.  */
1689     COSTS_N_INSNS (4),  /* loadd.  */
1690     0,                  /* load_unaligned.  */
1691     0,                  /* store.  */
1692     0,                  /* strd.  */
1693     COSTS_N_INSNS (1),  /* stm_1st.  */
1694     1,                  /* stm_regs_per_insn_1st.  */
1695     2,                  /* stm_regs_per_insn_subsequent.  */
1696     0,                  /* storef.  */
1697     0,                  /* stored.  */
1698     0,                  /* store_unaligned.  */
1699     COSTS_N_INSNS (1),  /* loadv.  */
1700     COSTS_N_INSNS (1)   /* storev.  */
1701   },
1702   {
1703     /* FP SFmode */
1704     {
1705       COSTS_N_INSNS (17),       /* div.  */
1706       COSTS_N_INSNS (4),        /* mult.  */
1707       COSTS_N_INSNS (8),        /* mult_addsub. */
1708       COSTS_N_INSNS (8),        /* fma.  */
1709       COSTS_N_INSNS (4),        /* addsub.  */
1710       COSTS_N_INSNS (2),        /* fpconst. */
1711       COSTS_N_INSNS (2),        /* neg.  */
1712       COSTS_N_INSNS (5),        /* compare.  */
1713       COSTS_N_INSNS (4),        /* widen.  */
1714       COSTS_N_INSNS (4),        /* narrow.  */
1715       COSTS_N_INSNS (4),        /* toint.  */
1716       COSTS_N_INSNS (4),        /* fromint.  */
1717       COSTS_N_INSNS (4)         /* roundint.  */
1718     },
1719     /* FP DFmode */
1720     {
1721       COSTS_N_INSNS (31),       /* div.  */
1722       COSTS_N_INSNS (4),        /* mult.  */
1723       COSTS_N_INSNS (8),        /* mult_addsub.  */
1724       COSTS_N_INSNS (8),        /* fma.  */
1725       COSTS_N_INSNS (4),        /* addsub.  */
1726       COSTS_N_INSNS (2),        /* fpconst.  */
1727       COSTS_N_INSNS (2),        /* neg.  */
1728       COSTS_N_INSNS (2),        /* compare.  */
1729       COSTS_N_INSNS (4),        /* widen.  */
1730       COSTS_N_INSNS (4),        /* narrow.  */
1731       COSTS_N_INSNS (4),        /* toint.  */
1732       COSTS_N_INSNS (4),        /* fromint.  */
1733       COSTS_N_INSNS (4)         /* roundint.  */
1734     }
1735   },
1736   /* Vector */
1737   {
1738     COSTS_N_INSNS (1),  /* alu.  */
1739     COSTS_N_INSNS (4),  /* mult.  */
1740     COSTS_N_INSNS (1),  /* movi.  */
1741     COSTS_N_INSNS (2),  /* dup.  */
1742     COSTS_N_INSNS (2)   /* extract.  */
1743   }
1744 };
1745
1746 const struct cpu_cost_table v7m_extra_costs =
1747 {
1748   /* ALU */
1749   {
1750     0,                  /* arith.  */
1751     0,                  /* logical.  */
1752     0,                  /* shift.  */
1753     0,                  /* shift_reg.  */
1754     0,                  /* arith_shift.  */
1755     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1756     0,                  /* log_shift.  */
1757     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1758     0,                  /* extend.  */
1759     COSTS_N_INSNS (1),  /* extend_arith.  */
1760     0,                  /* bfi.  */
1761     0,                  /* bfx.  */
1762     0,                  /* clz.  */
1763     0,                  /* rev.  */
1764     COSTS_N_INSNS (1),  /* non_exec.  */
1765     false               /* non_exec_costs_exec.  */
1766   },
1767   {
1768     /* MULT SImode */
1769     {
1770       COSTS_N_INSNS (1),        /* simple.  */
1771       COSTS_N_INSNS (1),        /* flag_setting.  */
1772       COSTS_N_INSNS (2),        /* extend.  */
1773       COSTS_N_INSNS (1),        /* add.  */
1774       COSTS_N_INSNS (3),        /* extend_add.  */
1775       COSTS_N_INSNS (8)         /* idiv.  */
1776     },
1777     /* MULT DImode */
1778     {
1779       0,                        /* simple (N/A).  */
1780       0,                        /* flag_setting (N/A).  */
1781       COSTS_N_INSNS (2),        /* extend.  */
1782       0,                        /* add (N/A).  */
1783       COSTS_N_INSNS (3),        /* extend_add.  */
1784       0                         /* idiv (N/A).  */
1785     }
1786   },
1787   /* LD/ST */
1788   {
1789     COSTS_N_INSNS (2),  /* load.  */
1790     0,                  /* load_sign_extend.  */
1791     COSTS_N_INSNS (3),  /* ldrd.  */
1792     COSTS_N_INSNS (2),  /* ldm_1st.  */
1793     1,                  /* ldm_regs_per_insn_1st.  */
1794     1,                  /* ldm_regs_per_insn_subsequent.  */
1795     COSTS_N_INSNS (2),  /* loadf.  */
1796     COSTS_N_INSNS (3),  /* loadd.  */
1797     COSTS_N_INSNS (1),  /* load_unaligned.  */
1798     COSTS_N_INSNS (2),  /* store.  */
1799     COSTS_N_INSNS (3),  /* strd.  */
1800     COSTS_N_INSNS (2),  /* stm_1st.  */
1801     1,                  /* stm_regs_per_insn_1st.  */
1802     1,                  /* stm_regs_per_insn_subsequent.  */
1803     COSTS_N_INSNS (2),  /* storef.  */
1804     COSTS_N_INSNS (3),  /* stored.  */
1805     COSTS_N_INSNS (1),  /* store_unaligned.  */
1806     COSTS_N_INSNS (1),  /* loadv.  */
1807     COSTS_N_INSNS (1)   /* storev.  */
1808   },
1809   {
1810     /* FP SFmode */
1811     {
1812       COSTS_N_INSNS (7),        /* div.  */
1813       COSTS_N_INSNS (2),        /* mult.  */
1814       COSTS_N_INSNS (5),        /* mult_addsub.  */
1815       COSTS_N_INSNS (3),        /* fma.  */
1816       COSTS_N_INSNS (1),        /* addsub.  */
1817       0,                        /* fpconst.  */
1818       0,                        /* neg.  */
1819       0,                        /* compare.  */
1820       0,                        /* widen.  */
1821       0,                        /* narrow.  */
1822       0,                        /* toint.  */
1823       0,                        /* fromint.  */
1824       0                         /* roundint.  */
1825     },
1826     /* FP DFmode */
1827     {
1828       COSTS_N_INSNS (15),       /* div.  */
1829       COSTS_N_INSNS (5),        /* mult.  */
1830       COSTS_N_INSNS (7),        /* mult_addsub.  */
1831       COSTS_N_INSNS (7),        /* fma.  */
1832       COSTS_N_INSNS (3),        /* addsub.  */
1833       0,                        /* fpconst.  */
1834       0,                        /* neg.  */
1835       0,                        /* compare.  */
1836       0,                        /* widen.  */
1837       0,                        /* narrow.  */
1838       0,                        /* toint.  */
1839       0,                        /* fromint.  */
1840       0                         /* roundint.  */
1841     }
1842   },
1843   /* Vector */
1844   {
1845     COSTS_N_INSNS (1),  /* alu.  */
1846     COSTS_N_INSNS (4),  /* mult.  */
1847     COSTS_N_INSNS (1),  /* movi.  */
1848     COSTS_N_INSNS (2),  /* dup.  */
1849     COSTS_N_INSNS (2)   /* extract.  */
1850   }
1851 };
1852
1853 const struct addr_mode_cost_table generic_addr_mode_costs =
1854 {
1855   /* int.  */
1856   {
1857     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1858     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1859     COSTS_N_INSNS (0)   /* AMO_WB.  */
1860   },
1861   /* float.  */
1862   {
1863     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1864     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1865     COSTS_N_INSNS (0)   /* AMO_WB.  */
1866   },
1867   /* vector.  */
1868   {
1869     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1870     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1871     COSTS_N_INSNS (0)   /* AMO_WB.  */
1872   }
1873 };
1874
1875 const struct tune_params arm_slowmul_tune =
1876 {
1877   &generic_extra_costs,                 /* Insn extra costs.  */
1878   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1879   NULL,                                 /* Sched adj cost.  */
1880   arm_default_branch_cost,
1881   &arm_default_vec_cost,
1882   3,                                            /* Constant limit.  */
1883   5,                                            /* Max cond insns.  */
1884   8,                                            /* Memset max inline.  */
1885   1,                                            /* Issue rate.  */
1886   ARM_PREFETCH_NOT_BENEFICIAL,
1887   tune_params::PREF_CONST_POOL_TRUE,
1888   tune_params::PREF_LDRD_FALSE,
1889   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1890   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1891   tune_params::DISPARAGE_FLAGS_NEITHER,
1892   tune_params::PREF_NEON_STRINGOPS_FALSE,
1893   tune_params::FUSE_NOTHING,
1894   tune_params::SCHED_AUTOPREF_OFF
1895 };
1896
1897 const struct tune_params arm_fastmul_tune =
1898 {
1899   &generic_extra_costs,                 /* Insn extra costs.  */
1900   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1901   NULL,                                 /* Sched adj cost.  */
1902   arm_default_branch_cost,
1903   &arm_default_vec_cost,
1904   1,                                            /* Constant limit.  */
1905   5,                                            /* Max cond insns.  */
1906   8,                                            /* Memset max inline.  */
1907   1,                                            /* Issue rate.  */
1908   ARM_PREFETCH_NOT_BENEFICIAL,
1909   tune_params::PREF_CONST_POOL_TRUE,
1910   tune_params::PREF_LDRD_FALSE,
1911   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1912   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1913   tune_params::DISPARAGE_FLAGS_NEITHER,
1914   tune_params::PREF_NEON_STRINGOPS_FALSE,
1915   tune_params::FUSE_NOTHING,
1916   tune_params::SCHED_AUTOPREF_OFF
1917 };
1918
1919 /* StrongARM has early execution of branches, so a sequence that is worth
1920    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1921
1922 const struct tune_params arm_strongarm_tune =
1923 {
1924   &generic_extra_costs,                 /* Insn extra costs.  */
1925   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1926   NULL,                                 /* Sched adj cost.  */
1927   arm_default_branch_cost,
1928   &arm_default_vec_cost,
1929   1,                                            /* Constant limit.  */
1930   3,                                            /* Max cond insns.  */
1931   8,                                            /* Memset max inline.  */
1932   1,                                            /* Issue rate.  */
1933   ARM_PREFETCH_NOT_BENEFICIAL,
1934   tune_params::PREF_CONST_POOL_TRUE,
1935   tune_params::PREF_LDRD_FALSE,
1936   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1937   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1938   tune_params::DISPARAGE_FLAGS_NEITHER,
1939   tune_params::PREF_NEON_STRINGOPS_FALSE,
1940   tune_params::FUSE_NOTHING,
1941   tune_params::SCHED_AUTOPREF_OFF
1942 };
1943
1944 const struct tune_params arm_xscale_tune =
1945 {
1946   &generic_extra_costs,                 /* Insn extra costs.  */
1947   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1948   xscale_sched_adjust_cost,
1949   arm_default_branch_cost,
1950   &arm_default_vec_cost,
1951   2,                                            /* Constant limit.  */
1952   3,                                            /* Max cond insns.  */
1953   8,                                            /* Memset max inline.  */
1954   1,                                            /* Issue rate.  */
1955   ARM_PREFETCH_NOT_BENEFICIAL,
1956   tune_params::PREF_CONST_POOL_TRUE,
1957   tune_params::PREF_LDRD_FALSE,
1958   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1959   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1960   tune_params::DISPARAGE_FLAGS_NEITHER,
1961   tune_params::PREF_NEON_STRINGOPS_FALSE,
1962   tune_params::FUSE_NOTHING,
1963   tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_9e_tune =
1967 {
1968   &generic_extra_costs,                 /* Insn extra costs.  */
1969   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1970   NULL,                                 /* Sched adj cost.  */
1971   arm_default_branch_cost,
1972   &arm_default_vec_cost,
1973   1,                                            /* Constant limit.  */
1974   5,                                            /* Max cond insns.  */
1975   8,                                            /* Memset max inline.  */
1976   1,                                            /* Issue rate.  */
1977   ARM_PREFETCH_NOT_BENEFICIAL,
1978   tune_params::PREF_CONST_POOL_TRUE,
1979   tune_params::PREF_LDRD_FALSE,
1980   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1981   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1982   tune_params::DISPARAGE_FLAGS_NEITHER,
1983   tune_params::PREF_NEON_STRINGOPS_FALSE,
1984   tune_params::FUSE_NOTHING,
1985   tune_params::SCHED_AUTOPREF_OFF
1986 };
1987
1988 const struct tune_params arm_marvell_pj4_tune =
1989 {
1990   &generic_extra_costs,                 /* Insn extra costs.  */
1991   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1992   NULL,                                 /* Sched adj cost.  */
1993   arm_default_branch_cost,
1994   &arm_default_vec_cost,
1995   1,                                            /* Constant limit.  */
1996   5,                                            /* Max cond insns.  */
1997   8,                                            /* Memset max inline.  */
1998   2,                                            /* Issue rate.  */
1999   ARM_PREFETCH_NOT_BENEFICIAL,
2000   tune_params::PREF_CONST_POOL_TRUE,
2001   tune_params::PREF_LDRD_FALSE,
2002   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2003   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2004   tune_params::DISPARAGE_FLAGS_NEITHER,
2005   tune_params::PREF_NEON_STRINGOPS_FALSE,
2006   tune_params::FUSE_NOTHING,
2007   tune_params::SCHED_AUTOPREF_OFF
2008 };
2009
2010 const struct tune_params arm_v6t2_tune =
2011 {
2012   &generic_extra_costs,                 /* Insn extra costs.  */
2013   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2014   NULL,                                 /* Sched adj cost.  */
2015   arm_default_branch_cost,
2016   &arm_default_vec_cost,
2017   1,                                            /* Constant limit.  */
2018   5,                                            /* Max cond insns.  */
2019   8,                                            /* Memset max inline.  */
2020   1,                                            /* Issue rate.  */
2021   ARM_PREFETCH_NOT_BENEFICIAL,
2022   tune_params::PREF_CONST_POOL_FALSE,
2023   tune_params::PREF_LDRD_FALSE,
2024   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2025   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2026   tune_params::DISPARAGE_FLAGS_NEITHER,
2027   tune_params::PREF_NEON_STRINGOPS_FALSE,
2028   tune_params::FUSE_NOTHING,
2029   tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032
2033 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2034 const struct tune_params arm_cortex_tune =
2035 {
2036   &generic_extra_costs,
2037   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2038   NULL,                                 /* Sched adj cost.  */
2039   arm_default_branch_cost,
2040   &arm_default_vec_cost,
2041   1,                                            /* Constant limit.  */
2042   5,                                            /* Max cond insns.  */
2043   8,                                            /* Memset max inline.  */
2044   2,                                            /* Issue rate.  */
2045   ARM_PREFETCH_NOT_BENEFICIAL,
2046   tune_params::PREF_CONST_POOL_FALSE,
2047   tune_params::PREF_LDRD_FALSE,
2048   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2049   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2050   tune_params::DISPARAGE_FLAGS_NEITHER,
2051   tune_params::PREF_NEON_STRINGOPS_FALSE,
2052   tune_params::FUSE_NOTHING,
2053   tune_params::SCHED_AUTOPREF_OFF
2054 };
2055
2056 const struct tune_params arm_cortex_a8_tune =
2057 {
2058   &cortexa8_extra_costs,
2059   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2060   NULL,                                 /* Sched adj cost.  */
2061   arm_default_branch_cost,
2062   &arm_default_vec_cost,
2063   1,                                            /* Constant limit.  */
2064   5,                                            /* Max cond insns.  */
2065   8,                                            /* Memset max inline.  */
2066   2,                                            /* Issue rate.  */
2067   ARM_PREFETCH_NOT_BENEFICIAL,
2068   tune_params::PREF_CONST_POOL_FALSE,
2069   tune_params::PREF_LDRD_FALSE,
2070   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2071   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2072   tune_params::DISPARAGE_FLAGS_NEITHER,
2073   tune_params::PREF_NEON_STRINGOPS_TRUE,
2074   tune_params::FUSE_NOTHING,
2075   tune_params::SCHED_AUTOPREF_OFF
2076 };
2077
2078 const struct tune_params arm_cortex_a7_tune =
2079 {
2080   &cortexa7_extra_costs,
2081   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2082   NULL,                                 /* Sched adj cost.  */
2083   arm_default_branch_cost,
2084   &arm_default_vec_cost,
2085   1,                                            /* Constant limit.  */
2086   5,                                            /* Max cond insns.  */
2087   8,                                            /* Memset max inline.  */
2088   2,                                            /* Issue rate.  */
2089   ARM_PREFETCH_NOT_BENEFICIAL,
2090   tune_params::PREF_CONST_POOL_FALSE,
2091   tune_params::PREF_LDRD_FALSE,
2092   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2094   tune_params::DISPARAGE_FLAGS_NEITHER,
2095   tune_params::PREF_NEON_STRINGOPS_TRUE,
2096   tune_params::FUSE_NOTHING,
2097   tune_params::SCHED_AUTOPREF_OFF
2098 };
2099
2100 const struct tune_params arm_cortex_a15_tune =
2101 {
2102   &cortexa15_extra_costs,
2103   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2104   NULL,                                 /* Sched adj cost.  */
2105   arm_default_branch_cost,
2106   &arm_default_vec_cost,
2107   1,                                            /* Constant limit.  */
2108   2,                                            /* Max cond insns.  */
2109   8,                                            /* Memset max inline.  */
2110   3,                                            /* Issue rate.  */
2111   ARM_PREFETCH_NOT_BENEFICIAL,
2112   tune_params::PREF_CONST_POOL_FALSE,
2113   tune_params::PREF_LDRD_TRUE,
2114   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2116   tune_params::DISPARAGE_FLAGS_ALL,
2117   tune_params::PREF_NEON_STRINGOPS_TRUE,
2118   tune_params::FUSE_NOTHING,
2119   tune_params::SCHED_AUTOPREF_FULL
2120 };
2121
2122 const struct tune_params arm_cortex_a35_tune =
2123 {
2124   &cortexa53_extra_costs,
2125   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2126   NULL,                                 /* Sched adj cost.  */
2127   arm_default_branch_cost,
2128   &arm_default_vec_cost,
2129   1,                                            /* Constant limit.  */
2130   5,                                            /* Max cond insns.  */
2131   8,                                            /* Memset max inline.  */
2132   1,                                            /* Issue rate.  */
2133   ARM_PREFETCH_NOT_BENEFICIAL,
2134   tune_params::PREF_CONST_POOL_FALSE,
2135   tune_params::PREF_LDRD_FALSE,
2136   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2137   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2138   tune_params::DISPARAGE_FLAGS_NEITHER,
2139   tune_params::PREF_NEON_STRINGOPS_TRUE,
2140   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2141   tune_params::SCHED_AUTOPREF_OFF
2142 };
2143
2144 const struct tune_params arm_cortex_a53_tune =
2145 {
2146   &cortexa53_extra_costs,
2147   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2148   NULL,                                 /* Sched adj cost.  */
2149   arm_default_branch_cost,
2150   &arm_default_vec_cost,
2151   1,                                            /* Constant limit.  */
2152   5,                                            /* Max cond insns.  */
2153   8,                                            /* Memset max inline.  */
2154   2,                                            /* Issue rate.  */
2155   ARM_PREFETCH_NOT_BENEFICIAL,
2156   tune_params::PREF_CONST_POOL_FALSE,
2157   tune_params::PREF_LDRD_FALSE,
2158   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2159   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2160   tune_params::DISPARAGE_FLAGS_NEITHER,
2161   tune_params::PREF_NEON_STRINGOPS_TRUE,
2162   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2163   tune_params::SCHED_AUTOPREF_OFF
2164 };
2165
2166 const struct tune_params arm_cortex_a57_tune =
2167 {
2168   &cortexa57_extra_costs,
2169   &generic_addr_mode_costs,             /* addressing mode costs */
2170   NULL,                                 /* Sched adj cost.  */
2171   arm_default_branch_cost,
2172   &arm_default_vec_cost,
2173   1,                                            /* Constant limit.  */
2174   2,                                            /* Max cond insns.  */
2175   8,                                            /* Memset max inline.  */
2176   3,                                            /* Issue rate.  */
2177   ARM_PREFETCH_NOT_BENEFICIAL,
2178   tune_params::PREF_CONST_POOL_FALSE,
2179   tune_params::PREF_LDRD_TRUE,
2180   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2181   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2182   tune_params::DISPARAGE_FLAGS_ALL,
2183   tune_params::PREF_NEON_STRINGOPS_TRUE,
2184   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2185   tune_params::SCHED_AUTOPREF_FULL
2186 };
2187
2188 const struct tune_params arm_exynosm1_tune =
2189 {
2190   &exynosm1_extra_costs,
2191   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2192   NULL,                                         /* Sched adj cost.  */
2193   arm_default_branch_cost,
2194   &arm_default_vec_cost,
2195   1,                                            /* Constant limit.  */
2196   2,                                            /* Max cond insns.  */
2197   8,                                            /* Memset max inline.  */
2198   3,                                            /* Issue rate.  */
2199   ARM_PREFETCH_NOT_BENEFICIAL,
2200   tune_params::PREF_CONST_POOL_FALSE,
2201   tune_params::PREF_LDRD_TRUE,
2202   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2203   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2204   tune_params::DISPARAGE_FLAGS_ALL,
2205   tune_params::PREF_NEON_STRINGOPS_TRUE,
2206   tune_params::FUSE_NOTHING,
2207   tune_params::SCHED_AUTOPREF_OFF
2208 };
2209
2210 const struct tune_params arm_xgene1_tune =
2211 {
2212   &xgene1_extra_costs,
2213   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2214   NULL,                                 /* Sched adj cost.  */
2215   arm_default_branch_cost,
2216   &arm_default_vec_cost,
2217   1,                                            /* Constant limit.  */
2218   2,                                            /* Max cond insns.  */
2219   32,                                           /* Memset max inline.  */
2220   4,                                            /* Issue rate.  */
2221   ARM_PREFETCH_NOT_BENEFICIAL,
2222   tune_params::PREF_CONST_POOL_FALSE,
2223   tune_params::PREF_LDRD_TRUE,
2224   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2225   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2226   tune_params::DISPARAGE_FLAGS_ALL,
2227   tune_params::PREF_NEON_STRINGOPS_FALSE,
2228   tune_params::FUSE_NOTHING,
2229   tune_params::SCHED_AUTOPREF_OFF
2230 };
2231
2232 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2233    less appealing.  Set max_insns_skipped to a low value.  */
2234
2235 const struct tune_params arm_cortex_a5_tune =
2236 {
2237   &cortexa5_extra_costs,
2238   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2239   NULL,                                 /* Sched adj cost.  */
2240   arm_cortex_a5_branch_cost,
2241   &arm_default_vec_cost,
2242   1,                                            /* Constant limit.  */
2243   1,                                            /* Max cond insns.  */
2244   8,                                            /* Memset max inline.  */
2245   2,                                            /* Issue rate.  */
2246   ARM_PREFETCH_NOT_BENEFICIAL,
2247   tune_params::PREF_CONST_POOL_FALSE,
2248   tune_params::PREF_LDRD_FALSE,
2249   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2250   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2251   tune_params::DISPARAGE_FLAGS_NEITHER,
2252   tune_params::PREF_NEON_STRINGOPS_TRUE,
2253   tune_params::FUSE_NOTHING,
2254   tune_params::SCHED_AUTOPREF_OFF
2255 };
2256
2257 const struct tune_params arm_cortex_a9_tune =
2258 {
2259   &cortexa9_extra_costs,
2260   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2261   cortex_a9_sched_adjust_cost,
2262   arm_default_branch_cost,
2263   &arm_default_vec_cost,
2264   1,                                            /* Constant limit.  */
2265   5,                                            /* Max cond insns.  */
2266   8,                                            /* Memset max inline.  */
2267   2,                                            /* Issue rate.  */
2268   ARM_PREFETCH_BENEFICIAL(4,32,32),
2269   tune_params::PREF_CONST_POOL_FALSE,
2270   tune_params::PREF_LDRD_FALSE,
2271   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2272   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2273   tune_params::DISPARAGE_FLAGS_NEITHER,
2274   tune_params::PREF_NEON_STRINGOPS_FALSE,
2275   tune_params::FUSE_NOTHING,
2276   tune_params::SCHED_AUTOPREF_OFF
2277 };
2278
2279 const struct tune_params arm_cortex_a12_tune =
2280 {
2281   &cortexa12_extra_costs,
2282   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2283   NULL,                                 /* Sched adj cost.  */
2284   arm_default_branch_cost,
2285   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2286   1,                                            /* Constant limit.  */
2287   2,                                            /* Max cond insns.  */
2288   8,                                            /* Memset max inline.  */
2289   2,                                            /* Issue rate.  */
2290   ARM_PREFETCH_NOT_BENEFICIAL,
2291   tune_params::PREF_CONST_POOL_FALSE,
2292   tune_params::PREF_LDRD_TRUE,
2293   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2294   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2295   tune_params::DISPARAGE_FLAGS_ALL,
2296   tune_params::PREF_NEON_STRINGOPS_TRUE,
2297   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2298   tune_params::SCHED_AUTOPREF_OFF
2299 };
2300
2301 const struct tune_params arm_cortex_a73_tune =
2302 {
2303   &cortexa57_extra_costs,
2304   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2305   NULL,                                         /* Sched adj cost.  */
2306   arm_default_branch_cost,
2307   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2308   1,                                            /* Constant limit.  */
2309   2,                                            /* Max cond insns.  */
2310   8,                                            /* Memset max inline.  */
2311   2,                                            /* Issue rate.  */
2312   ARM_PREFETCH_NOT_BENEFICIAL,
2313   tune_params::PREF_CONST_POOL_FALSE,
2314   tune_params::PREF_LDRD_TRUE,
2315   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2316   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2317   tune_params::DISPARAGE_FLAGS_ALL,
2318   tune_params::PREF_NEON_STRINGOPS_TRUE,
2319   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2320   tune_params::SCHED_AUTOPREF_FULL
2321 };
2322
2323 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2324    cycle to execute each.  An LDR from the constant pool also takes two cycles
2325    to execute, but mildly increases pipelining opportunity (consecutive
2326    loads/stores can be pipelined together, saving one cycle), and may also
2327    improve icache utilisation.  Hence we prefer the constant pool for such
2328    processors.  */
2329
2330 const struct tune_params arm_v7m_tune =
2331 {
2332   &v7m_extra_costs,
2333   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2334   NULL,                                 /* Sched adj cost.  */
2335   arm_cortex_m_branch_cost,
2336   &arm_default_vec_cost,
2337   1,                                            /* Constant limit.  */
2338   2,                                            /* Max cond insns.  */
2339   8,                                            /* Memset max inline.  */
2340   1,                                            /* Issue rate.  */
2341   ARM_PREFETCH_NOT_BENEFICIAL,
2342   tune_params::PREF_CONST_POOL_TRUE,
2343   tune_params::PREF_LDRD_FALSE,
2344   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2345   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2346   tune_params::DISPARAGE_FLAGS_NEITHER,
2347   tune_params::PREF_NEON_STRINGOPS_FALSE,
2348   tune_params::FUSE_NOTHING,
2349   tune_params::SCHED_AUTOPREF_OFF
2350 };
2351
2352 /* Cortex-M7 tuning.  */
2353
2354 const struct tune_params arm_cortex_m7_tune =
2355 {
2356   &v7m_extra_costs,
2357   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2358   NULL,                                 /* Sched adj cost.  */
2359   arm_cortex_m7_branch_cost,
2360   &arm_default_vec_cost,
2361   0,                                            /* Constant limit.  */
2362   1,                                            /* Max cond insns.  */
2363   8,                                            /* Memset max inline.  */
2364   2,                                            /* Issue rate.  */
2365   ARM_PREFETCH_NOT_BENEFICIAL,
2366   tune_params::PREF_CONST_POOL_TRUE,
2367   tune_params::PREF_LDRD_FALSE,
2368   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2369   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2370   tune_params::DISPARAGE_FLAGS_NEITHER,
2371   tune_params::PREF_NEON_STRINGOPS_FALSE,
2372   tune_params::FUSE_NOTHING,
2373   tune_params::SCHED_AUTOPREF_OFF
2374 };
2375
2376 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2377    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2378    cortex-m23.  */
2379 const struct tune_params arm_v6m_tune =
2380 {
2381   &generic_extra_costs,                 /* Insn extra costs.  */
2382   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2383   NULL,                                 /* Sched adj cost.  */
2384   arm_default_branch_cost,
2385   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2386   1,                                            /* Constant limit.  */
2387   5,                                            /* Max cond insns.  */
2388   8,                                            /* Memset max inline.  */
2389   1,                                            /* Issue rate.  */
2390   ARM_PREFETCH_NOT_BENEFICIAL,
2391   tune_params::PREF_CONST_POOL_FALSE,
2392   tune_params::PREF_LDRD_FALSE,
2393   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2394   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2395   tune_params::DISPARAGE_FLAGS_NEITHER,
2396   tune_params::PREF_NEON_STRINGOPS_FALSE,
2397   tune_params::FUSE_NOTHING,
2398   tune_params::SCHED_AUTOPREF_OFF
2399 };
2400
2401 const struct tune_params arm_fa726te_tune =
2402 {
2403   &generic_extra_costs,                         /* Insn extra costs.  */
2404   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2405   fa726te_sched_adjust_cost,
2406   arm_default_branch_cost,
2407   &arm_default_vec_cost,
2408   1,                                            /* Constant limit.  */
2409   5,                                            /* Max cond insns.  */
2410   8,                                            /* Memset max inline.  */
2411   2,                                            /* Issue rate.  */
2412   ARM_PREFETCH_NOT_BENEFICIAL,
2413   tune_params::PREF_CONST_POOL_TRUE,
2414   tune_params::PREF_LDRD_FALSE,
2415   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2416   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2417   tune_params::DISPARAGE_FLAGS_NEITHER,
2418   tune_params::PREF_NEON_STRINGOPS_FALSE,
2419   tune_params::FUSE_NOTHING,
2420   tune_params::SCHED_AUTOPREF_OFF
2421 };
2422
2423 /* Key type for Pointer Authentication extension.  */
2424 enum aarch_key_type aarch_ra_sign_key = AARCH_KEY_A;
2425
2426 char *accepted_branch_protection_string = NULL;
2427
2428 /* Auto-generated CPU, FPU and architecture tables.  */
2429 #include "arm-cpu-data.h"
2430
2431 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2432    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2433    is thus chosen to be big enough to hold the longest architecture name.  */
2434
2435 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2436
2437 /* Supported TLS relocations.  */
2438
2439 enum tls_reloc {
2440   TLS_GD32,
2441   TLS_GD32_FDPIC,
2442   TLS_LDM32,
2443   TLS_LDM32_FDPIC,
2444   TLS_LDO32,
2445   TLS_IE32,
2446   TLS_IE32_FDPIC,
2447   TLS_LE32,
2448   TLS_DESCSEQ   /* GNU scheme */
2449 };
2450
2451 /* The maximum number of insns to be used when loading a constant.  */
2452 inline static int
2453 arm_constant_limit (bool size_p)
2454 {
2455   return size_p ? 1 : current_tune->constant_limit;
2456 }
2457
2458 /* Emit an insn that's a simple single-set.  Both the operands must be known
2459    to be valid.  */
2460 inline static rtx_insn *
2461 emit_set_insn (rtx x, rtx y)
2462 {
2463   return emit_insn (gen_rtx_SET (x, y));
2464 }
2465
2466 /* Return the number of bits set in VALUE.  */
2467 static unsigned
2468 bit_count (unsigned long value)
2469 {
2470   unsigned long count = 0;
2471
2472   while (value)
2473     {
2474       count++;
2475       value &= value - 1;  /* Clear the least-significant set bit.  */
2476     }
2477
2478   return count;
2479 }
2480
2481 /* Return the number of bits set in BMAP.  */
2482 static unsigned
2483 bitmap_popcount (const sbitmap bmap)
2484 {
2485   unsigned int count = 0;
2486   unsigned int n = 0;
2487   sbitmap_iterator sbi;
2488
2489   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2490     count++;
2491   return count;
2492 }
2493
2494 typedef struct
2495 {
2496   machine_mode mode;
2497   const char *name;
2498 } arm_fixed_mode_set;
2499
2500 /* A small helper for setting fixed-point library libfuncs.  */
2501
2502 static void
2503 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2504                              const char *funcname, const char *modename,
2505                              int num_suffix)
2506 {
2507   char buffer[50];
2508
2509   if (num_suffix == 0)
2510     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2511   else
2512     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2513
2514   set_optab_libfunc (optable, mode, buffer);
2515 }
2516
2517 static void
2518 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2519                             machine_mode from, const char *funcname,
2520                             const char *toname, const char *fromname)
2521 {
2522   char buffer[50];
2523   const char *maybe_suffix_2 = "";
2524
2525   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2526   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2527       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2528       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2529     maybe_suffix_2 = "2";
2530
2531   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2532            maybe_suffix_2);
2533
2534   set_conv_libfunc (optable, to, from, buffer);
2535 }
2536
2537 static GTY(()) rtx speculation_barrier_libfunc;
2538
2539 /* Record that we have no arithmetic or comparison libfuncs for
2540    machine mode MODE.  */
2541
2542 static void
2543 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2544 {
2545   /* Arithmetic.  */
2546   set_optab_libfunc (add_optab, mode, NULL);
2547   set_optab_libfunc (sdiv_optab, mode, NULL);
2548   set_optab_libfunc (smul_optab, mode, NULL);
2549   set_optab_libfunc (neg_optab, mode, NULL);
2550   set_optab_libfunc (sub_optab, mode, NULL);
2551
2552   /* Comparisons.  */
2553   set_optab_libfunc (eq_optab, mode, NULL);
2554   set_optab_libfunc (ne_optab, mode, NULL);
2555   set_optab_libfunc (lt_optab, mode, NULL);
2556   set_optab_libfunc (le_optab, mode, NULL);
2557   set_optab_libfunc (ge_optab, mode, NULL);
2558   set_optab_libfunc (gt_optab, mode, NULL);
2559   set_optab_libfunc (unord_optab, mode, NULL);
2560 }
2561
2562 /* Set up library functions unique to ARM.  */
2563 static void
2564 arm_init_libfuncs (void)
2565 {
2566   machine_mode mode_iter;
2567
2568   /* For Linux, we have access to kernel support for atomic operations.  */
2569   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2570     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2571
2572   /* There are no special library functions unless we are using the
2573      ARM BPABI.  */
2574   if (!TARGET_BPABI)
2575     return;
2576
2577   /* The functions below are described in Section 4 of the "Run-Time
2578      ABI for the ARM architecture", Version 1.0.  */
2579
2580   /* Double-precision floating-point arithmetic.  Table 2.  */
2581   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2582   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2583   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2584   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2585   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2586
2587   /* Double-precision comparisons.  Table 3.  */
2588   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2589   set_optab_libfunc (ne_optab, DFmode, NULL);
2590   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2591   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2592   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2593   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2594   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2595
2596   /* Single-precision floating-point arithmetic.  Table 4.  */
2597   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2598   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2599   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2600   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2601   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2602
2603   /* Single-precision comparisons.  Table 5.  */
2604   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2605   set_optab_libfunc (ne_optab, SFmode, NULL);
2606   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2607   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2608   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2609   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2610   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2611
2612   /* Floating-point to integer conversions.  Table 6.  */
2613   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2614   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2615   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2616   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2617   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2618   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2619   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2620   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2621
2622   /* Conversions between floating types.  Table 7.  */
2623   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2624   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2625
2626   /* Integer to floating-point conversions.  Table 8.  */
2627   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2628   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2629   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2630   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2631   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2632   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2633   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2634   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2635
2636   /* Long long.  Table 9.  */
2637   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2638   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2639   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2640   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2641   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2642   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2643   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2644   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2645
2646   /* Integer (32/32->32) division.  \S 4.3.1.  */
2647   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2648   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2649
2650   /* The divmod functions are designed so that they can be used for
2651      plain division, even though they return both the quotient and the
2652      remainder.  The quotient is returned in the usual location (i.e.,
2653      r0 for SImode, {r0, r1} for DImode), just as would be expected
2654      for an ordinary division routine.  Because the AAPCS calling
2655      conventions specify that all of { r0, r1, r2, r3 } are
2656      callee-saved registers, there is no need to tell the compiler
2657      explicitly that those registers are clobbered by these
2658      routines.  */
2659   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2660   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2661
2662   /* For SImode division the ABI provides div-without-mod routines,
2663      which are faster.  */
2664   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2665   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2666
2667   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2668      divmod libcalls instead.  */
2669   set_optab_libfunc (smod_optab, DImode, NULL);
2670   set_optab_libfunc (umod_optab, DImode, NULL);
2671   set_optab_libfunc (smod_optab, SImode, NULL);
2672   set_optab_libfunc (umod_optab, SImode, NULL);
2673
2674   /* Half-precision float operations.  The compiler handles all operations
2675      with NULL libfuncs by converting the SFmode.  */
2676   switch (arm_fp16_format)
2677     {
2678     case ARM_FP16_FORMAT_IEEE:
2679     case ARM_FP16_FORMAT_ALTERNATIVE:
2680
2681       /* Conversions.  */
2682       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2683                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2684                          ? "__gnu_f2h_ieee"
2685                          : "__gnu_f2h_alternative"));
2686       set_conv_libfunc (sext_optab, SFmode, HFmode,
2687                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2688                          ? "__gnu_h2f_ieee"
2689                          : "__gnu_h2f_alternative"));
2690
2691       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2692                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2693                          ? "__gnu_d2h_ieee"
2694                          : "__gnu_d2h_alternative"));
2695
2696       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2697       break;
2698
2699     default:
2700       break;
2701     }
2702
2703   /* For all possible libcalls in BFmode, record NULL.  */
2704   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2705     {
2706       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2707       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2708       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2709       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2710     }
2711   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2712
2713   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2714   {
2715     const arm_fixed_mode_set fixed_arith_modes[] =
2716       {
2717         { E_QQmode, "qq" },
2718         { E_UQQmode, "uqq" },
2719         { E_HQmode, "hq" },
2720         { E_UHQmode, "uhq" },
2721         { E_SQmode, "sq" },
2722         { E_USQmode, "usq" },
2723         { E_DQmode, "dq" },
2724         { E_UDQmode, "udq" },
2725         { E_TQmode, "tq" },
2726         { E_UTQmode, "utq" },
2727         { E_HAmode, "ha" },
2728         { E_UHAmode, "uha" },
2729         { E_SAmode, "sa" },
2730         { E_USAmode, "usa" },
2731         { E_DAmode, "da" },
2732         { E_UDAmode, "uda" },
2733         { E_TAmode, "ta" },
2734         { E_UTAmode, "uta" }
2735       };
2736     const arm_fixed_mode_set fixed_conv_modes[] =
2737       {
2738         { E_QQmode, "qq" },
2739         { E_UQQmode, "uqq" },
2740         { E_HQmode, "hq" },
2741         { E_UHQmode, "uhq" },
2742         { E_SQmode, "sq" },
2743         { E_USQmode, "usq" },
2744         { E_DQmode, "dq" },
2745         { E_UDQmode, "udq" },
2746         { E_TQmode, "tq" },
2747         { E_UTQmode, "utq" },
2748         { E_HAmode, "ha" },
2749         { E_UHAmode, "uha" },
2750         { E_SAmode, "sa" },
2751         { E_USAmode, "usa" },
2752         { E_DAmode, "da" },
2753         { E_UDAmode, "uda" },
2754         { E_TAmode, "ta" },
2755         { E_UTAmode, "uta" },
2756         { E_QImode, "qi" },
2757         { E_HImode, "hi" },
2758         { E_SImode, "si" },
2759         { E_DImode, "di" },
2760         { E_TImode, "ti" },
2761         { E_SFmode, "sf" },
2762         { E_DFmode, "df" }
2763       };
2764     unsigned int i, j;
2765
2766     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2767       {
2768         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2769                                      "add", fixed_arith_modes[i].name, 3);
2770         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2771                                      "ssadd", fixed_arith_modes[i].name, 3);
2772         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2773                                      "usadd", fixed_arith_modes[i].name, 3);
2774         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2775                                      "sub", fixed_arith_modes[i].name, 3);
2776         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2777                                      "sssub", fixed_arith_modes[i].name, 3);
2778         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2779                                      "ussub", fixed_arith_modes[i].name, 3);
2780         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2781                                      "mul", fixed_arith_modes[i].name, 3);
2782         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2783                                      "ssmul", fixed_arith_modes[i].name, 3);
2784         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2785                                      "usmul", fixed_arith_modes[i].name, 3);
2786         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2787                                      "div", fixed_arith_modes[i].name, 3);
2788         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2789                                      "udiv", fixed_arith_modes[i].name, 3);
2790         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2791                                      "ssdiv", fixed_arith_modes[i].name, 3);
2792         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2793                                      "usdiv", fixed_arith_modes[i].name, 3);
2794         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2795                                      "neg", fixed_arith_modes[i].name, 2);
2796         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2797                                      "ssneg", fixed_arith_modes[i].name, 2);
2798         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2799                                      "usneg", fixed_arith_modes[i].name, 2);
2800         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2801                                      "ashl", fixed_arith_modes[i].name, 3);
2802         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2803                                      "ashr", fixed_arith_modes[i].name, 3);
2804         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2805                                      "lshr", fixed_arith_modes[i].name, 3);
2806         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2807                                      "ssashl", fixed_arith_modes[i].name, 3);
2808         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2809                                      "usashl", fixed_arith_modes[i].name, 3);
2810         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2811                                      "cmp", fixed_arith_modes[i].name, 2);
2812       }
2813
2814     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2815       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2816         {
2817           if (i == j
2818               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2819                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2820             continue;
2821
2822           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2823                                       fixed_conv_modes[j].mode, "fract",
2824                                       fixed_conv_modes[i].name,
2825                                       fixed_conv_modes[j].name);
2826           arm_set_fixed_conv_libfunc (satfract_optab,
2827                                       fixed_conv_modes[i].mode,
2828                                       fixed_conv_modes[j].mode, "satfract",
2829                                       fixed_conv_modes[i].name,
2830                                       fixed_conv_modes[j].name);
2831           arm_set_fixed_conv_libfunc (fractuns_optab,
2832                                       fixed_conv_modes[i].mode,
2833                                       fixed_conv_modes[j].mode, "fractuns",
2834                                       fixed_conv_modes[i].name,
2835                                       fixed_conv_modes[j].name);
2836           arm_set_fixed_conv_libfunc (satfractuns_optab,
2837                                       fixed_conv_modes[i].mode,
2838                                       fixed_conv_modes[j].mode, "satfractuns",
2839                                       fixed_conv_modes[i].name,
2840                                       fixed_conv_modes[j].name);
2841         }
2842   }
2843
2844   if (TARGET_AAPCS_BASED)
2845     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2846
2847   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2848 }
2849
2850 /* On AAPCS systems, this is the "struct __va_list".  */
2851 static GTY(()) tree va_list_type;
2852
2853 /* Return the type to use as __builtin_va_list.  */
2854 static tree
2855 arm_build_builtin_va_list (void)
2856 {
2857   tree va_list_name;
2858   tree ap_field;
2859
2860   if (!TARGET_AAPCS_BASED)
2861     return std_build_builtin_va_list ();
2862
2863   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2864      defined as:
2865
2866        struct __va_list
2867        {
2868          void *__ap;
2869        };
2870
2871      The C Library ABI further reinforces this definition in \S
2872      4.1.
2873
2874      We must follow this definition exactly.  The structure tag
2875      name is visible in C++ mangled names, and thus forms a part
2876      of the ABI.  The field name may be used by people who
2877      #include <stdarg.h>.  */
2878   /* Create the type.  */
2879   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2880   /* Give it the required name.  */
2881   va_list_name = build_decl (BUILTINS_LOCATION,
2882                              TYPE_DECL,
2883                              get_identifier ("__va_list"),
2884                              va_list_type);
2885   DECL_ARTIFICIAL (va_list_name) = 1;
2886   TYPE_NAME (va_list_type) = va_list_name;
2887   TYPE_STUB_DECL (va_list_type) = va_list_name;
2888   /* Create the __ap field.  */
2889   ap_field = build_decl (BUILTINS_LOCATION,
2890                          FIELD_DECL,
2891                          get_identifier ("__ap"),
2892                          ptr_type_node);
2893   DECL_ARTIFICIAL (ap_field) = 1;
2894   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2895   TYPE_FIELDS (va_list_type) = ap_field;
2896   /* Compute its layout.  */
2897   layout_type (va_list_type);
2898
2899   return va_list_type;
2900 }
2901
2902 /* Return an expression of type "void *" pointing to the next
2903    available argument in a variable-argument list.  VALIST is the
2904    user-level va_list object, of type __builtin_va_list.  */
2905 static tree
2906 arm_extract_valist_ptr (tree valist)
2907 {
2908   if (TREE_TYPE (valist) == error_mark_node)
2909     return error_mark_node;
2910
2911   /* On an AAPCS target, the pointer is stored within "struct
2912      va_list".  */
2913   if (TARGET_AAPCS_BASED)
2914     {
2915       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2916       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2917                        valist, ap_field, NULL_TREE);
2918     }
2919
2920   return valist;
2921 }
2922
2923 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2924 static void
2925 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2926 {
2927   valist = arm_extract_valist_ptr (valist);
2928   std_expand_builtin_va_start (valist, nextarg);
2929 }
2930
2931 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2932 static tree
2933 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2934                           gimple_seq *post_p)
2935 {
2936   valist = arm_extract_valist_ptr (valist);
2937   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2938 }
2939
2940 /* Check any incompatible options that the user has specified.  */
2941 static void
2942 arm_option_check_internal (struct gcc_options *opts)
2943 {
2944   int flags = opts->x_target_flags;
2945
2946   /* iWMMXt and NEON are incompatible.  */
2947   if (TARGET_IWMMXT
2948       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2949     error ("iWMMXt and NEON are incompatible");
2950
2951   /* Make sure that the processor choice does not conflict with any of the
2952      other command line choices.  */
2953   if (TARGET_ARM_P (flags)
2954       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2955     error ("target CPU does not support ARM mode");
2956
2957   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2958   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2959     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2960
2961   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2962     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2963
2964   /* If this target is normally configured to use APCS frames, warn if they
2965      are turned off and debugging is turned on.  */
2966   if (TARGET_ARM_P (flags)
2967       && write_symbols != NO_DEBUG
2968       && !TARGET_APCS_FRAME
2969       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2970     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2971              "debugging");
2972
2973   /* iWMMXt unsupported under Thumb mode.  */
2974   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2975     error ("iWMMXt unsupported under Thumb mode");
2976
2977   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2978     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2979
2980   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2981     {
2982       error ("RTP PIC is incompatible with Thumb");
2983       flag_pic = 0;
2984     }
2985
2986   if (target_pure_code || target_slow_flash_data)
2987     {
2988       const char *flag = (target_pure_code ? "-mpure-code" :
2989                                              "-mslow-flash-data");
2990       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2991
2992       /* We only support -mslow-flash-data on M-profile targets with
2993          MOVT.  */
2994       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2995         error ("%s only supports non-pic code on M-profile targets with the "
2996                "MOVT instruction", flag);
2997
2998       /* We only support -mpure-code on M-profile targets.  */
2999       if (target_pure_code && common_unsupported_modes)
3000         error ("%s only supports non-pic code on M-profile targets", flag);
3001
3002       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3003          -mword-relocations forbids relocation of MOVT/MOVW.  */
3004       if (target_word_relocations)
3005         error ("%s incompatible with %<-mword-relocations%>", flag);
3006     }
3007 }
3008
3009 /* Recompute the global settings depending on target attribute options.  */
3010
3011 static void
3012 arm_option_params_internal (void)
3013 {
3014   /* If we are not using the default (ARM mode) section anchor offset
3015      ranges, then set the correct ranges now.  */
3016   if (TARGET_THUMB1)
3017     {
3018       /* Thumb-1 LDR instructions cannot have negative offsets.
3019          Permissible positive offset ranges are 5-bit (for byte loads),
3020          6-bit (for halfword loads), or 7-bit (for word loads).
3021          Empirical results suggest a 7-bit anchor range gives the best
3022          overall code size.  */
3023       targetm.min_anchor_offset = 0;
3024       targetm.max_anchor_offset = 127;
3025     }
3026   else if (TARGET_THUMB2)
3027     {
3028       /* The minimum is set such that the total size of the block
3029          for a particular anchor is 248 + 1 + 4095 bytes, which is
3030          divisible by eight, ensuring natural spacing of anchors.  */
3031       targetm.min_anchor_offset = -248;
3032       targetm.max_anchor_offset = 4095;
3033     }
3034   else
3035     {
3036       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3037       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3038     }
3039
3040   /* Increase the number of conditional instructions with -Os.  */
3041   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3042
3043   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3044   if (TARGET_THUMB2)
3045     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3046
3047   if (TARGET_THUMB1)
3048     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3049   else
3050     targetm.md_asm_adjust = arm_md_asm_adjust;
3051 }
3052
3053 /* True if -mflip-thumb should next add an attribute for the default
3054    mode, false if it should next add an attribute for the opposite mode.  */
3055 static GTY(()) bool thumb_flipper;
3056
3057 /* Options after initial target override.  */
3058 static GTY(()) tree init_optimize;
3059
3060 static void
3061 arm_override_options_after_change_1 (struct gcc_options *opts,
3062                                      struct gcc_options *opts_set)
3063 {
3064   /* -falign-functions without argument: supply one.  */
3065   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3066     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3067       && opts->x_optimize_size ? "2" : "4";
3068 }
3069
3070 /* Implement targetm.override_options_after_change.  */
3071
3072 static void
3073 arm_override_options_after_change (void)
3074 {
3075   arm_override_options_after_change_1 (&global_options, &global_options_set);
3076 }
3077
3078 /* Implement TARGET_OPTION_RESTORE.  */
3079 static void
3080 arm_option_restore (struct gcc_options */* opts */,
3081                     struct gcc_options */* opts_set */,
3082                     struct cl_target_option *ptr)
3083 {
3084   arm_configure_build_target (&arm_active_target, ptr, false);
3085   arm_option_reconfigure_globals ();
3086 }
3087
3088 /* Reset options between modes that the user has specified.  */
3089 static void
3090 arm_option_override_internal (struct gcc_options *opts,
3091                               struct gcc_options *opts_set)
3092 {
3093   arm_override_options_after_change_1 (opts, opts_set);
3094
3095   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3096     {
3097       /* The default is to enable interworking, so this warning message would
3098          be confusing to users who have just compiled with
3099          eg, -march=armv4.  */
3100       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3101       opts->x_target_flags &= ~MASK_INTERWORK;
3102     }
3103
3104   if (TARGET_THUMB_P (opts->x_target_flags)
3105       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3106     {
3107       warning (0, "target CPU does not support THUMB instructions");
3108       opts->x_target_flags &= ~MASK_THUMB;
3109     }
3110
3111   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3112     {
3113       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3114       opts->x_target_flags &= ~MASK_APCS_FRAME;
3115     }
3116
3117   /* Callee super interworking implies thumb interworking.  Adding
3118      this to the flags here simplifies the logic elsewhere.  */
3119   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3120     opts->x_target_flags |= MASK_INTERWORK;
3121
3122   /* need to remember initial values so combinaisons of options like
3123      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3124   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3125
3126   if (! opts_set->x_arm_restrict_it)
3127     opts->x_arm_restrict_it = arm_arch8;
3128
3129   /* ARM execution state and M profile don't have [restrict] IT.  */
3130   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3131     opts->x_arm_restrict_it = 0;
3132
3133   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3134   if (!opts_set->x_arm_restrict_it
3135       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3136     opts->x_arm_restrict_it = 0;
3137
3138   /* Enable -munaligned-access by default for
3139      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3140      i.e. Thumb2 and ARM state only.
3141      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3142      - ARMv8 architecture-base processors.
3143
3144      Disable -munaligned-access by default for
3145      - all pre-ARMv6 architecture-based processors
3146      - ARMv6-M architecture-based processors
3147      - ARMv8-M Baseline processors.  */
3148
3149   if (! opts_set->x_unaligned_access)
3150     {
3151       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3152                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3153     }
3154   else if (opts->x_unaligned_access == 1
3155            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3156     {
3157       warning (0, "target CPU does not support unaligned accesses");
3158      opts->x_unaligned_access = 0;
3159     }
3160
3161   /* Don't warn since it's on by default in -O2.  */
3162   if (TARGET_THUMB1_P (opts->x_target_flags))
3163     opts->x_flag_schedule_insns = 0;
3164   else
3165     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3166
3167   /* Disable shrink-wrap when optimizing function for size, since it tends to
3168      generate additional returns.  */
3169   if (optimize_function_for_size_p (cfun)
3170       && TARGET_THUMB2_P (opts->x_target_flags))
3171     opts->x_flag_shrink_wrap = false;
3172   else
3173     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3174
3175   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3176      - epilogue_insns - does not accurately model the corresponding insns
3177      emitted in the asm file.  In particular, see the comment in thumb_exit
3178      'Find out how many of the (return) argument registers we can corrupt'.
3179      As a consequence, the epilogue may clobber registers without fipa-ra
3180      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3181      TODO: Accurately model clobbers for epilogue_insns and reenable
3182      fipa-ra.  */
3183   if (TARGET_THUMB1_P (opts->x_target_flags))
3184     opts->x_flag_ipa_ra = 0;
3185   else
3186     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3187
3188   /* Thumb2 inline assembly code should always use unified syntax.
3189      This will apply to ARM and Thumb1 eventually.  */
3190   if (TARGET_THUMB2_P (opts->x_target_flags))
3191     opts->x_inline_asm_unified = true;
3192
3193   if (arm_stack_protector_guard == SSP_GLOBAL
3194       && opts->x_arm_stack_protector_guard_offset_str)
3195     {
3196       error ("incompatible options %<-mstack-protector-guard=global%> and "
3197              "%<-mstack-protector-guard-offset=%s%>",
3198              arm_stack_protector_guard_offset_str);
3199     }
3200
3201   if (opts->x_arm_stack_protector_guard_offset_str)
3202     {
3203       char *end;
3204       const char *str = arm_stack_protector_guard_offset_str;
3205       errno = 0;
3206       long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3207       if (!*str || *end || errno)
3208         error ("%qs is not a valid offset in %qs", str,
3209                "-mstack-protector-guard-offset=");
3210       arm_stack_protector_guard_offset = offs;
3211     }
3212
3213   if (arm_current_function_pac_enabled_p ())
3214     {
3215       if (!arm_arch8m_main)
3216         error ("This architecture does not support branch protection "
3217                "instructions");
3218       if (TARGET_TPCS_FRAME)
3219         sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3220     }
3221
3222 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3223   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3224 #endif
3225 }
3226
3227 static sbitmap isa_all_fpubits_internal;
3228 static sbitmap isa_all_fpbits;
3229 static sbitmap isa_quirkbits;
3230
3231 /* Configure a build target TARGET from the user-specified options OPTS and
3232    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3233    architecture have been specified, but the two are not identical.  */
3234 void
3235 arm_configure_build_target (struct arm_build_target *target,
3236                             struct cl_target_option *opts,
3237                             bool warn_compatible)
3238 {
3239   const cpu_option *arm_selected_tune = NULL;
3240   const arch_option *arm_selected_arch = NULL;
3241   const cpu_option *arm_selected_cpu = NULL;
3242   const arm_fpu_desc *arm_selected_fpu = NULL;
3243   const char *tune_opts = NULL;
3244   const char *arch_opts = NULL;
3245   const char *cpu_opts = NULL;
3246
3247   bitmap_clear (target->isa);
3248   target->core_name = NULL;
3249   target->arch_name = NULL;
3250
3251   if (opts->x_arm_arch_string)
3252     {
3253       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3254                                                       "-march",
3255                                                       opts->x_arm_arch_string);
3256       arch_opts = strchr (opts->x_arm_arch_string, '+');
3257     }
3258
3259   if (opts->x_arm_cpu_string)
3260     {
3261       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3262                                                     opts->x_arm_cpu_string);
3263       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3264       arm_selected_tune = arm_selected_cpu;
3265       /* If taking the tuning from -mcpu, we don't need to rescan the
3266          options for tuning.  */
3267     }
3268
3269   if (opts->x_arm_tune_string)
3270     {
3271       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3272                                                      opts->x_arm_tune_string);
3273       tune_opts = strchr (opts->x_arm_tune_string, '+');
3274     }
3275
3276   if (opts->x_arm_branch_protection_string)
3277     {
3278       aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3279
3280       if (aarch_ra_sign_key != AARCH_KEY_A)
3281         {
3282           warning (0, "invalid key type for %<-mbranch-protection=%>");
3283           aarch_ra_sign_key = AARCH_KEY_A;
3284         }
3285     }
3286
3287   if (arm_selected_arch)
3288     {
3289       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3290       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3291                                  arch_opts);
3292
3293       if (arm_selected_cpu)
3294         {
3295           auto_sbitmap cpu_isa (isa_num_bits);
3296           auto_sbitmap isa_delta (isa_num_bits);
3297
3298           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3299           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3300                                      cpu_opts);
3301           bitmap_xor (isa_delta, cpu_isa, target->isa);
3302           /* Ignore any bits that are quirk bits.  */
3303           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3304           /* If the user (or the default configuration) has specified a
3305              specific FPU, then ignore any bits that depend on the FPU
3306              configuration.  Do similarly if using the soft-float
3307              ABI.  */
3308           if (opts->x_arm_fpu_index != TARGET_FPU_auto
3309               || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3310             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3311
3312           if (!bitmap_empty_p (isa_delta))
3313             {
3314               if (warn_compatible)
3315                 warning (0, "switch %<-mcpu=%s%> conflicts "
3316                          "with switch %<-march=%s%>",
3317                          opts->x_arm_cpu_string,
3318                          opts->x_arm_arch_string);
3319
3320               /* -march wins for code generation.
3321                  -mcpu wins for default tuning.  */
3322               if (!arm_selected_tune)
3323                 arm_selected_tune = arm_selected_cpu;
3324
3325               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3326               target->arch_name = arm_selected_arch->common.name;
3327             }
3328           else
3329             {
3330               /* Architecture and CPU are essentially the same.
3331                  Prefer the CPU setting.  */
3332               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3333               target->core_name = arm_selected_cpu->common.name;
3334               /* Copy the CPU's capabilities, so that we inherit the
3335                  appropriate extensions and quirks.  */
3336               bitmap_copy (target->isa, cpu_isa);
3337             }
3338         }
3339       else
3340         {
3341           /* Pick a CPU based on the architecture.  */
3342           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3343           target->arch_name = arm_selected_arch->common.name;
3344           /* Note: target->core_name is left unset in this path.  */
3345         }
3346     }
3347   else if (arm_selected_cpu)
3348     {
3349       target->core_name = arm_selected_cpu->common.name;
3350       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3351       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3352                                  cpu_opts);
3353       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3354     }
3355   /* If the user did not specify a processor or architecture, choose
3356      one for them.  */
3357   else
3358     {
3359       const cpu_option *sel;
3360       auto_sbitmap sought_isa (isa_num_bits);
3361       bitmap_clear (sought_isa);
3362       auto_sbitmap default_isa (isa_num_bits);
3363
3364       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3365                                                     TARGET_CPU_DEFAULT);
3366       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3367       gcc_assert (arm_selected_cpu->common.name);
3368
3369       /* RWE: All of the selection logic below (to the end of this
3370          'if' clause) looks somewhat suspect.  It appears to be mostly
3371          there to support forcing thumb support when the default CPU
3372          does not have thumb (somewhat dubious in terms of what the
3373          user might be expecting).  I think it should be removed once
3374          support for the pre-thumb era cores is removed.  */
3375       sel = arm_selected_cpu;
3376       arm_initialize_isa (default_isa, sel->common.isa_bits);
3377       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3378                                  cpu_opts);
3379
3380       /* Now check to see if the user has specified any command line
3381          switches that require certain abilities from the cpu.  */
3382
3383       if (TARGET_INTERWORK || TARGET_THUMB)
3384         bitmap_set_bit (sought_isa, isa_bit_thumb);
3385
3386       /* If there are such requirements and the default CPU does not
3387          satisfy them, we need to run over the complete list of
3388          cores looking for one that is satisfactory.  */
3389       if (!bitmap_empty_p (sought_isa)
3390           && !bitmap_subset_p (sought_isa, default_isa))
3391         {
3392           auto_sbitmap candidate_isa (isa_num_bits);
3393           /* We're only interested in a CPU with at least the
3394              capabilities of the default CPU and the required
3395              additional features.  */
3396           bitmap_ior (default_isa, default_isa, sought_isa);
3397
3398           /* Try to locate a CPU type that supports all of the abilities
3399              of the default CPU, plus the extra abilities requested by
3400              the user.  */
3401           for (sel = all_cores; sel->common.name != NULL; sel++)
3402             {
3403               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3404               /* An exact match?  */
3405               if (bitmap_equal_p (default_isa, candidate_isa))
3406                 break;
3407             }
3408
3409           if (sel->common.name == NULL)
3410             {
3411               unsigned current_bit_count = isa_num_bits;
3412               const cpu_option *best_fit = NULL;
3413
3414               /* Ideally we would like to issue an error message here
3415                  saying that it was not possible to find a CPU compatible
3416                  with the default CPU, but which also supports the command
3417                  line options specified by the programmer, and so they
3418                  ought to use the -mcpu=<name> command line option to
3419                  override the default CPU type.
3420
3421                  If we cannot find a CPU that has exactly the
3422                  characteristics of the default CPU and the given
3423                  command line options we scan the array again looking
3424                  for a best match.  The best match must have at least
3425                  the capabilities of the perfect match.  */
3426               for (sel = all_cores; sel->common.name != NULL; sel++)
3427                 {
3428                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3429
3430                   if (bitmap_subset_p (default_isa, candidate_isa))
3431                     {
3432                       unsigned count;
3433
3434                       bitmap_and_compl (candidate_isa, candidate_isa,
3435                                         default_isa);
3436                       count = bitmap_popcount (candidate_isa);
3437
3438                       if (count < current_bit_count)
3439                         {
3440                           best_fit = sel;
3441                           current_bit_count = count;
3442                         }
3443                     }
3444
3445                   gcc_assert (best_fit);
3446                   sel = best_fit;
3447                 }
3448             }
3449           arm_selected_cpu = sel;
3450         }
3451
3452       /* Now we know the CPU, we can finally initialize the target
3453          structure.  */
3454       target->core_name = arm_selected_cpu->common.name;
3455       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3456       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3457                                  cpu_opts);
3458       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3459     }
3460
3461   gcc_assert (arm_selected_cpu);
3462   gcc_assert (arm_selected_arch);
3463
3464   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3465     {
3466       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3467       auto_sbitmap fpu_bits (isa_num_bits);
3468
3469       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3470       /* This should clear out ALL bits relating to the FPU/simd
3471          extensions, to avoid potentially invalid combinations later on
3472          that we can't match.  At present we only clear out those bits
3473          that can be set by -mfpu.  This should be fixed in GCC-12.  */
3474       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3475       bitmap_ior (target->isa, target->isa, fpu_bits);
3476     }
3477
3478   /* If we have the soft-float ABI, clear any feature bits relating to use of
3479      floating-point operations.  They'll just confuse things later on.  */
3480   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3481     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3482
3483   /* There may be implied bits which we still need to enable. These are
3484      non-named features which are needed to complete other sets of features,
3485      but cannot be enabled from arm-cpus.in due to being shared between
3486      multiple fgroups. Each entry in all_implied_fbits is of the form
3487      ante -> cons, meaning that if the feature "ante" is enabled, we should
3488      implicitly enable "cons".  */
3489   const struct fbit_implication *impl = all_implied_fbits;
3490   while (impl->ante)
3491     {
3492       if (bitmap_bit_p (target->isa, impl->ante))
3493         bitmap_set_bit (target->isa, impl->cons);
3494       impl++;
3495     }
3496
3497   if (!arm_selected_tune)
3498     arm_selected_tune = arm_selected_cpu;
3499   else /* Validate the features passed to -mtune.  */
3500     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3501
3502   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3503
3504   /* Finish initializing the target structure.  */
3505   if (!target->arch_name)
3506     target->arch_name = arm_selected_arch->common.name;
3507   target->arch_pp_name = arm_selected_arch->arch;
3508   target->base_arch = arm_selected_arch->base_arch;
3509   target->profile = arm_selected_arch->profile;
3510
3511   target->tune_flags = tune_data->tune_flags;
3512   target->tune = tune_data->tune;
3513   target->tune_core = tune_data->scheduler;
3514 }
3515
3516 /* Fix up any incompatible options that the user has specified.  */
3517 static void
3518 arm_option_override (void)
3519 {
3520   static const enum isa_feature fpu_bitlist_internal[]
3521     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3522   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3523   static const enum isa_feature fp_bitlist[]
3524     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3525   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3526   cl_target_option opts;
3527
3528   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3529   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3530
3531   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3532   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3533   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3534   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3535
3536   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3537
3538   if (!OPTION_SET_P (arm_fpu_index))
3539     {
3540       bool ok;
3541       int fpu_index;
3542
3543       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3544                                   CL_TARGET);
3545       gcc_assert (ok);
3546       arm_fpu_index = (enum fpu_type) fpu_index;
3547     }
3548
3549   cl_target_option_save (&opts, &global_options, &global_options_set);
3550   arm_configure_build_target (&arm_active_target, &opts, true);
3551
3552 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3553   SUBTARGET_OVERRIDE_OPTIONS;
3554 #endif
3555
3556   /* Initialize boolean versions of the architectural flags, for use
3557      in the arm.md file and for enabling feature flags.  */
3558   arm_option_reconfigure_globals ();
3559
3560   arm_tune = arm_active_target.tune_core;
3561   tune_flags = arm_active_target.tune_flags;
3562   current_tune = arm_active_target.tune;
3563
3564   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3565   if (TARGET_APCS_FRAME)
3566     flag_shrink_wrap = false;
3567
3568   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3569     {
3570       warning (0, "%<-mapcs-stack-check%> incompatible with "
3571                "%<-mno-apcs-frame%>");
3572       target_flags |= MASK_APCS_FRAME;
3573     }
3574
3575   if (TARGET_POKE_FUNCTION_NAME)
3576     target_flags |= MASK_APCS_FRAME;
3577
3578   if (TARGET_APCS_REENT && flag_pic)
3579     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3580
3581   if (TARGET_APCS_REENT)
3582     warning (0, "APCS reentrant code not supported.  Ignored");
3583
3584   /* Set up some tuning parameters.  */
3585   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3586   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3587   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3588   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3589   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3590   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3591
3592   /* For arm2/3 there is no need to do any scheduling if we are doing
3593      software floating-point.  */
3594   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3595     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3596
3597   /* Override the default structure alignment for AAPCS ABI.  */
3598   if (!OPTION_SET_P (arm_structure_size_boundary))
3599     {
3600       if (TARGET_AAPCS_BASED)
3601         arm_structure_size_boundary = 8;
3602     }
3603   else
3604     {
3605       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3606
3607       if (arm_structure_size_boundary != 8
3608           && arm_structure_size_boundary != 32
3609           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3610         {
3611           if (ARM_DOUBLEWORD_ALIGN)
3612             warning (0,
3613                      "structure size boundary can only be set to 8, 32 or 64");
3614           else
3615             warning (0, "structure size boundary can only be set to 8 or 32");
3616           arm_structure_size_boundary
3617             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3618         }
3619     }
3620
3621   if (TARGET_VXWORKS_RTP)
3622     {
3623       if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3624         arm_pic_data_is_text_relative = 0;
3625     }
3626   else if (flag_pic
3627            && !arm_pic_data_is_text_relative
3628            && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3629     /* When text & data segments don't have a fixed displacement, the
3630        intended use is with a single, read only, pic base register.
3631        Unless the user explicitly requested not to do that, set
3632        it.  */
3633     target_flags |= MASK_SINGLE_PIC_BASE;
3634
3635   /* If stack checking is disabled, we can use r10 as the PIC register,
3636      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3637   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3638     {
3639       if (TARGET_VXWORKS_RTP)
3640         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3641       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3642     }
3643
3644   if (flag_pic && TARGET_VXWORKS_RTP)
3645     arm_pic_register = 9;
3646
3647   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3648   if (TARGET_FDPIC)
3649     {
3650       arm_pic_register = FDPIC_REGNUM;
3651       if (TARGET_THUMB1)
3652         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3653     }
3654
3655   if (arm_pic_register_string != NULL)
3656     {
3657       int pic_register = decode_reg_name (arm_pic_register_string);
3658
3659       if (!flag_pic)
3660         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3661
3662       /* Prevent the user from choosing an obviously stupid PIC register.  */
3663       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3664                || pic_register == HARD_FRAME_POINTER_REGNUM
3665                || pic_register == STACK_POINTER_REGNUM
3666                || pic_register >= PC_REGNUM
3667                || (TARGET_VXWORKS_RTP
3668                    && (unsigned int) pic_register != arm_pic_register))
3669         error ("unable to use %qs for PIC register", arm_pic_register_string);
3670       else
3671         arm_pic_register = pic_register;
3672     }
3673
3674   if (flag_pic)
3675     target_word_relocations = 1;
3676
3677   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3678   if (fix_cm3_ldrd == 2)
3679     {
3680       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3681         fix_cm3_ldrd = 1;
3682       else
3683         fix_cm3_ldrd = 0;
3684     }
3685
3686   /* Enable fix_vlldm by default if required.  */
3687   if (fix_vlldm == 2)
3688     {
3689       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3690         fix_vlldm = 1;
3691       else
3692         fix_vlldm = 0;
3693     }
3694
3695   /* Enable fix_aes by default if required.  */
3696   if (fix_aes_erratum_1742098 == 2)
3697     {
3698       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3699         fix_aes_erratum_1742098 = 1;
3700       else
3701         fix_aes_erratum_1742098 = 0;
3702     }
3703
3704   /* Hot/Cold partitioning is not currently supported, since we can't
3705      handle literal pool placement in that case.  */
3706   if (flag_reorder_blocks_and_partition)
3707     {
3708       inform (input_location,
3709               "%<-freorder-blocks-and-partition%> not supported "
3710               "on this architecture");
3711       flag_reorder_blocks_and_partition = 0;
3712       flag_reorder_blocks = 1;
3713     }
3714
3715   if (flag_pic)
3716     /* Hoisting PIC address calculations more aggressively provides a small,
3717        but measurable, size reduction for PIC code.  Therefore, we decrease
3718        the bar for unrestricted expression hoisting to the cost of PIC address
3719        calculation, which is 2 instructions.  */
3720     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3721                          param_gcse_unrestricted_cost, 2);
3722
3723   /* ARM EABI defaults to strict volatile bitfields.  */
3724   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3725       && abi_version_at_least(2))
3726     flag_strict_volatile_bitfields = 1;
3727
3728   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3729      have deemed it beneficial (signified by setting
3730      prefetch.num_slots to 1 or more).  */
3731   if (flag_prefetch_loop_arrays < 0
3732       && HAVE_prefetch
3733       && optimize >= 3
3734       && current_tune->prefetch.num_slots > 0)
3735     flag_prefetch_loop_arrays = 1;
3736
3737   /* Set up parameters to be used in prefetching algorithm.  Do not
3738      override the defaults unless we are tuning for a core we have
3739      researched values for.  */
3740   if (current_tune->prefetch.num_slots > 0)
3741     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3742                          param_simultaneous_prefetches,
3743                          current_tune->prefetch.num_slots);
3744   if (current_tune->prefetch.l1_cache_line_size >= 0)
3745     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3746                          param_l1_cache_line_size,
3747                          current_tune->prefetch.l1_cache_line_size);
3748   if (current_tune->prefetch.l1_cache_line_size >= 0)
3749     {
3750       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3751                            param_destruct_interfere_size,
3752                            current_tune->prefetch.l1_cache_line_size);
3753       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3754                            param_construct_interfere_size,
3755                            current_tune->prefetch.l1_cache_line_size);
3756     }
3757   else
3758     {
3759       /* For a generic ARM target, JF Bastien proposed using 64 for both.  */
3760       /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3761          constructive?  */
3762       /* More recent Cortex chips have a 64-byte cache line, but are marked
3763          ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults.  */
3764       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3765                            param_destruct_interfere_size, 64);
3766       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3767                            param_construct_interfere_size, 64);
3768     }
3769
3770   if (current_tune->prefetch.l1_cache_size >= 0)
3771     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3772                          param_l1_cache_size,
3773                          current_tune->prefetch.l1_cache_size);
3774
3775   /* Look through ready list and all of queue for instructions
3776      relevant for L2 auto-prefetcher.  */
3777   int sched_autopref_queue_depth;
3778
3779   switch (current_tune->sched_autopref)
3780     {
3781     case tune_params::SCHED_AUTOPREF_OFF:
3782       sched_autopref_queue_depth = -1;
3783       break;
3784
3785     case tune_params::SCHED_AUTOPREF_RANK:
3786       sched_autopref_queue_depth = 0;
3787       break;
3788
3789     case tune_params::SCHED_AUTOPREF_FULL:
3790       sched_autopref_queue_depth = max_insn_queue_index + 1;
3791       break;
3792
3793     default:
3794       gcc_unreachable ();
3795     }
3796
3797   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3798                        param_sched_autopref_queue_depth,
3799                        sched_autopref_queue_depth);
3800
3801   /* Currently, for slow flash data, we just disable literal pools.  We also
3802      disable it for pure-code.  */
3803   if (target_slow_flash_data || target_pure_code)
3804     arm_disable_literal_pool = true;
3805
3806   /* Disable scheduling fusion by default if it's not armv7 processor
3807      or doesn't prefer ldrd/strd.  */
3808   if (flag_schedule_fusion == 2
3809       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3810     flag_schedule_fusion = 0;
3811
3812   /* Need to remember initial options before they are overriden.  */
3813   init_optimize = build_optimization_node (&global_options,
3814                                            &global_options_set);
3815
3816   arm_options_perform_arch_sanity_checks ();
3817   arm_option_override_internal (&global_options, &global_options_set);
3818   arm_option_check_internal (&global_options);
3819   arm_option_params_internal ();
3820
3821   /* Create the default target_options structure.  */
3822   target_option_default_node = target_option_current_node
3823     = build_target_option_node (&global_options, &global_options_set);
3824
3825   /* Register global variables with the garbage collector.  */
3826   arm_add_gc_roots ();
3827
3828   /* Init initial mode for testing.  */
3829   thumb_flipper = TARGET_THUMB;
3830 }
3831
3832
3833 /* Reconfigure global status flags from the active_target.isa.  */
3834 void
3835 arm_option_reconfigure_globals (void)
3836 {
3837   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3838   arm_base_arch = arm_active_target.base_arch;
3839
3840   /* Initialize boolean versions of the architectural flags, for use
3841      in the arm.md file.  */
3842   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3843   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3844   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3845   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3846   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3847   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3848   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3849   arm_arch6m = arm_arch6 && !arm_arch_notm;
3850   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3851   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3852   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3853   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3854   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3855   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3856   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3857   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3858                                     isa_bit_armv8_1m_main);
3859   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3860   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3861   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3862   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3863   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3864   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3865   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3866   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3867   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3868   arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3869   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3870   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3871   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3872
3873   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3874   if (arm_fp16_inst)
3875     {
3876       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3877         error ("selected fp16 options are incompatible");
3878       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3879     }
3880
3881   arm_arch_cde = 0;
3882   arm_arch_cde_coproc = 0;
3883   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3884                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3885                     isa_bit_cdecp6, isa_bit_cdecp7};
3886   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3887     {
3888       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3889       if (cde_bit)
3890         {
3891           arm_arch_cde |= cde_bit;
3892           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3893         }
3894     }
3895
3896   /* And finally, set up some quirks.  */
3897   arm_arch_no_volatile_ce
3898     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3899   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3900                                             isa_bit_quirk_armv6kz);
3901
3902   /* Use the cp15 method if it is available.  */
3903   if (target_thread_pointer == TP_AUTO)
3904     {
3905       if (arm_arch6k && !TARGET_THUMB1)
3906         target_thread_pointer = TP_CP15;
3907       else
3908         target_thread_pointer = TP_SOFT;
3909     }
3910
3911   if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3912     error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3913 }
3914
3915 /* Perform some validation between the desired architecture and the rest of the
3916    options.  */
3917 void
3918 arm_options_perform_arch_sanity_checks (void)
3919 {
3920   /* V5T code we generate is completely interworking capable, so we turn off
3921      TARGET_INTERWORK here to avoid many tests later on.  */
3922
3923   /* XXX However, we must pass the right pre-processor defines to CPP
3924      or GLD can get confused.  This is a hack.  */
3925   if (TARGET_INTERWORK)
3926     arm_cpp_interwork = 1;
3927
3928   if (arm_arch5t)
3929     target_flags &= ~MASK_INTERWORK;
3930
3931   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3932     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3933
3934   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3935     error ("iwmmxt abi requires an iwmmxt capable cpu");
3936
3937   /* BPABI targets use linker tricks to allow interworking on cores
3938      without thumb support.  */
3939   if (TARGET_INTERWORK
3940       && !TARGET_BPABI
3941       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3942     {
3943       warning (0, "target CPU does not support interworking" );
3944       target_flags &= ~MASK_INTERWORK;
3945     }
3946
3947   /* If soft-float is specified then don't use FPU.  */
3948   if (TARGET_SOFT_FLOAT)
3949     arm_fpu_attr = FPU_NONE;
3950   else
3951     arm_fpu_attr = FPU_VFP;
3952
3953   if (TARGET_AAPCS_BASED)
3954     {
3955       if (TARGET_CALLER_INTERWORKING)
3956         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3957       else
3958         if (TARGET_CALLEE_INTERWORKING)
3959           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3960     }
3961
3962   /* __fp16 support currently assumes the core has ldrh.  */
3963   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3964     sorry ("%<__fp16%> and no ldrh");
3965
3966   if (use_cmse && !arm_arch_cmse)
3967     error ("target CPU does not support ARMv8-M Security Extensions");
3968
3969   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3970      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3971   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3972     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3973
3974
3975   if (TARGET_AAPCS_BASED)
3976     {
3977       if (arm_abi == ARM_ABI_IWMMXT)
3978         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3979       else if (TARGET_HARD_FLOAT_ABI)
3980         {
3981           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3982           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3983               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3984             error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3985         }
3986       else
3987         arm_pcs_default = ARM_PCS_AAPCS;
3988     }
3989   else
3990     {
3991       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3992         sorry ("%<-mfloat-abi=hard%> and VFP");
3993
3994       if (arm_abi == ARM_ABI_APCS)
3995         arm_pcs_default = ARM_PCS_APCS;
3996       else
3997         arm_pcs_default = ARM_PCS_ATPCS;
3998     }
3999 }
4000
4001 /* Test whether a local function descriptor is canonical, i.e.,
4002    whether we can use GOTOFFFUNCDESC to compute the address of the
4003    function.  */
4004 static bool
4005 arm_fdpic_local_funcdesc_p (rtx fnx)
4006 {
4007   tree fn;
4008   enum symbol_visibility vis;
4009   bool ret;
4010
4011   if (!TARGET_FDPIC)
4012     return true;
4013
4014   if (! SYMBOL_REF_LOCAL_P (fnx))
4015     return false;
4016
4017   fn = SYMBOL_REF_DECL (fnx);
4018
4019   if (! fn)
4020     return false;
4021
4022   vis = DECL_VISIBILITY (fn);
4023
4024   if (vis == VISIBILITY_PROTECTED)
4025     /* Private function descriptors for protected functions are not
4026        canonical.  Temporarily change the visibility to global so that
4027        we can ensure uniqueness of funcdesc pointers.  */
4028     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4029
4030   ret = default_binds_local_p_1 (fn, flag_pic);
4031
4032   DECL_VISIBILITY (fn) = vis;
4033
4034   return ret;
4035 }
4036
4037 static void
4038 arm_add_gc_roots (void)
4039 {
4040   gcc_obstack_init(&minipool_obstack);
4041   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4042 }
4043 \f
4044 /* A table of known ARM exception types.
4045    For use with the interrupt function attribute.  */
4046
4047 typedef struct
4048 {
4049   const char *const arg;
4050   const unsigned long return_value;
4051 }
4052 isr_attribute_arg;
4053
4054 static const isr_attribute_arg isr_attribute_args [] =
4055 {
4056   { "IRQ",   ARM_FT_ISR },
4057   { "irq",   ARM_FT_ISR },
4058   { "FIQ",   ARM_FT_FIQ },
4059   { "fiq",   ARM_FT_FIQ },
4060   { "ABORT", ARM_FT_ISR },
4061   { "abort", ARM_FT_ISR },
4062   { "UNDEF", ARM_FT_EXCEPTION },
4063   { "undef", ARM_FT_EXCEPTION },
4064   { "SWI",   ARM_FT_EXCEPTION },
4065   { "swi",   ARM_FT_EXCEPTION },
4066   { NULL,    ARM_FT_NORMAL }
4067 };
4068
4069 /* Returns the (interrupt) function type of the current
4070    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
4071
4072 static unsigned long
4073 arm_isr_value (tree argument)
4074 {
4075   const isr_attribute_arg * ptr;
4076   const char *              arg;
4077
4078   if (!arm_arch_notm)
4079     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4080
4081   /* No argument - default to IRQ.  */
4082   if (argument == NULL_TREE)
4083     return ARM_FT_ISR;
4084
4085   /* Get the value of the argument.  */
4086   if (TREE_VALUE (argument) == NULL_TREE
4087       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4088     return ARM_FT_UNKNOWN;
4089
4090   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4091
4092   /* Check it against the list of known arguments.  */
4093   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4094     if (streq (arg, ptr->arg))
4095       return ptr->return_value;
4096
4097   /* An unrecognized interrupt type.  */
4098   return ARM_FT_UNKNOWN;
4099 }
4100
4101 /* Computes the type of the current function.  */
4102
4103 static unsigned long
4104 arm_compute_func_type (void)
4105 {
4106   unsigned long type = ARM_FT_UNKNOWN;
4107   tree a;
4108   tree attr;
4109
4110   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4111
4112   /* Decide if the current function is volatile.  Such functions
4113      never return, and many memory cycles can be saved by not storing
4114      register values that will never be needed again.  This optimization
4115      was added to speed up context switching in a kernel application.  */
4116   if (optimize > 0
4117       && (TREE_NOTHROW (current_function_decl)
4118           || !(flag_unwind_tables
4119                || (flag_exceptions
4120                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4121       && TREE_THIS_VOLATILE (current_function_decl))
4122     type |= ARM_FT_VOLATILE;
4123
4124   if (cfun->static_chain_decl != NULL)
4125     type |= ARM_FT_NESTED;
4126
4127   attr = DECL_ATTRIBUTES (current_function_decl);
4128
4129   a = lookup_attribute ("naked", attr);
4130   if (a != NULL_TREE)
4131     type |= ARM_FT_NAKED;
4132
4133   a = lookup_attribute ("isr", attr);
4134   if (a == NULL_TREE)
4135     a = lookup_attribute ("interrupt", attr);
4136
4137   if (a == NULL_TREE)
4138     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4139   else
4140     type |= arm_isr_value (TREE_VALUE (a));
4141
4142   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4143     type |= ARM_FT_CMSE_ENTRY;
4144
4145   return type;
4146 }
4147
4148 /* Returns the type of the current function.  */
4149
4150 unsigned long
4151 arm_current_func_type (void)
4152 {
4153   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4154     cfun->machine->func_type = arm_compute_func_type ();
4155
4156   return cfun->machine->func_type;
4157 }
4158
4159 bool
4160 arm_allocate_stack_slots_for_args (void)
4161 {
4162   /* Naked functions should not allocate stack slots for arguments.  */
4163   return !IS_NAKED (arm_current_func_type ());
4164 }
4165
4166 static bool
4167 arm_warn_func_return (tree decl)
4168 {
4169   /* Naked functions are implemented entirely in assembly, including the
4170      return sequence, so suppress warnings about this.  */
4171   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4172 }
4173
4174 \f
4175 /* Output assembler code for a block containing the constant parts
4176    of a trampoline, leaving space for the variable parts.
4177
4178    On the ARM, (if r8 is the static chain regnum, and remembering that
4179    referencing pc adds an offset of 8) the trampoline looks like:
4180            ldr          r8, [pc, #0]
4181            ldr          pc, [pc]
4182            .word        static chain value
4183            .word        function's address
4184    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4185
4186    In FDPIC mode, the trampoline looks like:
4187            .word        trampoline address
4188            .word        trampoline GOT address
4189            ldr          r12, [pc, #8] ; #4 for Arm mode
4190            ldr          r9,  [pc, #8] ; #4 for Arm mode
4191            ldr          pc,  [pc, #8] ; #4 for Arm mode
4192            .word        static chain value
4193            .word        GOT address
4194            .word        function's address
4195 */
4196
4197 static void
4198 arm_asm_trampoline_template (FILE *f)
4199 {
4200   fprintf (f, "\t.syntax unified\n");
4201
4202   if (TARGET_FDPIC)
4203     {
4204       /* The first two words are a function descriptor pointing to the
4205          trampoline code just below.  */
4206       if (TARGET_ARM)
4207         fprintf (f, "\t.arm\n");
4208       else if (TARGET_THUMB2)
4209         fprintf (f, "\t.thumb\n");
4210       else
4211         /* Only ARM and Thumb-2 are supported.  */
4212         gcc_unreachable ();
4213
4214       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4215       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4216       /* Trampoline code which sets the static chain register but also
4217          PIC register before jumping into real code.  */
4218       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4219                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4220                    TARGET_THUMB2 ? 8 : 4);
4221       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4222                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4223                    TARGET_THUMB2 ? 8 : 4);
4224       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4225                    PC_REGNUM, PC_REGNUM,
4226                    TARGET_THUMB2 ? 8 : 4);
4227       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4228     }
4229   else if (TARGET_ARM)
4230     {
4231       fprintf (f, "\t.arm\n");
4232       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4233       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4234     }
4235   else if (TARGET_THUMB2)
4236     {
4237       fprintf (f, "\t.thumb\n");
4238       /* The Thumb-2 trampoline is similar to the arm implementation.
4239          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4240       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4241                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4242       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4243     }
4244   else
4245     {
4246       ASM_OUTPUT_ALIGN (f, 2);
4247       fprintf (f, "\t.code\t16\n");
4248       fprintf (f, ".Ltrampoline_start:\n");
4249       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4250       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4251       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4252       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4253       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4254       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4255     }
4256   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4257   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4258 }
4259
4260 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4261
4262 static void
4263 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4264 {
4265   rtx fnaddr, mem, a_tramp;
4266
4267   emit_block_move (m_tramp, assemble_trampoline_template (),
4268                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4269
4270   if (TARGET_FDPIC)
4271     {
4272       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4273       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4274       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4275       /* The function start address is at offset 8, but in Thumb mode
4276          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4277          below.  */
4278       rtx trampoline_code_start
4279         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4280
4281       /* Write initial funcdesc which points to the trampoline.  */
4282       mem = adjust_address (m_tramp, SImode, 0);
4283       emit_move_insn (mem, trampoline_code_start);
4284       mem = adjust_address (m_tramp, SImode, 4);
4285       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4286       /* Setup static chain.  */
4287       mem = adjust_address (m_tramp, SImode, 20);
4288       emit_move_insn (mem, chain_value);
4289       /* GOT + real function entry point.  */
4290       mem = adjust_address (m_tramp, SImode, 24);
4291       emit_move_insn (mem, gotaddr);
4292       mem = adjust_address (m_tramp, SImode, 28);
4293       emit_move_insn (mem, fnaddr);
4294     }
4295   else
4296     {
4297       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4298       emit_move_insn (mem, chain_value);
4299
4300       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4301       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4302       emit_move_insn (mem, fnaddr);
4303     }
4304
4305   a_tramp = XEXP (m_tramp, 0);
4306   maybe_emit_call_builtin___clear_cache (a_tramp,
4307                                          plus_constant (ptr_mode,
4308                                                         a_tramp,
4309                                                         TRAMPOLINE_SIZE));
4310 }
4311
4312 /* Thumb trampolines should be entered in thumb mode, so set
4313    the bottom bit of the address.  */
4314
4315 static rtx
4316 arm_trampoline_adjust_address (rtx addr)
4317 {
4318   /* For FDPIC don't fix trampoline address since it's a function
4319      descriptor and not a function address.  */
4320   if (TARGET_THUMB && !TARGET_FDPIC)
4321     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4322                                 NULL, 0, OPTAB_LIB_WIDEN);
4323   return addr;
4324 }
4325 \f
4326 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4327    includes call-clobbered registers too.  If this is a leaf function
4328    we can just examine the registers used by the RTL, but otherwise we
4329    have to assume that whatever function is called might clobber
4330    anything, and so we have to save all the call-clobbered registers
4331    as well.  */
4332 static inline bool reg_needs_saving_p (unsigned reg)
4333 {
4334   unsigned long func_type = arm_current_func_type ();
4335
4336   if (IS_INTERRUPT (func_type))
4337     if (df_regs_ever_live_p (reg)
4338         /* Save call-clobbered core registers.  */
4339         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4340       return true;
4341     else
4342       return false;
4343   else
4344     if (!df_regs_ever_live_p (reg)
4345         || call_used_or_fixed_reg_p (reg))
4346       return false;
4347     else
4348       return true;
4349 }
4350
4351 /* Return 1 if it is possible to return using a single instruction.
4352    If SIBLING is non-null, this is a test for a return before a sibling
4353    call.  SIBLING is the call insn, so we can examine its register usage.  */
4354
4355 int
4356 use_return_insn (int iscond, rtx sibling)
4357 {
4358   int regno;
4359   unsigned int func_type;
4360   unsigned long saved_int_regs;
4361   unsigned HOST_WIDE_INT stack_adjust;
4362   arm_stack_offsets *offsets;
4363
4364   /* Never use a return instruction before reload has run.  */
4365   if (!reload_completed)
4366     return 0;
4367
4368   /* Never use a return instruction when return address signing
4369      mechanism is enabled as it requires more than one
4370      instruction.  */
4371   if (arm_current_function_pac_enabled_p ())
4372     return 0;
4373
4374   func_type = arm_current_func_type ();
4375
4376   /* Naked, volatile and stack alignment functions need special
4377      consideration.  */
4378   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4379     return 0;
4380
4381   /* So do interrupt functions that use the frame pointer and Thumb
4382      interrupt functions.  */
4383   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4384     return 0;
4385
4386   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4387       && !optimize_function_for_size_p (cfun))
4388     return 0;
4389
4390   offsets = arm_get_frame_offsets ();
4391   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4392
4393   /* As do variadic functions.  */
4394   if (crtl->args.pretend_args_size
4395       || cfun->machine->uses_anonymous_args
4396       /* Or if the function calls __builtin_eh_return () */
4397       || crtl->calls_eh_return
4398       /* Or if the function calls alloca */
4399       || cfun->calls_alloca
4400       /* Or if there is a stack adjustment.  However, if the stack pointer
4401          is saved on the stack, we can use a pre-incrementing stack load.  */
4402       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4403                                  && stack_adjust == 4))
4404       /* Or if the static chain register was saved above the frame, under the
4405          assumption that the stack pointer isn't saved on the stack.  */
4406       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4407           && arm_compute_static_chain_stack_bytes() != 0))
4408     return 0;
4409
4410   saved_int_regs = offsets->saved_regs_mask;
4411
4412   /* Unfortunately, the insn
4413
4414        ldmib sp, {..., sp, ...}
4415
4416      triggers a bug on most SA-110 based devices, such that the stack
4417      pointer won't be correctly restored if the instruction takes a
4418      page fault.  We work around this problem by popping r3 along with
4419      the other registers, since that is never slower than executing
4420      another instruction.
4421
4422      We test for !arm_arch5t here, because code for any architecture
4423      less than this could potentially be run on one of the buggy
4424      chips.  */
4425   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4426     {
4427       /* Validate that r3 is a call-clobbered register (always true in
4428          the default abi) ...  */
4429       if (!call_used_or_fixed_reg_p (3))
4430         return 0;
4431
4432       /* ... that it isn't being used for a return value ... */
4433       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4434         return 0;
4435
4436       /* ... or for a tail-call argument ...  */
4437       if (sibling)
4438         {
4439           gcc_assert (CALL_P (sibling));
4440
4441           if (find_regno_fusage (sibling, USE, 3))
4442             return 0;
4443         }
4444
4445       /* ... and that there are no call-saved registers in r0-r2
4446          (always true in the default ABI).  */
4447       if (saved_int_regs & 0x7)
4448         return 0;
4449     }
4450
4451   /* Can't be done if interworking with Thumb, and any registers have been
4452      stacked.  */
4453   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4454     return 0;
4455
4456   /* On StrongARM, conditional returns are expensive if they aren't
4457      taken and multiple registers have been stacked.  */
4458   if (iscond && arm_tune_strongarm)
4459     {
4460       /* Conditional return when just the LR is stored is a simple
4461          conditional-load instruction, that's not expensive.  */
4462       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4463         return 0;
4464
4465       if (flag_pic
4466           && arm_pic_register != INVALID_REGNUM
4467           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4468         return 0;
4469     }
4470
4471   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4472      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4473      also needs several instructions to save and restore FP context.  */
4474   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4475     return 0;
4476
4477   /* If there are saved registers but the LR isn't saved, then we need
4478      two instructions for the return.  */
4479   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4480     return 0;
4481
4482   /* Can't be done if any of the VFP regs are pushed,
4483      since this also requires an insn.  */
4484   if (TARGET_VFP_BASE)
4485     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4486       if (reg_needs_saving_p (regno))
4487         return 0;
4488
4489   if (TARGET_REALLY_IWMMXT)
4490     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4491       if (reg_needs_saving_p (regno))
4492         return 0;
4493
4494   return 1;
4495 }
4496
4497 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4498    shrink-wrapping if possible.  This is the case if we need to emit a
4499    prologue, which we can test by looking at the offsets.  */
4500 bool
4501 use_simple_return_p (void)
4502 {
4503   arm_stack_offsets *offsets;
4504
4505   /* Note this function can be called before or after reload.  */
4506   if (!reload_completed)
4507     arm_compute_frame_layout ();
4508
4509   offsets = arm_get_frame_offsets ();
4510   return offsets->outgoing_args != 0;
4511 }
4512
4513 /* Return TRUE if int I is a valid immediate ARM constant.  */
4514
4515 int
4516 const_ok_for_arm (HOST_WIDE_INT i)
4517 {
4518   int lowbit;
4519
4520   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4521      be all zero, or all one.  */
4522   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4523       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4524           != ((~(unsigned HOST_WIDE_INT) 0)
4525               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4526     return FALSE;
4527
4528   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4529
4530   /* Fast return for 0 and small values.  We must do this for zero, since
4531      the code below can't handle that one case.  */
4532   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4533     return TRUE;
4534
4535   /* Get the number of trailing zeros.  */
4536   lowbit = ffs((int) i) - 1;
4537
4538   /* Only even shifts are allowed in ARM mode so round down to the
4539      nearest even number.  */
4540   if (TARGET_ARM)
4541     lowbit &= ~1;
4542
4543   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4544     return TRUE;
4545
4546   if (TARGET_ARM)
4547     {
4548       /* Allow rotated constants in ARM mode.  */
4549       if (lowbit <= 4
4550            && ((i & ~0xc000003f) == 0
4551                || (i & ~0xf000000f) == 0
4552                || (i & ~0xfc000003) == 0))
4553         return TRUE;
4554     }
4555   else if (TARGET_THUMB2)
4556     {
4557       HOST_WIDE_INT v;
4558
4559       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4560       v = i & 0xff;
4561       v |= v << 16;
4562       if (i == v || i == (v | (v << 8)))
4563         return TRUE;
4564
4565       /* Allow repeated pattern 0xXY00XY00.  */
4566       v = i & 0xff00;
4567       v |= v << 16;
4568       if (i == v)
4569         return TRUE;
4570     }
4571   else if (TARGET_HAVE_MOVT)
4572     {
4573       /* Thumb-1 Targets with MOVT.  */
4574       if (i > 0xffff)
4575         return FALSE;
4576       else
4577         return TRUE;
4578     }
4579
4580   return FALSE;
4581 }
4582
4583 /* Return true if I is a valid constant for the operation CODE.  */
4584 int
4585 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4586 {
4587   if (const_ok_for_arm (i))
4588     return 1;
4589
4590   switch (code)
4591     {
4592     case SET:
4593       /* See if we can use movw.  */
4594       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4595         return 1;
4596       else
4597         /* Otherwise, try mvn.  */
4598         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4599
4600     case PLUS:
4601       /* See if we can use addw or subw.  */
4602       if (TARGET_THUMB2
4603           && ((i & 0xfffff000) == 0
4604               || ((-i) & 0xfffff000) == 0))
4605         return 1;
4606       /* Fall through.  */
4607     case COMPARE:
4608     case EQ:
4609     case NE:
4610     case GT:
4611     case LE:
4612     case LT:
4613     case GE:
4614     case GEU:
4615     case LTU:
4616     case GTU:
4617     case LEU:
4618     case UNORDERED:
4619     case ORDERED:
4620     case UNEQ:
4621     case UNGE:
4622     case UNLT:
4623     case UNGT:
4624     case UNLE:
4625       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4626
4627     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4628     case XOR:
4629       return 0;
4630
4631     case IOR:
4632       if (TARGET_THUMB2)
4633         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4634       return 0;
4635
4636     case AND:
4637       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4638
4639     default:
4640       gcc_unreachable ();
4641     }
4642 }
4643
4644 /* Return true if I is a valid di mode constant for the operation CODE.  */
4645 int
4646 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4647 {
4648   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4649   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4650   rtx hi = GEN_INT (hi_val);
4651   rtx lo = GEN_INT (lo_val);
4652
4653   if (TARGET_THUMB1)
4654     return 0;
4655
4656   switch (code)
4657     {
4658     case AND:
4659     case IOR:
4660     case XOR:
4661       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4662              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4663     case PLUS:
4664       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4665
4666     default:
4667       return 0;
4668     }
4669 }
4670
4671 /* Emit a sequence of insns to handle a large constant.
4672    CODE is the code of the operation required, it can be any of SET, PLUS,
4673    IOR, AND, XOR, MINUS;
4674    MODE is the mode in which the operation is being performed;
4675    VAL is the integer to operate on;
4676    SOURCE is the other operand (a register, or a null-pointer for SET);
4677    SUBTARGETS means it is safe to create scratch registers if that will
4678    either produce a simpler sequence, or we will want to cse the values.
4679    Return value is the number of insns emitted.  */
4680
4681 /* ??? Tweak this for thumb2.  */
4682 int
4683 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4684                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4685 {
4686   rtx cond;
4687
4688   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4689     cond = COND_EXEC_TEST (PATTERN (insn));
4690   else
4691     cond = NULL_RTX;
4692
4693   if (subtargets || code == SET
4694       || (REG_P (target) && REG_P (source)
4695           && REGNO (target) != REGNO (source)))
4696     {
4697       /* After arm_reorg has been called, we can't fix up expensive
4698          constants by pushing them into memory so we must synthesize
4699          them in-line, regardless of the cost.  This is only likely to
4700          be more costly on chips that have load delay slots and we are
4701          compiling without running the scheduler (so no splitting
4702          occurred before the final instruction emission).
4703
4704          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4705       */
4706       if (!cfun->machine->after_arm_reorg
4707           && !cond
4708           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4709                                 1, 0)
4710               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4711                  + (code != SET))))
4712         {
4713           if (code == SET)
4714             {
4715               /* Currently SET is the only monadic value for CODE, all
4716                  the rest are diadic.  */
4717               if (TARGET_USE_MOVT)
4718                 arm_emit_movpair (target, GEN_INT (val));
4719               else
4720                 emit_set_insn (target, GEN_INT (val));
4721
4722               return 1;
4723             }
4724           else
4725             {
4726               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4727
4728               if (TARGET_USE_MOVT)
4729                 arm_emit_movpair (temp, GEN_INT (val));
4730               else
4731                 emit_set_insn (temp, GEN_INT (val));
4732
4733               /* For MINUS, the value is subtracted from, since we never
4734                  have subtraction of a constant.  */
4735               if (code == MINUS)
4736                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4737               else
4738                 emit_set_insn (target,
4739                                gen_rtx_fmt_ee (code, mode, source, temp));
4740               return 2;
4741             }
4742         }
4743     }
4744
4745   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4746                            1);
4747 }
4748
4749 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4750    ARM/THUMB2 immediates, and add up to VAL.
4751    Thr function return value gives the number of insns required.  */
4752 static int
4753 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4754                             struct four_ints *return_sequence)
4755 {
4756   int best_consecutive_zeros = 0;
4757   int i;
4758   int best_start = 0;
4759   int insns1, insns2;
4760   struct four_ints tmp_sequence;
4761
4762   /* If we aren't targeting ARM, the best place to start is always at
4763      the bottom, otherwise look more closely.  */
4764   if (TARGET_ARM)
4765     {
4766       for (i = 0; i < 32; i += 2)
4767         {
4768           int consecutive_zeros = 0;
4769
4770           if (!(val & (3 << i)))
4771             {
4772               while ((i < 32) && !(val & (3 << i)))
4773                 {
4774                   consecutive_zeros += 2;
4775                   i += 2;
4776                 }
4777               if (consecutive_zeros > best_consecutive_zeros)
4778                 {
4779                   best_consecutive_zeros = consecutive_zeros;
4780                   best_start = i - consecutive_zeros;
4781                 }
4782               i -= 2;
4783             }
4784         }
4785     }
4786
4787   /* So long as it won't require any more insns to do so, it's
4788      desirable to emit a small constant (in bits 0...9) in the last
4789      insn.  This way there is more chance that it can be combined with
4790      a later addressing insn to form a pre-indexed load or store
4791      operation.  Consider:
4792
4793            *((volatile int *)0xe0000100) = 1;
4794            *((volatile int *)0xe0000110) = 2;
4795
4796      We want this to wind up as:
4797
4798             mov rA, #0xe0000000
4799             mov rB, #1
4800             str rB, [rA, #0x100]
4801             mov rB, #2
4802             str rB, [rA, #0x110]
4803
4804      rather than having to synthesize both large constants from scratch.
4805
4806      Therefore, we calculate how many insns would be required to emit
4807      the constant starting from `best_start', and also starting from
4808      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4809      yield a shorter sequence, we may as well use zero.  */
4810   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4811   if (best_start != 0
4812       && ((HOST_WIDE_INT_1U << best_start) < val))
4813     {
4814       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4815       if (insns2 <= insns1)
4816         {
4817           *return_sequence = tmp_sequence;
4818           insns1 = insns2;
4819         }
4820     }
4821
4822   return insns1;
4823 }
4824
4825 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4826 static int
4827 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4828                              struct four_ints *return_sequence, int i)
4829 {
4830   int remainder = val & 0xffffffff;
4831   int insns = 0;
4832
4833   /* Try and find a way of doing the job in either two or three
4834      instructions.
4835
4836      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4837      location.  We start at position I.  This may be the MSB, or
4838      optimial_immediate_sequence may have positioned it at the largest block
4839      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4840      wrapping around to the top of the word when we drop off the bottom.
4841      In the worst case this code should produce no more than four insns.
4842
4843      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4844      constants, shifted to any arbitrary location.  We should always start
4845      at the MSB.  */
4846   do
4847     {
4848       int end;
4849       unsigned int b1, b2, b3, b4;
4850       unsigned HOST_WIDE_INT result;
4851       int loc;
4852
4853       gcc_assert (insns < 4);
4854
4855       if (i <= 0)
4856         i += 32;
4857
4858       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4859       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4860         {
4861           loc = i;
4862           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4863             /* We can use addw/subw for the last 12 bits.  */
4864             result = remainder;
4865           else
4866             {
4867               /* Use an 8-bit shifted/rotated immediate.  */
4868               end = i - 8;
4869               if (end < 0)
4870                 end += 32;
4871               result = remainder & ((0x0ff << end)
4872                                    | ((i < end) ? (0xff >> (32 - end))
4873                                                 : 0));
4874               i -= 8;
4875             }
4876         }
4877       else
4878         {
4879           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4880              arbitrary shifts.  */
4881           i -= TARGET_ARM ? 2 : 1;
4882           continue;
4883         }
4884
4885       /* Next, see if we can do a better job with a thumb2 replicated
4886          constant.
4887
4888          We do it this way around to catch the cases like 0x01F001E0 where
4889          two 8-bit immediates would work, but a replicated constant would
4890          make it worse.
4891
4892          TODO: 16-bit constants that don't clear all the bits, but still win.
4893          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4894       if (TARGET_THUMB2)
4895         {
4896           b1 = (remainder & 0xff000000) >> 24;
4897           b2 = (remainder & 0x00ff0000) >> 16;
4898           b3 = (remainder & 0x0000ff00) >> 8;
4899           b4 = remainder & 0xff;
4900
4901           if (loc > 24)
4902             {
4903               /* The 8-bit immediate already found clears b1 (and maybe b2),
4904                  but must leave b3 and b4 alone.  */
4905
4906               /* First try to find a 32-bit replicated constant that clears
4907                  almost everything.  We can assume that we can't do it in one,
4908                  or else we wouldn't be here.  */
4909               unsigned int tmp = b1 & b2 & b3 & b4;
4910               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4911                                   + (tmp << 24);
4912               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4913                                             + (tmp == b3) + (tmp == b4);
4914               if (tmp
4915                   && (matching_bytes >= 3
4916                       || (matching_bytes == 2
4917                           && const_ok_for_op (remainder & ~tmp2, code))))
4918                 {
4919                   /* At least 3 of the bytes match, and the fourth has at
4920                      least as many bits set, or two of the bytes match
4921                      and it will only require one more insn to finish.  */
4922                   result = tmp2;
4923                   i = tmp != b1 ? 32
4924                       : tmp != b2 ? 24
4925                       : tmp != b3 ? 16
4926                       : 8;
4927                 }
4928
4929               /* Second, try to find a 16-bit replicated constant that can
4930                  leave three of the bytes clear.  If b2 or b4 is already
4931                  zero, then we can.  If the 8-bit from above would not
4932                  clear b2 anyway, then we still win.  */
4933               else if (b1 == b3 && (!b2 || !b4
4934                                || (remainder & 0x00ff0000 & ~result)))
4935                 {
4936                   result = remainder & 0xff00ff00;
4937                   i = 24;
4938                 }
4939             }
4940           else if (loc > 16)
4941             {
4942               /* The 8-bit immediate already found clears b2 (and maybe b3)
4943                  and we don't get here unless b1 is alredy clear, but it will
4944                  leave b4 unchanged.  */
4945
4946               /* If we can clear b2 and b4 at once, then we win, since the
4947                  8-bits couldn't possibly reach that far.  */
4948               if (b2 == b4)
4949                 {
4950                   result = remainder & 0x00ff00ff;
4951                   i = 16;
4952                 }
4953             }
4954         }
4955
4956       return_sequence->i[insns++] = result;
4957       remainder &= ~result;
4958
4959       if (code == SET || code == MINUS)
4960         code = PLUS;
4961     }
4962   while (remainder);
4963
4964   return insns;
4965 }
4966
4967 /* Emit an instruction with the indicated PATTERN.  If COND is
4968    non-NULL, conditionalize the execution of the instruction on COND
4969    being true.  */
4970
4971 static void
4972 emit_constant_insn (rtx cond, rtx pattern)
4973 {
4974   if (cond)
4975     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4976   emit_insn (pattern);
4977 }
4978
4979 /* As above, but extra parameter GENERATE which, if clear, suppresses
4980    RTL generation.  */
4981
4982 static int
4983 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4984                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4985                   int subtargets, int generate)
4986 {
4987   int can_invert = 0;
4988   int can_negate = 0;
4989   int final_invert = 0;
4990   int i;
4991   int set_sign_bit_copies = 0;
4992   int clear_sign_bit_copies = 0;
4993   int clear_zero_bit_copies = 0;
4994   int set_zero_bit_copies = 0;
4995   int insns = 0, neg_insns, inv_insns;
4996   unsigned HOST_WIDE_INT temp1, temp2;
4997   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4998   struct four_ints *immediates;
4999   struct four_ints pos_immediates, neg_immediates, inv_immediates;
5000
5001   /* Find out which operations are safe for a given CODE.  Also do a quick
5002      check for degenerate cases; these can occur when DImode operations
5003      are split.  */
5004   switch (code)
5005     {
5006     case SET:
5007       can_invert = 1;
5008       break;
5009
5010     case PLUS:
5011       can_negate = 1;
5012       break;
5013
5014     case IOR:
5015       if (remainder == 0xffffffff)
5016         {
5017           if (generate)
5018             emit_constant_insn (cond,
5019                                 gen_rtx_SET (target,
5020                                              GEN_INT (ARM_SIGN_EXTEND (val))));
5021           return 1;
5022         }
5023
5024       if (remainder == 0)
5025         {
5026           if (reload_completed && rtx_equal_p (target, source))
5027             return 0;
5028
5029           if (generate)
5030             emit_constant_insn (cond, gen_rtx_SET (target, source));
5031           return 1;
5032         }
5033       break;
5034
5035     case AND:
5036       if (remainder == 0)
5037         {
5038           if (generate)
5039             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5040           return 1;
5041         }
5042       if (remainder == 0xffffffff)
5043         {
5044           if (reload_completed && rtx_equal_p (target, source))
5045             return 0;
5046           if (generate)
5047             emit_constant_insn (cond, gen_rtx_SET (target, source));
5048           return 1;
5049         }
5050       can_invert = 1;
5051       break;
5052
5053     case XOR:
5054       if (remainder == 0)
5055         {
5056           if (reload_completed && rtx_equal_p (target, source))
5057             return 0;
5058           if (generate)
5059             emit_constant_insn (cond, gen_rtx_SET (target, source));
5060           return 1;
5061         }
5062
5063       if (remainder == 0xffffffff)
5064         {
5065           if (generate)
5066             emit_constant_insn (cond,
5067                                 gen_rtx_SET (target,
5068                                              gen_rtx_NOT (mode, source)));
5069           return 1;
5070         }
5071       final_invert = 1;
5072       break;
5073
5074     case MINUS:
5075       /* We treat MINUS as (val - source), since (source - val) is always
5076          passed as (source + (-val)).  */
5077       if (remainder == 0)
5078         {
5079           if (generate)
5080             emit_constant_insn (cond,
5081                                 gen_rtx_SET (target,
5082                                              gen_rtx_NEG (mode, source)));
5083           return 1;
5084         }
5085       if (const_ok_for_arm (val))
5086         {
5087           if (generate)
5088             emit_constant_insn (cond,
5089                                 gen_rtx_SET (target,
5090                                              gen_rtx_MINUS (mode, GEN_INT (val),
5091                                                             source)));
5092           return 1;
5093         }
5094
5095       break;
5096
5097     default:
5098       gcc_unreachable ();
5099     }
5100
5101   /* If we can do it in one insn get out quickly.  */
5102   if (const_ok_for_op (val, code))
5103     {
5104       if (generate)
5105         emit_constant_insn (cond,
5106                             gen_rtx_SET (target,
5107                                          (source
5108                                           ? gen_rtx_fmt_ee (code, mode, source,
5109                                                             GEN_INT (val))
5110                                           : GEN_INT (val))));
5111       return 1;
5112     }
5113
5114   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5115      insn.  */
5116   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5117       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5118     {
5119       if (generate)
5120         {
5121           if (mode == SImode && i == 16)
5122             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5123                smaller insn.  */
5124             emit_constant_insn (cond,
5125                                 gen_zero_extendhisi2
5126                                 (target, gen_lowpart (HImode, source)));
5127           else
5128             /* Extz only supports SImode, but we can coerce the operands
5129                into that mode.  */
5130             emit_constant_insn (cond,
5131                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5132                                               gen_lowpart (SImode, source),
5133                                               GEN_INT (i), const0_rtx));
5134         }
5135
5136       return 1;
5137     }
5138
5139   /* Calculate a few attributes that may be useful for specific
5140      optimizations.  */
5141   /* Count number of leading zeros.  */
5142   for (i = 31; i >= 0; i--)
5143     {
5144       if ((remainder & (1 << i)) == 0)
5145         clear_sign_bit_copies++;
5146       else
5147         break;
5148     }
5149
5150   /* Count number of leading 1's.  */
5151   for (i = 31; i >= 0; i--)
5152     {
5153       if ((remainder & (1 << i)) != 0)
5154         set_sign_bit_copies++;
5155       else
5156         break;
5157     }
5158
5159   /* Count number of trailing zero's.  */
5160   for (i = 0; i <= 31; i++)
5161     {
5162       if ((remainder & (1 << i)) == 0)
5163         clear_zero_bit_copies++;
5164       else
5165         break;
5166     }
5167
5168   /* Count number of trailing 1's.  */
5169   for (i = 0; i <= 31; i++)
5170     {
5171       if ((remainder & (1 << i)) != 0)
5172         set_zero_bit_copies++;
5173       else
5174         break;
5175     }
5176
5177   switch (code)
5178     {
5179     case SET:
5180       /* See if we can do this by sign_extending a constant that is known
5181          to be negative.  This is a good, way of doing it, since the shift
5182          may well merge into a subsequent insn.  */
5183       if (set_sign_bit_copies > 1)
5184         {
5185           if (const_ok_for_arm
5186               (temp1 = ARM_SIGN_EXTEND (remainder
5187                                         << (set_sign_bit_copies - 1))))
5188             {
5189               if (generate)
5190                 {
5191                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5192                   emit_constant_insn (cond,
5193                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5194                   emit_constant_insn (cond,
5195                                       gen_ashrsi3 (target, new_src,
5196                                                    GEN_INT (set_sign_bit_copies - 1)));
5197                 }
5198               return 2;
5199             }
5200           /* For an inverted constant, we will need to set the low bits,
5201              these will be shifted out of harm's way.  */
5202           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5203           if (const_ok_for_arm (~temp1))
5204             {
5205               if (generate)
5206                 {
5207                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5208                   emit_constant_insn (cond,
5209                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5210                   emit_constant_insn (cond,
5211                                       gen_ashrsi3 (target, new_src,
5212                                                    GEN_INT (set_sign_bit_copies - 1)));
5213                 }
5214               return 2;
5215             }
5216         }
5217
5218       /* See if we can calculate the value as the difference between two
5219          valid immediates.  */
5220       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5221         {
5222           int topshift = clear_sign_bit_copies & ~1;
5223
5224           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5225                                    & (0xff000000 >> topshift));
5226
5227           /* If temp1 is zero, then that means the 9 most significant
5228              bits of remainder were 1 and we've caused it to overflow.
5229              When topshift is 0 we don't need to do anything since we
5230              can borrow from 'bit 32'.  */
5231           if (temp1 == 0 && topshift != 0)
5232             temp1 = 0x80000000 >> (topshift - 1);
5233
5234           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5235
5236           if (const_ok_for_arm (temp2))
5237             {
5238               if (generate)
5239                 {
5240                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5241                   emit_constant_insn (cond,
5242                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5243                   emit_constant_insn (cond,
5244                                       gen_addsi3 (target, new_src,
5245                                                   GEN_INT (-temp2)));
5246                 }
5247
5248               return 2;
5249             }
5250         }
5251
5252       /* See if we can generate this by setting the bottom (or the top)
5253          16 bits, and then shifting these into the other half of the
5254          word.  We only look for the simplest cases, to do more would cost
5255          too much.  Be careful, however, not to generate this when the
5256          alternative would take fewer insns.  */
5257       if (val & 0xffff0000)
5258         {
5259           temp1 = remainder & 0xffff0000;
5260           temp2 = remainder & 0x0000ffff;
5261
5262           /* Overlaps outside this range are best done using other methods.  */
5263           for (i = 9; i < 24; i++)
5264             {
5265               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5266                   && !const_ok_for_arm (temp2))
5267                 {
5268                   rtx new_src = (subtargets
5269                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5270                                  : target);
5271                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5272                                             source, subtargets, generate);
5273                   source = new_src;
5274                   if (generate)
5275                     emit_constant_insn
5276                       (cond,
5277                        gen_rtx_SET
5278                        (target,
5279                         gen_rtx_IOR (mode,
5280                                      gen_rtx_ASHIFT (mode, source,
5281                                                      GEN_INT (i)),
5282                                      source)));
5283                   return insns + 1;
5284                 }
5285             }
5286
5287           /* Don't duplicate cases already considered.  */
5288           for (i = 17; i < 24; i++)
5289             {
5290               if (((temp1 | (temp1 >> i)) == remainder)
5291                   && !const_ok_for_arm (temp1))
5292                 {
5293                   rtx new_src = (subtargets
5294                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5295                                  : target);
5296                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5297                                             source, subtargets, generate);
5298                   source = new_src;
5299                   if (generate)
5300                     emit_constant_insn
5301                       (cond,
5302                        gen_rtx_SET (target,
5303                                     gen_rtx_IOR
5304                                     (mode,
5305                                      gen_rtx_LSHIFTRT (mode, source,
5306                                                        GEN_INT (i)),
5307                                      source)));
5308                   return insns + 1;
5309                 }
5310             }
5311         }
5312       break;
5313
5314     case IOR:
5315     case XOR:
5316       /* If we have IOR or XOR, and the constant can be loaded in a
5317          single instruction, and we can find a temporary to put it in,
5318          then this can be done in two instructions instead of 3-4.  */
5319       if (subtargets
5320           /* TARGET can't be NULL if SUBTARGETS is 0 */
5321           || (reload_completed && !reg_mentioned_p (target, source)))
5322         {
5323           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5324             {
5325               if (generate)
5326                 {
5327                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5328
5329                   emit_constant_insn (cond,
5330                                       gen_rtx_SET (sub, GEN_INT (val)));
5331                   emit_constant_insn (cond,
5332                                       gen_rtx_SET (target,
5333                                                    gen_rtx_fmt_ee (code, mode,
5334                                                                    source, sub)));
5335                 }
5336               return 2;
5337             }
5338         }
5339
5340       if (code == XOR)
5341         break;
5342
5343       /*  Convert.
5344           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5345                              and the remainder 0s for e.g. 0xfff00000)
5346           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5347
5348           This can be done in 2 instructions by using shifts with mov or mvn.
5349           e.g. for
5350           x = x | 0xfff00000;
5351           we generate.
5352           mvn   r0, r0, asl #12
5353           mvn   r0, r0, lsr #12  */
5354       if (set_sign_bit_copies > 8
5355           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5356         {
5357           if (generate)
5358             {
5359               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5360               rtx shift = GEN_INT (set_sign_bit_copies);
5361
5362               emit_constant_insn
5363                 (cond,
5364                  gen_rtx_SET (sub,
5365                               gen_rtx_NOT (mode,
5366                                            gen_rtx_ASHIFT (mode,
5367                                                            source,
5368                                                            shift))));
5369               emit_constant_insn
5370                 (cond,
5371                  gen_rtx_SET (target,
5372                               gen_rtx_NOT (mode,
5373                                            gen_rtx_LSHIFTRT (mode, sub,
5374                                                              shift))));
5375             }
5376           return 2;
5377         }
5378
5379       /* Convert
5380           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5381            to
5382           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5383
5384           For eg. r0 = r0 | 0xfff
5385                mvn      r0, r0, lsr #12
5386                mvn      r0, r0, asl #12
5387
5388       */
5389       if (set_zero_bit_copies > 8
5390           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5391         {
5392           if (generate)
5393             {
5394               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5395               rtx shift = GEN_INT (set_zero_bit_copies);
5396
5397               emit_constant_insn
5398                 (cond,
5399                  gen_rtx_SET (sub,
5400                               gen_rtx_NOT (mode,
5401                                            gen_rtx_LSHIFTRT (mode,
5402                                                              source,
5403                                                              shift))));
5404               emit_constant_insn
5405                 (cond,
5406                  gen_rtx_SET (target,
5407                               gen_rtx_NOT (mode,
5408                                            gen_rtx_ASHIFT (mode, sub,
5409                                                            shift))));
5410             }
5411           return 2;
5412         }
5413
5414       /* This will never be reached for Thumb2 because orn is a valid
5415          instruction. This is for Thumb1 and the ARM 32 bit cases.
5416
5417          x = y | constant (such that ~constant is a valid constant)
5418          Transform this to
5419          x = ~(~y & ~constant).
5420       */
5421       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5422         {
5423           if (generate)
5424             {
5425               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5426               emit_constant_insn (cond,
5427                                   gen_rtx_SET (sub,
5428                                                gen_rtx_NOT (mode, source)));
5429               source = sub;
5430               if (subtargets)
5431                 sub = gen_reg_rtx (mode);
5432               emit_constant_insn (cond,
5433                                   gen_rtx_SET (sub,
5434                                                gen_rtx_AND (mode, source,
5435                                                             GEN_INT (temp1))));
5436               emit_constant_insn (cond,
5437                                   gen_rtx_SET (target,
5438                                                gen_rtx_NOT (mode, sub)));
5439             }
5440           return 3;
5441         }
5442       break;
5443
5444     case AND:
5445       /* See if two shifts will do 2 or more insn's worth of work.  */
5446       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5447         {
5448           HOST_WIDE_INT shift_mask = ((0xffffffff
5449                                        << (32 - clear_sign_bit_copies))
5450                                       & 0xffffffff);
5451
5452           if ((remainder | shift_mask) != 0xffffffff)
5453             {
5454               HOST_WIDE_INT new_val
5455                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5456
5457               if (generate)
5458                 {
5459                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5460                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5461                                             new_src, source, subtargets, 1);
5462                   source = new_src;
5463                 }
5464               else
5465                 {
5466                   rtx targ = subtargets ? NULL_RTX : target;
5467                   insns = arm_gen_constant (AND, mode, cond, new_val,
5468                                             targ, source, subtargets, 0);
5469                 }
5470             }
5471
5472           if (generate)
5473             {
5474               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5475               rtx shift = GEN_INT (clear_sign_bit_copies);
5476
5477               emit_insn (gen_ashlsi3 (new_src, source, shift));
5478               emit_insn (gen_lshrsi3 (target, new_src, shift));
5479             }
5480
5481           return insns + 2;
5482         }
5483
5484       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5485         {
5486           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5487
5488           if ((remainder | shift_mask) != 0xffffffff)
5489             {
5490               HOST_WIDE_INT new_val
5491                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5492               if (generate)
5493                 {
5494                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5495
5496                   insns = arm_gen_constant (AND, mode, cond, new_val,
5497                                             new_src, source, subtargets, 1);
5498                   source = new_src;
5499                 }
5500               else
5501                 {
5502                   rtx targ = subtargets ? NULL_RTX : target;
5503
5504                   insns = arm_gen_constant (AND, mode, cond, new_val,
5505                                             targ, source, subtargets, 0);
5506                 }
5507             }
5508
5509           if (generate)
5510             {
5511               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5512               rtx shift = GEN_INT (clear_zero_bit_copies);
5513
5514               emit_insn (gen_lshrsi3 (new_src, source, shift));
5515               emit_insn (gen_ashlsi3 (target, new_src, shift));
5516             }
5517
5518           return insns + 2;
5519         }
5520
5521       break;
5522
5523     default:
5524       break;
5525     }
5526
5527   /* Calculate what the instruction sequences would be if we generated it
5528      normally, negated, or inverted.  */
5529   if (code == AND)
5530     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5531     insns = 99;
5532   else
5533     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5534
5535   if (can_negate)
5536     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5537                                             &neg_immediates);
5538   else
5539     neg_insns = 99;
5540
5541   if (can_invert || final_invert)
5542     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5543                                             &inv_immediates);
5544   else
5545     inv_insns = 99;
5546
5547   immediates = &pos_immediates;
5548
5549   /* Is the negated immediate sequence more efficient?  */
5550   if (neg_insns < insns && neg_insns <= inv_insns)
5551     {
5552       insns = neg_insns;
5553       immediates = &neg_immediates;
5554     }
5555   else
5556     can_negate = 0;
5557
5558   /* Is the inverted immediate sequence more efficient?
5559      We must allow for an extra NOT instruction for XOR operations, although
5560      there is some chance that the final 'mvn' will get optimized later.  */
5561   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5562     {
5563       insns = inv_insns;
5564       immediates = &inv_immediates;
5565     }
5566   else
5567     {
5568       can_invert = 0;
5569       final_invert = 0;
5570     }
5571
5572   /* Now output the chosen sequence as instructions.  */
5573   if (generate)
5574     {
5575       for (i = 0; i < insns; i++)
5576         {
5577           rtx new_src, temp1_rtx;
5578
5579           temp1 = immediates->i[i];
5580
5581           if (code == SET || code == MINUS)
5582             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5583           else if ((final_invert || i < (insns - 1)) && subtargets)
5584             new_src = gen_reg_rtx (mode);
5585           else
5586             new_src = target;
5587
5588           if (can_invert)
5589             temp1 = ~temp1;
5590           else if (can_negate)
5591             temp1 = -temp1;
5592
5593           temp1 = trunc_int_for_mode (temp1, mode);
5594           temp1_rtx = GEN_INT (temp1);
5595
5596           if (code == SET)
5597             ;
5598           else if (code == MINUS)
5599             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5600           else
5601             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5602
5603           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5604           source = new_src;
5605
5606           if (code == SET)
5607             {
5608               can_negate = can_invert;
5609               can_invert = 0;
5610               code = PLUS;
5611             }
5612           else if (code == MINUS)
5613             code = PLUS;
5614         }
5615     }
5616
5617   if (final_invert)
5618     {
5619       if (generate)
5620         emit_constant_insn (cond, gen_rtx_SET (target,
5621                                                gen_rtx_NOT (mode, source)));
5622       insns++;
5623     }
5624
5625   return insns;
5626 }
5627
5628 /* Return TRUE if op is a constant where both the low and top words are
5629    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5630    we do not have RSC in that case.  */
5631 static bool
5632 arm_const_double_prefer_rsbs_rsc (rtx op)
5633 {
5634   /* Thumb lacks RSC, so we never prefer that sequence.  */
5635   if (TARGET_THUMB || !CONST_INT_P (op))
5636     return false;
5637   HOST_WIDE_INT hi, lo;
5638   lo = UINTVAL (op) & 0xffffffffULL;
5639   hi = UINTVAL (op) >> 32;
5640   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5641 }
5642
5643 /* Canonicalize a comparison so that we are more likely to recognize it.
5644    This can be done for a few constant compares, where we can make the
5645    immediate value easier to load.  */
5646
5647 static void
5648 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5649                              bool op0_preserve_value)
5650 {
5651   machine_mode mode;
5652   unsigned HOST_WIDE_INT i, maxval;
5653
5654   mode = GET_MODE (*op0);
5655   if (mode == VOIDmode)
5656     mode = GET_MODE (*op1);
5657
5658   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5659
5660   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5661      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5662      either reversed or (for constant OP1) adjusted to GE/LT.
5663      Similarly for GTU/LEU in Thumb mode.  */
5664   if (mode == DImode)
5665     {
5666
5667       if (*code == GT || *code == LE
5668           || *code == GTU || *code == LEU)
5669         {
5670           /* Missing comparison.  First try to use an available
5671              comparison.  */
5672           if (CONST_INT_P (*op1))
5673             {
5674               i = INTVAL (*op1);
5675               switch (*code)
5676                 {
5677                 case GT:
5678                 case LE:
5679                   if (i != maxval)
5680                     {
5681                       /* Try to convert to GE/LT, unless that would be more
5682                          expensive.  */
5683                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5684                           && arm_const_double_prefer_rsbs_rsc (*op1))
5685                         return;
5686                       *op1 = GEN_INT (i + 1);
5687                       *code = *code == GT ? GE : LT;
5688                     }
5689                   else
5690                     {
5691                       /* GT maxval is always false, LE maxval is always true.
5692                          We can't fold that away here as we must make a
5693                          comparison, but we can fold them to comparisons
5694                          with the same result that can be handled:
5695                            op0 GT maxval -> op0 LT minval
5696                            op0 LE maxval -> op0 GE minval
5697                          where minval = (-maxval - 1).  */
5698                       *op1 = GEN_INT (-maxval - 1);
5699                       *code = *code == GT ? LT : GE;
5700                     }
5701                   return;
5702
5703                 case GTU:
5704                 case LEU:
5705                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5706                     {
5707                       /* Try to convert to GEU/LTU, unless that would
5708                          be more expensive.  */
5709                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5710                           && arm_const_double_prefer_rsbs_rsc (*op1))
5711                         return;
5712                       *op1 = GEN_INT (i + 1);
5713                       *code = *code == GTU ? GEU : LTU;
5714                     }
5715                   else
5716                     {
5717                       /* GTU ~0 is always false, LEU ~0 is always true.
5718                          We can't fold that away here as we must make a
5719                          comparison, but we can fold them to comparisons
5720                          with the same result that can be handled:
5721                            op0 GTU ~0 -> op0 LTU 0
5722                            op0 LEU ~0 -> op0 GEU 0.  */
5723                       *op1 = const0_rtx;
5724                       *code = *code == GTU ? LTU : GEU;
5725                     }
5726                   return;
5727
5728                 default:
5729                   gcc_unreachable ();
5730                 }
5731             }
5732
5733           if (!op0_preserve_value)
5734             {
5735               std::swap (*op0, *op1);
5736               *code = (int)swap_condition ((enum rtx_code)*code);
5737             }
5738         }
5739       return;
5740     }
5741
5742   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5743      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5744      to facilitate possible combining with a cmp into 'ands'.  */
5745   if (mode == SImode
5746       && GET_CODE (*op0) == ZERO_EXTEND
5747       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5748       && GET_MODE (XEXP (*op0, 0)) == QImode
5749       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5750       && subreg_lowpart_p (XEXP (*op0, 0))
5751       && *op1 == const0_rtx)
5752     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5753                         GEN_INT (255));
5754
5755   /* Comparisons smaller than DImode.  Only adjust comparisons against
5756      an out-of-range constant.  */
5757   if (!CONST_INT_P (*op1)
5758       || const_ok_for_arm (INTVAL (*op1))
5759       || const_ok_for_arm (- INTVAL (*op1)))
5760     return;
5761
5762   i = INTVAL (*op1);
5763
5764   switch (*code)
5765     {
5766     case EQ:
5767     case NE:
5768       return;
5769
5770     case GT:
5771     case LE:
5772       if (i != maxval
5773           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5774         {
5775           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5776           *code = *code == GT ? GE : LT;
5777           return;
5778         }
5779       break;
5780
5781     case GE:
5782     case LT:
5783       if (i != ~maxval
5784           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5785         {
5786           *op1 = GEN_INT (i - 1);
5787           *code = *code == GE ? GT : LE;
5788           return;
5789         }
5790       break;
5791
5792     case GTU:
5793     case LEU:
5794       if (i != ~((unsigned HOST_WIDE_INT) 0)
5795           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5796         {
5797           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5798           *code = *code == GTU ? GEU : LTU;
5799           return;
5800         }
5801       break;
5802
5803     case GEU:
5804     case LTU:
5805       if (i != 0
5806           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5807         {
5808           *op1 = GEN_INT (i - 1);
5809           *code = *code == GEU ? GTU : LEU;
5810           return;
5811         }
5812       break;
5813
5814     default:
5815       gcc_unreachable ();
5816     }
5817 }
5818
5819
5820 /* Define how to find the value returned by a function.  */
5821
5822 static rtx
5823 arm_function_value(const_tree type, const_tree func,
5824                    bool outgoing ATTRIBUTE_UNUSED)
5825 {
5826   machine_mode mode;
5827   int unsignedp ATTRIBUTE_UNUSED;
5828   rtx r ATTRIBUTE_UNUSED;
5829
5830   mode = TYPE_MODE (type);
5831
5832   if (TARGET_AAPCS_BASED)
5833     return aapcs_allocate_return_reg (mode, type, func);
5834
5835   /* Promote integer types.  */
5836   if (INTEGRAL_TYPE_P (type))
5837     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5838
5839   /* Promotes small structs returned in a register to full-word size
5840      for big-endian AAPCS.  */
5841   if (arm_return_in_msb (type))
5842     {
5843       HOST_WIDE_INT size = int_size_in_bytes (type);
5844       if (size % UNITS_PER_WORD != 0)
5845         {
5846           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5847           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5848         }
5849     }
5850
5851   return arm_libcall_value_1 (mode);
5852 }
5853
5854 /* libcall hashtable helpers.  */
5855
5856 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5857 {
5858   static inline hashval_t hash (const rtx_def *);
5859   static inline bool equal (const rtx_def *, const rtx_def *);
5860   static inline void remove (rtx_def *);
5861 };
5862
5863 inline bool
5864 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5865 {
5866   return rtx_equal_p (p1, p2);
5867 }
5868
5869 inline hashval_t
5870 libcall_hasher::hash (const rtx_def *p1)
5871 {
5872   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5873 }
5874
5875 typedef hash_table<libcall_hasher> libcall_table_type;
5876
5877 static void
5878 add_libcall (libcall_table_type *htab, rtx libcall)
5879 {
5880   *htab->find_slot (libcall, INSERT) = libcall;
5881 }
5882
5883 static bool
5884 arm_libcall_uses_aapcs_base (const_rtx libcall)
5885 {
5886   static bool init_done = false;
5887   static libcall_table_type *libcall_htab = NULL;
5888
5889   if (!init_done)
5890     {
5891       init_done = true;
5892
5893       libcall_htab = new libcall_table_type (31);
5894       add_libcall (libcall_htab,
5895                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5896       add_libcall (libcall_htab,
5897                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5898       add_libcall (libcall_htab,
5899                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5900       add_libcall (libcall_htab,
5901                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5902
5903       add_libcall (libcall_htab,
5904                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5905       add_libcall (libcall_htab,
5906                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5907       add_libcall (libcall_htab,
5908                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5909       add_libcall (libcall_htab,
5910                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5911
5912       add_libcall (libcall_htab,
5913                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5914       add_libcall (libcall_htab,
5915                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5916       add_libcall (libcall_htab,
5917                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5918       add_libcall (libcall_htab,
5919                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5920       add_libcall (libcall_htab,
5921                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5922       add_libcall (libcall_htab,
5923                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5924       add_libcall (libcall_htab,
5925                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5926       add_libcall (libcall_htab,
5927                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5928       add_libcall (libcall_htab,
5929                    convert_optab_libfunc (sfix_optab, SImode, SFmode));
5930       add_libcall (libcall_htab,
5931                    convert_optab_libfunc (ufix_optab, SImode, SFmode));
5932
5933       /* Values from double-precision helper functions are returned in core
5934          registers if the selected core only supports single-precision
5935          arithmetic, even if we are using the hard-float ABI.  The same is
5936          true for single-precision helpers except in case of MVE, because in
5937          MVE we will be using the hard-float ABI on a CPU which doesn't support
5938          single-precision operations in hardware.  In MVE the following check
5939          enables use of emulation for the single-precision arithmetic
5940          operations.  */
5941       if (TARGET_HAVE_MVE)
5942         {
5943           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5944           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5945           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5946           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5947           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5948           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5949           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5950           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5951           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5952           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5953           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5954         }
5955       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5956       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5957       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5958       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5959       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5960       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5961       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5962       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5963       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5964       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5965       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5966       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5967                                                         SFmode));
5968       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5969                                                         DFmode));
5970       add_libcall (libcall_htab,
5971                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5972     }
5973
5974   return libcall && libcall_htab->find (libcall) != NULL;
5975 }
5976
5977 static rtx
5978 arm_libcall_value_1 (machine_mode mode)
5979 {
5980   if (TARGET_AAPCS_BASED)
5981     return aapcs_libcall_value (mode);
5982   else if (TARGET_IWMMXT_ABI
5983            && arm_vector_mode_supported_p (mode))
5984     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5985   else
5986     return gen_rtx_REG (mode, ARG_REGISTER (1));
5987 }
5988
5989 /* Define how to find the value returned by a library function
5990    assuming the value has mode MODE.  */
5991
5992 static rtx
5993 arm_libcall_value (machine_mode mode, const_rtx libcall)
5994 {
5995   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5996       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5997     {
5998       /* The following libcalls return their result in integer registers,
5999          even though they return a floating point value.  */
6000       if (arm_libcall_uses_aapcs_base (libcall))
6001         return gen_rtx_REG (mode, ARG_REGISTER(1));
6002
6003     }
6004
6005   return arm_libcall_value_1 (mode);
6006 }
6007
6008 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
6009
6010 static bool
6011 arm_function_value_regno_p (const unsigned int regno)
6012 {
6013   if (regno == ARG_REGISTER (1)
6014       || (TARGET_32BIT
6015           && TARGET_AAPCS_BASED
6016           && TARGET_HARD_FLOAT
6017           && regno == FIRST_VFP_REGNUM)
6018       || (TARGET_IWMMXT_ABI
6019           && regno == FIRST_IWMMXT_REGNUM))
6020     return true;
6021
6022   return false;
6023 }
6024
6025 /* Determine the amount of memory needed to store the possible return
6026    registers of an untyped call.  */
6027 int
6028 arm_apply_result_size (void)
6029 {
6030   int size = 16;
6031
6032   if (TARGET_32BIT)
6033     {
6034       if (TARGET_HARD_FLOAT_ABI)
6035         size += 32;
6036       if (TARGET_IWMMXT_ABI)
6037         size += 8;
6038     }
6039
6040   return size;
6041 }
6042
6043 /* Decide whether TYPE should be returned in memory (true)
6044    or in a register (false).  FNTYPE is the type of the function making
6045    the call.  */
6046 static bool
6047 arm_return_in_memory (const_tree type, const_tree fntype)
6048 {
6049   HOST_WIDE_INT size;
6050
6051   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
6052
6053   if (TARGET_AAPCS_BASED)
6054     {
6055       /* Simple, non-aggregate types (ie not including vectors and
6056          complex) are always returned in a register (or registers).
6057          We don't care about which register here, so we can short-cut
6058          some of the detail.  */
6059       if (!AGGREGATE_TYPE_P (type)
6060           && TREE_CODE (type) != VECTOR_TYPE
6061           && TREE_CODE (type) != COMPLEX_TYPE)
6062         return false;
6063
6064       /* Any return value that is no larger than one word can be
6065          returned in r0.  */
6066       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6067         return false;
6068
6069       /* Check any available co-processors to see if they accept the
6070          type as a register candidate (VFP, for example, can return
6071          some aggregates in consecutive registers).  These aren't
6072          available if the call is variadic.  */
6073       if (aapcs_select_return_coproc (type, fntype) >= 0)
6074         return false;
6075
6076       /* Vector values should be returned using ARM registers, not
6077          memory (unless they're over 16 bytes, which will break since
6078          we only have four call-clobbered registers to play with).  */
6079       if (TREE_CODE (type) == VECTOR_TYPE)
6080         return (size < 0 || size > (4 * UNITS_PER_WORD));
6081
6082       /* The rest go in memory.  */
6083       return true;
6084     }
6085
6086   if (TREE_CODE (type) == VECTOR_TYPE)
6087     return (size < 0 || size > (4 * UNITS_PER_WORD));
6088
6089   if (!AGGREGATE_TYPE_P (type) &&
6090       (TREE_CODE (type) != VECTOR_TYPE))
6091     /* All simple types are returned in registers.  */
6092     return false;
6093
6094   if (arm_abi != ARM_ABI_APCS)
6095     {
6096       /* ATPCS and later return aggregate types in memory only if they are
6097          larger than a word (or are variable size).  */
6098       return (size < 0 || size > UNITS_PER_WORD);
6099     }
6100
6101   /* For the arm-wince targets we choose to be compatible with Microsoft's
6102      ARM and Thumb compilers, which always return aggregates in memory.  */
6103 #ifndef ARM_WINCE
6104   /* All structures/unions bigger than one word are returned in memory.
6105      Also catch the case where int_size_in_bytes returns -1.  In this case
6106      the aggregate is either huge or of variable size, and in either case
6107      we will want to return it via memory and not in a register.  */
6108   if (size < 0 || size > UNITS_PER_WORD)
6109     return true;
6110
6111   if (TREE_CODE (type) == RECORD_TYPE)
6112     {
6113       tree field;
6114
6115       /* For a struct the APCS says that we only return in a register
6116          if the type is 'integer like' and every addressable element
6117          has an offset of zero.  For practical purposes this means
6118          that the structure can have at most one non bit-field element
6119          and that this element must be the first one in the structure.  */
6120
6121       /* Find the first field, ignoring non FIELD_DECL things which will
6122          have been created by C++.  */
6123       /* NOTE: This code is deprecated and has not been updated to handle
6124          DECL_FIELD_ABI_IGNORED.  */
6125       for (field = TYPE_FIELDS (type);
6126            field && TREE_CODE (field) != FIELD_DECL;
6127            field = DECL_CHAIN (field))
6128         continue;
6129
6130       if (field == NULL)
6131         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6132
6133       /* Check that the first field is valid for returning in a register.  */
6134
6135       /* ... Floats are not allowed */
6136       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6137         return true;
6138
6139       /* ... Aggregates that are not themselves valid for returning in
6140          a register are not allowed.  */
6141       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6142         return true;
6143
6144       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6145          since they are not addressable.  */
6146       for (field = DECL_CHAIN (field);
6147            field;
6148            field = DECL_CHAIN (field))
6149         {
6150           if (TREE_CODE (field) != FIELD_DECL)
6151             continue;
6152
6153           if (!DECL_BIT_FIELD_TYPE (field))
6154             return true;
6155         }
6156
6157       return false;
6158     }
6159
6160   if (TREE_CODE (type) == UNION_TYPE)
6161     {
6162       tree field;
6163
6164       /* Unions can be returned in registers if every element is
6165          integral, or can be returned in an integer register.  */
6166       for (field = TYPE_FIELDS (type);
6167            field;
6168            field = DECL_CHAIN (field))
6169         {
6170           if (TREE_CODE (field) != FIELD_DECL)
6171             continue;
6172
6173           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6174             return true;
6175
6176           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6177             return true;
6178         }
6179
6180       return false;
6181     }
6182 #endif /* not ARM_WINCE */
6183
6184   /* Return all other types in memory.  */
6185   return true;
6186 }
6187
6188 const struct pcs_attribute_arg
6189 {
6190   const char *arg;
6191   enum arm_pcs value;
6192 } pcs_attribute_args[] =
6193   {
6194     {"aapcs", ARM_PCS_AAPCS},
6195     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6196 #if 0
6197     /* We could recognize these, but changes would be needed elsewhere
6198      * to implement them.  */
6199     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6200     {"atpcs", ARM_PCS_ATPCS},
6201     {"apcs", ARM_PCS_APCS},
6202 #endif
6203     {NULL, ARM_PCS_UNKNOWN}
6204   };
6205
6206 static enum arm_pcs
6207 arm_pcs_from_attribute (tree attr)
6208 {
6209   const struct pcs_attribute_arg *ptr;
6210   const char *arg;
6211
6212   /* Get the value of the argument.  */
6213   if (TREE_VALUE (attr) == NULL_TREE
6214       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6215     return ARM_PCS_UNKNOWN;
6216
6217   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6218
6219   /* Check it against the list of known arguments.  */
6220   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6221     if (streq (arg, ptr->arg))
6222       return ptr->value;
6223
6224   /* An unrecognized interrupt type.  */
6225   return ARM_PCS_UNKNOWN;
6226 }
6227
6228 /* Get the PCS variant to use for this call.  TYPE is the function's type
6229    specification, DECL is the specific declartion.  DECL may be null if
6230    the call could be indirect or if this is a library call.  */
6231 static enum arm_pcs
6232 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6233 {
6234   bool user_convention = false;
6235   enum arm_pcs user_pcs = arm_pcs_default;
6236   tree attr;
6237
6238   gcc_assert (type);
6239
6240   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6241   if (attr)
6242     {
6243       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6244       user_convention = true;
6245     }
6246
6247   if (TARGET_AAPCS_BASED)
6248     {
6249       /* Detect varargs functions.  These always use the base rules
6250          (no argument is ever a candidate for a co-processor
6251          register).  */
6252       bool base_rules = stdarg_p (type);
6253
6254       if (user_convention)
6255         {
6256           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6257             sorry ("non-AAPCS derived PCS variant");
6258           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6259             error ("variadic functions must use the base AAPCS variant");
6260         }
6261
6262       if (base_rules)
6263         return ARM_PCS_AAPCS;
6264       else if (user_convention)
6265         return user_pcs;
6266 #if 0
6267       /* Unfortunately, this is not safe and can lead to wrong code
6268          being generated (PR96882).  Not all calls into the back-end
6269          pass the DECL, so it is unsafe to make any PCS-changing
6270          decisions based on it.  In particular the RETURN_IN_MEMORY
6271          hook is only ever passed a TYPE.  This needs revisiting to
6272          see if there are any partial improvements that can be
6273          re-enabled.  */
6274       else if (decl && flag_unit_at_a_time)
6275         {
6276           /* Local functions never leak outside this compilation unit,
6277              so we are free to use whatever conventions are
6278              appropriate.  */
6279           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6280           cgraph_node *local_info_node
6281             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6282           if (local_info_node && local_info_node->local)
6283             return ARM_PCS_AAPCS_LOCAL;
6284         }
6285 #endif
6286     }
6287   else if (user_convention && user_pcs != arm_pcs_default)
6288     sorry ("PCS variant");
6289
6290   /* For everything else we use the target's default.  */
6291   return arm_pcs_default;
6292 }
6293
6294
6295 static void
6296 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6297                     const_tree fntype ATTRIBUTE_UNUSED,
6298                     rtx libcall ATTRIBUTE_UNUSED,
6299                     const_tree fndecl ATTRIBUTE_UNUSED)
6300 {
6301   /* Record the unallocated VFP registers.  */
6302   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6303   pcum->aapcs_vfp_reg_alloc = 0;
6304 }
6305
6306 /* Bitmasks that indicate whether earlier versions of GCC would have
6307    taken a different path through the ABI logic.  This should result in
6308    a -Wpsabi warning if the earlier path led to a different ABI decision.
6309
6310    WARN_PSABI_EMPTY_CXX17_BASE
6311       Indicates that the type includes an artificial empty C++17 base field
6312       that, prior to GCC 10.1, would prevent the type from being treated as
6313       a HFA or HVA.  See PR94711 for details.
6314
6315    WARN_PSABI_NO_UNIQUE_ADDRESS
6316       Indicates that the type includes an empty [[no_unique_address]] field
6317       that, prior to GCC 10.1, would prevent the type from being treated as
6318       a HFA or HVA.  */
6319 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6320 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6321 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6322
6323 /* Walk down the type tree of TYPE counting consecutive base elements.
6324    If *MODEP is VOIDmode, then set it to the first valid floating point
6325    type.  If a non-floating point type is found, or if a floating point
6326    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6327    otherwise return the count in the sub-tree.
6328
6329    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6330    function has changed its behavior relative to earlier versions of GCC.
6331    Normally the argument should be nonnull and point to a zero-initialized
6332    variable.  The function then records whether the ABI decision might
6333    be affected by a known fix to the ABI logic, setting the associated
6334    WARN_PSABI_* bits if so.
6335
6336    When the argument is instead a null pointer, the function tries to
6337    simulate the behavior of GCC before all such ABI fixes were made.
6338    This is useful to check whether the function returns something
6339    different after the ABI fixes.  */
6340 static int
6341 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6342                          unsigned int *warn_psabi_flags)
6343 {
6344   machine_mode mode;
6345   HOST_WIDE_INT size;
6346
6347   switch (TREE_CODE (type))
6348     {
6349     case REAL_TYPE:
6350       mode = TYPE_MODE (type);
6351       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6352         return -1;
6353
6354       if (*modep == VOIDmode)
6355         *modep = mode;
6356
6357       if (*modep == mode)
6358         return 1;
6359
6360       break;
6361
6362     case COMPLEX_TYPE:
6363       mode = TYPE_MODE (TREE_TYPE (type));
6364       if (mode != DFmode && mode != SFmode)
6365         return -1;
6366
6367       if (*modep == VOIDmode)
6368         *modep = mode;
6369
6370       if (*modep == mode)
6371         return 2;
6372
6373       break;
6374
6375     case VECTOR_TYPE:
6376       /* Use V2SImode and V4SImode as representatives of all 64-bit
6377          and 128-bit vector types, whether or not those modes are
6378          supported with the present options.  */
6379       size = int_size_in_bytes (type);
6380       switch (size)
6381         {
6382         case 8:
6383           mode = V2SImode;
6384           break;
6385         case 16:
6386           mode = V4SImode;
6387           break;
6388         default:
6389           return -1;
6390         }
6391
6392       if (*modep == VOIDmode)
6393         *modep = mode;
6394
6395       /* Vector modes are considered to be opaque: two vectors are
6396          equivalent for the purposes of being homogeneous aggregates
6397          if they are the same size.  */
6398       if (*modep == mode)
6399         return 1;
6400
6401       break;
6402
6403     case ARRAY_TYPE:
6404       {
6405         int count;
6406         tree index = TYPE_DOMAIN (type);
6407
6408         /* Can't handle incomplete types nor sizes that are not
6409            fixed.  */
6410         if (!COMPLETE_TYPE_P (type)
6411             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6412           return -1;
6413
6414         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6415                                          warn_psabi_flags);
6416         if (count == -1
6417             || !index
6418             || !TYPE_MAX_VALUE (index)
6419             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6420             || !TYPE_MIN_VALUE (index)
6421             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6422             || count < 0)
6423           return -1;
6424
6425         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6426                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6427
6428         /* There must be no padding.  */
6429         if (wi::to_wide (TYPE_SIZE (type))
6430             != count * GET_MODE_BITSIZE (*modep))
6431           return -1;
6432
6433         return count;
6434       }
6435
6436     case RECORD_TYPE:
6437       {
6438         int count = 0;
6439         int sub_count;
6440         tree field;
6441
6442         /* Can't handle incomplete types nor sizes that are not
6443            fixed.  */
6444         if (!COMPLETE_TYPE_P (type)
6445             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6446           return -1;
6447
6448         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6449           {
6450             if (TREE_CODE (field) != FIELD_DECL)
6451               continue;
6452
6453             if (DECL_FIELD_ABI_IGNORED (field))
6454               {
6455                 /* See whether this is something that earlier versions of
6456                    GCC failed to ignore.  */
6457                 unsigned int flag;
6458                 if (lookup_attribute ("no_unique_address",
6459                                       DECL_ATTRIBUTES (field)))
6460                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6461                 else if (cxx17_empty_base_field_p (field))
6462                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6463                 else
6464                   /* No compatibility problem.  */
6465                   continue;
6466
6467                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6468                 if (warn_psabi_flags)
6469                   {
6470                     *warn_psabi_flags |= flag;
6471                     continue;
6472                   }
6473               }
6474             /* A zero-width bitfield may affect layout in some
6475                circumstances, but adds no members.  The determination
6476                of whether or not a type is an HFA is performed after
6477                layout is complete, so if the type still looks like an
6478                HFA afterwards, it is still classed as one.  This is
6479                potentially an ABI break for the hard-float ABI.  */
6480             else if (DECL_BIT_FIELD (field)
6481                      && integer_zerop (DECL_SIZE (field)))
6482               {
6483                 /* Prior to GCC-12 these fields were striped early,
6484                    hiding them from the back-end entirely and
6485                    resulting in the correct behaviour for argument
6486                    passing.  Simulate that old behaviour without
6487                    generating a warning.  */
6488                 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6489                   continue;
6490                 if (warn_psabi_flags)
6491                   {
6492                     *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6493                     continue;
6494                   }
6495               }
6496
6497             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6498                                                  warn_psabi_flags);
6499             if (sub_count < 0)
6500               return -1;
6501             count += sub_count;
6502           }
6503
6504         /* There must be no padding.  */
6505         if (wi::to_wide (TYPE_SIZE (type))
6506             != count * GET_MODE_BITSIZE (*modep))
6507           return -1;
6508
6509         return count;
6510       }
6511
6512     case UNION_TYPE:
6513     case QUAL_UNION_TYPE:
6514       {
6515         /* These aren't very interesting except in a degenerate case.  */
6516         int count = 0;
6517         int sub_count;
6518         tree field;
6519
6520         /* Can't handle incomplete types nor sizes that are not
6521            fixed.  */
6522         if (!COMPLETE_TYPE_P (type)
6523             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6524           return -1;
6525
6526         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6527           {
6528             if (TREE_CODE (field) != FIELD_DECL)
6529               continue;
6530
6531             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6532                                                  warn_psabi_flags);
6533             if (sub_count < 0)
6534               return -1;
6535             count = count > sub_count ? count : sub_count;
6536           }
6537
6538         /* There must be no padding.  */
6539         if (wi::to_wide (TYPE_SIZE (type))
6540             != count * GET_MODE_BITSIZE (*modep))
6541           return -1;
6542
6543         return count;
6544       }
6545
6546     default:
6547       break;
6548     }
6549
6550   return -1;
6551 }
6552
6553 /* Return true if PCS_VARIANT should use VFP registers.  */
6554 static bool
6555 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6556 {
6557   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6558     {
6559       static bool seen_thumb1_vfp = false;
6560
6561       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6562         {
6563           sorry ("Thumb-1 %<hard-float%> VFP ABI");
6564           /* sorry() is not immediately fatal, so only display this once.  */
6565           seen_thumb1_vfp = true;
6566         }
6567
6568       return true;
6569     }
6570
6571   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6572     return false;
6573
6574   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6575          (TARGET_VFP_DOUBLE || !is_double));
6576 }
6577
6578 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6579    suitable for passing or returning in VFP registers for the PCS
6580    variant selected.  If it is, then *BASE_MODE is updated to contain
6581    a machine mode describing each element of the argument's type and
6582    *COUNT to hold the number of such elements.  */
6583 static bool
6584 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6585                                        machine_mode mode, const_tree type,
6586                                        machine_mode *base_mode, int *count)
6587 {
6588   machine_mode new_mode = VOIDmode;
6589
6590   /* If we have the type information, prefer that to working things
6591      out from the mode.  */
6592   if (type)
6593     {
6594       unsigned int warn_psabi_flags = 0;
6595       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6596                                               &warn_psabi_flags);
6597       if (ag_count > 0 && ag_count <= 4)
6598         {
6599           static unsigned last_reported_type_uid;
6600           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6601           int alt;
6602           if (warn_psabi
6603               && warn_psabi_flags
6604               && uid != last_reported_type_uid
6605               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6606                   != ag_count))
6607             {
6608               const char *url10
6609                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6610               const char *url12
6611                 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6612               gcc_assert (alt == -1);
6613               last_reported_type_uid = uid;
6614               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6615                  qualification.  */
6616               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6617                 inform (input_location, "parameter passing for argument of "
6618                         "type %qT with %<[[no_unique_address]]%> members "
6619                         "changed %{in GCC 10.1%}",
6620                         TYPE_MAIN_VARIANT (type), url10);
6621               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6622                 inform (input_location, "parameter passing for argument of "
6623                         "type %qT when C++17 is enabled changed to match "
6624                         "C++14 %{in GCC 10.1%}",
6625                         TYPE_MAIN_VARIANT (type), url10);
6626               else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6627                 inform (input_location, "parameter passing for argument of "
6628                         "type %qT changed %{in GCC 12.1%}",
6629                         TYPE_MAIN_VARIANT (type), url12);
6630             }
6631           *count = ag_count;
6632         }
6633       else
6634         return false;
6635     }
6636   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6637            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6638            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6639     {
6640       *count = 1;
6641       new_mode = mode;
6642     }
6643   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6644     {
6645       *count = 2;
6646       new_mode = (mode == DCmode ? DFmode : SFmode);
6647     }
6648   else
6649     return false;
6650
6651
6652   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6653     return false;
6654
6655   *base_mode = new_mode;
6656
6657   if (TARGET_GENERAL_REGS_ONLY)
6658     error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6659            type);
6660
6661   return true;
6662 }
6663
6664 static bool
6665 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6666                                machine_mode mode, const_tree type)
6667 {
6668   int count ATTRIBUTE_UNUSED;
6669   machine_mode ag_mode ATTRIBUTE_UNUSED;
6670
6671   if (!use_vfp_abi (pcs_variant, false))
6672     return false;
6673   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6674                                                 &ag_mode, &count);
6675 }
6676
6677 static bool
6678 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6679                              const_tree type)
6680 {
6681   if (!use_vfp_abi (pcum->pcs_variant, false))
6682     return false;
6683
6684   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6685                                                 &pcum->aapcs_vfp_rmode,
6686                                                 &pcum->aapcs_vfp_rcount);
6687 }
6688
6689 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6690    for the behaviour of this function.  */
6691
6692 static bool
6693 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6694                     const_tree type  ATTRIBUTE_UNUSED)
6695 {
6696   int rmode_size
6697     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6698   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6699   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6700   int regno;
6701
6702   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6703     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6704       {
6705         pcum->aapcs_vfp_reg_alloc = mask << regno;
6706         if (mode == BLKmode
6707             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6708             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6709           {
6710             int i;
6711             int rcount = pcum->aapcs_vfp_rcount;
6712             int rshift = shift;
6713             machine_mode rmode = pcum->aapcs_vfp_rmode;
6714             rtx par;
6715             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6716               {
6717                 /* Avoid using unsupported vector modes.  */
6718                 if (rmode == V2SImode)
6719                   rmode = DImode;
6720                 else if (rmode == V4SImode)
6721                   {
6722                     rmode = DImode;
6723                     rcount *= 2;
6724                     rshift /= 2;
6725                   }
6726               }
6727             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6728             for (i = 0; i < rcount; i++)
6729               {
6730                 rtx tmp = gen_rtx_REG (rmode,
6731                                        FIRST_VFP_REGNUM + regno + i * rshift);
6732                 tmp = gen_rtx_EXPR_LIST
6733                   (VOIDmode, tmp,
6734                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6735                 XVECEXP (par, 0, i) = tmp;
6736               }
6737
6738             pcum->aapcs_reg = par;
6739           }
6740         else
6741           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6742         return true;
6743       }
6744   return false;
6745 }
6746
6747 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6748    comment there for the behaviour of this function.  */
6749
6750 static rtx
6751 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6752                                machine_mode mode,
6753                                const_tree type ATTRIBUTE_UNUSED)
6754 {
6755   if (!use_vfp_abi (pcs_variant, false))
6756     return NULL;
6757
6758   if (mode == BLKmode
6759       || (GET_MODE_CLASS (mode) == MODE_INT
6760           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6761           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6762     {
6763       int count;
6764       machine_mode ag_mode;
6765       int i;
6766       rtx par;
6767       int shift;
6768
6769       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6770                                              &ag_mode, &count);
6771
6772       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6773         {
6774           if (ag_mode == V2SImode)
6775             ag_mode = DImode;
6776           else if (ag_mode == V4SImode)
6777             {
6778               ag_mode = DImode;
6779               count *= 2;
6780             }
6781         }
6782       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6783       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6784       for (i = 0; i < count; i++)
6785         {
6786           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6787           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6788                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6789           XVECEXP (par, 0, i) = tmp;
6790         }
6791
6792       return par;
6793     }
6794
6795   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6796 }
6797
6798 static void
6799 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6800                    machine_mode mode  ATTRIBUTE_UNUSED,
6801                    const_tree type  ATTRIBUTE_UNUSED)
6802 {
6803   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6804   pcum->aapcs_vfp_reg_alloc = 0;
6805   return;
6806 }
6807
6808 #define AAPCS_CP(X)                             \
6809   {                                             \
6810     aapcs_ ## X ## _cum_init,                   \
6811     aapcs_ ## X ## _is_call_candidate,          \
6812     aapcs_ ## X ## _allocate,                   \
6813     aapcs_ ## X ## _is_return_candidate,        \
6814     aapcs_ ## X ## _allocate_return_reg,        \
6815     aapcs_ ## X ## _advance                     \
6816   }
6817
6818 /* Table of co-processors that can be used to pass arguments in
6819    registers.  Idealy no arugment should be a candidate for more than
6820    one co-processor table entry, but the table is processed in order
6821    and stops after the first match.  If that entry then fails to put
6822    the argument into a co-processor register, the argument will go on
6823    the stack.  */
6824 static struct
6825 {
6826   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6827   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6828
6829   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6830      BLKmode) is a candidate for this co-processor's registers; this
6831      function should ignore any position-dependent state in
6832      CUMULATIVE_ARGS and only use call-type dependent information.  */
6833   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6834
6835   /* Return true if the argument does get a co-processor register; it
6836      should set aapcs_reg to an RTX of the register allocated as is
6837      required for a return from FUNCTION_ARG.  */
6838   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6839
6840   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6841      be returned in this co-processor's registers.  */
6842   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6843
6844   /* Allocate and return an RTX element to hold the return type of a call.  This
6845      routine must not fail and will only be called if is_return_candidate
6846      returned true with the same parameters.  */
6847   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6848
6849   /* Finish processing this argument and prepare to start processing
6850      the next one.  */
6851   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6852 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6853   {
6854     AAPCS_CP(vfp)
6855   };
6856
6857 #undef AAPCS_CP
6858
6859 static int
6860 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6861                           const_tree type)
6862 {
6863   int i;
6864
6865   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6866     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6867       return i;
6868
6869   return -1;
6870 }
6871
6872 static int
6873 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6874 {
6875   /* We aren't passed a decl, so we can't check that a call is local.
6876      However, it isn't clear that that would be a win anyway, since it
6877      might limit some tail-calling opportunities.  */
6878   enum arm_pcs pcs_variant;
6879
6880   if (fntype)
6881     {
6882       const_tree fndecl = NULL_TREE;
6883
6884       if (TREE_CODE (fntype) == FUNCTION_DECL)
6885         {
6886           fndecl = fntype;
6887           fntype = TREE_TYPE (fntype);
6888         }
6889
6890       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6891     }
6892   else
6893     pcs_variant = arm_pcs_default;
6894
6895   if (pcs_variant != ARM_PCS_AAPCS)
6896     {
6897       int i;
6898
6899       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6900         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6901                                                         TYPE_MODE (type),
6902                                                         type))
6903           return i;
6904     }
6905   return -1;
6906 }
6907
6908 static rtx
6909 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6910                            const_tree fntype)
6911 {
6912   /* We aren't passed a decl, so we can't check that a call is local.
6913      However, it isn't clear that that would be a win anyway, since it
6914      might limit some tail-calling opportunities.  */
6915   enum arm_pcs pcs_variant;
6916   int unsignedp ATTRIBUTE_UNUSED;
6917
6918   if (fntype)
6919     {
6920       const_tree fndecl = NULL_TREE;
6921
6922       if (TREE_CODE (fntype) == FUNCTION_DECL)
6923         {
6924           fndecl = fntype;
6925           fntype = TREE_TYPE (fntype);
6926         }
6927
6928       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6929     }
6930   else
6931     pcs_variant = arm_pcs_default;
6932
6933   /* Promote integer types.  */
6934   if (type && INTEGRAL_TYPE_P (type))
6935     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6936
6937   if (pcs_variant != ARM_PCS_AAPCS)
6938     {
6939       int i;
6940
6941       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6942         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6943                                                         type))
6944           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6945                                                              mode, type);
6946     }
6947
6948   /* Promotes small structs returned in a register to full-word size
6949      for big-endian AAPCS.  */
6950   if (type && arm_return_in_msb (type))
6951     {
6952       HOST_WIDE_INT size = int_size_in_bytes (type);
6953       if (size % UNITS_PER_WORD != 0)
6954         {
6955           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6956           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6957         }
6958     }
6959
6960   return gen_rtx_REG (mode, R0_REGNUM);
6961 }
6962
6963 static rtx
6964 aapcs_libcall_value (machine_mode mode)
6965 {
6966   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6967       && GET_MODE_SIZE (mode) <= 4)
6968     mode = SImode;
6969
6970   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6971 }
6972
6973 /* Lay out a function argument using the AAPCS rules.  The rule
6974    numbers referred to here are those in the AAPCS.  */
6975 static void
6976 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6977                   const_tree type, bool named)
6978 {
6979   int nregs, nregs2;
6980   int ncrn;
6981
6982   /* We only need to do this once per argument.  */
6983   if (pcum->aapcs_arg_processed)
6984     return;
6985
6986   pcum->aapcs_arg_processed = true;
6987
6988   /* Special case: if named is false then we are handling an incoming
6989      anonymous argument which is on the stack.  */
6990   if (!named)
6991     return;
6992
6993   /* Is this a potential co-processor register candidate?  */
6994   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6995     {
6996       int slot = aapcs_select_call_coproc (pcum, mode, type);
6997       pcum->aapcs_cprc_slot = slot;
6998
6999       /* We don't have to apply any of the rules from part B of the
7000          preparation phase, these are handled elsewhere in the
7001          compiler.  */
7002
7003       if (slot >= 0)
7004         {
7005           /* A Co-processor register candidate goes either in its own
7006              class of registers or on the stack.  */
7007           if (!pcum->aapcs_cprc_failed[slot])
7008             {
7009               /* C1.cp - Try to allocate the argument to co-processor
7010                  registers.  */
7011               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7012                 return;
7013
7014               /* C2.cp - Put the argument on the stack and note that we
7015                  can't assign any more candidates in this slot.  We also
7016                  need to note that we have allocated stack space, so that
7017                  we won't later try to split a non-cprc candidate between
7018                  core registers and the stack.  */
7019               pcum->aapcs_cprc_failed[slot] = true;
7020               pcum->can_split = false;
7021             }
7022
7023           /* We didn't get a register, so this argument goes on the
7024              stack.  */
7025           gcc_assert (pcum->can_split == false);
7026           return;
7027         }
7028     }
7029
7030   /* C3 - For double-word aligned arguments, round the NCRN up to the
7031      next even number.  */
7032   ncrn = pcum->aapcs_ncrn;
7033   if (ncrn & 1)
7034     {
7035       int res = arm_needs_doubleword_align (mode, type);
7036       /* Only warn during RTL expansion of call stmts, otherwise we would
7037          warn e.g. during gimplification even on functions that will be
7038          always inlined, and we'd warn multiple times.  Don't warn when
7039          called in expand_function_start either, as we warn instead in
7040          arm_function_arg_boundary in that case.  */
7041       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7042         inform (input_location, "parameter passing for argument of type "
7043                 "%qT changed in GCC 7.1", type);
7044       else if (res > 0)
7045         ncrn++;
7046     }
7047
7048   nregs = ARM_NUM_REGS2(mode, type);
7049
7050   /* Sigh, this test should really assert that nregs > 0, but a GCC
7051      extension allows empty structs and then gives them empty size; it
7052      then allows such a structure to be passed by value.  For some of
7053      the code below we have to pretend that such an argument has
7054      non-zero size so that we 'locate' it correctly either in
7055      registers or on the stack.  */
7056   gcc_assert (nregs >= 0);
7057
7058   nregs2 = nregs ? nregs : 1;
7059
7060   /* C4 - Argument fits entirely in core registers.  */
7061   if (ncrn + nregs2 <= NUM_ARG_REGS)
7062     {
7063       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7064       pcum->aapcs_next_ncrn = ncrn + nregs;
7065       return;
7066     }
7067
7068   /* C5 - Some core registers left and there are no arguments already
7069      on the stack: split this argument between the remaining core
7070      registers and the stack.  */
7071   if (ncrn < NUM_ARG_REGS && pcum->can_split)
7072     {
7073       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7074       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7075       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7076       return;
7077     }
7078
7079   /* C6 - NCRN is set to 4.  */
7080   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7081
7082   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
7083   return;
7084 }
7085
7086 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7087    for a call to a function whose data type is FNTYPE.
7088    For a library call, FNTYPE is NULL.  */
7089 void
7090 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7091                           rtx libname,
7092                           tree fndecl ATTRIBUTE_UNUSED)
7093 {
7094   /* Long call handling.  */
7095   if (fntype)
7096     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7097   else
7098     pcum->pcs_variant = arm_pcs_default;
7099
7100   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7101     {
7102       if (arm_libcall_uses_aapcs_base (libname))
7103         pcum->pcs_variant = ARM_PCS_AAPCS;
7104
7105       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7106       pcum->aapcs_reg = NULL_RTX;
7107       pcum->aapcs_partial = 0;
7108       pcum->aapcs_arg_processed = false;
7109       pcum->aapcs_cprc_slot = -1;
7110       pcum->can_split = true;
7111
7112       if (pcum->pcs_variant != ARM_PCS_AAPCS)
7113         {
7114           int i;
7115
7116           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7117             {
7118               pcum->aapcs_cprc_failed[i] = false;
7119               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7120             }
7121         }
7122       return;
7123     }
7124
7125   /* Legacy ABIs */
7126
7127   /* On the ARM, the offset starts at 0.  */
7128   pcum->nregs = 0;
7129   pcum->iwmmxt_nregs = 0;
7130   pcum->can_split = true;
7131
7132   /* Varargs vectors are treated the same as long long.
7133      named_count avoids having to change the way arm handles 'named' */
7134   pcum->named_count = 0;
7135   pcum->nargs = 0;
7136
7137   if (TARGET_REALLY_IWMMXT && fntype)
7138     {
7139       tree fn_arg;
7140
7141       for (fn_arg = TYPE_ARG_TYPES (fntype);
7142            fn_arg;
7143            fn_arg = TREE_CHAIN (fn_arg))
7144         pcum->named_count += 1;
7145
7146       if (! pcum->named_count)
7147         pcum->named_count = INT_MAX;
7148     }
7149 }
7150
7151 /* Return 2 if double word alignment is required for argument passing,
7152    but wasn't required before the fix for PR88469.
7153    Return 1 if double word alignment is required for argument passing.
7154    Return -1 if double word alignment used to be required for argument
7155    passing before PR77728 ABI fix, but is not required anymore.
7156    Return 0 if double word alignment is not required and wasn't requried
7157    before either.  */
7158 static int
7159 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7160 {
7161   if (!type)
7162     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7163
7164   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7165   if (!AGGREGATE_TYPE_P (type))
7166     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7167
7168   /* Array types: Use member alignment of element type.  */
7169   if (TREE_CODE (type) == ARRAY_TYPE)
7170     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7171
7172   int ret = 0;
7173   int ret2 = 0;
7174   /* Record/aggregate types: Use greatest member alignment of any member.
7175
7176      Note that we explicitly consider zero-sized fields here, even though
7177      they don't map to AAPCS machine types.  For example, in:
7178
7179          struct __attribute__((aligned(8))) empty {};
7180
7181          struct s {
7182            [[no_unique_address]] empty e;
7183            int x;
7184          };
7185
7186      "s" contains only one Fundamental Data Type (the int field)
7187      but gains 8-byte alignment and size thanks to "e".  */
7188   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7189     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7190       {
7191         if (TREE_CODE (field) == FIELD_DECL)
7192           return 1;
7193         else
7194           /* Before PR77728 fix, we were incorrectly considering also
7195              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7196              Make sure we can warn about that with -Wpsabi.  */
7197           ret = -1;
7198       }
7199     else if (TREE_CODE (field) == FIELD_DECL
7200              && DECL_BIT_FIELD_TYPE (field)
7201              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7202       ret2 = 1;
7203
7204   if (ret2)
7205     return 2;
7206
7207   return ret;
7208 }
7209
7210
7211 /* Determine where to put an argument to a function.
7212    Value is zero to push the argument on the stack,
7213    or a hard register in which to store the argument.
7214
7215    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7216     the preceding args and about the function being called.
7217    ARG is a description of the argument.
7218
7219    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7220    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7221    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7222    defined), say it is passed in the stack (function_prologue will
7223    indeed make it pass in the stack if necessary).  */
7224
7225 static rtx
7226 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7227 {
7228   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7229   int nregs;
7230
7231   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7232      a call insn (op3 of a call_value insn).  */
7233   if (arg.end_marker_p ())
7234     return const0_rtx;
7235
7236   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7237     {
7238       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7239       return pcum->aapcs_reg;
7240     }
7241
7242   /* Varargs vectors are treated the same as long long.
7243      named_count avoids having to change the way arm handles 'named' */
7244   if (TARGET_IWMMXT_ABI
7245       && arm_vector_mode_supported_p (arg.mode)
7246       && pcum->named_count > pcum->nargs + 1)
7247     {
7248       if (pcum->iwmmxt_nregs <= 9)
7249         return gen_rtx_REG (arg.mode,
7250                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7251       else
7252         {
7253           pcum->can_split = false;
7254           return NULL_RTX;
7255         }
7256     }
7257
7258   /* Put doubleword aligned quantities in even register pairs.  */
7259   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7260     {
7261       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7262       if (res < 0 && warn_psabi)
7263         inform (input_location, "parameter passing for argument of type "
7264                 "%qT changed in GCC 7.1", arg.type);
7265       else if (res > 0)
7266         {
7267           pcum->nregs++;
7268           if (res > 1 && warn_psabi)
7269             inform (input_location, "parameter passing for argument of type "
7270                     "%qT changed in GCC 9.1", arg.type);
7271         }
7272     }
7273
7274   /* Only allow splitting an arg between regs and memory if all preceding
7275      args were allocated to regs.  For args passed by reference we only count
7276      the reference pointer.  */
7277   if (pcum->can_split)
7278     nregs = 1;
7279   else
7280     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7281
7282   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7283     return NULL_RTX;
7284
7285   return gen_rtx_REG (arg.mode, pcum->nregs);
7286 }
7287
7288 static unsigned int
7289 arm_function_arg_boundary (machine_mode mode, const_tree type)
7290 {
7291   if (!ARM_DOUBLEWORD_ALIGN)
7292     return PARM_BOUNDARY;
7293
7294   int res = arm_needs_doubleword_align (mode, type);
7295   if (res < 0 && warn_psabi)
7296     inform (input_location, "parameter passing for argument of type %qT "
7297             "changed in GCC 7.1", type);
7298   if (res > 1 && warn_psabi)
7299     inform (input_location, "parameter passing for argument of type "
7300             "%qT changed in GCC 9.1", type);
7301
7302   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7303 }
7304
7305 static int
7306 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7307 {
7308   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7309   int nregs = pcum->nregs;
7310
7311   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7312     {
7313       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7314       return pcum->aapcs_partial;
7315     }
7316
7317   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7318     return 0;
7319
7320   if (NUM_ARG_REGS > nregs
7321       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7322       && pcum->can_split)
7323     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7324
7325   return 0;
7326 }
7327
7328 /* Update the data in PCUM to advance over argument ARG.  */
7329
7330 static void
7331 arm_function_arg_advance (cumulative_args_t pcum_v,
7332                           const function_arg_info &arg)
7333 {
7334   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7335
7336   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7337     {
7338       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7339
7340       if (pcum->aapcs_cprc_slot >= 0)
7341         {
7342           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7343                                                               arg.type);
7344           pcum->aapcs_cprc_slot = -1;
7345         }
7346
7347       /* Generic stuff.  */
7348       pcum->aapcs_arg_processed = false;
7349       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7350       pcum->aapcs_reg = NULL_RTX;
7351       pcum->aapcs_partial = 0;
7352     }
7353   else
7354     {
7355       pcum->nargs += 1;
7356       if (arm_vector_mode_supported_p (arg.mode)
7357           && pcum->named_count > pcum->nargs
7358           && TARGET_IWMMXT_ABI)
7359         pcum->iwmmxt_nregs += 1;
7360       else
7361         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7362     }
7363 }
7364
7365 /* Variable sized types are passed by reference.  This is a GCC
7366    extension to the ARM ABI.  */
7367
7368 static bool
7369 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7370 {
7371   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7372 }
7373 \f
7374 /* Encode the current state of the #pragma [no_]long_calls.  */
7375 typedef enum
7376 {
7377   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7378   LONG,         /* #pragma long_calls is in effect.  */
7379   SHORT         /* #pragma no_long_calls is in effect.  */
7380 } arm_pragma_enum;
7381
7382 static arm_pragma_enum arm_pragma_long_calls = OFF;
7383
7384 void
7385 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7386 {
7387   arm_pragma_long_calls = LONG;
7388 }
7389
7390 void
7391 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7392 {
7393   arm_pragma_long_calls = SHORT;
7394 }
7395
7396 void
7397 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7398 {
7399   arm_pragma_long_calls = OFF;
7400 }
7401 \f
7402 /* Handle an attribute requiring a FUNCTION_DECL;
7403    arguments as in struct attribute_spec.handler.  */
7404 static tree
7405 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7406                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7407 {
7408   if (TREE_CODE (*node) != FUNCTION_DECL)
7409     {
7410       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7411                name);
7412       *no_add_attrs = true;
7413     }
7414
7415   return NULL_TREE;
7416 }
7417
7418 /* Handle an "interrupt" or "isr" attribute;
7419    arguments as in struct attribute_spec.handler.  */
7420 static tree
7421 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7422                           bool *no_add_attrs)
7423 {
7424   if (DECL_P (*node))
7425     {
7426       if (TREE_CODE (*node) != FUNCTION_DECL)
7427         {
7428           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7429                    name);
7430           *no_add_attrs = true;
7431         }
7432       else if (TARGET_VFP_BASE)
7433         {
7434           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7435                    name);
7436         }
7437       /* FIXME: the argument if any is checked for type attributes;
7438          should it be checked for decl ones?  */
7439     }
7440   else
7441     {
7442       if (TREE_CODE (*node) == FUNCTION_TYPE
7443           || TREE_CODE (*node) == METHOD_TYPE)
7444         {
7445           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7446             {
7447               warning (OPT_Wattributes, "%qE attribute ignored",
7448                        name);
7449               *no_add_attrs = true;
7450             }
7451         }
7452       else if (TREE_CODE (*node) == POINTER_TYPE
7453                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7454                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7455                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7456         {
7457           *node = build_variant_type_copy (*node);
7458           TREE_TYPE (*node) = build_type_attribute_variant
7459             (TREE_TYPE (*node),
7460              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7461           *no_add_attrs = true;
7462         }
7463       else
7464         {
7465           /* Possibly pass this attribute on from the type to a decl.  */
7466           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7467                        | (int) ATTR_FLAG_FUNCTION_NEXT
7468                        | (int) ATTR_FLAG_ARRAY_NEXT))
7469             {
7470               *no_add_attrs = true;
7471               return tree_cons (name, args, NULL_TREE);
7472             }
7473           else
7474             {
7475               warning (OPT_Wattributes, "%qE attribute ignored",
7476                        name);
7477             }
7478         }
7479     }
7480
7481   return NULL_TREE;
7482 }
7483
7484 /* Handle a "pcs" attribute; arguments as in struct
7485    attribute_spec.handler.  */
7486 static tree
7487 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7488                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7489 {
7490   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7491     {
7492       warning (OPT_Wattributes, "%qE attribute ignored", name);
7493       *no_add_attrs = true;
7494     }
7495   return NULL_TREE;
7496 }
7497
7498 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7499 /* Handle the "notshared" attribute.  This attribute is another way of
7500    requesting hidden visibility.  ARM's compiler supports
7501    "__declspec(notshared)"; we support the same thing via an
7502    attribute.  */
7503
7504 static tree
7505 arm_handle_notshared_attribute (tree *node,
7506                                 tree name ATTRIBUTE_UNUSED,
7507                                 tree args ATTRIBUTE_UNUSED,
7508                                 int flags ATTRIBUTE_UNUSED,
7509                                 bool *no_add_attrs)
7510 {
7511   tree decl = TYPE_NAME (*node);
7512
7513   if (decl)
7514     {
7515       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7516       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7517       *no_add_attrs = false;
7518     }
7519   return NULL_TREE;
7520 }
7521 #endif
7522
7523 /* This function returns true if a function with declaration FNDECL and type
7524    FNTYPE uses the stack to pass arguments or return variables and false
7525    otherwise.  This is used for functions with the attributes
7526    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7527    diagnostic messages if the stack is used.  NAME is the name of the attribute
7528    used.  */
7529
7530 static bool
7531 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7532 {
7533   function_args_iterator args_iter;
7534   CUMULATIVE_ARGS args_so_far_v;
7535   cumulative_args_t args_so_far;
7536   bool first_param = true;
7537   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7538
7539   /* Error out if any argument is passed on the stack.  */
7540   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7541   args_so_far = pack_cumulative_args (&args_so_far_v);
7542   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7543     {
7544       rtx arg_rtx;
7545
7546       prev_arg_type = arg_type;
7547       if (VOID_TYPE_P (arg_type))
7548         continue;
7549
7550       function_arg_info arg (arg_type, /*named=*/true);
7551       if (!first_param)
7552         /* ??? We should advance after processing the argument and pass
7553            the argument we're advancing past.  */
7554         arm_function_arg_advance (args_so_far, arg);
7555       arg_rtx = arm_function_arg (args_so_far, arg);
7556       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7557         {
7558           error ("%qE attribute not available to functions with arguments "
7559                  "passed on the stack", name);
7560           return true;
7561         }
7562       first_param = false;
7563     }
7564
7565   /* Error out for variadic functions since we cannot control how many
7566      arguments will be passed and thus stack could be used.  stdarg_p () is not
7567      used for the checking to avoid browsing arguments twice.  */
7568   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7569     {
7570       error ("%qE attribute not available to functions with variable number "
7571              "of arguments", name);
7572       return true;
7573     }
7574
7575   /* Error out if return value is passed on the stack.  */
7576   ret_type = TREE_TYPE (fntype);
7577   if (arm_return_in_memory (ret_type, fntype))
7578     {
7579       error ("%qE attribute not available to functions that return value on "
7580              "the stack", name);
7581       return true;
7582     }
7583   return false;
7584 }
7585
7586 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7587    function will check whether the attribute is allowed here and will add the
7588    attribute to the function declaration tree or otherwise issue a warning.  */
7589
7590 static tree
7591 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7592                                  tree /* args */,
7593                                  int /* flags */,
7594                                  bool *no_add_attrs)
7595 {
7596   tree fndecl;
7597
7598   if (!use_cmse)
7599     {
7600       *no_add_attrs = true;
7601       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7602                "option", name);
7603       return NULL_TREE;
7604     }
7605
7606   /* Ignore attribute for function types.  */
7607   if (TREE_CODE (*node) != FUNCTION_DECL)
7608     {
7609       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7610                name);
7611       *no_add_attrs = true;
7612       return NULL_TREE;
7613     }
7614
7615   fndecl = *node;
7616
7617   /* Warn for static linkage functions.  */
7618   if (!TREE_PUBLIC (fndecl))
7619     {
7620       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7621                "with static linkage", name);
7622       *no_add_attrs = true;
7623       return NULL_TREE;
7624     }
7625
7626   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7627                                                 TREE_TYPE (fndecl));
7628   return NULL_TREE;
7629 }
7630
7631
7632 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7633    function will check whether the attribute is allowed here and will add the
7634    attribute to the function type tree or otherwise issue a diagnostic.  The
7635    reason we check this at declaration time is to only allow the use of the
7636    attribute with declarations of function pointers and not function
7637    declarations.  This function checks NODE is of the expected type and issues
7638    diagnostics otherwise using NAME.  If it is not of the expected type
7639    *NO_ADD_ATTRS will be set to true.  */
7640
7641 static tree
7642 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7643                                  tree /* args */,
7644                                  int /* flags */,
7645                                  bool *no_add_attrs)
7646 {
7647   tree decl = NULL_TREE;
7648   tree fntype, type;
7649
7650   if (!use_cmse)
7651     {
7652       *no_add_attrs = true;
7653       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7654                "option", name);
7655       return NULL_TREE;
7656     }
7657
7658   if (DECL_P (*node))
7659     {
7660       fntype = TREE_TYPE (*node);
7661
7662       if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7663         decl = *node;
7664     }
7665   else
7666     fntype = *node;
7667
7668   while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7669     fntype = TREE_TYPE (fntype);
7670
7671   if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7672     {
7673         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7674                  "function pointer", name);
7675         *no_add_attrs = true;
7676         return NULL_TREE;
7677     }
7678
7679   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7680
7681   if (*no_add_attrs)
7682     return NULL_TREE;
7683
7684   /* Prevent trees being shared among function types with and without
7685      cmse_nonsecure_call attribute.  */
7686   if (decl)
7687     {
7688       type = build_distinct_type_copy (TREE_TYPE (decl));
7689       TREE_TYPE (decl) = type;
7690     }
7691   else
7692     {
7693       type = build_distinct_type_copy (*node);
7694       *node = type;
7695     }
7696
7697   fntype = type;
7698
7699   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7700     {
7701       type = fntype;
7702       fntype = TREE_TYPE (fntype);
7703       fntype = build_distinct_type_copy (fntype);
7704       TREE_TYPE (type) = fntype;
7705     }
7706
7707   /* Construct a type attribute and add it to the function type.  */
7708   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7709                           TYPE_ATTRIBUTES (fntype));
7710   TYPE_ATTRIBUTES (fntype) = attrs;
7711   return NULL_TREE;
7712 }
7713
7714 /* Return 0 if the attributes for two types are incompatible, 1 if they
7715    are compatible, and 2 if they are nearly compatible (which causes a
7716    warning to be generated).  */
7717 static int
7718 arm_comp_type_attributes (const_tree type1, const_tree type2)
7719 {
7720   int l1, l2, s1, s2;
7721
7722   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7723                                   TYPE_ATTRIBUTES (type1));
7724   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7725                                   TYPE_ATTRIBUTES (type2));
7726   if (bool (attrs1) != bool (attrs2))
7727     return 0;
7728   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7729     return 0;
7730
7731   /* Check for mismatch of non-default calling convention.  */
7732   if (TREE_CODE (type1) != FUNCTION_TYPE)
7733     return 1;
7734
7735   /* Check for mismatched call attributes.  */
7736   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7737   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7738   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7739   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7740
7741   /* Only bother to check if an attribute is defined.  */
7742   if (l1 | l2 | s1 | s2)
7743     {
7744       /* If one type has an attribute, the other must have the same attribute.  */
7745       if ((l1 != l2) || (s1 != s2))
7746         return 0;
7747
7748       /* Disallow mixed attributes.  */
7749       if ((l1 & s2) || (l2 & s1))
7750         return 0;
7751     }
7752
7753   /* Check for mismatched ISR attribute.  */
7754   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7755   if (! l1)
7756     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7757   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7758   if (! l2)
7759     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7760   if (l1 != l2)
7761     return 0;
7762
7763   l1 = lookup_attribute ("cmse_nonsecure_call",
7764                          TYPE_ATTRIBUTES (type1)) != NULL;
7765   l2 = lookup_attribute ("cmse_nonsecure_call",
7766                          TYPE_ATTRIBUTES (type2)) != NULL;
7767
7768   if (l1 != l2)
7769     return 0;
7770
7771   return 1;
7772 }
7773
7774 /*  Assigns default attributes to newly defined type.  This is used to
7775     set short_call/long_call attributes for function types of
7776     functions defined inside corresponding #pragma scopes.  */
7777 static void
7778 arm_set_default_type_attributes (tree type)
7779 {
7780   /* Add __attribute__ ((long_call)) to all functions, when
7781      inside #pragma long_calls or __attribute__ ((short_call)),
7782      when inside #pragma no_long_calls.  */
7783   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7784     {
7785       tree type_attr_list, attr_name;
7786       type_attr_list = TYPE_ATTRIBUTES (type);
7787
7788       if (arm_pragma_long_calls == LONG)
7789         attr_name = get_identifier ("long_call");
7790       else if (arm_pragma_long_calls == SHORT)
7791         attr_name = get_identifier ("short_call");
7792       else
7793         return;
7794
7795       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7796       TYPE_ATTRIBUTES (type) = type_attr_list;
7797     }
7798 }
7799 \f
7800 /* Return true if DECL is known to be linked into section SECTION.  */
7801
7802 static bool
7803 arm_function_in_section_p (tree decl, section *section)
7804 {
7805   /* We can only be certain about the prevailing symbol definition.  */
7806   if (!decl_binds_to_current_def_p (decl))
7807     return false;
7808
7809   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7810   if (!DECL_SECTION_NAME (decl))
7811     {
7812       /* Make sure that we will not create a unique section for DECL.  */
7813       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7814         return false;
7815     }
7816
7817   return function_section (decl) == section;
7818 }
7819
7820 /* Return nonzero if a 32-bit "long_call" should be generated for
7821    a call from the current function to DECL.  We generate a long_call
7822    if the function:
7823
7824         a.  has an __attribute__((long call))
7825      or b.  is within the scope of a #pragma long_calls
7826      or c.  the -mlong-calls command line switch has been specified
7827
7828    However we do not generate a long call if the function:
7829
7830         d.  has an __attribute__ ((short_call))
7831      or e.  is inside the scope of a #pragma no_long_calls
7832      or f.  is defined in the same section as the current function.  */
7833
7834 bool
7835 arm_is_long_call_p (tree decl)
7836 {
7837   tree attrs;
7838
7839   if (!decl)
7840     return TARGET_LONG_CALLS;
7841
7842   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7843   if (lookup_attribute ("short_call", attrs))
7844     return false;
7845
7846   /* For "f", be conservative, and only cater for cases in which the
7847      whole of the current function is placed in the same section.  */
7848   if (!flag_reorder_blocks_and_partition
7849       && TREE_CODE (decl) == FUNCTION_DECL
7850       && arm_function_in_section_p (decl, current_function_section ()))
7851     return false;
7852
7853   if (lookup_attribute ("long_call", attrs))
7854     return true;
7855
7856   return TARGET_LONG_CALLS;
7857 }
7858
7859 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7860 static bool
7861 arm_function_ok_for_sibcall (tree decl, tree exp)
7862 {
7863   unsigned long func_type;
7864
7865   if (cfun->machine->sibcall_blocked)
7866     return false;
7867
7868   if (TARGET_FDPIC)
7869     {
7870       /* In FDPIC, never tailcall something for which we have no decl:
7871          the target function could be in a different module, requiring
7872          a different FDPIC register value.  */
7873       if (decl == NULL)
7874         return false;
7875     }
7876
7877   /* Never tailcall something if we are generating code for Thumb-1.  */
7878   if (TARGET_THUMB1)
7879     return false;
7880
7881   /* The PIC register is live on entry to VxWorks PLT entries, so we
7882      must make the call before restoring the PIC register.  */
7883   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7884     return false;
7885
7886   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7887      may be used both as target of the call and base register for restoring
7888      the VFP registers  */
7889   if (TARGET_APCS_FRAME && TARGET_ARM
7890       && TARGET_HARD_FLOAT
7891       && decl && arm_is_long_call_p (decl))
7892     return false;
7893
7894   /* If we are interworking and the function is not declared static
7895      then we can't tail-call it unless we know that it exists in this
7896      compilation unit (since it might be a Thumb routine).  */
7897   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7898       && !TREE_ASM_WRITTEN (decl))
7899     return false;
7900
7901   func_type = arm_current_func_type ();
7902   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7903   if (IS_INTERRUPT (func_type))
7904     return false;
7905
7906   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7907      generated for entry functions themselves.  */
7908   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7909     return false;
7910
7911   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7912      this would complicate matters for later code generation.  */
7913   if (TREE_CODE (exp) == CALL_EXPR)
7914     {
7915       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7916       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7917         return false;
7918     }
7919
7920   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7921     {
7922       /* Check that the return value locations are the same.  For
7923          example that we aren't returning a value from the sibling in
7924          a VFP register but then need to transfer it to a core
7925          register.  */
7926       rtx a, b;
7927       tree decl_or_type = decl;
7928
7929       /* If it is an indirect function pointer, get the function type.  */
7930       if (!decl)
7931         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7932
7933       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7934       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7935                               cfun->decl, false);
7936       if (!rtx_equal_p (a, b))
7937         return false;
7938     }
7939
7940   /* Never tailcall if function may be called with a misaligned SP.  */
7941   if (IS_STACKALIGN (func_type))
7942     return false;
7943
7944   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7945      references should become a NOP.  Don't convert such calls into
7946      sibling calls.  */
7947   if (TARGET_AAPCS_BASED
7948       && arm_abi == ARM_ABI_AAPCS
7949       && decl
7950       && DECL_WEAK (decl))
7951     return false;
7952
7953   /* We cannot do a tailcall for an indirect call by descriptor if all the
7954      argument registers are used because the only register left to load the
7955      address is IP and it will already contain the static chain.  */
7956   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7957     {
7958       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7959       CUMULATIVE_ARGS cum;
7960       cumulative_args_t cum_v;
7961
7962       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7963       cum_v = pack_cumulative_args (&cum);
7964
7965       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7966         {
7967           tree type = TREE_VALUE (t);
7968           if (!VOID_TYPE_P (type))
7969             {
7970               function_arg_info arg (type, /*named=*/true);
7971               arm_function_arg_advance (cum_v, arg);
7972             }
7973         }
7974
7975       function_arg_info arg (integer_type_node, /*named=*/true);
7976       if (!arm_function_arg (cum_v, arg))
7977         return false;
7978     }
7979
7980   /* Everything else is ok.  */
7981   return true;
7982 }
7983
7984 \f
7985 /* Addressing mode support functions.  */
7986
7987 /* Return nonzero if X is a legitimate immediate operand when compiling
7988    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7989 int
7990 legitimate_pic_operand_p (rtx x)
7991 {
7992   if (SYMBOL_REF_P (x)
7993       || (GET_CODE (x) == CONST
7994           && GET_CODE (XEXP (x, 0)) == PLUS
7995           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7996     return 0;
7997
7998   return 1;
7999 }
8000
8001 /* Record that the current function needs a PIC register.  If PIC_REG is null,
8002    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
8003    both case cfun->machine->pic_reg is initialized if we have not already done
8004    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
8005    PIC register is reloaded in the current position of the instruction stream
8006    irregardless of whether it was loaded before.  Otherwise, it is only loaded
8007    if not already done so (crtl->uses_pic_offset_table is null).  Note that
8008    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8009    is only supported iff COMPUTE_NOW is false.  */
8010
8011 static void
8012 require_pic_register (rtx pic_reg, bool compute_now)
8013 {
8014   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8015
8016   /* A lot of the logic here is made obscure by the fact that this
8017      routine gets called as part of the rtx cost estimation process.
8018      We don't want those calls to affect any assumptions about the real
8019      function; and further, we can't call entry_of_function() until we
8020      start the real expansion process.  */
8021   if (!crtl->uses_pic_offset_table || compute_now)
8022     {
8023       gcc_assert (can_create_pseudo_p ()
8024                   || (pic_reg != NULL_RTX
8025                       && REG_P (pic_reg)
8026                       && GET_MODE (pic_reg) == Pmode));
8027       if (arm_pic_register != INVALID_REGNUM
8028           && !compute_now
8029           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8030         {
8031           if (!cfun->machine->pic_reg)
8032             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8033
8034           /* Play games to avoid marking the function as needing pic
8035              if we are being called as part of the cost-estimation
8036              process.  */
8037           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8038             crtl->uses_pic_offset_table = 1;
8039         }
8040       else
8041         {
8042           rtx_insn *seq, *insn;
8043
8044           if (pic_reg == NULL_RTX)
8045             pic_reg = gen_reg_rtx (Pmode);
8046           if (!cfun->machine->pic_reg)
8047             cfun->machine->pic_reg = pic_reg;
8048
8049           /* Play games to avoid marking the function as needing pic
8050              if we are being called as part of the cost-estimation
8051              process.  */
8052           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8053             {
8054               crtl->uses_pic_offset_table = 1;
8055               start_sequence ();
8056
8057               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8058                   && arm_pic_register > LAST_LO_REGNUM
8059                   && !compute_now)
8060                 emit_move_insn (cfun->machine->pic_reg,
8061                                 gen_rtx_REG (Pmode, arm_pic_register));
8062               else
8063                 arm_load_pic_register (0UL, pic_reg);
8064
8065               seq = get_insns ();
8066               end_sequence ();
8067
8068               for (insn = seq; insn; insn = NEXT_INSN (insn))
8069                 if (INSN_P (insn))
8070                   INSN_LOCATION (insn) = prologue_location;
8071
8072               /* We can be called during expansion of PHI nodes, where
8073                  we can't yet emit instructions directly in the final
8074                  insn stream.  Queue the insns on the entry edge, they will
8075                  be committed after everything else is expanded.  */
8076               if (currently_expanding_to_rtl)
8077                 insert_insn_on_edge (seq,
8078                                      single_succ_edge
8079                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8080               else
8081                 emit_insn (seq);
8082             }
8083         }
8084     }
8085 }
8086
8087 /* Generate insns to calculate the address of ORIG in pic mode.  */
8088 static rtx_insn *
8089 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8090 {
8091   rtx pat;
8092   rtx mem;
8093
8094   pat = gen_calculate_pic_address (reg, pic_reg, orig);
8095
8096   /* Make the MEM as close to a constant as possible.  */
8097   mem = SET_SRC (pat);
8098   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8099   MEM_READONLY_P (mem) = 1;
8100   MEM_NOTRAP_P (mem) = 1;
8101
8102   return emit_insn (pat);
8103 }
8104
8105 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
8106    created to hold the result of the load.  If not NULL, PIC_REG indicates
8107    which register to use as PIC register, otherwise it is decided by register
8108    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
8109    location in the instruction stream, irregardless of whether it was loaded
8110    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8111    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8112
8113    Returns the register REG into which the PIC load is performed.  */
8114
8115 rtx
8116 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8117                         bool compute_now)
8118 {
8119   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8120
8121   if (SYMBOL_REF_P (orig)
8122       || LABEL_REF_P (orig))
8123     {
8124       if (reg == 0)
8125         {
8126           gcc_assert (can_create_pseudo_p ());
8127           reg = gen_reg_rtx (Pmode);
8128         }
8129
8130       /* VxWorks does not impose a fixed gap between segments; the run-time
8131          gap can be different from the object-file gap.  We therefore can't
8132          use GOTOFF unless we are absolutely sure that the symbol is in the
8133          same segment as the GOT.  Unfortunately, the flexibility of linker
8134          scripts means that we can't be sure of that in general, so assume
8135          that GOTOFF is never valid on VxWorks.  */
8136       /* References to weak symbols cannot be resolved locally: they
8137          may be overridden by a non-weak definition at link time.  */
8138       rtx_insn *insn;
8139       if ((LABEL_REF_P (orig)
8140            || (SYMBOL_REF_P (orig)
8141                && SYMBOL_REF_LOCAL_P (orig)
8142                && (SYMBOL_REF_DECL (orig)
8143                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8144                && (!SYMBOL_REF_FUNCTION_P (orig)
8145                    || arm_fdpic_local_funcdesc_p (orig))))
8146           && NEED_GOT_RELOC
8147           && arm_pic_data_is_text_relative)
8148         insn = arm_pic_static_addr (orig, reg);
8149       else
8150         {
8151           /* If this function doesn't have a pic register, create one now.  */
8152           require_pic_register (pic_reg, compute_now);
8153
8154           if (pic_reg == NULL_RTX)
8155             pic_reg = cfun->machine->pic_reg;
8156
8157           insn = calculate_pic_address_constant (reg, pic_reg, orig);
8158         }
8159
8160       /* Put a REG_EQUAL note on this insn, so that it can be optimized
8161          by loop.  */
8162       set_unique_reg_note (insn, REG_EQUAL, orig);
8163
8164       return reg;
8165     }
8166   else if (GET_CODE (orig) == CONST)
8167     {
8168       rtx base, offset;
8169
8170       if (GET_CODE (XEXP (orig, 0)) == PLUS
8171           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8172         return orig;
8173
8174       /* Handle the case where we have: const (UNSPEC_TLS).  */
8175       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8176           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8177         return orig;
8178
8179       /* Handle the case where we have:
8180          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8181          CONST_INT.  */
8182       if (GET_CODE (XEXP (orig, 0)) == PLUS
8183           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8184           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8185         {
8186           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8187           return orig;
8188         }
8189
8190       if (reg == 0)
8191         {
8192           gcc_assert (can_create_pseudo_p ());
8193           reg = gen_reg_rtx (Pmode);
8194         }
8195
8196       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8197
8198       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8199                                      pic_reg, compute_now);
8200       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8201                                        base == reg ? 0 : reg, pic_reg,
8202                                        compute_now);
8203
8204       if (CONST_INT_P (offset))
8205         {
8206           /* The base register doesn't really matter, we only want to
8207              test the index for the appropriate mode.  */
8208           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8209             {
8210               gcc_assert (can_create_pseudo_p ());
8211               offset = force_reg (Pmode, offset);
8212             }
8213
8214           if (CONST_INT_P (offset))
8215             return plus_constant (Pmode, base, INTVAL (offset));
8216         }
8217
8218       if (GET_MODE_SIZE (mode) > 4
8219           && (GET_MODE_CLASS (mode) == MODE_INT
8220               || TARGET_SOFT_FLOAT))
8221         {
8222           emit_insn (gen_addsi3 (reg, base, offset));
8223           return reg;
8224         }
8225
8226       return gen_rtx_PLUS (Pmode, base, offset);
8227     }
8228
8229   return orig;
8230 }
8231
8232
8233 /* Generate insns that produce the address of the stack canary */
8234 rtx
8235 arm_stack_protect_tls_canary_mem (bool reload)
8236 {
8237   rtx tp = gen_reg_rtx (SImode);
8238   if (reload)
8239     emit_insn (gen_reload_tp_hard (tp));
8240   else
8241     emit_insn (gen_load_tp_hard (tp));
8242
8243   rtx reg = gen_reg_rtx (SImode);
8244   rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8245   emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8246   return gen_rtx_MEM (SImode, reg);
8247 }
8248
8249
8250 /* Whether a register is callee saved or not.  This is necessary because high
8251    registers are marked as caller saved when optimizing for size on Thumb-1
8252    targets despite being callee saved in order to avoid using them.  */
8253 #define callee_saved_reg_p(reg) \
8254   (!call_used_or_fixed_reg_p (reg) \
8255    || (TARGET_THUMB1 && optimize_size \
8256        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8257
8258 /* Return a mask for the call-clobbered low registers that are unused
8259    at the end of the prologue.  */
8260 static unsigned long
8261 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8262 {
8263   unsigned long mask = 0;
8264   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8265
8266   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8267     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8268       mask |= 1 << (reg - FIRST_LO_REGNUM);
8269   return mask;
8270 }
8271
8272 /* Similarly for the start of the epilogue.  */
8273 static unsigned long
8274 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8275 {
8276   unsigned long mask = 0;
8277   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8278
8279   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8280     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8281       mask |= 1 << (reg - FIRST_LO_REGNUM);
8282   return mask;
8283 }
8284
8285 /* Find a spare register to use during the prolog of a function.  */
8286
8287 static int
8288 thumb_find_work_register (unsigned long pushed_regs_mask)
8289 {
8290   int reg;
8291
8292   unsigned long unused_regs
8293     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8294
8295   /* Check the argument registers first as these are call-used.  The
8296      register allocation order means that sometimes r3 might be used
8297      but earlier argument registers might not, so check them all.  */
8298   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8299     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8300       return reg;
8301
8302   /* Otherwise look for a call-saved register that is going to be pushed.  */
8303   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8304     if (pushed_regs_mask & (1 << reg))
8305       return reg;
8306
8307   if (TARGET_THUMB2)
8308     {
8309       /* Thumb-2 can use high regs.  */
8310       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8311         if (pushed_regs_mask & (1 << reg))
8312           return reg;
8313     }
8314   /* Something went wrong - thumb_compute_save_reg_mask()
8315      should have arranged for a suitable register to be pushed.  */
8316   gcc_unreachable ();
8317 }
8318
8319 static GTY(()) int pic_labelno;
8320
8321 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8322    low register.  */
8323
8324 void
8325 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8326 {
8327   rtx l1, labelno, pic_tmp, pic_rtx;
8328
8329   if (crtl->uses_pic_offset_table == 0
8330       || TARGET_SINGLE_PIC_BASE
8331       || TARGET_FDPIC)
8332     return;
8333
8334   gcc_assert (flag_pic);
8335
8336   if (pic_reg == NULL_RTX)
8337     pic_reg = cfun->machine->pic_reg;
8338   if (TARGET_VXWORKS_RTP)
8339     {
8340       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8341       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8342       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8343
8344       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8345
8346       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8347       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8348     }
8349   else
8350     {
8351       /* We use an UNSPEC rather than a LABEL_REF because this label
8352          never appears in the code stream.  */
8353
8354       labelno = GEN_INT (pic_labelno++);
8355       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8356       l1 = gen_rtx_CONST (VOIDmode, l1);
8357
8358       /* On the ARM the PC register contains 'dot + 8' at the time of the
8359          addition, on the Thumb it is 'dot + 4'.  */
8360       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8361       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8362                                 UNSPEC_GOTSYM_OFF);
8363       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8364
8365       if (TARGET_32BIT)
8366         {
8367           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8368         }
8369       else /* TARGET_THUMB1 */
8370         {
8371           if (arm_pic_register != INVALID_REGNUM
8372               && REGNO (pic_reg) > LAST_LO_REGNUM)
8373             {
8374               /* We will have pushed the pic register, so we should always be
8375                  able to find a work register.  */
8376               pic_tmp = gen_rtx_REG (SImode,
8377                                      thumb_find_work_register (saved_regs));
8378               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8379               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8380               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8381             }
8382           else if (arm_pic_register != INVALID_REGNUM
8383                    && arm_pic_register > LAST_LO_REGNUM
8384                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8385             {
8386               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8387               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8388               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8389             }
8390           else
8391             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8392         }
8393     }
8394
8395   /* Need to emit this whether or not we obey regdecls,
8396      since setjmp/longjmp can cause life info to screw up.  */
8397   emit_use (pic_reg);
8398 }
8399
8400 /* Try to determine whether an object, referenced via ORIG, will be
8401    placed in the text or data segment.  This is used in FDPIC mode, to
8402    decide which relocations to use when accessing ORIG.  *IS_READONLY
8403    is set to true if ORIG is a read-only location, false otherwise.
8404    Return true if we could determine the location of ORIG, false
8405    otherwise.  *IS_READONLY is valid only when we return true.  */
8406 static bool
8407 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8408 {
8409   *is_readonly = false;
8410
8411   if (LABEL_REF_P (orig))
8412     {
8413       *is_readonly = true;
8414       return true;
8415     }
8416
8417   if (SYMBOL_REF_P (orig))
8418     {
8419       if (CONSTANT_POOL_ADDRESS_P (orig))
8420         {
8421           *is_readonly = true;
8422           return true;
8423         }
8424       if (SYMBOL_REF_LOCAL_P (orig)
8425           && !SYMBOL_REF_EXTERNAL_P (orig)
8426           && SYMBOL_REF_DECL (orig)
8427           && (!DECL_P (SYMBOL_REF_DECL (orig))
8428               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8429         {
8430           tree decl = SYMBOL_REF_DECL (orig);
8431           tree init = (TREE_CODE (decl) == VAR_DECL)
8432             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8433             ? decl : 0;
8434           int reloc = 0;
8435           bool named_section, readonly;
8436
8437           if (init && init != error_mark_node)
8438             reloc = compute_reloc_for_constant (init);
8439
8440           named_section = TREE_CODE (decl) == VAR_DECL
8441             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8442           readonly = decl_readonly_section (decl, reloc);
8443
8444           /* We don't know where the link script will put a named
8445              section, so return false in such a case.  */
8446           if (named_section)
8447             return false;
8448
8449           *is_readonly = readonly;
8450           return true;
8451         }
8452
8453       /* We don't know.  */
8454       return false;
8455     }
8456
8457   gcc_unreachable ();
8458 }
8459
8460 /* Generate code to load the address of a static var when flag_pic is set.  */
8461 static rtx_insn *
8462 arm_pic_static_addr (rtx orig, rtx reg)
8463 {
8464   rtx l1, labelno, offset_rtx;
8465   rtx_insn *insn;
8466
8467   gcc_assert (flag_pic);
8468
8469   bool is_readonly = false;
8470   bool info_known = false;
8471
8472   if (TARGET_FDPIC
8473       && SYMBOL_REF_P (orig)
8474       && !SYMBOL_REF_FUNCTION_P (orig))
8475     info_known = arm_is_segment_info_known (orig, &is_readonly);
8476
8477   if (TARGET_FDPIC
8478       && SYMBOL_REF_P (orig)
8479       && !SYMBOL_REF_FUNCTION_P (orig)
8480       && !info_known)
8481     {
8482       /* We don't know where orig is stored, so we have be
8483          pessimistic and use a GOT relocation.  */
8484       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8485
8486       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8487     }
8488   else if (TARGET_FDPIC
8489            && SYMBOL_REF_P (orig)
8490            && (SYMBOL_REF_FUNCTION_P (orig)
8491                || !is_readonly))
8492     {
8493       /* We use the GOTOFF relocation.  */
8494       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8495
8496       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8497       emit_insn (gen_movsi (reg, l1));
8498       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8499     }
8500   else
8501     {
8502       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8503          PC-relative access.  */
8504       /* We use an UNSPEC rather than a LABEL_REF because this label
8505          never appears in the code stream.  */
8506       labelno = GEN_INT (pic_labelno++);
8507       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8508       l1 = gen_rtx_CONST (VOIDmode, l1);
8509
8510       /* On the ARM the PC register contains 'dot + 8' at the time of the
8511          addition, on the Thumb it is 'dot + 4'.  */
8512       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8513       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8514                                    UNSPEC_SYMBOL_OFFSET);
8515       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8516
8517       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8518                                                    labelno));
8519     }
8520
8521   return insn;
8522 }
8523
8524 /* Return nonzero if X is valid as an ARM state addressing register.  */
8525 static int
8526 arm_address_register_rtx_p (rtx x, int strict_p)
8527 {
8528   int regno;
8529
8530   if (!REG_P (x))
8531     return 0;
8532
8533   regno = REGNO (x);
8534
8535   if (strict_p)
8536     return ARM_REGNO_OK_FOR_BASE_P (regno);
8537
8538   return (regno <= LAST_ARM_REGNUM
8539           || regno >= FIRST_PSEUDO_REGISTER
8540           || regno == FRAME_POINTER_REGNUM
8541           || regno == ARG_POINTER_REGNUM);
8542 }
8543
8544 /* Return TRUE if this rtx is the difference of a symbol and a label,
8545    and will reduce to a PC-relative relocation in the object file.
8546    Expressions like this can be left alone when generating PIC, rather
8547    than forced through the GOT.  */
8548 static int
8549 pcrel_constant_p (rtx x)
8550 {
8551   if (GET_CODE (x) == MINUS)
8552     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8553
8554   return FALSE;
8555 }
8556
8557 /* Return true if X will surely end up in an index register after next
8558    splitting pass.  */
8559 static bool
8560 will_be_in_index_register (const_rtx x)
8561 {
8562   /* arm.md: calculate_pic_address will split this into a register.  */
8563   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8564 }
8565
8566 /* Return nonzero if X is a valid ARM state address operand.  */
8567 int
8568 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8569                                 int strict_p)
8570 {
8571   bool use_ldrd;
8572   enum rtx_code code = GET_CODE (x);
8573
8574   if (arm_address_register_rtx_p (x, strict_p))
8575     return 1;
8576
8577   use_ldrd = (TARGET_LDRD
8578               && (mode == DImode || mode == DFmode));
8579
8580   if (code == POST_INC || code == PRE_DEC
8581       || ((code == PRE_INC || code == POST_DEC)
8582           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8583     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8584
8585   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8586            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8587            && GET_CODE (XEXP (x, 1)) == PLUS
8588            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8589     {
8590       rtx addend = XEXP (XEXP (x, 1), 1);
8591
8592       /* Don't allow ldrd post increment by register because it's hard
8593          to fixup invalid register choices.  */
8594       if (use_ldrd
8595           && GET_CODE (x) == POST_MODIFY
8596           && REG_P (addend))
8597         return 0;
8598
8599       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8600               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8601     }
8602
8603   /* After reload constants split into minipools will have addresses
8604      from a LABEL_REF.  */
8605   else if (reload_completed
8606            && (code == LABEL_REF
8607                || (code == CONST
8608                    && GET_CODE (XEXP (x, 0)) == PLUS
8609                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8610                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8611     return 1;
8612
8613   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8614     return 0;
8615
8616   else if (code == PLUS)
8617     {
8618       rtx xop0 = XEXP (x, 0);
8619       rtx xop1 = XEXP (x, 1);
8620
8621       return ((arm_address_register_rtx_p (xop0, strict_p)
8622                && ((CONST_INT_P (xop1)
8623                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8624                    || (!strict_p && will_be_in_index_register (xop1))))
8625               || (arm_address_register_rtx_p (xop1, strict_p)
8626                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8627     }
8628
8629 #if 0
8630   /* Reload currently can't handle MINUS, so disable this for now */
8631   else if (GET_CODE (x) == MINUS)
8632     {
8633       rtx xop0 = XEXP (x, 0);
8634       rtx xop1 = XEXP (x, 1);
8635
8636       return (arm_address_register_rtx_p (xop0, strict_p)
8637               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8638     }
8639 #endif
8640
8641   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8642            && code == SYMBOL_REF
8643            && CONSTANT_POOL_ADDRESS_P (x)
8644            && ! (flag_pic
8645                  && symbol_mentioned_p (get_pool_constant (x))
8646                  && ! pcrel_constant_p (get_pool_constant (x))))
8647     return 1;
8648
8649   return 0;
8650 }
8651
8652 /* Return true if we can avoid creating a constant pool entry for x.  */
8653 static bool
8654 can_avoid_literal_pool_for_label_p (rtx x)
8655 {
8656   /* Normally we can assign constant values to target registers without
8657      the help of constant pool.  But there are cases we have to use constant
8658      pool like:
8659      1) assign a label to register.
8660      2) sign-extend a 8bit value to 32bit and then assign to register.
8661
8662      Constant pool access in format:
8663      (set (reg r0) (mem (symbol_ref (".LC0"))))
8664      will cause the use of literal pool (later in function arm_reorg).
8665      So here we mark such format as an invalid format, then the compiler
8666      will adjust it into:
8667      (set (reg r0) (symbol_ref (".LC0")))
8668      (set (reg r0) (mem (reg r0))).
8669      No extra register is required, and (mem (reg r0)) won't cause the use
8670      of literal pools.  */
8671   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8672       && CONSTANT_POOL_ADDRESS_P (x))
8673     return 1;
8674   return 0;
8675 }
8676
8677
8678 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8679 static int
8680 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8681 {
8682   bool use_ldrd;
8683   enum rtx_code code = GET_CODE (x);
8684
8685   if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8686     return mve_vector_mem_operand (mode, x, strict_p);
8687
8688   if (arm_address_register_rtx_p (x, strict_p))
8689     return 1;
8690
8691   use_ldrd = (TARGET_LDRD
8692               && (mode == DImode || mode == DFmode));
8693
8694   if (code == POST_INC || code == PRE_DEC
8695       || ((code == PRE_INC || code == POST_DEC)
8696           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8697     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8698
8699   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8700            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8701            && GET_CODE (XEXP (x, 1)) == PLUS
8702            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8703     {
8704       /* Thumb-2 only has autoincrement by constant.  */
8705       rtx addend = XEXP (XEXP (x, 1), 1);
8706       HOST_WIDE_INT offset;
8707
8708       if (!CONST_INT_P (addend))
8709         return 0;
8710
8711       offset = INTVAL(addend);
8712       if (GET_MODE_SIZE (mode) <= 4)
8713         return (offset > -256 && offset < 256);
8714
8715       return (use_ldrd && offset > -1024 && offset < 1024
8716               && (offset & 3) == 0);
8717     }
8718
8719   /* After reload constants split into minipools will have addresses
8720      from a LABEL_REF.  */
8721   else if (reload_completed
8722            && (code == LABEL_REF
8723                || (code == CONST
8724                    && GET_CODE (XEXP (x, 0)) == PLUS
8725                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8726                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8727     return 1;
8728
8729   else if (mode == TImode
8730            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8731            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8732     return 0;
8733
8734   else if (code == PLUS)
8735     {
8736       rtx xop0 = XEXP (x, 0);
8737       rtx xop1 = XEXP (x, 1);
8738
8739       return ((arm_address_register_rtx_p (xop0, strict_p)
8740                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8741                    || (!strict_p && will_be_in_index_register (xop1))))
8742               || (arm_address_register_rtx_p (xop1, strict_p)
8743                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8744     }
8745
8746   else if (can_avoid_literal_pool_for_label_p (x))
8747     return 0;
8748
8749   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8750            && code == SYMBOL_REF
8751            && CONSTANT_POOL_ADDRESS_P (x)
8752            && ! (flag_pic
8753                  && symbol_mentioned_p (get_pool_constant (x))
8754                  && ! pcrel_constant_p (get_pool_constant (x))))
8755     return 1;
8756
8757   return 0;
8758 }
8759
8760 /* Return nonzero if INDEX is valid for an address index operand in
8761    ARM state.  */
8762 static int
8763 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8764                         int strict_p)
8765 {
8766   HOST_WIDE_INT range;
8767   enum rtx_code code = GET_CODE (index);
8768
8769   /* Standard coprocessor addressing modes.  */
8770   if (TARGET_HARD_FLOAT
8771       && (mode == SFmode || mode == DFmode))
8772     return (code == CONST_INT && INTVAL (index) < 1024
8773             && INTVAL (index) > -1024
8774             && (INTVAL (index) & 3) == 0);
8775
8776   /* For quad modes, we restrict the constant offset to be slightly less
8777      than what the instruction format permits.  We do this because for
8778      quad mode moves, we will actually decompose them into two separate
8779      double-mode reads or writes.  INDEX must therefore be a valid
8780      (double-mode) offset and so should INDEX+8.  */
8781   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8782     return (code == CONST_INT
8783             && INTVAL (index) < 1016
8784             && INTVAL (index) > -1024
8785             && (INTVAL (index) & 3) == 0);
8786
8787   /* We have no such constraint on double mode offsets, so we permit the
8788      full range of the instruction format.  */
8789   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8790     return (code == CONST_INT
8791             && INTVAL (index) < 1024
8792             && INTVAL (index) > -1024
8793             && (INTVAL (index) & 3) == 0);
8794
8795   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8796     return (code == CONST_INT
8797             && INTVAL (index) < 1024
8798             && INTVAL (index) > -1024
8799             && (INTVAL (index) & 3) == 0);
8800
8801   if (arm_address_register_rtx_p (index, strict_p)
8802       && (GET_MODE_SIZE (mode) <= 4))
8803     return 1;
8804
8805   if (mode == DImode || mode == DFmode)
8806     {
8807       if (code == CONST_INT)
8808         {
8809           HOST_WIDE_INT val = INTVAL (index);
8810
8811           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8812              If vldr is selected it uses arm_coproc_mem_operand.  */
8813           if (TARGET_LDRD)
8814             return val > -256 && val < 256;
8815           else
8816             return val > -4096 && val < 4092;
8817         }
8818
8819       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8820     }
8821
8822   if (GET_MODE_SIZE (mode) <= 4
8823       && ! (arm_arch4
8824             && (mode == HImode
8825                 || mode == HFmode
8826                 || (mode == QImode && outer == SIGN_EXTEND))))
8827     {
8828       if (code == MULT)
8829         {
8830           rtx xiop0 = XEXP (index, 0);
8831           rtx xiop1 = XEXP (index, 1);
8832
8833           return ((arm_address_register_rtx_p (xiop0, strict_p)
8834                    && power_of_two_operand (xiop1, SImode))
8835                   || (arm_address_register_rtx_p (xiop1, strict_p)
8836                       && power_of_two_operand (xiop0, SImode)));
8837         }
8838       else if (code == LSHIFTRT || code == ASHIFTRT
8839                || code == ASHIFT || code == ROTATERT)
8840         {
8841           rtx op = XEXP (index, 1);
8842
8843           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8844                   && CONST_INT_P (op)
8845                   && INTVAL (op) > 0
8846                   && INTVAL (op) <= 31);
8847         }
8848     }
8849
8850   /* For ARM v4 we may be doing a sign-extend operation during the
8851      load.  */
8852   if (arm_arch4)
8853     {
8854       if (mode == HImode
8855           || mode == HFmode
8856           || (outer == SIGN_EXTEND && mode == QImode))
8857         range = 256;
8858       else
8859         range = 4096;
8860     }
8861   else
8862     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8863
8864   return (code == CONST_INT
8865           && INTVAL (index) < range
8866           && INTVAL (index) > -range);
8867 }
8868
8869 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8870    index operand.  i.e. 1, 2, 4 or 8.  */
8871 static bool
8872 thumb2_index_mul_operand (rtx op)
8873 {
8874   HOST_WIDE_INT val;
8875
8876   if (!CONST_INT_P (op))
8877     return false;
8878
8879   val = INTVAL(op);
8880   return (val == 1 || val == 2 || val == 4 || val == 8);
8881 }
8882
8883 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8884 static int
8885 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8886 {
8887   enum rtx_code code = GET_CODE (index);
8888
8889   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8890   /* Standard coprocessor addressing modes.  */
8891   if (TARGET_VFP_BASE
8892       && (mode == SFmode || mode == DFmode))
8893     return (code == CONST_INT && INTVAL (index) < 1024
8894             /* Thumb-2 allows only > -256 index range for it's core register
8895                load/stores. Since we allow SF/DF in core registers, we have
8896                to use the intersection between -256~4096 (core) and -1024~1024
8897                (coprocessor).  */
8898             && INTVAL (index) > -256
8899             && (INTVAL (index) & 3) == 0);
8900
8901   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8902     {
8903       /* For DImode assume values will usually live in core regs
8904          and only allow LDRD addressing modes.  */
8905       if (!TARGET_LDRD || mode != DImode)
8906         return (code == CONST_INT
8907                 && INTVAL (index) < 1024
8908                 && INTVAL (index) > -1024
8909                 && (INTVAL (index) & 3) == 0);
8910     }
8911
8912   /* For quad modes, we restrict the constant offset to be slightly less
8913      than what the instruction format permits.  We do this because for
8914      quad mode moves, we will actually decompose them into two separate
8915      double-mode reads or writes.  INDEX must therefore be a valid
8916      (double-mode) offset and so should INDEX+8.  */
8917   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8918     return (code == CONST_INT
8919             && INTVAL (index) < 1016
8920             && INTVAL (index) > -1024
8921             && (INTVAL (index) & 3) == 0);
8922
8923   /* We have no such constraint on double mode offsets, so we permit the
8924      full range of the instruction format.  */
8925   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8926     return (code == CONST_INT
8927             && INTVAL (index) < 1024
8928             && INTVAL (index) > -1024
8929             && (INTVAL (index) & 3) == 0);
8930
8931   if (arm_address_register_rtx_p (index, strict_p)
8932       && (GET_MODE_SIZE (mode) <= 4))
8933     return 1;
8934
8935   if (mode == DImode || mode == DFmode)
8936     {
8937       if (code == CONST_INT)
8938         {
8939           HOST_WIDE_INT val = INTVAL (index);
8940           /* Thumb-2 ldrd only has reg+const addressing modes.
8941              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8942              If vldr is selected it uses arm_coproc_mem_operand.  */
8943           if (TARGET_LDRD)
8944             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8945           else
8946             return IN_RANGE (val, -255, 4095 - 4);
8947         }
8948       else
8949         return 0;
8950     }
8951
8952   if (code == MULT)
8953     {
8954       rtx xiop0 = XEXP (index, 0);
8955       rtx xiop1 = XEXP (index, 1);
8956
8957       return ((arm_address_register_rtx_p (xiop0, strict_p)
8958                && thumb2_index_mul_operand (xiop1))
8959               || (arm_address_register_rtx_p (xiop1, strict_p)
8960                   && thumb2_index_mul_operand (xiop0)));
8961     }
8962   else if (code == ASHIFT)
8963     {
8964       rtx op = XEXP (index, 1);
8965
8966       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8967               && CONST_INT_P (op)
8968               && INTVAL (op) > 0
8969               && INTVAL (op) <= 3);
8970     }
8971
8972   return (code == CONST_INT
8973           && INTVAL (index) < 4096
8974           && INTVAL (index) > -256);
8975 }
8976
8977 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8978 static int
8979 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8980 {
8981   int regno;
8982
8983   if (!REG_P (x))
8984     return 0;
8985
8986   regno = REGNO (x);
8987
8988   if (strict_p)
8989     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8990
8991   return (regno <= LAST_LO_REGNUM
8992           || regno > LAST_VIRTUAL_REGISTER
8993           || regno == FRAME_POINTER_REGNUM
8994           || (GET_MODE_SIZE (mode) >= 4
8995               && (regno == STACK_POINTER_REGNUM
8996                   || regno >= FIRST_PSEUDO_REGISTER
8997                   || x == hard_frame_pointer_rtx
8998                   || x == arg_pointer_rtx)));
8999 }
9000
9001 /* Return nonzero if x is a legitimate index register.  This is the case
9002    for any base register that can access a QImode object.  */
9003 inline static int
9004 thumb1_index_register_rtx_p (rtx x, int strict_p)
9005 {
9006   return thumb1_base_register_rtx_p (x, QImode, strict_p);
9007 }
9008
9009 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9010
9011    The AP may be eliminated to either the SP or the FP, so we use the
9012    least common denominator, e.g. SImode, and offsets from 0 to 64.
9013
9014    ??? Verify whether the above is the right approach.
9015
9016    ??? Also, the FP may be eliminated to the SP, so perhaps that
9017    needs special handling also.
9018
9019    ??? Look at how the mips16 port solves this problem.  It probably uses
9020    better ways to solve some of these problems.
9021
9022    Although it is not incorrect, we don't accept QImode and HImode
9023    addresses based on the frame pointer or arg pointer until the
9024    reload pass starts.  This is so that eliminating such addresses
9025    into stack based ones won't produce impossible code.  */
9026 int
9027 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9028 {
9029   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9030     return 0;
9031
9032   /* ??? Not clear if this is right.  Experiment.  */
9033   if (GET_MODE_SIZE (mode) < 4
9034       && !(reload_in_progress || reload_completed)
9035       && (reg_mentioned_p (frame_pointer_rtx, x)
9036           || reg_mentioned_p (arg_pointer_rtx, x)
9037           || reg_mentioned_p (virtual_incoming_args_rtx, x)
9038           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9039           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9040           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9041     return 0;
9042
9043   /* Accept any base register.  SP only in SImode or larger.  */
9044   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9045     return 1;
9046
9047   /* This is PC relative data before arm_reorg runs.  */
9048   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9049            && SYMBOL_REF_P (x)
9050            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9051            && !arm_disable_literal_pool)
9052     return 1;
9053
9054   /* This is PC relative data after arm_reorg runs.  */
9055   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9056            && reload_completed
9057            && (LABEL_REF_P (x)
9058                || (GET_CODE (x) == CONST
9059                    && GET_CODE (XEXP (x, 0)) == PLUS
9060                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9061                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9062     return 1;
9063
9064   /* Post-inc indexing only supported for SImode and larger.  */
9065   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9066            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9067     return 1;
9068
9069   else if (GET_CODE (x) == PLUS)
9070     {
9071       /* REG+REG address can be any two index registers.  */
9072       /* We disallow FRAME+REG addressing since we know that FRAME
9073          will be replaced with STACK, and SP relative addressing only
9074          permits SP+OFFSET.  */
9075       if (GET_MODE_SIZE (mode) <= 4
9076           && XEXP (x, 0) != frame_pointer_rtx
9077           && XEXP (x, 1) != frame_pointer_rtx
9078           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9079           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9080               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9081         return 1;
9082
9083       /* REG+const has 5-7 bit offset for non-SP registers.  */
9084       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9085                 || XEXP (x, 0) == arg_pointer_rtx)
9086                && CONST_INT_P (XEXP (x, 1))
9087                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9088         return 1;
9089
9090       /* REG+const has 10-bit offset for SP, but only SImode and
9091          larger is supported.  */
9092       /* ??? Should probably check for DI/DFmode overflow here
9093          just like GO_IF_LEGITIMATE_OFFSET does.  */
9094       else if (REG_P (XEXP (x, 0))
9095                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9096                && GET_MODE_SIZE (mode) >= 4
9097                && CONST_INT_P (XEXP (x, 1))
9098                && INTVAL (XEXP (x, 1)) >= 0
9099                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9100                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9101         return 1;
9102
9103       else if (REG_P (XEXP (x, 0))
9104                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9105                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9106                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9107                        && REGNO (XEXP (x, 0))
9108                           <= LAST_VIRTUAL_POINTER_REGISTER))
9109                && GET_MODE_SIZE (mode) >= 4
9110                && CONST_INT_P (XEXP (x, 1))
9111                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9112         return 1;
9113     }
9114
9115   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9116            && GET_MODE_SIZE (mode) == 4
9117            && SYMBOL_REF_P (x)
9118            && CONSTANT_POOL_ADDRESS_P (x)
9119            && !arm_disable_literal_pool
9120            && ! (flag_pic
9121                  && symbol_mentioned_p (get_pool_constant (x))
9122                  && ! pcrel_constant_p (get_pool_constant (x))))
9123     return 1;
9124
9125   return 0;
9126 }
9127
9128 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9129    instruction of mode MODE.  */
9130 int
9131 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9132 {
9133   switch (GET_MODE_SIZE (mode))
9134     {
9135     case 1:
9136       return val >= 0 && val < 32;
9137
9138     case 2:
9139       return val >= 0 && val < 64 && (val & 1) == 0;
9140
9141     default:
9142       return (val >= 0
9143               && (val + GET_MODE_SIZE (mode)) <= 128
9144               && (val & 3) == 0);
9145     }
9146 }
9147
9148 bool
9149 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9150 {
9151   if (TARGET_ARM)
9152     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9153   else if (TARGET_THUMB2)
9154     return thumb2_legitimate_address_p (mode, x, strict_p);
9155   else /* if (TARGET_THUMB1) */
9156     return thumb1_legitimate_address_p (mode, x, strict_p);
9157 }
9158
9159 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9160
9161    Given an rtx X being reloaded into a reg required to be
9162    in class CLASS, return the class of reg to actually use.
9163    In general this is just CLASS, but for the Thumb core registers and
9164    immediate constants we prefer a LO_REGS class or a subset.  */
9165
9166 static reg_class_t
9167 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9168 {
9169   if (TARGET_32BIT)
9170     return rclass;
9171   else
9172     {
9173       if (rclass == GENERAL_REGS)
9174         return LO_REGS;
9175       else
9176         return rclass;
9177     }
9178 }
9179
9180 /* Build the SYMBOL_REF for __tls_get_addr.  */
9181
9182 static GTY(()) rtx tls_get_addr_libfunc;
9183
9184 static rtx
9185 get_tls_get_addr (void)
9186 {
9187   if (!tls_get_addr_libfunc)
9188     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9189   return tls_get_addr_libfunc;
9190 }
9191
9192 rtx
9193 arm_load_tp (rtx target)
9194 {
9195   if (!target)
9196     target = gen_reg_rtx (SImode);
9197
9198   if (TARGET_HARD_TP)
9199     {
9200       /* Can return in any reg.  */
9201       emit_insn (gen_load_tp_hard (target));
9202     }
9203   else
9204     {
9205       /* Always returned in r0.  Immediately copy the result into a pseudo,
9206          otherwise other uses of r0 (e.g. setting up function arguments) may
9207          clobber the value.  */
9208
9209       rtx tmp;
9210
9211       if (TARGET_FDPIC)
9212         {
9213           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9214           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9215
9216           emit_insn (gen_load_tp_soft_fdpic ());
9217
9218           /* Restore r9.  */
9219           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9220         }
9221       else
9222         emit_insn (gen_load_tp_soft ());
9223
9224       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9225       emit_move_insn (target, tmp);
9226     }
9227   return target;
9228 }
9229
9230 static rtx
9231 load_tls_operand (rtx x, rtx reg)
9232 {
9233   rtx tmp;
9234
9235   if (reg == NULL_RTX)
9236     reg = gen_reg_rtx (SImode);
9237
9238   tmp = gen_rtx_CONST (SImode, x);
9239
9240   emit_move_insn (reg, tmp);
9241
9242   return reg;
9243 }
9244
9245 static rtx_insn *
9246 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9247 {
9248   rtx label, labelno = NULL_RTX, sum;
9249
9250   gcc_assert (reloc != TLS_DESCSEQ);
9251   start_sequence ();
9252
9253   if (TARGET_FDPIC)
9254     {
9255       sum = gen_rtx_UNSPEC (Pmode,
9256                             gen_rtvec (2, x, GEN_INT (reloc)),
9257                             UNSPEC_TLS);
9258     }
9259   else
9260     {
9261       labelno = GEN_INT (pic_labelno++);
9262       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9263       label = gen_rtx_CONST (VOIDmode, label);
9264
9265       sum = gen_rtx_UNSPEC (Pmode,
9266                             gen_rtvec (4, x, GEN_INT (reloc), label,
9267                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9268                             UNSPEC_TLS);
9269     }
9270   reg = load_tls_operand (sum, reg);
9271
9272   if (TARGET_FDPIC)
9273       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9274   else if (TARGET_ARM)
9275     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9276   else
9277     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9278
9279   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9280                                      LCT_PURE, /* LCT_CONST?  */
9281                                      Pmode, reg, Pmode);
9282
9283   rtx_insn *insns = get_insns ();
9284   end_sequence ();
9285
9286   return insns;
9287 }
9288
9289 static rtx
9290 arm_tls_descseq_addr (rtx x, rtx reg)
9291 {
9292   rtx labelno = GEN_INT (pic_labelno++);
9293   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9294   rtx sum = gen_rtx_UNSPEC (Pmode,
9295                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9296                                        gen_rtx_CONST (VOIDmode, label),
9297                                        GEN_INT (!TARGET_ARM)),
9298                             UNSPEC_TLS);
9299   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9300
9301   emit_insn (gen_tlscall (x, labelno));
9302   if (!reg)
9303     reg = gen_reg_rtx (SImode);
9304   else
9305     gcc_assert (REGNO (reg) != R0_REGNUM);
9306
9307   emit_move_insn (reg, reg0);
9308
9309   return reg;
9310 }
9311
9312
9313 rtx
9314 legitimize_tls_address (rtx x, rtx reg)
9315 {
9316   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9317   rtx_insn *insns;
9318   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9319
9320   switch (model)
9321     {
9322     case TLS_MODEL_GLOBAL_DYNAMIC:
9323       if (TARGET_GNU2_TLS)
9324         {
9325           gcc_assert (!TARGET_FDPIC);
9326
9327           reg = arm_tls_descseq_addr (x, reg);
9328
9329           tp = arm_load_tp (NULL_RTX);
9330
9331           dest = gen_rtx_PLUS (Pmode, tp, reg);
9332         }
9333       else
9334         {
9335           /* Original scheme */
9336           if (TARGET_FDPIC)
9337             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9338           else
9339             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9340           dest = gen_reg_rtx (Pmode);
9341           emit_libcall_block (insns, dest, ret, x);
9342         }
9343       return dest;
9344
9345     case TLS_MODEL_LOCAL_DYNAMIC:
9346       if (TARGET_GNU2_TLS)
9347         {
9348           gcc_assert (!TARGET_FDPIC);
9349
9350           reg = arm_tls_descseq_addr (x, reg);
9351
9352           tp = arm_load_tp (NULL_RTX);
9353
9354           dest = gen_rtx_PLUS (Pmode, tp, reg);
9355         }
9356       else
9357         {
9358           if (TARGET_FDPIC)
9359             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9360           else
9361             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9362
9363           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9364              share the LDM result with other LD model accesses.  */
9365           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9366                                 UNSPEC_TLS);
9367           dest = gen_reg_rtx (Pmode);
9368           emit_libcall_block (insns, dest, ret, eqv);
9369
9370           /* Load the addend.  */
9371           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9372                                                      GEN_INT (TLS_LDO32)),
9373                                    UNSPEC_TLS);
9374           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9375           dest = gen_rtx_PLUS (Pmode, dest, addend);
9376         }
9377       return dest;
9378
9379     case TLS_MODEL_INITIAL_EXEC:
9380       if (TARGET_FDPIC)
9381         {
9382           sum = gen_rtx_UNSPEC (Pmode,
9383                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9384                                 UNSPEC_TLS);
9385           reg = load_tls_operand (sum, reg);
9386           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9387           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9388         }
9389       else
9390         {
9391           labelno = GEN_INT (pic_labelno++);
9392           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9393           label = gen_rtx_CONST (VOIDmode, label);
9394           sum = gen_rtx_UNSPEC (Pmode,
9395                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9396                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9397                                 UNSPEC_TLS);
9398           reg = load_tls_operand (sum, reg);
9399
9400           if (TARGET_ARM)
9401             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9402           else if (TARGET_THUMB2)
9403             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9404           else
9405             {
9406               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9407               emit_move_insn (reg, gen_const_mem (SImode, reg));
9408             }
9409         }
9410
9411       tp = arm_load_tp (NULL_RTX);
9412
9413       return gen_rtx_PLUS (Pmode, tp, reg);
9414
9415     case TLS_MODEL_LOCAL_EXEC:
9416       tp = arm_load_tp (NULL_RTX);
9417
9418       reg = gen_rtx_UNSPEC (Pmode,
9419                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9420                             UNSPEC_TLS);
9421       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9422
9423       return gen_rtx_PLUS (Pmode, tp, reg);
9424
9425     default:
9426       abort ();
9427     }
9428 }
9429
9430 /* Try machine-dependent ways of modifying an illegitimate address
9431    to be legitimate.  If we find one, return the new, valid address.  */
9432 rtx
9433 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9434 {
9435   if (arm_tls_referenced_p (x))
9436     {
9437       rtx addend = NULL;
9438
9439       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9440         {
9441           addend = XEXP (XEXP (x, 0), 1);
9442           x = XEXP (XEXP (x, 0), 0);
9443         }
9444
9445       if (!SYMBOL_REF_P (x))
9446         return x;
9447
9448       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9449
9450       x = legitimize_tls_address (x, NULL_RTX);
9451
9452       if (addend)
9453         {
9454           x = gen_rtx_PLUS (SImode, x, addend);
9455           orig_x = x;
9456         }
9457       else
9458         return x;
9459     }
9460
9461   if (TARGET_THUMB1)
9462     return thumb_legitimize_address (x, orig_x, mode);
9463
9464   if (GET_CODE (x) == PLUS)
9465     {
9466       rtx xop0 = XEXP (x, 0);
9467       rtx xop1 = XEXP (x, 1);
9468
9469       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9470         xop0 = force_reg (SImode, xop0);
9471
9472       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9473           && !symbol_mentioned_p (xop1))
9474         xop1 = force_reg (SImode, xop1);
9475
9476       if (ARM_BASE_REGISTER_RTX_P (xop0)
9477           && CONST_INT_P (xop1))
9478         {
9479           HOST_WIDE_INT n, low_n;
9480           rtx base_reg, val;
9481           n = INTVAL (xop1);
9482
9483           /* VFP addressing modes actually allow greater offsets, but for
9484              now we just stick with the lowest common denominator.  */
9485           if (mode == DImode || mode == DFmode)
9486             {
9487               low_n = n & 0x0f;
9488               n &= ~0x0f;
9489               if (low_n > 4)
9490                 {
9491                   n += 16;
9492                   low_n -= 16;
9493                 }
9494             }
9495           else
9496             {
9497               low_n = ((mode) == TImode ? 0
9498                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9499               n -= low_n;
9500             }
9501
9502           base_reg = gen_reg_rtx (SImode);
9503           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9504           emit_move_insn (base_reg, val);
9505           x = plus_constant (Pmode, base_reg, low_n);
9506         }
9507       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9508         x = gen_rtx_PLUS (SImode, xop0, xop1);
9509     }
9510
9511   /* XXX We don't allow MINUS any more -- see comment in
9512      arm_legitimate_address_outer_p ().  */
9513   else if (GET_CODE (x) == MINUS)
9514     {
9515       rtx xop0 = XEXP (x, 0);
9516       rtx xop1 = XEXP (x, 1);
9517
9518       if (CONSTANT_P (xop0))
9519         xop0 = force_reg (SImode, xop0);
9520
9521       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9522         xop1 = force_reg (SImode, xop1);
9523
9524       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9525         x = gen_rtx_MINUS (SImode, xop0, xop1);
9526     }
9527
9528   /* Make sure to take full advantage of the pre-indexed addressing mode
9529      with absolute addresses which often allows for the base register to
9530      be factorized for multiple adjacent memory references, and it might
9531      even allows for the mini pool to be avoided entirely. */
9532   else if (CONST_INT_P (x) && optimize > 0)
9533     {
9534       unsigned int bits;
9535       HOST_WIDE_INT mask, base, index;
9536       rtx base_reg;
9537
9538       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9539          only use a 8-bit index. So let's use a 12-bit index for
9540          SImode only and hope that arm_gen_constant will enable LDRB
9541          to use more bits. */
9542       bits = (mode == SImode) ? 12 : 8;
9543       mask = (1 << bits) - 1;
9544       base = INTVAL (x) & ~mask;
9545       index = INTVAL (x) & mask;
9546       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9547         {
9548           /* It'll most probably be more efficient to generate the
9549              base with more bits set and use a negative index instead.
9550              Don't do this for Thumb as negative offsets are much more
9551              limited.  */
9552           base |= mask;
9553           index -= mask;
9554         }
9555       base_reg = force_reg (SImode, GEN_INT (base));
9556       x = plus_constant (Pmode, base_reg, index);
9557     }
9558
9559   if (flag_pic)
9560     {
9561       /* We need to find and carefully transform any SYMBOL and LABEL
9562          references; so go back to the original address expression.  */
9563       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9564                                           false /*compute_now*/);
9565
9566       if (new_x != orig_x)
9567         x = new_x;
9568     }
9569
9570   return x;
9571 }
9572
9573
9574 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9575    to be legitimate.  If we find one, return the new, valid address.  */
9576 rtx
9577 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9578 {
9579   if (GET_CODE (x) == PLUS
9580       && CONST_INT_P (XEXP (x, 1))
9581       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9582           || INTVAL (XEXP (x, 1)) < 0))
9583     {
9584       rtx xop0 = XEXP (x, 0);
9585       rtx xop1 = XEXP (x, 1);
9586       HOST_WIDE_INT offset = INTVAL (xop1);
9587
9588       /* Try and fold the offset into a biasing of the base register and
9589          then offsetting that.  Don't do this when optimizing for space
9590          since it can cause too many CSEs.  */
9591       if (optimize_size && offset >= 0
9592           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9593         {
9594           HOST_WIDE_INT delta;
9595
9596           if (offset >= 256)
9597             delta = offset - (256 - GET_MODE_SIZE (mode));
9598           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9599             delta = 31 * GET_MODE_SIZE (mode);
9600           else
9601             delta = offset & (~31 * GET_MODE_SIZE (mode));
9602
9603           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9604                                 NULL_RTX);
9605           x = plus_constant (Pmode, xop0, delta);
9606         }
9607       else if (offset < 0 && offset > -256)
9608         /* Small negative offsets are best done with a subtract before the
9609            dereference, forcing these into a register normally takes two
9610            instructions.  */
9611         x = force_operand (x, NULL_RTX);
9612       else
9613         {
9614           /* For the remaining cases, force the constant into a register.  */
9615           xop1 = force_reg (SImode, xop1);
9616           x = gen_rtx_PLUS (SImode, xop0, xop1);
9617         }
9618     }
9619   else if (GET_CODE (x) == PLUS
9620            && s_register_operand (XEXP (x, 1), SImode)
9621            && !s_register_operand (XEXP (x, 0), SImode))
9622     {
9623       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9624
9625       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9626     }
9627
9628   if (flag_pic)
9629     {
9630       /* We need to find and carefully transform any SYMBOL and LABEL
9631          references; so go back to the original address expression.  */
9632       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9633                                           false /*compute_now*/);
9634
9635       if (new_x != orig_x)
9636         x = new_x;
9637     }
9638
9639   return x;
9640 }
9641
9642 /* Return TRUE if X contains any TLS symbol references.  */
9643
9644 bool
9645 arm_tls_referenced_p (rtx x)
9646 {
9647   if (! TARGET_HAVE_TLS)
9648     return false;
9649
9650   subrtx_iterator::array_type array;
9651   FOR_EACH_SUBRTX (iter, array, x, ALL)
9652     {
9653       const_rtx x = *iter;
9654       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9655         {
9656           /* ARM currently does not provide relocations to encode TLS variables
9657              into AArch32 instructions, only data, so there is no way to
9658              currently implement these if a literal pool is disabled.  */
9659           if (arm_disable_literal_pool)
9660             sorry ("accessing thread-local storage is not currently supported "
9661                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9662
9663           return true;
9664         }
9665
9666       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9667          TLS offsets, not real symbol references.  */
9668       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9669         iter.skip_subrtxes ();
9670     }
9671   return false;
9672 }
9673
9674 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9675
9676    On the ARM, allow any integer (invalid ones are removed later by insn
9677    patterns), nice doubles and symbol_refs which refer to the function's
9678    constant pool XXX.
9679
9680    When generating pic allow anything.  */
9681
9682 static bool
9683 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9684 {
9685   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9686     return false;
9687
9688   return flag_pic || !label_mentioned_p (x);
9689 }
9690
9691 static bool
9692 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9693 {
9694   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9695      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9696      for ARMv8-M Baseline or later the result is valid.  */
9697   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9698     x = XEXP (x, 0);
9699
9700   return (CONST_INT_P (x)
9701           || CONST_DOUBLE_P (x)
9702           || CONSTANT_ADDRESS_P (x)
9703           || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9704           /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9705              we build the symbol address with upper/lower
9706              relocations.  */
9707           || (TARGET_THUMB1
9708               && !label_mentioned_p (x)
9709               && arm_valid_symbolic_address_p (x)
9710               && arm_disable_literal_pool)
9711           || flag_pic);
9712 }
9713
9714 static bool
9715 arm_legitimate_constant_p (machine_mode mode, rtx x)
9716 {
9717   return (!arm_cannot_force_const_mem (mode, x)
9718           && (TARGET_32BIT
9719               ? arm_legitimate_constant_p_1 (mode, x)
9720               : thumb_legitimate_constant_p (mode, x)));
9721 }
9722
9723 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9724
9725 static bool
9726 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9727 {
9728   rtx base, offset;
9729   split_const (x, &base, &offset);
9730
9731   if (SYMBOL_REF_P (base))
9732     {
9733       /* Function symbols cannot have an offset due to the Thumb bit.  */
9734       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9735           && INTVAL (offset) != 0)
9736         return true;
9737
9738       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9739           && !offset_within_block_p (base, INTVAL (offset)))
9740         return true;
9741     }
9742   return arm_tls_referenced_p (x);
9743 }
9744 \f
9745 #define REG_OR_SUBREG_REG(X)                                            \
9746   (REG_P (X)                                                    \
9747    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9748
9749 #define REG_OR_SUBREG_RTX(X)                    \
9750    (REG_P (X) ? (X) : SUBREG_REG (X))
9751
9752 static inline int
9753 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9754 {
9755   machine_mode mode = GET_MODE (x);
9756   int total, words;
9757
9758   switch (code)
9759     {
9760     case ASHIFT:
9761     case ASHIFTRT:
9762     case LSHIFTRT:
9763     case ROTATERT:
9764       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9765
9766     case PLUS:
9767     case MINUS:
9768     case COMPARE:
9769     case NEG:
9770     case NOT:
9771       return COSTS_N_INSNS (1);
9772
9773     case MULT:
9774       if (arm_arch6m && arm_m_profile_small_mul)
9775         return COSTS_N_INSNS (32);
9776
9777       if (CONST_INT_P (XEXP (x, 1)))
9778         {
9779           int cycles = 0;
9780           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9781
9782           while (i)
9783             {
9784               i >>= 2;
9785               cycles++;
9786             }
9787           return COSTS_N_INSNS (2) + cycles;
9788         }
9789       return COSTS_N_INSNS (1) + 16;
9790
9791     case SET:
9792       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9793          the mode.  */
9794       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9795       return (COSTS_N_INSNS (words)
9796               + 4 * ((MEM_P (SET_SRC (x)))
9797                      + MEM_P (SET_DEST (x))));
9798
9799     case CONST_INT:
9800       if (outer == SET)
9801         {
9802           if (UINTVAL (x) < 256
9803               /* 16-bit constant.  */
9804               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9805             return 0;
9806           if (thumb_shiftable_const (INTVAL (x)))
9807             return COSTS_N_INSNS (2);
9808           return arm_disable_literal_pool
9809             ? COSTS_N_INSNS (8)
9810             : COSTS_N_INSNS (3);
9811         }
9812       else if ((outer == PLUS || outer == COMPARE)
9813                && INTVAL (x) < 256 && INTVAL (x) > -256)
9814         return 0;
9815       else if ((outer == IOR || outer == XOR || outer == AND)
9816                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9817         return COSTS_N_INSNS (1);
9818       else if (outer == AND)
9819         {
9820           int i;
9821           /* This duplicates the tests in the andsi3 expander.  */
9822           for (i = 9; i <= 31; i++)
9823             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9824                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9825               return COSTS_N_INSNS (2);
9826         }
9827       else if (outer == ASHIFT || outer == ASHIFTRT
9828                || outer == LSHIFTRT)
9829         return 0;
9830       return COSTS_N_INSNS (2);
9831
9832     case CONST:
9833     case CONST_DOUBLE:
9834     case LABEL_REF:
9835     case SYMBOL_REF:
9836       return COSTS_N_INSNS (3);
9837
9838     case UDIV:
9839     case UMOD:
9840     case DIV:
9841     case MOD:
9842       return 100;
9843
9844     case TRUNCATE:
9845       return 99;
9846
9847     case AND:
9848     case XOR:
9849     case IOR:
9850       /* XXX guess.  */
9851       return 8;
9852
9853     case MEM:
9854       /* XXX another guess.  */
9855       /* Memory costs quite a lot for the first word, but subsequent words
9856          load at the equivalent of a single insn each.  */
9857       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9858               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9859                  ? 4 : 0));
9860
9861     case IF_THEN_ELSE:
9862       /* XXX a guess.  */
9863       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9864         return 14;
9865       return 2;
9866
9867     case SIGN_EXTEND:
9868     case ZERO_EXTEND:
9869       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9870       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9871
9872       if (mode == SImode)
9873         return total;
9874
9875       if (arm_arch6)
9876         return total + COSTS_N_INSNS (1);
9877
9878       /* Assume a two-shift sequence.  Increase the cost slightly so
9879          we prefer actual shifts over an extend operation.  */
9880       return total + 1 + COSTS_N_INSNS (2);
9881
9882     default:
9883       return 99;
9884     }
9885 }
9886
9887 /* Estimates the size cost of thumb1 instructions.
9888    For now most of the code is copied from thumb1_rtx_costs. We need more
9889    fine grain tuning when we have more related test cases.  */
9890 static inline int
9891 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9892 {
9893   machine_mode mode = GET_MODE (x);
9894   int words, cost;
9895
9896   switch (code)
9897     {
9898     case ASHIFT:
9899     case ASHIFTRT:
9900     case LSHIFTRT:
9901     case ROTATERT:
9902       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9903
9904     case PLUS:
9905     case MINUS:
9906       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9907          defined by RTL expansion, especially for the expansion of
9908          multiplication.  */
9909       if ((GET_CODE (XEXP (x, 0)) == MULT
9910            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9911           || (GET_CODE (XEXP (x, 1)) == MULT
9912               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9913         return COSTS_N_INSNS (2);
9914       /* Fall through.  */
9915     case COMPARE:
9916     case NEG:
9917     case NOT:
9918       return COSTS_N_INSNS (1);
9919
9920     case MULT:
9921       if (CONST_INT_P (XEXP (x, 1)))
9922         {
9923           /* Thumb1 mul instruction can't operate on const. We must Load it
9924              into a register first.  */
9925           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9926           /* For the targets which have a very small and high-latency multiply
9927              unit, we prefer to synthesize the mult with up to 5 instructions,
9928              giving a good balance between size and performance.  */
9929           if (arm_arch6m && arm_m_profile_small_mul)
9930             return COSTS_N_INSNS (5);
9931           else
9932             return COSTS_N_INSNS (1) + const_size;
9933         }
9934       return COSTS_N_INSNS (1);
9935
9936     case SET:
9937       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9938          the mode.  */
9939       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9940       cost = COSTS_N_INSNS (words);
9941       if (satisfies_constraint_J (SET_SRC (x))
9942           || satisfies_constraint_K (SET_SRC (x))
9943              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9944           || (CONST_INT_P (SET_SRC (x))
9945               && UINTVAL (SET_SRC (x)) >= 256
9946               && TARGET_HAVE_MOVT
9947               && satisfies_constraint_j (SET_SRC (x)))
9948              /* thumb1_movdi_insn.  */
9949           || ((words > 1) && MEM_P (SET_SRC (x))))
9950         cost += COSTS_N_INSNS (1);
9951       return cost;
9952
9953     case CONST_INT:
9954       if (outer == SET)
9955         {
9956           if (UINTVAL (x) < 256)
9957             return COSTS_N_INSNS (1);
9958           /* movw is 4byte long.  */
9959           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9960             return COSTS_N_INSNS (2);
9961           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9962           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9963             return COSTS_N_INSNS (2);
9964           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9965           if (thumb_shiftable_const (INTVAL (x)))
9966             return COSTS_N_INSNS (2);
9967           return arm_disable_literal_pool
9968             ? COSTS_N_INSNS (8)
9969             : COSTS_N_INSNS (3);
9970         }
9971       else if ((outer == PLUS || outer == COMPARE)
9972                && INTVAL (x) < 256 && INTVAL (x) > -256)
9973         return 0;
9974       else if ((outer == IOR || outer == XOR || outer == AND)
9975                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9976         return COSTS_N_INSNS (1);
9977       else if (outer == AND)
9978         {
9979           int i;
9980           /* This duplicates the tests in the andsi3 expander.  */
9981           for (i = 9; i <= 31; i++)
9982             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9983                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9984               return COSTS_N_INSNS (2);
9985         }
9986       else if (outer == ASHIFT || outer == ASHIFTRT
9987                || outer == LSHIFTRT)
9988         return 0;
9989       return COSTS_N_INSNS (2);
9990
9991     case CONST:
9992     case CONST_DOUBLE:
9993     case LABEL_REF:
9994     case SYMBOL_REF:
9995       return COSTS_N_INSNS (3);
9996
9997     case UDIV:
9998     case UMOD:
9999     case DIV:
10000     case MOD:
10001       return 100;
10002
10003     case TRUNCATE:
10004       return 99;
10005
10006     case AND:
10007     case XOR:
10008     case IOR:
10009       return COSTS_N_INSNS (1);
10010
10011     case MEM:
10012       return (COSTS_N_INSNS (1)
10013               + COSTS_N_INSNS (1)
10014                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10015               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10016                  ? COSTS_N_INSNS (1) : 0));
10017
10018     case IF_THEN_ELSE:
10019       /* XXX a guess.  */
10020       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10021         return 14;
10022       return 2;
10023
10024     case ZERO_EXTEND:
10025       /* XXX still guessing.  */
10026       switch (GET_MODE (XEXP (x, 0)))
10027         {
10028           case E_QImode:
10029             return (1 + (mode == DImode ? 4 : 0)
10030                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10031
10032           case E_HImode:
10033             return (4 + (mode == DImode ? 4 : 0)
10034                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10035
10036           case E_SImode:
10037             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10038
10039           default:
10040             return 99;
10041         }
10042
10043     default:
10044       return 99;
10045     }
10046 }
10047
10048 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
10049    PLUS, adds the carry flag, then return the other operand.  If
10050    neither is a carry, return OP unchanged.  */
10051 static rtx
10052 strip_carry_operation (rtx op)
10053 {
10054   gcc_assert (GET_CODE (op) == PLUS);
10055   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10056     return XEXP (op, 1);
10057   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10058     return XEXP (op, 0);
10059   return op;
10060 }
10061
10062 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
10063    operand, then return the operand that is being shifted.  If the shift
10064    is not by a constant, then set SHIFT_REG to point to the operand.
10065    Return NULL if OP is not a shifter operand.  */
10066 static rtx
10067 shifter_op_p (rtx op, rtx *shift_reg)
10068 {
10069   enum rtx_code code = GET_CODE (op);
10070
10071   if (code == MULT && CONST_INT_P (XEXP (op, 1))
10072       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10073     return XEXP (op, 0);
10074   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10075     return XEXP (op, 0);
10076   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10077            || code == ASHIFTRT)
10078     {
10079       if (!CONST_INT_P (XEXP (op, 1)))
10080         *shift_reg = XEXP (op, 1);
10081       return XEXP (op, 0);
10082     }
10083
10084   return NULL;
10085 }
10086
10087 static bool
10088 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10089 {
10090   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10091   rtx_code code = GET_CODE (x);
10092   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10093
10094   switch (XINT (x, 1))
10095     {
10096     case UNSPEC_UNALIGNED_LOAD:
10097       /* We can only do unaligned loads into the integer unit, and we can't
10098          use LDM or LDRD.  */
10099       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10100       if (speed_p)
10101         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10102                   + extra_cost->ldst.load_unaligned);
10103
10104 #ifdef NOT_YET
10105       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10106                                  ADDR_SPACE_GENERIC, speed_p);
10107 #endif
10108       return true;
10109
10110     case UNSPEC_UNALIGNED_STORE:
10111       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10112       if (speed_p)
10113         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10114                   + extra_cost->ldst.store_unaligned);
10115
10116       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10117 #ifdef NOT_YET
10118       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10119                                  ADDR_SPACE_GENERIC, speed_p);
10120 #endif
10121       return true;
10122
10123     case UNSPEC_VRINTZ:
10124     case UNSPEC_VRINTP:
10125     case UNSPEC_VRINTM:
10126     case UNSPEC_VRINTR:
10127     case UNSPEC_VRINTX:
10128     case UNSPEC_VRINTA:
10129       if (speed_p)
10130         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10131
10132       return true;
10133     default:
10134       *cost = COSTS_N_INSNS (2);
10135       break;
10136     }
10137   return true;
10138 }
10139
10140 /* Cost of a libcall.  We assume one insn per argument, an amount for the
10141    call (one insn for -Os) and then one for processing the result.  */
10142 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10143
10144 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
10145         do                                                              \
10146           {                                                             \
10147             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
10148             if (shift_op != NULL                                        \
10149                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
10150               {                                                         \
10151                 if (shift_reg)                                          \
10152                   {                                                     \
10153                     if (speed_p)                                        \
10154                       *cost += extra_cost->alu.arith_shift_reg;         \
10155                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10156                                        ASHIFT, 1, speed_p);             \
10157                   }                                                     \
10158                 else if (speed_p)                                       \
10159                   *cost += extra_cost->alu.arith_shift;                 \
10160                                                                         \
10161                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
10162                                     ASHIFT, 0, speed_p)                 \
10163                           + rtx_cost (XEXP (x, 1 - IDX),                \
10164                                       GET_MODE (shift_op),              \
10165                                       OP, 1, speed_p));                 \
10166                 return true;                                            \
10167               }                                                         \
10168           }                                                             \
10169         while (0)
10170
10171 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
10172    considering the costs of the addressing mode and memory access
10173    separately.  */
10174 static bool
10175 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10176                int *cost, bool speed_p)
10177 {
10178   machine_mode mode = GET_MODE (x);
10179
10180   *cost = COSTS_N_INSNS (1);
10181
10182   if (flag_pic
10183       && GET_CODE (XEXP (x, 0)) == PLUS
10184       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10185     /* This will be split into two instructions.  Add the cost of the
10186        additional instruction here.  The cost of the memory access is computed
10187        below.  See arm.md:calculate_pic_address.  */
10188     *cost += COSTS_N_INSNS (1);
10189
10190   /* Calculate cost of the addressing mode.  */
10191   if (speed_p)
10192     {
10193       arm_addr_mode_op op_type;
10194       switch (GET_CODE (XEXP (x, 0)))
10195         {
10196         default:
10197         case REG:
10198           op_type = AMO_DEFAULT;
10199           break;
10200         case MINUS:
10201           /* MINUS does not appear in RTL, but the architecture supports it,
10202              so handle this case defensively.  */
10203           /* fall through */
10204         case PLUS:
10205           op_type = AMO_NO_WB;
10206           break;
10207         case PRE_INC:
10208         case PRE_DEC:
10209         case POST_INC:
10210         case POST_DEC:
10211         case PRE_MODIFY:
10212         case POST_MODIFY:
10213           op_type = AMO_WB;
10214           break;
10215         }
10216
10217       if (VECTOR_MODE_P (mode))
10218           *cost += current_tune->addr_mode_costs->vector[op_type];
10219       else if (FLOAT_MODE_P (mode))
10220           *cost += current_tune->addr_mode_costs->fp[op_type];
10221       else
10222           *cost += current_tune->addr_mode_costs->integer[op_type];
10223     }
10224
10225   /* Calculate cost of memory access.  */
10226   if (speed_p)
10227     {
10228       if (FLOAT_MODE_P (mode))
10229         {
10230           if (GET_MODE_SIZE (mode) == 8)
10231             *cost += extra_cost->ldst.loadd;
10232           else
10233             *cost += extra_cost->ldst.loadf;
10234         }
10235       else if (VECTOR_MODE_P (mode))
10236         *cost += extra_cost->ldst.loadv;
10237       else
10238         {
10239           /* Integer modes */
10240           if (GET_MODE_SIZE (mode) == 8)
10241             *cost += extra_cost->ldst.ldrd;
10242           else
10243             *cost += extra_cost->ldst.load;
10244         }
10245     }
10246
10247   return true;
10248 }
10249
10250 /* Helper for arm_bfi_p.  */
10251 static bool
10252 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10253 {
10254   unsigned HOST_WIDE_INT const1;
10255   unsigned HOST_WIDE_INT const2 = 0;
10256
10257   if (!CONST_INT_P (XEXP (op0, 1)))
10258     return false;
10259
10260   const1 = UINTVAL (XEXP (op0, 1));
10261   if (!CONST_INT_P (XEXP (op1, 1))
10262       || ~UINTVAL (XEXP (op1, 1)) != const1)
10263     return false;
10264
10265   if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10266       && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10267     {
10268       const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10269       *sub0 = XEXP (XEXP (op0, 0), 0);
10270     }
10271   else
10272     *sub0 = XEXP (op0, 0);
10273
10274   if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10275     return false;
10276
10277   *sub1 = XEXP (op1, 0);
10278   return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10279 }
10280
10281 /* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
10282    format looks something like:
10283
10284    (IOR (AND (reg1) (~const1))
10285         (AND (ASHIFT (reg2) (const2))
10286              (const1)))
10287
10288    where const1 is a consecutive sequence of 1-bits with the
10289    least-significant non-zero bit starting at bit position const2.  If
10290    const2 is zero, then the shift will not appear at all, due to
10291    canonicalization.  The two arms of the IOR expression may be
10292    flipped.  */
10293 static bool
10294 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10295 {
10296   if (GET_CODE (x) != IOR)
10297     return false;
10298   if (GET_CODE (XEXP (x, 0)) != AND
10299       || GET_CODE (XEXP (x, 1)) != AND)
10300     return false;
10301   return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10302           || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10303 }
10304
10305 /* RTX costs.  Make an estimate of the cost of executing the operation
10306    X, which is contained within an operation with code OUTER_CODE.
10307    SPEED_P indicates whether the cost desired is the performance cost,
10308    or the size cost.  The estimate is stored in COST and the return
10309    value is TRUE if the cost calculation is final, or FALSE if the
10310    caller should recurse through the operands of X to add additional
10311    costs.
10312
10313    We currently make no attempt to model the size savings of Thumb-2
10314    16-bit instructions.  At the normal points in compilation where
10315    this code is called we have no measure of whether the condition
10316    flags are live or not, and thus no realistic way to determine what
10317    the size will eventually be.  */
10318 static bool
10319 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10320                    const struct cpu_cost_table *extra_cost,
10321                    int *cost, bool speed_p)
10322 {
10323   machine_mode mode = GET_MODE (x);
10324
10325   *cost = COSTS_N_INSNS (1);
10326
10327   if (TARGET_THUMB1)
10328     {
10329       if (speed_p)
10330         *cost = thumb1_rtx_costs (x, code, outer_code);
10331       else
10332         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10333       return true;
10334     }
10335
10336   switch (code)
10337     {
10338     case SET:
10339       *cost = 0;
10340       /* SET RTXs don't have a mode so we get it from the destination.  */
10341       mode = GET_MODE (SET_DEST (x));
10342
10343       if (REG_P (SET_SRC (x))
10344           && REG_P (SET_DEST (x)))
10345         {
10346           /* Assume that most copies can be done with a single insn,
10347              unless we don't have HW FP, in which case everything
10348              larger than word mode will require two insns.  */
10349           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10350                                    && GET_MODE_SIZE (mode) > 4)
10351                                   || mode == DImode)
10352                                  ? 2 : 1);
10353           /* Conditional register moves can be encoded
10354              in 16 bits in Thumb mode.  */
10355           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10356             *cost >>= 1;
10357
10358           return true;
10359         }
10360
10361       if (CONST_INT_P (SET_SRC (x)))
10362         {
10363           /* Handle CONST_INT here, since the value doesn't have a mode
10364              and we would otherwise be unable to work out the true cost.  */
10365           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10366                             0, speed_p);
10367           outer_code = SET;
10368           /* Slightly lower the cost of setting a core reg to a constant.
10369              This helps break up chains and allows for better scheduling.  */
10370           if (REG_P (SET_DEST (x))
10371               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10372             *cost -= 1;
10373           x = SET_SRC (x);
10374           /* Immediate moves with an immediate in the range [0, 255] can be
10375              encoded in 16 bits in Thumb mode.  */
10376           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10377               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10378             *cost >>= 1;
10379           goto const_int_cost;
10380         }
10381
10382       return false;
10383
10384     case MEM:
10385       return arm_mem_costs (x, extra_cost, cost, speed_p);
10386
10387     case PARALLEL:
10388     {
10389    /* Calculations of LDM costs are complex.  We assume an initial cost
10390    (ldm_1st) which will load the number of registers mentioned in
10391    ldm_regs_per_insn_1st registers; then each additional
10392    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10393    formula for N regs is thus:
10394
10395    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10396                              + ldm_regs_per_insn_subsequent - 1)
10397                             / ldm_regs_per_insn_subsequent).
10398
10399    Additional costs may also be added for addressing.  A similar
10400    formula is used for STM.  */
10401
10402       bool is_ldm = load_multiple_operation (x, SImode);
10403       bool is_stm = store_multiple_operation (x, SImode);
10404
10405       if (is_ldm || is_stm)
10406         {
10407           if (speed_p)
10408             {
10409               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10410               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10411                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10412                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10413               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10414                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10415                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10416
10417               *cost += regs_per_insn_1st
10418                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10419                                             + regs_per_insn_sub - 1)
10420                                           / regs_per_insn_sub);
10421               return true;
10422             }
10423
10424         }
10425       return false;
10426     }
10427     case DIV:
10428     case UDIV:
10429       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10430           && (mode == SFmode || !TARGET_VFP_SINGLE))
10431         *cost += COSTS_N_INSNS (speed_p
10432                                ? extra_cost->fp[mode != SFmode].div : 0);
10433       else if (mode == SImode && TARGET_IDIV)
10434         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10435       else
10436         *cost = LIBCALL_COST (2);
10437
10438       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10439          possible udiv is prefered.  */
10440       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10441       return false;     /* All arguments must be in registers.  */
10442
10443     case MOD:
10444       /* MOD by a power of 2 can be expanded as:
10445          rsbs    r1, r0, #0
10446          and     r0, r0, #(n - 1)
10447          and     r1, r1, #(n - 1)
10448          rsbpl   r0, r1, #0.  */
10449       if (CONST_INT_P (XEXP (x, 1))
10450           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10451           && mode == SImode)
10452         {
10453           *cost += COSTS_N_INSNS (3);
10454
10455           if (speed_p)
10456             *cost += 2 * extra_cost->alu.logical
10457                      + extra_cost->alu.arith;
10458           return true;
10459         }
10460
10461     /* Fall-through.  */
10462     case UMOD:
10463       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10464          possible udiv is prefered.  */
10465       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10466       return false;     /* All arguments must be in registers.  */
10467
10468     case ROTATE:
10469       if (mode == SImode && REG_P (XEXP (x, 1)))
10470         {
10471           *cost += (COSTS_N_INSNS (1)
10472                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10473           if (speed_p)
10474             *cost += extra_cost->alu.shift_reg;
10475           return true;
10476         }
10477       /* Fall through */
10478     case ROTATERT:
10479     case ASHIFT:
10480     case LSHIFTRT:
10481     case ASHIFTRT:
10482       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10483         {
10484           *cost += (COSTS_N_INSNS (2)
10485                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10486           if (speed_p)
10487             *cost += 2 * extra_cost->alu.shift;
10488           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10489           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10490             *cost += 1;
10491           return true;
10492         }
10493       else if (mode == SImode)
10494         {
10495           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10496           /* Slightly disparage register shifts at -Os, but not by much.  */
10497           if (!CONST_INT_P (XEXP (x, 1)))
10498             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10499                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10500           return true;
10501         }
10502       else if (GET_MODE_CLASS (mode) == MODE_INT
10503                && GET_MODE_SIZE (mode) < 4)
10504         {
10505           if (code == ASHIFT)
10506             {
10507               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10508               /* Slightly disparage register shifts at -Os, but not by
10509                  much.  */
10510               if (!CONST_INT_P (XEXP (x, 1)))
10511                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10512                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10513             }
10514           else if (code == LSHIFTRT || code == ASHIFTRT)
10515             {
10516               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10517                 {
10518                   /* Can use SBFX/UBFX.  */
10519                   if (speed_p)
10520                     *cost += extra_cost->alu.bfx;
10521                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10522                 }
10523               else
10524                 {
10525                   *cost += COSTS_N_INSNS (1);
10526                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10527                   if (speed_p)
10528                     {
10529                       if (CONST_INT_P (XEXP (x, 1)))
10530                         *cost += 2 * extra_cost->alu.shift;
10531                       else
10532                         *cost += (extra_cost->alu.shift
10533                                   + extra_cost->alu.shift_reg);
10534                     }
10535                   else
10536                     /* Slightly disparage register shifts.  */
10537                     *cost += !CONST_INT_P (XEXP (x, 1));
10538                 }
10539             }
10540           else /* Rotates.  */
10541             {
10542               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10543               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10544               if (speed_p)
10545                 {
10546                   if (CONST_INT_P (XEXP (x, 1)))
10547                     *cost += (2 * extra_cost->alu.shift
10548                               + extra_cost->alu.log_shift);
10549                   else
10550                     *cost += (extra_cost->alu.shift
10551                               + extra_cost->alu.shift_reg
10552                               + extra_cost->alu.log_shift_reg);
10553                 }
10554             }
10555           return true;
10556         }
10557
10558       *cost = LIBCALL_COST (2);
10559       return false;
10560
10561     case BSWAP:
10562       if (arm_arch6)
10563         {
10564           if (mode == SImode)
10565             {
10566               if (speed_p)
10567                 *cost += extra_cost->alu.rev;
10568
10569               return false;
10570             }
10571         }
10572       else
10573         {
10574         /* No rev instruction available.  Look at arm_legacy_rev
10575            and thumb_legacy_rev for the form of RTL used then.  */
10576           if (TARGET_THUMB)
10577             {
10578               *cost += COSTS_N_INSNS (9);
10579
10580               if (speed_p)
10581                 {
10582                   *cost += 6 * extra_cost->alu.shift;
10583                   *cost += 3 * extra_cost->alu.logical;
10584                 }
10585             }
10586           else
10587             {
10588               *cost += COSTS_N_INSNS (4);
10589
10590               if (speed_p)
10591                 {
10592                   *cost += 2 * extra_cost->alu.shift;
10593                   *cost += extra_cost->alu.arith_shift;
10594                   *cost += 2 * extra_cost->alu.logical;
10595                 }
10596             }
10597           return true;
10598         }
10599       return false;
10600
10601     case MINUS:
10602       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10603           && (mode == SFmode || !TARGET_VFP_SINGLE))
10604         {
10605           if (GET_CODE (XEXP (x, 0)) == MULT
10606               || GET_CODE (XEXP (x, 1)) == MULT)
10607             {
10608               rtx mul_op0, mul_op1, sub_op;
10609
10610               if (speed_p)
10611                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10612
10613               if (GET_CODE (XEXP (x, 0)) == MULT)
10614                 {
10615                   mul_op0 = XEXP (XEXP (x, 0), 0);
10616                   mul_op1 = XEXP (XEXP (x, 0), 1);
10617                   sub_op = XEXP (x, 1);
10618                 }
10619               else
10620                 {
10621                   mul_op0 = XEXP (XEXP (x, 1), 0);
10622                   mul_op1 = XEXP (XEXP (x, 1), 1);
10623                   sub_op = XEXP (x, 0);
10624                 }
10625
10626               /* The first operand of the multiply may be optionally
10627                  negated.  */
10628               if (GET_CODE (mul_op0) == NEG)
10629                 mul_op0 = XEXP (mul_op0, 0);
10630
10631               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10632                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10633                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10634
10635               return true;
10636             }
10637
10638           if (speed_p)
10639             *cost += extra_cost->fp[mode != SFmode].addsub;
10640           return false;
10641         }
10642
10643       if (mode == SImode)
10644         {
10645           rtx shift_by_reg = NULL;
10646           rtx shift_op;
10647           rtx non_shift_op;
10648           rtx op0 = XEXP (x, 0);
10649           rtx op1 = XEXP (x, 1);
10650
10651           /* Factor out any borrow operation.  There's more than one way
10652              of expressing this; try to recognize them all.  */
10653           if (GET_CODE (op0) == MINUS)
10654             {
10655               if (arm_borrow_operation (op1, SImode))
10656                 {
10657                   op1 = XEXP (op0, 1);
10658                   op0 = XEXP (op0, 0);
10659                 }
10660               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10661                 op0 = XEXP (op0, 0);
10662             }
10663           else if (GET_CODE (op1) == PLUS
10664                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10665             op1 = XEXP (op1, 0);
10666           else if (GET_CODE (op0) == NEG
10667                    && arm_borrow_operation (op1, SImode))
10668             {
10669               /* Negate with carry-in.  For Thumb2 this is done with
10670                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10671                  RSC instruction that exists in Arm mode.  */
10672               if (speed_p)
10673                 *cost += (TARGET_THUMB2
10674                           ? extra_cost->alu.arith_shift
10675                           : extra_cost->alu.arith);
10676               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10677               return true;
10678             }
10679           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10680              Note we do mean ~borrow here.  */
10681           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10682             {
10683               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10684               return true;
10685             }
10686
10687           shift_op = shifter_op_p (op0, &shift_by_reg);
10688           if (shift_op == NULL)
10689             {
10690               shift_op = shifter_op_p (op1, &shift_by_reg);
10691               non_shift_op = op0;
10692             }
10693           else
10694             non_shift_op = op1;
10695
10696           if (shift_op != NULL)
10697             {
10698               if (shift_by_reg != NULL)
10699                 {
10700                   if (speed_p)
10701                     *cost += extra_cost->alu.arith_shift_reg;
10702                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10703                 }
10704               else if (speed_p)
10705                 *cost += extra_cost->alu.arith_shift;
10706
10707               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10708               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10709               return true;
10710             }
10711
10712           if (arm_arch_thumb2
10713               && GET_CODE (XEXP (x, 1)) == MULT)
10714             {
10715               /* MLS.  */
10716               if (speed_p)
10717                 *cost += extra_cost->mult[0].add;
10718               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10719               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10720               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10721               return true;
10722             }
10723
10724           if (CONST_INT_P (op0))
10725             {
10726               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10727                                             INTVAL (op0), NULL_RTX,
10728                                             NULL_RTX, 1, 0);
10729               *cost = COSTS_N_INSNS (insns);
10730               if (speed_p)
10731                 *cost += insns * extra_cost->alu.arith;
10732               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10733               return true;
10734             }
10735           else if (speed_p)
10736             *cost += extra_cost->alu.arith;
10737
10738           /* Don't recurse as we don't want to cost any borrow that
10739              we've stripped.  */
10740           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10741           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10742           return true;
10743         }
10744
10745       if (GET_MODE_CLASS (mode) == MODE_INT
10746           && GET_MODE_SIZE (mode) < 4)
10747         {
10748           rtx shift_op, shift_reg;
10749           shift_reg = NULL;
10750
10751           /* We check both sides of the MINUS for shifter operands since,
10752              unlike PLUS, it's not commutative.  */
10753
10754           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10755           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10756
10757           /* Slightly disparage, as we might need to widen the result.  */
10758           *cost += 1;
10759           if (speed_p)
10760             *cost += extra_cost->alu.arith;
10761
10762           if (CONST_INT_P (XEXP (x, 0)))
10763             {
10764               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10765               return true;
10766             }
10767
10768           return false;
10769         }
10770
10771       if (mode == DImode)
10772         {
10773           *cost += COSTS_N_INSNS (1);
10774
10775           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10776             {
10777               rtx op1 = XEXP (x, 1);
10778
10779               if (speed_p)
10780                 *cost += 2 * extra_cost->alu.arith;
10781
10782               if (GET_CODE (op1) == ZERO_EXTEND)
10783                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10784                                    0, speed_p);
10785               else
10786                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10787               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10788                                  0, speed_p);
10789               return true;
10790             }
10791           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10792             {
10793               if (speed_p)
10794                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10795               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10796                                   0, speed_p)
10797                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10798               return true;
10799             }
10800           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10801                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10802             {
10803               if (speed_p)
10804                 *cost += (extra_cost->alu.arith
10805                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10806                              ? extra_cost->alu.arith
10807                              : extra_cost->alu.arith_shift));
10808               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10809                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10810                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10811               return true;
10812             }
10813
10814           if (speed_p)
10815             *cost += 2 * extra_cost->alu.arith;
10816           return false;
10817         }
10818
10819       /* Vector mode?  */
10820
10821       *cost = LIBCALL_COST (2);
10822       return false;
10823
10824     case PLUS:
10825       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10826           && (mode == SFmode || !TARGET_VFP_SINGLE))
10827         {
10828           if (GET_CODE (XEXP (x, 0)) == MULT)
10829             {
10830               rtx mul_op0, mul_op1, add_op;
10831
10832               if (speed_p)
10833                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10834
10835               mul_op0 = XEXP (XEXP (x, 0), 0);
10836               mul_op1 = XEXP (XEXP (x, 0), 1);
10837               add_op = XEXP (x, 1);
10838
10839               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10840                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10841                         + rtx_cost (add_op, mode, code, 0, speed_p));
10842
10843               return true;
10844             }
10845
10846           if (speed_p)
10847             *cost += extra_cost->fp[mode != SFmode].addsub;
10848           return false;
10849         }
10850       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10851         {
10852           *cost = LIBCALL_COST (2);
10853           return false;
10854         }
10855
10856         /* Narrow modes can be synthesized in SImode, but the range
10857            of useful sub-operations is limited.  Check for shift operations
10858            on one of the operands.  Only left shifts can be used in the
10859            narrow modes.  */
10860       if (GET_MODE_CLASS (mode) == MODE_INT
10861           && GET_MODE_SIZE (mode) < 4)
10862         {
10863           rtx shift_op, shift_reg;
10864           shift_reg = NULL;
10865
10866           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10867
10868           if (CONST_INT_P (XEXP (x, 1)))
10869             {
10870               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10871                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10872                                             NULL_RTX, 1, 0);
10873               *cost = COSTS_N_INSNS (insns);
10874               if (speed_p)
10875                 *cost += insns * extra_cost->alu.arith;
10876               /* Slightly penalize a narrow operation as the result may
10877                  need widening.  */
10878               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10879               return true;
10880             }
10881
10882           /* Slightly penalize a narrow operation as the result may
10883              need widening.  */
10884           *cost += 1;
10885           if (speed_p)
10886             *cost += extra_cost->alu.arith;
10887
10888           return false;
10889         }
10890
10891       if (mode == SImode)
10892         {
10893           rtx shift_op, shift_reg;
10894
10895           if (TARGET_INT_SIMD
10896               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10897                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10898             {
10899               /* UXTA[BH] or SXTA[BH].  */
10900               if (speed_p)
10901                 *cost += extra_cost->alu.extend_arith;
10902               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10903                                   0, speed_p)
10904                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10905               return true;
10906             }
10907
10908           rtx op0 = XEXP (x, 0);
10909           rtx op1 = XEXP (x, 1);
10910
10911           /* Handle a side effect of adding in the carry to an addition.  */
10912           if (GET_CODE (op0) == PLUS
10913               && arm_carry_operation (op1, mode))
10914             {
10915               op1 = XEXP (op0, 1);
10916               op0 = XEXP (op0, 0);
10917             }
10918           else if (GET_CODE (op1) == PLUS
10919                    && arm_carry_operation (op0, mode))
10920             {
10921               op0 = XEXP (op1, 0);
10922               op1 = XEXP (op1, 1);
10923             }
10924           else if (GET_CODE (op0) == PLUS)
10925             {
10926               op0 = strip_carry_operation (op0);
10927               if (swap_commutative_operands_p (op0, op1))
10928                 std::swap (op0, op1);
10929             }
10930
10931           if (arm_carry_operation (op0, mode))
10932             {
10933               /* Adding the carry to a register is a canonicalization of
10934                  adding 0 to the register plus the carry.  */
10935               if (speed_p)
10936                 *cost += extra_cost->alu.arith;
10937               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10938               return true;
10939             }
10940
10941           shift_reg = NULL;
10942           shift_op = shifter_op_p (op0, &shift_reg);
10943           if (shift_op != NULL)
10944             {
10945               if (shift_reg)
10946                 {
10947                   if (speed_p)
10948                     *cost += extra_cost->alu.arith_shift_reg;
10949                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10950                 }
10951               else if (speed_p)
10952                 *cost += extra_cost->alu.arith_shift;
10953
10954               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10955                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10956               return true;
10957             }
10958
10959           if (GET_CODE (op0) == MULT)
10960             {
10961               rtx mul_op = op0;
10962
10963               if (TARGET_DSP_MULTIPLY
10964                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10965                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10966                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10967                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10968                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10969                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10970                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10971                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10972                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10973                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10974                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10975                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10976                                       == 16))))))
10977                 {
10978                   /* SMLA[BT][BT].  */
10979                   if (speed_p)
10980                     *cost += extra_cost->mult[0].extend_add;
10981                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10982                                       SIGN_EXTEND, 0, speed_p)
10983                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10984                                         SIGN_EXTEND, 0, speed_p)
10985                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
10986                   return true;
10987                 }
10988
10989               if (speed_p)
10990                 *cost += extra_cost->mult[0].add;
10991               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10992                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10993                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10994               return true;
10995             }
10996
10997           if (CONST_INT_P (op1))
10998             {
10999               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11000                                             INTVAL (op1), NULL_RTX,
11001                                             NULL_RTX, 1, 0);
11002               *cost = COSTS_N_INSNS (insns);
11003               if (speed_p)
11004                 *cost += insns * extra_cost->alu.arith;
11005               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11006               return true;
11007             }
11008
11009           if (speed_p)
11010             *cost += extra_cost->alu.arith;
11011
11012           /* Don't recurse here because we want to test the operands
11013              without any carry operation.  */
11014           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11015           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11016           return true;
11017         }
11018
11019       if (mode == DImode)
11020         {
11021           if (GET_CODE (XEXP (x, 0)) == MULT
11022               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11023                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11024                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11025                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11026             {
11027               if (speed_p)
11028                 *cost += extra_cost->mult[1].extend_add;
11029               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11030                                   ZERO_EXTEND, 0, speed_p)
11031                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11032                                     ZERO_EXTEND, 0, speed_p)
11033                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11034               return true;
11035             }
11036
11037           *cost += COSTS_N_INSNS (1);
11038
11039           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11040               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11041             {
11042               if (speed_p)
11043                 *cost += (extra_cost->alu.arith
11044                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11045                              ? extra_cost->alu.arith
11046                              : extra_cost->alu.arith_shift));
11047
11048               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11049                                   0, speed_p)
11050                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11051               return true;
11052             }
11053
11054           if (speed_p)
11055             *cost += 2 * extra_cost->alu.arith;
11056           return false;
11057         }
11058
11059       /* Vector mode?  */
11060       *cost = LIBCALL_COST (2);
11061       return false;
11062     case IOR:
11063       {
11064         rtx sub0, sub1;
11065         if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11066           {
11067             if (speed_p)
11068               *cost += extra_cost->alu.rev;
11069
11070             return true;
11071           }
11072         else if (mode == SImode && arm_arch_thumb2
11073                  && arm_bfi_p (x, &sub0, &sub1))
11074           {
11075             *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11076             *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11077             if (speed_p)
11078               *cost += extra_cost->alu.bfi;
11079
11080             return true;
11081           }
11082       }
11083
11084       /* Fall through.  */
11085     case AND: case XOR:
11086       if (mode == SImode)
11087         {
11088           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11089           rtx op0 = XEXP (x, 0);
11090           rtx shift_op, shift_reg;
11091
11092           if (subcode == NOT
11093               && (code == AND
11094                   || (code == IOR && TARGET_THUMB2)))
11095             op0 = XEXP (op0, 0);
11096
11097           shift_reg = NULL;
11098           shift_op = shifter_op_p (op0, &shift_reg);
11099           if (shift_op != NULL)
11100             {
11101               if (shift_reg)
11102                 {
11103                   if (speed_p)
11104                     *cost += extra_cost->alu.log_shift_reg;
11105                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11106                 }
11107               else if (speed_p)
11108                 *cost += extra_cost->alu.log_shift;
11109
11110               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11111                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11112               return true;
11113             }
11114
11115           if (CONST_INT_P (XEXP (x, 1)))
11116             {
11117               int insns = arm_gen_constant (code, SImode, NULL_RTX,
11118                                             INTVAL (XEXP (x, 1)), NULL_RTX,
11119                                             NULL_RTX, 1, 0);
11120
11121               *cost = COSTS_N_INSNS (insns);
11122               if (speed_p)
11123                 *cost += insns * extra_cost->alu.logical;
11124               *cost += rtx_cost (op0, mode, code, 0, speed_p);
11125               return true;
11126             }
11127
11128           if (speed_p)
11129             *cost += extra_cost->alu.logical;
11130           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11131                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11132           return true;
11133         }
11134
11135       if (mode == DImode)
11136         {
11137           rtx op0 = XEXP (x, 0);
11138           enum rtx_code subcode = GET_CODE (op0);
11139
11140           *cost += COSTS_N_INSNS (1);
11141
11142           if (subcode == NOT
11143               && (code == AND
11144                   || (code == IOR && TARGET_THUMB2)))
11145             op0 = XEXP (op0, 0);
11146
11147           if (GET_CODE (op0) == ZERO_EXTEND)
11148             {
11149               if (speed_p)
11150                 *cost += 2 * extra_cost->alu.logical;
11151
11152               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11153                                   0, speed_p)
11154                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11155               return true;
11156             }
11157           else if (GET_CODE (op0) == SIGN_EXTEND)
11158             {
11159               if (speed_p)
11160                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11161
11162               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11163                                   0, speed_p)
11164                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11165               return true;
11166             }
11167
11168           if (speed_p)
11169             *cost += 2 * extra_cost->alu.logical;
11170
11171           return true;
11172         }
11173       /* Vector mode?  */
11174
11175       *cost = LIBCALL_COST (2);
11176       return false;
11177
11178     case MULT:
11179       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11180           && (mode == SFmode || !TARGET_VFP_SINGLE))
11181         {
11182           rtx op0 = XEXP (x, 0);
11183
11184           if (GET_CODE (op0) == NEG && !flag_rounding_math)
11185             op0 = XEXP (op0, 0);
11186
11187           if (speed_p)
11188             *cost += extra_cost->fp[mode != SFmode].mult;
11189
11190           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11191                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11192           return true;
11193         }
11194       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11195         {
11196           *cost = LIBCALL_COST (2);
11197           return false;
11198         }
11199
11200       if (mode == SImode)
11201         {
11202           if (TARGET_DSP_MULTIPLY
11203               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11204                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11205                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11206                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11207                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11208                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11209                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11210                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11211                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11212                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11213                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11214                               && (INTVAL (XEXP (XEXP (x, 1), 1))
11215                                   == 16))))))
11216             {
11217               /* SMUL[TB][TB].  */
11218               if (speed_p)
11219                 *cost += extra_cost->mult[0].extend;
11220               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11221                                  SIGN_EXTEND, 0, speed_p);
11222               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11223                                  SIGN_EXTEND, 1, speed_p);
11224               return true;
11225             }
11226           if (speed_p)
11227             *cost += extra_cost->mult[0].simple;
11228           return false;
11229         }
11230
11231       if (mode == DImode)
11232         {
11233           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11234                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11235                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11236                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11237             {
11238               if (speed_p)
11239                 *cost += extra_cost->mult[1].extend;
11240               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11241                                   ZERO_EXTEND, 0, speed_p)
11242                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11243                                     ZERO_EXTEND, 0, speed_p));
11244               return true;
11245             }
11246
11247           *cost = LIBCALL_COST (2);
11248           return false;
11249         }
11250
11251       /* Vector mode?  */
11252       *cost = LIBCALL_COST (2);
11253       return false;
11254
11255     case NEG:
11256       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11257           && (mode == SFmode || !TARGET_VFP_SINGLE))
11258         {
11259           if (GET_CODE (XEXP (x, 0)) == MULT)
11260             {
11261               /* VNMUL.  */
11262               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11263               return true;
11264             }
11265
11266           if (speed_p)
11267             *cost += extra_cost->fp[mode != SFmode].neg;
11268
11269           return false;
11270         }
11271       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11272         {
11273           *cost = LIBCALL_COST (1);
11274           return false;
11275         }
11276
11277       if (mode == SImode)
11278         {
11279           if (GET_CODE (XEXP (x, 0)) == ABS)
11280             {
11281               *cost += COSTS_N_INSNS (1);
11282               /* Assume the non-flag-changing variant.  */
11283               if (speed_p)
11284                 *cost += (extra_cost->alu.log_shift
11285                           + extra_cost->alu.arith_shift);
11286               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11287               return true;
11288             }
11289
11290           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11291               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11292             {
11293               *cost += COSTS_N_INSNS (1);
11294               /* No extra cost for MOV imm and MVN imm.  */
11295               /* If the comparison op is using the flags, there's no further
11296                  cost, otherwise we need to add the cost of the comparison.  */
11297               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11298                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11299                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11300                 {
11301                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11302                   *cost += (COSTS_N_INSNS (1)
11303                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11304                                         0, speed_p)
11305                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11306                                         1, speed_p));
11307                   if (speed_p)
11308                     *cost += extra_cost->alu.arith;
11309                 }
11310               return true;
11311             }
11312
11313           if (speed_p)
11314             *cost += extra_cost->alu.arith;
11315           return false;
11316         }
11317
11318       if (GET_MODE_CLASS (mode) == MODE_INT
11319           && GET_MODE_SIZE (mode) < 4)
11320         {
11321           /* Slightly disparage, as we might need an extend operation.  */
11322           *cost += 1;
11323           if (speed_p)
11324             *cost += extra_cost->alu.arith;
11325           return false;
11326         }
11327
11328       if (mode == DImode)
11329         {
11330           *cost += COSTS_N_INSNS (1);
11331           if (speed_p)
11332             *cost += 2 * extra_cost->alu.arith;
11333           return false;
11334         }
11335
11336       /* Vector mode?  */
11337       *cost = LIBCALL_COST (1);
11338       return false;
11339
11340     case NOT:
11341       if (mode == SImode)
11342         {
11343           rtx shift_op;
11344           rtx shift_reg = NULL;
11345
11346           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11347
11348           if (shift_op)
11349             {
11350               if (shift_reg != NULL)
11351                 {
11352                   if (speed_p)
11353                     *cost += extra_cost->alu.log_shift_reg;
11354                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11355                 }
11356               else if (speed_p)
11357                 *cost += extra_cost->alu.log_shift;
11358               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11359               return true;
11360             }
11361
11362           if (speed_p)
11363             *cost += extra_cost->alu.logical;
11364           return false;
11365         }
11366       if (mode == DImode)
11367         {
11368           *cost += COSTS_N_INSNS (1);
11369           return false;
11370         }
11371
11372       /* Vector mode?  */
11373
11374       *cost += LIBCALL_COST (1);
11375       return false;
11376
11377     case IF_THEN_ELSE:
11378       {
11379         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11380           {
11381             *cost += COSTS_N_INSNS (3);
11382             return true;
11383           }
11384         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11385         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11386
11387         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11388         /* Assume that if one arm of the if_then_else is a register,
11389            that it will be tied with the result and eliminate the
11390            conditional insn.  */
11391         if (REG_P (XEXP (x, 1)))
11392           *cost += op2cost;
11393         else if (REG_P (XEXP (x, 2)))
11394           *cost += op1cost;
11395         else
11396           {
11397             if (speed_p)
11398               {
11399                 if (extra_cost->alu.non_exec_costs_exec)
11400                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11401                 else
11402                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11403               }
11404             else
11405               *cost += op1cost + op2cost;
11406           }
11407       }
11408       return true;
11409
11410     case COMPARE:
11411       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11412         *cost = 0;
11413       else
11414         {
11415           machine_mode op0mode;
11416           /* We'll mostly assume that the cost of a compare is the cost of the
11417              LHS.  However, there are some notable exceptions.  */
11418
11419           /* Floating point compares are never done as side-effects.  */
11420           op0mode = GET_MODE (XEXP (x, 0));
11421           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11422               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11423             {
11424               if (speed_p)
11425                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11426
11427               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11428                 {
11429                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11430                   return true;
11431                 }
11432
11433               return false;
11434             }
11435           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11436             {
11437               *cost = LIBCALL_COST (2);
11438               return false;
11439             }
11440
11441           /* DImode compares normally take two insns.  */
11442           if (op0mode == DImode)
11443             {
11444               *cost += COSTS_N_INSNS (1);
11445               if (speed_p)
11446                 *cost += 2 * extra_cost->alu.arith;
11447               return false;
11448             }
11449
11450           if (op0mode == SImode)
11451             {
11452               rtx shift_op;
11453               rtx shift_reg;
11454
11455               if (XEXP (x, 1) == const0_rtx
11456                   && !(REG_P (XEXP (x, 0))
11457                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11458                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11459                 {
11460                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11461
11462                   /* Multiply operations that set the flags are often
11463                      significantly more expensive.  */
11464                   if (speed_p
11465                       && GET_CODE (XEXP (x, 0)) == MULT
11466                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11467                     *cost += extra_cost->mult[0].flag_setting;
11468
11469                   if (speed_p
11470                       && GET_CODE (XEXP (x, 0)) == PLUS
11471                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11472                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11473                                                             0), 1), mode))
11474                     *cost += extra_cost->mult[0].flag_setting;
11475                   return true;
11476                 }
11477
11478               shift_reg = NULL;
11479               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11480               if (shift_op != NULL)
11481                 {
11482                   if (shift_reg != NULL)
11483                     {
11484                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11485                                          1, speed_p);
11486                       if (speed_p)
11487                         *cost += extra_cost->alu.arith_shift_reg;
11488                     }
11489                   else if (speed_p)
11490                     *cost += extra_cost->alu.arith_shift;
11491                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11492                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11493                   return true;
11494                 }
11495
11496               if (speed_p)
11497                 *cost += extra_cost->alu.arith;
11498               if (CONST_INT_P (XEXP (x, 1))
11499                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11500                 {
11501                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11502                   return true;
11503                 }
11504               return false;
11505             }
11506
11507           /* Vector mode?  */
11508
11509           *cost = LIBCALL_COST (2);
11510           return false;
11511         }
11512       return true;
11513
11514     case EQ:
11515     case GE:
11516     case GT:
11517     case LE:
11518     case LT:
11519       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11520          vcle and vclt). */
11521       if (TARGET_NEON
11522           && TARGET_HARD_FLOAT
11523           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11524           && (XEXP (x, 1) == CONST0_RTX (mode)))
11525         {
11526           *cost = 0;
11527           return true;
11528         }
11529
11530       /* Fall through.  */
11531     case NE:
11532     case LTU:
11533     case LEU:
11534     case GEU:
11535     case GTU:
11536     case ORDERED:
11537     case UNORDERED:
11538     case UNEQ:
11539     case UNLE:
11540     case UNLT:
11541     case UNGE:
11542     case UNGT:
11543     case LTGT:
11544       if (outer_code == SET)
11545         {
11546           /* Is it a store-flag operation?  */
11547           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11548               && XEXP (x, 1) == const0_rtx)
11549             {
11550               /* Thumb also needs an IT insn.  */
11551               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11552               return true;
11553             }
11554           if (XEXP (x, 1) == const0_rtx)
11555             {
11556               switch (code)
11557                 {
11558                 case LT:
11559                   /* LSR Rd, Rn, #31.  */
11560                   if (speed_p)
11561                     *cost += extra_cost->alu.shift;
11562                   break;
11563
11564                 case EQ:
11565                   /* RSBS T1, Rn, #0
11566                      ADC  Rd, Rn, T1.  */
11567
11568                 case NE:
11569                   /* SUBS T1, Rn, #1
11570                      SBC  Rd, Rn, T1.  */
11571                   *cost += COSTS_N_INSNS (1);
11572                   break;
11573
11574                 case LE:
11575                   /* RSBS T1, Rn, Rn, LSR #31
11576                      ADC  Rd, Rn, T1. */
11577                   *cost += COSTS_N_INSNS (1);
11578                   if (speed_p)
11579                     *cost += extra_cost->alu.arith_shift;
11580                   break;
11581
11582                 case GT:
11583                   /* RSB  Rd, Rn, Rn, ASR #1
11584                      LSR  Rd, Rd, #31.  */
11585                   *cost += COSTS_N_INSNS (1);
11586                   if (speed_p)
11587                     *cost += (extra_cost->alu.arith_shift
11588                               + extra_cost->alu.shift);
11589                   break;
11590
11591                 case GE:
11592                   /* ASR  Rd, Rn, #31
11593                      ADD  Rd, Rn, #1.  */
11594                   *cost += COSTS_N_INSNS (1);
11595                   if (speed_p)
11596                     *cost += extra_cost->alu.shift;
11597                   break;
11598
11599                 default:
11600                   /* Remaining cases are either meaningless or would take
11601                      three insns anyway.  */
11602                   *cost = COSTS_N_INSNS (3);
11603                   break;
11604                 }
11605               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11606               return true;
11607             }
11608           else
11609             {
11610               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11611               if (CONST_INT_P (XEXP (x, 1))
11612                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11613                 {
11614                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11615                   return true;
11616                 }
11617
11618               return false;
11619             }
11620         }
11621       /* Not directly inside a set.  If it involves the condition code
11622          register it must be the condition for a branch, cond_exec or
11623          I_T_E operation.  Since the comparison is performed elsewhere
11624          this is just the control part which has no additional
11625          cost.  */
11626       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11627                && XEXP (x, 1) == const0_rtx)
11628         {
11629           *cost = 0;
11630           return true;
11631         }
11632       return false;
11633
11634     case ABS:
11635       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11636           && (mode == SFmode || !TARGET_VFP_SINGLE))
11637         {
11638           if (speed_p)
11639             *cost += extra_cost->fp[mode != SFmode].neg;
11640
11641           return false;
11642         }
11643       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11644         {
11645           *cost = LIBCALL_COST (1);
11646           return false;
11647         }
11648
11649       if (mode == SImode)
11650         {
11651           if (speed_p)
11652             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11653           return false;
11654         }
11655       /* Vector mode?  */
11656       *cost = LIBCALL_COST (1);
11657       return false;
11658
11659     case SIGN_EXTEND:
11660       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11661           && MEM_P (XEXP (x, 0)))
11662         {
11663           if (mode == DImode)
11664             *cost += COSTS_N_INSNS (1);
11665
11666           if (!speed_p)
11667             return true;
11668
11669           if (GET_MODE (XEXP (x, 0)) == SImode)
11670             *cost += extra_cost->ldst.load;
11671           else
11672             *cost += extra_cost->ldst.load_sign_extend;
11673
11674           if (mode == DImode)
11675             *cost += extra_cost->alu.shift;
11676
11677           return true;
11678         }
11679
11680       /* Widening from less than 32-bits requires an extend operation.  */
11681       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11682         {
11683           /* We have SXTB/SXTH.  */
11684           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11685           if (speed_p)
11686             *cost += extra_cost->alu.extend;
11687         }
11688       else if (GET_MODE (XEXP (x, 0)) != SImode)
11689         {
11690           /* Needs two shifts.  */
11691           *cost += COSTS_N_INSNS (1);
11692           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11693           if (speed_p)
11694             *cost += 2 * extra_cost->alu.shift;
11695         }
11696
11697       /* Widening beyond 32-bits requires one more insn.  */
11698       if (mode == DImode)
11699         {
11700           *cost += COSTS_N_INSNS (1);
11701           if (speed_p)
11702             *cost += extra_cost->alu.shift;
11703         }
11704
11705       return true;
11706
11707     case ZERO_EXTEND:
11708       if ((arm_arch4
11709            || GET_MODE (XEXP (x, 0)) == SImode
11710            || GET_MODE (XEXP (x, 0)) == QImode)
11711           && MEM_P (XEXP (x, 0)))
11712         {
11713           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11714
11715           if (mode == DImode)
11716             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11717
11718           return true;
11719         }
11720
11721       /* Widening from less than 32-bits requires an extend operation.  */
11722       if (GET_MODE (XEXP (x, 0)) == QImode)
11723         {
11724           /* UXTB can be a shorter instruction in Thumb2, but it might
11725              be slower than the AND Rd, Rn, #255 alternative.  When
11726              optimizing for speed it should never be slower to use
11727              AND, and we don't really model 16-bit vs 32-bit insns
11728              here.  */
11729           if (speed_p)
11730             *cost += extra_cost->alu.logical;
11731         }
11732       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11733         {
11734           /* We have UXTB/UXTH.  */
11735           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11736           if (speed_p)
11737             *cost += extra_cost->alu.extend;
11738         }
11739       else if (GET_MODE (XEXP (x, 0)) != SImode)
11740         {
11741           /* Needs two shifts.  It's marginally preferable to use
11742              shifts rather than two BIC instructions as the second
11743              shift may merge with a subsequent insn as a shifter
11744              op.  */
11745           *cost = COSTS_N_INSNS (2);
11746           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11747           if (speed_p)
11748             *cost += 2 * extra_cost->alu.shift;
11749         }
11750
11751       /* Widening beyond 32-bits requires one more insn.  */
11752       if (mode == DImode)
11753         {
11754           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11755         }
11756
11757       return true;
11758
11759     case CONST_INT:
11760       *cost = 0;
11761       /* CONST_INT has no mode, so we cannot tell for sure how many
11762          insns are really going to be needed.  The best we can do is
11763          look at the value passed.  If it fits in SImode, then assume
11764          that's the mode it will be used for.  Otherwise assume it
11765          will be used in DImode.  */
11766       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11767         mode = SImode;
11768       else
11769         mode = DImode;
11770
11771       /* Avoid blowing up in arm_gen_constant ().  */
11772       if (!(outer_code == PLUS
11773             || outer_code == AND
11774             || outer_code == IOR
11775             || outer_code == XOR
11776             || outer_code == MINUS))
11777         outer_code = SET;
11778
11779     const_int_cost:
11780       if (mode == SImode)
11781         {
11782           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11783                                                     INTVAL (x), NULL, NULL,
11784                                                     0, 0));
11785           /* Extra costs?  */
11786         }
11787       else
11788         {
11789           *cost += COSTS_N_INSNS (arm_gen_constant
11790                                   (outer_code, SImode, NULL,
11791                                    trunc_int_for_mode (INTVAL (x), SImode),
11792                                    NULL, NULL, 0, 0)
11793                                   + arm_gen_constant (outer_code, SImode, NULL,
11794                                                       INTVAL (x) >> 32, NULL,
11795                                                       NULL, 0, 0));
11796           /* Extra costs?  */
11797         }
11798
11799       return true;
11800
11801     case CONST:
11802     case LABEL_REF:
11803     case SYMBOL_REF:
11804       if (speed_p)
11805         {
11806           if (arm_arch_thumb2 && !flag_pic)
11807             *cost += COSTS_N_INSNS (1);
11808           else
11809             *cost += extra_cost->ldst.load;
11810         }
11811       else
11812         *cost += COSTS_N_INSNS (1);
11813
11814       if (flag_pic)
11815         {
11816           *cost += COSTS_N_INSNS (1);
11817           if (speed_p)
11818             *cost += extra_cost->alu.arith;
11819         }
11820
11821       return true;
11822
11823     case CONST_FIXED:
11824       *cost = COSTS_N_INSNS (4);
11825       /* Fixme.  */
11826       return true;
11827
11828     case CONST_DOUBLE:
11829       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11830           && (mode == SFmode || !TARGET_VFP_SINGLE))
11831         {
11832           if (vfp3_const_double_rtx (x))
11833             {
11834               if (speed_p)
11835                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11836               return true;
11837             }
11838
11839           if (speed_p)
11840             {
11841               if (mode == DFmode)
11842                 *cost += extra_cost->ldst.loadd;
11843               else
11844                 *cost += extra_cost->ldst.loadf;
11845             }
11846           else
11847             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11848
11849           return true;
11850         }
11851       *cost = COSTS_N_INSNS (4);
11852       return true;
11853
11854     case CONST_VECTOR:
11855       /* Fixme.  */
11856       if (((TARGET_NEON && TARGET_HARD_FLOAT
11857             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11858            || TARGET_HAVE_MVE)
11859           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11860         *cost = COSTS_N_INSNS (1);
11861       else
11862         *cost = COSTS_N_INSNS (4);
11863       return true;
11864
11865     case HIGH:
11866     case LO_SUM:
11867       /* When optimizing for size, we prefer constant pool entries to
11868          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11869       if (!speed_p)
11870         *cost += 1;
11871       return true;
11872
11873     case CLZ:
11874       if (speed_p)
11875         *cost += extra_cost->alu.clz;
11876       return false;
11877
11878     case SMIN:
11879       if (XEXP (x, 1) == const0_rtx)
11880         {
11881           if (speed_p)
11882             *cost += extra_cost->alu.log_shift;
11883           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11884           return true;
11885         }
11886       /* Fall through.  */
11887     case SMAX:
11888     case UMIN:
11889     case UMAX:
11890       *cost += COSTS_N_INSNS (1);
11891       return false;
11892
11893     case TRUNCATE:
11894       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11895           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11896           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11897           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11898           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11899                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11900               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11901                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11902                       == ZERO_EXTEND))))
11903         {
11904           if (speed_p)
11905             *cost += extra_cost->mult[1].extend;
11906           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11907                               ZERO_EXTEND, 0, speed_p)
11908                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11909                                 ZERO_EXTEND, 0, speed_p));
11910           return true;
11911         }
11912       *cost = LIBCALL_COST (1);
11913       return false;
11914
11915     case UNSPEC_VOLATILE:
11916     case UNSPEC:
11917       return arm_unspec_cost (x, outer_code, speed_p, cost);
11918
11919     case PC:
11920       /* Reading the PC is like reading any other register.  Writing it
11921          is more expensive, but we take that into account elsewhere.  */
11922       *cost = 0;
11923       return true;
11924
11925     case ZERO_EXTRACT:
11926       /* TODO: Simple zero_extract of bottom bits using AND.  */
11927       /* Fall through.  */
11928     case SIGN_EXTRACT:
11929       if (arm_arch6
11930           && mode == SImode
11931           && CONST_INT_P (XEXP (x, 1))
11932           && CONST_INT_P (XEXP (x, 2)))
11933         {
11934           if (speed_p)
11935             *cost += extra_cost->alu.bfx;
11936           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11937           return true;
11938         }
11939       /* Without UBFX/SBFX, need to resort to shift operations.  */
11940       *cost += COSTS_N_INSNS (1);
11941       if (speed_p)
11942         *cost += 2 * extra_cost->alu.shift;
11943       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11944       return true;
11945
11946     case FLOAT_EXTEND:
11947       if (TARGET_HARD_FLOAT)
11948         {
11949           if (speed_p)
11950             *cost += extra_cost->fp[mode == DFmode].widen;
11951           if (!TARGET_VFP5
11952               && GET_MODE (XEXP (x, 0)) == HFmode)
11953             {
11954               /* Pre v8, widening HF->DF is a two-step process, first
11955                  widening to SFmode.  */
11956               *cost += COSTS_N_INSNS (1);
11957               if (speed_p)
11958                 *cost += extra_cost->fp[0].widen;
11959             }
11960           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11961           return true;
11962         }
11963
11964       *cost = LIBCALL_COST (1);
11965       return false;
11966
11967     case FLOAT_TRUNCATE:
11968       if (TARGET_HARD_FLOAT)
11969         {
11970           if (speed_p)
11971             *cost += extra_cost->fp[mode == DFmode].narrow;
11972           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11973           return true;
11974           /* Vector modes?  */
11975         }
11976       *cost = LIBCALL_COST (1);
11977       return false;
11978
11979     case FMA:
11980       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11981         {
11982           rtx op0 = XEXP (x, 0);
11983           rtx op1 = XEXP (x, 1);
11984           rtx op2 = XEXP (x, 2);
11985
11986
11987           /* vfms or vfnma.  */
11988           if (GET_CODE (op0) == NEG)
11989             op0 = XEXP (op0, 0);
11990
11991           /* vfnms or vfnma.  */
11992           if (GET_CODE (op2) == NEG)
11993             op2 = XEXP (op2, 0);
11994
11995           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11996           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11997           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11998
11999           if (speed_p)
12000             *cost += extra_cost->fp[mode ==DFmode].fma;
12001
12002           return true;
12003         }
12004
12005       *cost = LIBCALL_COST (3);
12006       return false;
12007
12008     case FIX:
12009     case UNSIGNED_FIX:
12010       if (TARGET_HARD_FLOAT)
12011         {
12012           /* The *combine_vcvtf2i reduces a vmul+vcvt into
12013              a vcvt fixed-point conversion.  */
12014           if (code == FIX && mode == SImode
12015               && GET_CODE (XEXP (x, 0)) == FIX
12016               && GET_MODE (XEXP (x, 0)) == SFmode
12017               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12018               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12019                  > 0)
12020             {
12021               if (speed_p)
12022                 *cost += extra_cost->fp[0].toint;
12023
12024               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12025                                  code, 0, speed_p);
12026               return true;
12027             }
12028
12029           if (GET_MODE_CLASS (mode) == MODE_INT)
12030             {
12031               mode = GET_MODE (XEXP (x, 0));
12032               if (speed_p)
12033                 *cost += extra_cost->fp[mode == DFmode].toint;
12034               /* Strip of the 'cost' of rounding towards zero.  */
12035               if (GET_CODE (XEXP (x, 0)) == FIX)
12036                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12037                                    0, speed_p);
12038               else
12039                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12040               /* ??? Increase the cost to deal with transferring from
12041                  FP -> CORE registers?  */
12042               return true;
12043             }
12044           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12045                    && TARGET_VFP5)
12046             {
12047               if (speed_p)
12048                 *cost += extra_cost->fp[mode == DFmode].roundint;
12049               return false;
12050             }
12051           /* Vector costs? */
12052         }
12053       *cost = LIBCALL_COST (1);
12054       return false;
12055
12056     case FLOAT:
12057     case UNSIGNED_FLOAT:
12058       if (TARGET_HARD_FLOAT)
12059         {
12060           /* ??? Increase the cost to deal with transferring from CORE
12061              -> FP registers?  */
12062           if (speed_p)
12063             *cost += extra_cost->fp[mode == DFmode].fromint;
12064           return false;
12065         }
12066       *cost = LIBCALL_COST (1);
12067       return false;
12068
12069     case CALL:
12070       return true;
12071
12072     case ASM_OPERANDS:
12073       {
12074       /* Just a guess.  Guess number of instructions in the asm
12075          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
12076          though (see PR60663).  */
12077         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12078         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12079
12080         *cost = COSTS_N_INSNS (asm_length + num_operands);
12081         return true;
12082       }
12083     default:
12084       if (mode != VOIDmode)
12085         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12086       else
12087         *cost = COSTS_N_INSNS (4); /* Who knows?  */
12088       return false;
12089     }
12090 }
12091
12092 #undef HANDLE_NARROW_SHIFT_ARITH
12093
12094 /* RTX costs entry point.  */
12095
12096 static bool
12097 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12098                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12099 {
12100   bool result;
12101   int code = GET_CODE (x);
12102   gcc_assert (current_tune->insn_extra_cost);
12103
12104   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
12105                                 (enum rtx_code) outer_code,
12106                                 current_tune->insn_extra_cost,
12107                                 total, speed);
12108
12109   if (dump_file && arm_verbose_cost)
12110     {
12111       print_rtl_single (dump_file, x);
12112       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12113                *total, result ? "final" : "partial");
12114     }
12115   return result;
12116 }
12117
12118 static int
12119 arm_insn_cost (rtx_insn *insn, bool speed)
12120 {
12121   int cost;
12122
12123   /* Don't cost a simple reg-reg move at a full insn cost: such moves
12124      will likely disappear during register allocation.  */
12125   if (!reload_completed
12126       && GET_CODE (PATTERN (insn)) == SET
12127       && REG_P (SET_DEST (PATTERN (insn)))
12128       && REG_P (SET_SRC (PATTERN (insn))))
12129     return 2;
12130   cost = pattern_cost (PATTERN (insn), speed);
12131   /* If the cost is zero, then it's likely a complex insn.  We don't want the
12132      cost of these to be less than something we know about.  */
12133   return cost ? cost : COSTS_N_INSNS (2);
12134 }
12135
12136 /* All address computations that can be done are free, but rtx cost returns
12137    the same for practically all of them.  So we weight the different types
12138    of address here in the order (most pref first):
12139    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
12140 static inline int
12141 arm_arm_address_cost (rtx x)
12142 {
12143   enum rtx_code c  = GET_CODE (x);
12144
12145   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12146     return 0;
12147   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12148     return 10;
12149
12150   if (c == PLUS)
12151     {
12152       if (CONST_INT_P (XEXP (x, 1)))
12153         return 2;
12154
12155       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12156         return 3;
12157
12158       return 4;
12159     }
12160
12161   return 6;
12162 }
12163
12164 static inline int
12165 arm_thumb_address_cost (rtx x)
12166 {
12167   enum rtx_code c  = GET_CODE (x);
12168
12169   if (c == REG)
12170     return 1;
12171   if (c == PLUS
12172       && REG_P (XEXP (x, 0))
12173       && CONST_INT_P (XEXP (x, 1)))
12174     return 1;
12175
12176   return 2;
12177 }
12178
12179 static int
12180 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12181                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12182 {
12183   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12184 }
12185
12186 /* Adjust cost hook for XScale.  */
12187 static bool
12188 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12189                           int * cost)
12190 {
12191   /* Some true dependencies can have a higher cost depending
12192      on precisely how certain input operands are used.  */
12193   if (dep_type == 0
12194       && recog_memoized (insn) >= 0
12195       && recog_memoized (dep) >= 0)
12196     {
12197       int shift_opnum = get_attr_shift (insn);
12198       enum attr_type attr_type = get_attr_type (dep);
12199
12200       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12201          operand for INSN.  If we have a shifted input operand and the
12202          instruction we depend on is another ALU instruction, then we may
12203          have to account for an additional stall.  */
12204       if (shift_opnum != 0
12205           && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12206               || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12207               || attr_type == TYPE_ALUS_SHIFT_IMM
12208               || attr_type == TYPE_LOGIC_SHIFT_IMM
12209               || attr_type == TYPE_LOGICS_SHIFT_IMM
12210               || attr_type == TYPE_ALU_SHIFT_REG
12211               || attr_type == TYPE_ALUS_SHIFT_REG
12212               || attr_type == TYPE_LOGIC_SHIFT_REG
12213               || attr_type == TYPE_LOGICS_SHIFT_REG
12214               || attr_type == TYPE_MOV_SHIFT
12215               || attr_type == TYPE_MVN_SHIFT
12216               || attr_type == TYPE_MOV_SHIFT_REG
12217               || attr_type == TYPE_MVN_SHIFT_REG))
12218         {
12219           rtx shifted_operand;
12220           int opno;
12221
12222           /* Get the shifted operand.  */
12223           extract_insn (insn);
12224           shifted_operand = recog_data.operand[shift_opnum];
12225
12226           /* Iterate over all the operands in DEP.  If we write an operand
12227              that overlaps with SHIFTED_OPERAND, then we have increase the
12228              cost of this dependency.  */
12229           extract_insn (dep);
12230           preprocess_constraints (dep);
12231           for (opno = 0; opno < recog_data.n_operands; opno++)
12232             {
12233               /* We can ignore strict inputs.  */
12234               if (recog_data.operand_type[opno] == OP_IN)
12235                 continue;
12236
12237               if (reg_overlap_mentioned_p (recog_data.operand[opno],
12238                                            shifted_operand))
12239                 {
12240                   *cost = 2;
12241                   return false;
12242                 }
12243             }
12244         }
12245     }
12246   return true;
12247 }
12248
12249 /* Adjust cost hook for Cortex A9.  */
12250 static bool
12251 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12252                              int * cost)
12253 {
12254   switch (dep_type)
12255     {
12256     case REG_DEP_ANTI:
12257       *cost = 0;
12258       return false;
12259
12260     case REG_DEP_TRUE:
12261     case REG_DEP_OUTPUT:
12262         if (recog_memoized (insn) >= 0
12263             && recog_memoized (dep) >= 0)
12264           {
12265             if (GET_CODE (PATTERN (insn)) == SET)
12266               {
12267                 if (GET_MODE_CLASS
12268                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12269                   || GET_MODE_CLASS
12270                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12271                   {
12272                     enum attr_type attr_type_insn = get_attr_type (insn);
12273                     enum attr_type attr_type_dep = get_attr_type (dep);
12274
12275                     /* By default all dependencies of the form
12276                        s0 = s0 <op> s1
12277                        s0 = s0 <op> s2
12278                        have an extra latency of 1 cycle because
12279                        of the input and output dependency in this
12280                        case. However this gets modeled as an true
12281                        dependency and hence all these checks.  */
12282                     if (REG_P (SET_DEST (PATTERN (insn)))
12283                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12284                       {
12285                         /* FMACS is a special case where the dependent
12286                            instruction can be issued 3 cycles before
12287                            the normal latency in case of an output
12288                            dependency.  */
12289                         if ((attr_type_insn == TYPE_FMACS
12290                              || attr_type_insn == TYPE_FMACD)
12291                             && (attr_type_dep == TYPE_FMACS
12292                                 || attr_type_dep == TYPE_FMACD))
12293                           {
12294                             if (dep_type == REG_DEP_OUTPUT)
12295                               *cost = insn_default_latency (dep) - 3;
12296                             else
12297                               *cost = insn_default_latency (dep);
12298                             return false;
12299                           }
12300                         else
12301                           {
12302                             if (dep_type == REG_DEP_OUTPUT)
12303                               *cost = insn_default_latency (dep) + 1;
12304                             else
12305                               *cost = insn_default_latency (dep);
12306                           }
12307                         return false;
12308                       }
12309                   }
12310               }
12311           }
12312         break;
12313
12314     default:
12315       gcc_unreachable ();
12316     }
12317
12318   return true;
12319 }
12320
12321 /* Adjust cost hook for FA726TE.  */
12322 static bool
12323 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12324                            int * cost)
12325 {
12326   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12327      have penalty of 3.  */
12328   if (dep_type == REG_DEP_TRUE
12329       && recog_memoized (insn) >= 0
12330       && recog_memoized (dep) >= 0
12331       && get_attr_conds (dep) == CONDS_SET)
12332     {
12333       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12334       if (get_attr_conds (insn) == CONDS_USE
12335           && get_attr_type (insn) != TYPE_BRANCH)
12336         {
12337           *cost = 3;
12338           return false;
12339         }
12340
12341       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12342           || get_attr_conds (insn) == CONDS_USE)
12343         {
12344           *cost = 0;
12345           return false;
12346         }
12347     }
12348
12349   return true;
12350 }
12351
12352 /* Implement TARGET_REGISTER_MOVE_COST.
12353
12354    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12355    it is typically more expensive than a single memory access.  We set
12356    the cost to less than two memory accesses so that floating
12357    point to integer conversion does not go through memory.  */
12358
12359 int
12360 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12361                         reg_class_t from, reg_class_t to)
12362 {
12363   if (TARGET_32BIT)
12364     {
12365       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12366           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12367         return 15;
12368       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12369                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12370         return 4;
12371       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12372         return 20;
12373       else
12374         return 2;
12375     }
12376   else
12377     {
12378       if (from == HI_REGS || to == HI_REGS)
12379         return 4;
12380       else
12381         return 2;
12382     }
12383 }
12384
12385 /* Implement TARGET_MEMORY_MOVE_COST.  */
12386
12387 int
12388 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12389                       bool in ATTRIBUTE_UNUSED)
12390 {
12391   if (TARGET_32BIT)
12392     return 10;
12393   else
12394     {
12395       if (GET_MODE_SIZE (mode) < 4)
12396         return 8;
12397       else
12398         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12399     }
12400 }
12401
12402 /* Vectorizer cost model implementation.  */
12403
12404 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12405 static int
12406 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12407                                 tree vectype,
12408                                 int misalign ATTRIBUTE_UNUSED)
12409 {
12410   unsigned elements;
12411
12412   switch (type_of_cost)
12413     {
12414       case scalar_stmt:
12415         return current_tune->vec_costs->scalar_stmt_cost;
12416
12417       case scalar_load:
12418         return current_tune->vec_costs->scalar_load_cost;
12419
12420       case scalar_store:
12421         return current_tune->vec_costs->scalar_store_cost;
12422
12423       case vector_stmt:
12424         return current_tune->vec_costs->vec_stmt_cost;
12425
12426       case vector_load:
12427         return current_tune->vec_costs->vec_align_load_cost;
12428
12429       case vector_store:
12430         return current_tune->vec_costs->vec_store_cost;
12431
12432       case vec_to_scalar:
12433         return current_tune->vec_costs->vec_to_scalar_cost;
12434
12435       case scalar_to_vec:
12436         return current_tune->vec_costs->scalar_to_vec_cost;
12437
12438       case unaligned_load:
12439       case vector_gather_load:
12440         return current_tune->vec_costs->vec_unalign_load_cost;
12441
12442       case unaligned_store:
12443       case vector_scatter_store:
12444         return current_tune->vec_costs->vec_unalign_store_cost;
12445
12446       case cond_branch_taken:
12447         return current_tune->vec_costs->cond_taken_branch_cost;
12448
12449       case cond_branch_not_taken:
12450         return current_tune->vec_costs->cond_not_taken_branch_cost;
12451
12452       case vec_perm:
12453       case vec_promote_demote:
12454         return current_tune->vec_costs->vec_stmt_cost;
12455
12456       case vec_construct:
12457         elements = TYPE_VECTOR_SUBPARTS (vectype);
12458         return elements / 2 + 1;
12459
12460       default:
12461         gcc_unreachable ();
12462     }
12463 }
12464
12465 /* Return true if and only if this insn can dual-issue only as older.  */
12466 static bool
12467 cortexa7_older_only (rtx_insn *insn)
12468 {
12469   if (recog_memoized (insn) < 0)
12470     return false;
12471
12472   switch (get_attr_type (insn))
12473     {
12474     case TYPE_ALU_DSP_REG:
12475     case TYPE_ALU_SREG:
12476     case TYPE_ALUS_SREG:
12477     case TYPE_LOGIC_REG:
12478     case TYPE_LOGICS_REG:
12479     case TYPE_ADC_REG:
12480     case TYPE_ADCS_REG:
12481     case TYPE_ADR:
12482     case TYPE_BFM:
12483     case TYPE_REV:
12484     case TYPE_MVN_REG:
12485     case TYPE_SHIFT_IMM:
12486     case TYPE_SHIFT_REG:
12487     case TYPE_LOAD_BYTE:
12488     case TYPE_LOAD_4:
12489     case TYPE_STORE_4:
12490     case TYPE_FFARITHS:
12491     case TYPE_FADDS:
12492     case TYPE_FFARITHD:
12493     case TYPE_FADDD:
12494     case TYPE_FMOV:
12495     case TYPE_F_CVT:
12496     case TYPE_FCMPS:
12497     case TYPE_FCMPD:
12498     case TYPE_FCONSTS:
12499     case TYPE_FCONSTD:
12500     case TYPE_FMULS:
12501     case TYPE_FMACS:
12502     case TYPE_FMULD:
12503     case TYPE_FMACD:
12504     case TYPE_FDIVS:
12505     case TYPE_FDIVD:
12506     case TYPE_F_MRC:
12507     case TYPE_F_MRRC:
12508     case TYPE_F_FLAG:
12509     case TYPE_F_LOADS:
12510     case TYPE_F_STORES:
12511       return true;
12512     default:
12513       return false;
12514     }
12515 }
12516
12517 /* Return true if and only if this insn can dual-issue as younger.  */
12518 static bool
12519 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12520 {
12521   if (recog_memoized (insn) < 0)
12522     {
12523       if (verbose > 5)
12524         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12525       return false;
12526     }
12527
12528   switch (get_attr_type (insn))
12529     {
12530     case TYPE_ALU_IMM:
12531     case TYPE_ALUS_IMM:
12532     case TYPE_LOGIC_IMM:
12533     case TYPE_LOGICS_IMM:
12534     case TYPE_EXTEND:
12535     case TYPE_MVN_IMM:
12536     case TYPE_MOV_IMM:
12537     case TYPE_MOV_REG:
12538     case TYPE_MOV_SHIFT:
12539     case TYPE_MOV_SHIFT_REG:
12540     case TYPE_BRANCH:
12541     case TYPE_CALL:
12542       return true;
12543     default:
12544       return false;
12545     }
12546 }
12547
12548
12549 /* Look for an instruction that can dual issue only as an older
12550    instruction, and move it in front of any instructions that can
12551    dual-issue as younger, while preserving the relative order of all
12552    other instructions in the ready list.  This is a hueuristic to help
12553    dual-issue in later cycles, by postponing issue of more flexible
12554    instructions.  This heuristic may affect dual issue opportunities
12555    in the current cycle.  */
12556 static void
12557 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12558                         int *n_readyp, int clock)
12559 {
12560   int i;
12561   int first_older_only = -1, first_younger = -1;
12562
12563   if (verbose > 5)
12564     fprintf (file,
12565              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12566              clock,
12567              *n_readyp);
12568
12569   /* Traverse the ready list from the head (the instruction to issue
12570      first), and looking for the first instruction that can issue as
12571      younger and the first instruction that can dual-issue only as
12572      older.  */
12573   for (i = *n_readyp - 1; i >= 0; i--)
12574     {
12575       rtx_insn *insn = ready[i];
12576       if (cortexa7_older_only (insn))
12577         {
12578           first_older_only = i;
12579           if (verbose > 5)
12580             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12581           break;
12582         }
12583       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12584         first_younger = i;
12585     }
12586
12587   /* Nothing to reorder because either no younger insn found or insn
12588      that can dual-issue only as older appears before any insn that
12589      can dual-issue as younger.  */
12590   if (first_younger == -1)
12591     {
12592       if (verbose > 5)
12593         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12594       return;
12595     }
12596
12597   /* Nothing to reorder because no older-only insn in the ready list.  */
12598   if (first_older_only == -1)
12599     {
12600       if (verbose > 5)
12601         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12602       return;
12603     }
12604
12605   /* Move first_older_only insn before first_younger.  */
12606   if (verbose > 5)
12607     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12608              INSN_UID(ready [first_older_only]),
12609              INSN_UID(ready [first_younger]));
12610   rtx_insn *first_older_only_insn = ready [first_older_only];
12611   for (i = first_older_only; i < first_younger; i++)
12612     {
12613       ready[i] = ready[i+1];
12614     }
12615
12616   ready[i] = first_older_only_insn;
12617   return;
12618 }
12619
12620 /* Implement TARGET_SCHED_REORDER. */
12621 static int
12622 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12623                    int clock)
12624 {
12625   switch (arm_tune)
12626     {
12627     case TARGET_CPU_cortexa7:
12628       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12629       break;
12630     default:
12631       /* Do nothing for other cores.  */
12632       break;
12633     }
12634
12635   return arm_issue_rate ();
12636 }
12637
12638 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12639    It corrects the value of COST based on the relationship between
12640    INSN and DEP through the dependence LINK.  It returns the new
12641    value. There is a per-core adjust_cost hook to adjust scheduler costs
12642    and the per-core hook can choose to completely override the generic
12643    adjust_cost function. Only put bits of code into arm_adjust_cost that
12644    are common across all cores.  */
12645 static int
12646 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12647                  unsigned int)
12648 {
12649   rtx i_pat, d_pat;
12650
12651  /* When generating Thumb-1 code, we want to place flag-setting operations
12652     close to a conditional branch which depends on them, so that we can
12653     omit the comparison. */
12654   if (TARGET_THUMB1
12655       && dep_type == 0
12656       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12657       && recog_memoized (dep) >= 0
12658       && get_attr_conds (dep) == CONDS_SET)
12659     return 0;
12660
12661   if (current_tune->sched_adjust_cost != NULL)
12662     {
12663       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12664         return cost;
12665     }
12666
12667   /* XXX Is this strictly true?  */
12668   if (dep_type == REG_DEP_ANTI
12669       || dep_type == REG_DEP_OUTPUT)
12670     return 0;
12671
12672   /* Call insns don't incur a stall, even if they follow a load.  */
12673   if (dep_type == 0
12674       && CALL_P (insn))
12675     return 1;
12676
12677   if ((i_pat = single_set (insn)) != NULL
12678       && MEM_P (SET_SRC (i_pat))
12679       && (d_pat = single_set (dep)) != NULL
12680       && MEM_P (SET_DEST (d_pat)))
12681     {
12682       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12683       /* This is a load after a store, there is no conflict if the load reads
12684          from a cached area.  Assume that loads from the stack, and from the
12685          constant pool are cached, and that others will miss.  This is a
12686          hack.  */
12687
12688       if ((SYMBOL_REF_P (src_mem)
12689            && CONSTANT_POOL_ADDRESS_P (src_mem))
12690           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12691           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12692           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12693         return 1;
12694     }
12695
12696   return cost;
12697 }
12698
12699 int
12700 arm_max_conditional_execute (void)
12701 {
12702   return max_insns_skipped;
12703 }
12704
12705 static int
12706 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12707 {
12708   if (TARGET_32BIT)
12709     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12710   else
12711     return (optimize > 0) ? 2 : 0;
12712 }
12713
12714 static int
12715 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12716 {
12717   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12718 }
12719
12720 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12721    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12722    sequences of non-executed instructions in IT blocks probably take the same
12723    amount of time as executed instructions (and the IT instruction itself takes
12724    space in icache).  This function was experimentally determined to give good
12725    results on a popular embedded benchmark.  */
12726
12727 static int
12728 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12729 {
12730   return (TARGET_32BIT && speed_p) ? 1
12731          : arm_default_branch_cost (speed_p, predictable_p);
12732 }
12733
12734 static int
12735 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12736 {
12737   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12738 }
12739
12740 static bool fp_consts_inited = false;
12741
12742 static REAL_VALUE_TYPE value_fp0;
12743
12744 static void
12745 init_fp_table (void)
12746 {
12747   REAL_VALUE_TYPE r;
12748
12749   r = REAL_VALUE_ATOF ("0", DFmode);
12750   value_fp0 = r;
12751   fp_consts_inited = true;
12752 }
12753
12754 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12755 int
12756 arm_const_double_rtx (rtx x)
12757 {
12758   const REAL_VALUE_TYPE *r;
12759
12760   if (!fp_consts_inited)
12761     init_fp_table ();
12762
12763   r = CONST_DOUBLE_REAL_VALUE (x);
12764   if (REAL_VALUE_MINUS_ZERO (*r))
12765     return 0;
12766
12767   if (real_equal (r, &value_fp0))
12768     return 1;
12769
12770   return 0;
12771 }
12772
12773 /* VFPv3 has a fairly wide range of representable immediates, formed from
12774    "quarter-precision" floating-point values. These can be evaluated using this
12775    formula (with ^ for exponentiation):
12776
12777      -1^s * n * 2^-r
12778
12779    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12780    16 <= n <= 31 and 0 <= r <= 7.
12781
12782    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12783
12784      - A (most-significant) is the sign bit.
12785      - BCD are the exponent (encoded as r XOR 3).
12786      - EFGH are the mantissa (encoded as n - 16).
12787 */
12788
12789 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12790    fconst[sd] instruction, or -1 if X isn't suitable.  */
12791 static int
12792 vfp3_const_double_index (rtx x)
12793 {
12794   REAL_VALUE_TYPE r, m;
12795   int sign, exponent;
12796   unsigned HOST_WIDE_INT mantissa, mant_hi;
12797   unsigned HOST_WIDE_INT mask;
12798   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12799   bool fail;
12800
12801   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12802     return -1;
12803
12804   r = *CONST_DOUBLE_REAL_VALUE (x);
12805
12806   /* We can't represent these things, so detect them first.  */
12807   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12808     return -1;
12809
12810   /* Extract sign, exponent and mantissa.  */
12811   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12812   r = real_value_abs (&r);
12813   exponent = REAL_EXP (&r);
12814   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12815      highest (sign) bit, with a fixed binary point at bit point_pos.
12816      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12817      bits for the mantissa, this may fail (low bits would be lost).  */
12818   real_ldexp (&m, &r, point_pos - exponent);
12819   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12820   mantissa = w.elt (0);
12821   mant_hi = w.elt (1);
12822
12823   /* If there are bits set in the low part of the mantissa, we can't
12824      represent this value.  */
12825   if (mantissa != 0)
12826     return -1;
12827
12828   /* Now make it so that mantissa contains the most-significant bits, and move
12829      the point_pos to indicate that the least-significant bits have been
12830      discarded.  */
12831   point_pos -= HOST_BITS_PER_WIDE_INT;
12832   mantissa = mant_hi;
12833
12834   /* We can permit four significant bits of mantissa only, plus a high bit
12835      which is always 1.  */
12836   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12837   if ((mantissa & mask) != 0)
12838     return -1;
12839
12840   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12841   mantissa >>= point_pos - 5;
12842
12843   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12844      floating-point immediate zero with Neon using an integer-zero load, but
12845      that case is handled elsewhere.)  */
12846   if (mantissa == 0)
12847     return -1;
12848
12849   gcc_assert (mantissa >= 16 && mantissa <= 31);
12850
12851   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12852      normalized significands are in the range [1, 2). (Our mantissa is shifted
12853      left 4 places at this point relative to normalized IEEE754 values).  GCC
12854      internally uses [0.5, 1) (see real.cc), so the exponent returned from
12855      REAL_EXP must be altered.  */
12856   exponent = 5 - exponent;
12857
12858   if (exponent < 0 || exponent > 7)
12859     return -1;
12860
12861   /* Sign, mantissa and exponent are now in the correct form to plug into the
12862      formula described in the comment above.  */
12863   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12864 }
12865
12866 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12867 int
12868 vfp3_const_double_rtx (rtx x)
12869 {
12870   if (!TARGET_VFP3)
12871     return 0;
12872
12873   return vfp3_const_double_index (x) != -1;
12874 }
12875
12876 /* Recognize immediates which can be used in various Neon and MVE instructions.
12877    Legal immediates are described by the following table (for VMVN variants, the
12878    bitwise inverse of the constant shown is recognized. In either case, VMOV
12879    is output and the correct instruction to use for a given constant is chosen
12880    by the assembler). The constant shown is replicated across all elements of
12881    the destination vector.
12882
12883    insn elems variant constant (binary)
12884    ---- ----- ------- -----------------
12885    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12886    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12887    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12888    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12889    vmov  i16     4    00000000 abcdefgh
12890    vmov  i16     5    abcdefgh 00000000
12891    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12892    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12893    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12894    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12895    vmvn  i16    10    00000000 abcdefgh
12896    vmvn  i16    11    abcdefgh 00000000
12897    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12898    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12899    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12900    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12901    vmov   i8    16    abcdefgh
12902    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12903                       eeeeeeee ffffffff gggggggg hhhhhhhh
12904    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12905    vmov  f32    19    00000000 00000000 00000000 00000000
12906
12907    For case 18, B = !b. Representable values are exactly those accepted by
12908    vfp3_const_double_index, but are output as floating-point numbers rather
12909    than indices.
12910
12911    For case 19, we will change it to vmov.i32 when assembling.
12912
12913    Variants 0-5 (inclusive) may also be used as immediates for the second
12914    operand of VORR/VBIC instructions.
12915
12916    The INVERSE argument causes the bitwise inverse of the given operand to be
12917    recognized instead (used for recognizing legal immediates for the VAND/VORN
12918    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12919    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12920    output, rather than the real insns vbic/vorr).
12921
12922    INVERSE makes no difference to the recognition of float vectors.
12923
12924    The return value is the variant of immediate as shown in the above table, or
12925    -1 if the given value doesn't match any of the listed patterns.
12926 */
12927 static int
12928 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12929                       rtx *modconst, int *elementwidth)
12930 {
12931 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12932   matches = 1;                                  \
12933   for (i = 0; i < idx; i += (STRIDE))           \
12934     if (!(TEST))                                \
12935       matches = 0;                              \
12936   if (matches)                                  \
12937     {                                           \
12938       immtype = (CLASS);                        \
12939       elsize = (ELSIZE);                        \
12940       break;                                    \
12941     }
12942
12943   unsigned int i, elsize = 0, idx = 0, n_elts;
12944   unsigned int innersize;
12945   unsigned char bytes[16] = {};
12946   int immtype = -1, matches;
12947   unsigned int invmask = inverse ? 0xff : 0;
12948   bool vector = GET_CODE (op) == CONST_VECTOR;
12949
12950   if (vector)
12951     n_elts = CONST_VECTOR_NUNITS (op);
12952   else
12953     {
12954       n_elts = 1;
12955       gcc_assert (mode != VOIDmode);
12956     }
12957
12958   innersize = GET_MODE_UNIT_SIZE (mode);
12959
12960   /* Only support 128-bit vectors for MVE.  */
12961   if (TARGET_HAVE_MVE
12962       && (!vector
12963           || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12964           || n_elts * innersize != 16))
12965     return -1;
12966
12967   if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12968     return -1;
12969
12970   /* Vectors of float constants.  */
12971   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12972     {
12973       rtx el0 = CONST_VECTOR_ELT (op, 0);
12974
12975       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12976         return -1;
12977
12978       /* FP16 vectors cannot be represented.  */
12979       if (GET_MODE_INNER (mode) == HFmode)
12980         return -1;
12981
12982       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12983          are distinct in this context.  */
12984       if (!const_vec_duplicate_p (op))
12985         return -1;
12986
12987       if (modconst)
12988         *modconst = CONST_VECTOR_ELT (op, 0);
12989
12990       if (elementwidth)
12991         *elementwidth = 0;
12992
12993       if (el0 == CONST0_RTX (GET_MODE (el0)))
12994         return 19;
12995       else
12996         return 18;
12997     }
12998
12999   /* The tricks done in the code below apply for little-endian vector layout.
13000      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13001      FIXME: Implement logic for big-endian vectors.  */
13002   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13003     return -1;
13004
13005   /* Splat vector constant out into a byte vector.  */
13006   for (i = 0; i < n_elts; i++)
13007     {
13008       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13009       unsigned HOST_WIDE_INT elpart;
13010
13011       gcc_assert (CONST_INT_P (el));
13012       elpart = INTVAL (el);
13013
13014       for (unsigned int byte = 0; byte < innersize; byte++)
13015         {
13016           bytes[idx++] = (elpart & 0xff) ^ invmask;
13017           elpart >>= BITS_PER_UNIT;
13018         }
13019     }
13020
13021   /* Sanity check.  */
13022   gcc_assert (idx == GET_MODE_SIZE (mode));
13023
13024   do
13025     {
13026       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13027                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13028
13029       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13030                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13031
13032       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13033                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13034
13035       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13036                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13037
13038       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13039
13040       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13041
13042       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13043                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13044
13045       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13046                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13047
13048       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13049                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13050
13051       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13052                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13053
13054       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13055
13056       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13057
13058       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13059                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13060
13061       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13062                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13063
13064       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13065                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13066
13067       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13068                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13069
13070       CHECK (1, 8, 16, bytes[i] == bytes[0]);
13071
13072       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13073                         && bytes[i] == bytes[(i + 8) % idx]);
13074     }
13075   while (0);
13076
13077   if (immtype == -1)
13078     return -1;
13079
13080   if (elementwidth)
13081     *elementwidth = elsize;
13082
13083   if (modconst)
13084     {
13085       unsigned HOST_WIDE_INT imm = 0;
13086
13087       /* Un-invert bytes of recognized vector, if necessary.  */
13088       if (invmask != 0)
13089         for (i = 0; i < idx; i++)
13090           bytes[i] ^= invmask;
13091
13092       if (immtype == 17)
13093         {
13094           /* FIXME: Broken on 32-bit H_W_I hosts.  */
13095           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13096
13097           for (i = 0; i < 8; i++)
13098             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13099                    << (i * BITS_PER_UNIT);
13100
13101           *modconst = GEN_INT (imm);
13102         }
13103       else
13104         {
13105           unsigned HOST_WIDE_INT imm = 0;
13106
13107           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13108             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13109
13110           *modconst = GEN_INT (imm);
13111         }
13112     }
13113
13114   return immtype;
13115 #undef CHECK
13116 }
13117
13118 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13119    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
13120    (or zero for float elements), and a modified constant (whatever should be
13121    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
13122    modified to "simd_immediate_valid_for_move" as this function will be used
13123    both by neon and mve.  */
13124 int
13125 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13126                                rtx *modconst, int *elementwidth)
13127 {
13128   rtx tmpconst;
13129   int tmpwidth;
13130   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13131
13132   if (retval == -1)
13133     return 0;
13134
13135   if (modconst)
13136     *modconst = tmpconst;
13137
13138   if (elementwidth)
13139     *elementwidth = tmpwidth;
13140
13141   return 1;
13142 }
13143
13144 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
13145    the immediate is valid, write a constant suitable for using as an operand
13146    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13147    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
13148
13149 int
13150 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13151                                 rtx *modconst, int *elementwidth)
13152 {
13153   rtx tmpconst;
13154   int tmpwidth;
13155   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13156
13157   if (retval < 0 || retval > 5)
13158     return 0;
13159
13160   if (modconst)
13161     *modconst = tmpconst;
13162
13163   if (elementwidth)
13164     *elementwidth = tmpwidth;
13165
13166   return 1;
13167 }
13168
13169 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
13170    the immediate is valid, write a constant suitable for using as an operand
13171    to VSHR/VSHL to *MODCONST and the corresponding element width to
13172    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13173    because they have different limitations.  */
13174
13175 int
13176 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13177                                 rtx *modconst, int *elementwidth,
13178                                 bool isleftshift)
13179 {
13180   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13181   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13182   unsigned HOST_WIDE_INT last_elt = 0;
13183   unsigned HOST_WIDE_INT maxshift;
13184
13185   /* Split vector constant out into a byte vector.  */
13186   for (i = 0; i < n_elts; i++)
13187     {
13188       rtx el = CONST_VECTOR_ELT (op, i);
13189       unsigned HOST_WIDE_INT elpart;
13190
13191       if (CONST_INT_P (el))
13192         elpart = INTVAL (el);
13193       else if (CONST_DOUBLE_P (el))
13194         return 0;
13195       else
13196         gcc_unreachable ();
13197
13198       if (i != 0 && elpart != last_elt)
13199         return 0;
13200
13201       last_elt = elpart;
13202     }
13203
13204   /* Shift less than element size.  */
13205   maxshift = innersize * 8;
13206
13207   if (isleftshift)
13208     {
13209       /* Left shift immediate value can be from 0 to <size>-1.  */
13210       if (last_elt >= maxshift)
13211         return 0;
13212     }
13213   else
13214     {
13215       /* Right shift immediate value can be from 1 to <size>.  */
13216       if (last_elt == 0 || last_elt > maxshift)
13217         return 0;
13218     }
13219
13220   if (elementwidth)
13221     *elementwidth = innersize * 8;
13222
13223   if (modconst)
13224     *modconst = CONST_VECTOR_ELT (op, 0);
13225
13226   return 1;
13227 }
13228
13229 /* Return a string suitable for output of Neon immediate logic operation
13230    MNEM.  */
13231
13232 char *
13233 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13234                              int inverse, int quad)
13235 {
13236   int width, is_valid;
13237   static char templ[40];
13238
13239   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13240
13241   gcc_assert (is_valid != 0);
13242
13243   if (quad)
13244     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13245   else
13246     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13247
13248   return templ;
13249 }
13250
13251 /* Return a string suitable for output of Neon immediate shift operation
13252    (VSHR or VSHL) MNEM.  */
13253
13254 char *
13255 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13256                              machine_mode mode, int quad,
13257                              bool isleftshift)
13258 {
13259   int width, is_valid;
13260   static char templ[40];
13261
13262   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13263   gcc_assert (is_valid != 0);
13264
13265   if (quad)
13266     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13267   else
13268     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13269
13270   return templ;
13271 }
13272
13273 /* Output a sequence of pairwise operations to implement a reduction.
13274    NOTE: We do "too much work" here, because pairwise operations work on two
13275    registers-worth of operands in one go. Unfortunately we can't exploit those
13276    extra calculations to do the full operation in fewer steps, I don't think.
13277    Although all vector elements of the result but the first are ignored, we
13278    actually calculate the same result in each of the elements. An alternative
13279    such as initially loading a vector with zero to use as each of the second
13280    operands would use up an additional register and take an extra instruction,
13281    for no particular gain.  */
13282
13283 void
13284 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13285                       rtx (*reduc) (rtx, rtx, rtx))
13286 {
13287   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13288   rtx tmpsum = op1;
13289
13290   for (i = parts / 2; i >= 1; i /= 2)
13291     {
13292       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13293       emit_insn (reduc (dest, tmpsum, tmpsum));
13294       tmpsum = dest;
13295     }
13296 }
13297
13298 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13299    loaded into a register using VDUP.
13300
13301    If this is the case, and GENERATE is set, we also generate
13302    instructions to do this and return an RTX to assign to the register.  */
13303
13304 static rtx
13305 neon_vdup_constant (rtx vals, bool generate)
13306 {
13307   machine_mode mode = GET_MODE (vals);
13308   machine_mode inner_mode = GET_MODE_INNER (mode);
13309   rtx x;
13310
13311   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13312     return NULL_RTX;
13313
13314   if (!const_vec_duplicate_p (vals, &x))
13315     /* The elements are not all the same.  We could handle repeating
13316        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13317        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13318        vdup.i16).  */
13319     return NULL_RTX;
13320
13321   if (!generate)
13322     return x;
13323
13324   /* We can load this constant by using VDUP and a constant in a
13325      single ARM register.  This will be cheaper than a vector
13326      load.  */
13327
13328   x = copy_to_mode_reg (inner_mode, x);
13329   return gen_vec_duplicate (mode, x);
13330 }
13331
13332 /* Return a HI representation of CONST_VEC suitable for MVE predicates.  */
13333 rtx
13334 mve_bool_vec_to_const (rtx const_vec)
13335 {
13336   int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
13337   int repeat = 16 / n_elts;
13338   int i;
13339   int hi_val = 0;
13340
13341   for (i = 0; i < n_elts; i++)
13342     {
13343       rtx el = CONST_VECTOR_ELT (const_vec, i);
13344       unsigned HOST_WIDE_INT elpart;
13345
13346       gcc_assert (CONST_INT_P (el));
13347       elpart = INTVAL (el);
13348
13349       for (int j = 0; j < repeat; j++)
13350         hi_val |= elpart << (i * repeat + j);
13351     }
13352   return gen_int_mode (hi_val, HImode);
13353 }
13354
13355 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13356    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13357    into a register.
13358
13359    If this is the case, and GENERATE is set, we also generate code to do
13360    this and return an RTX to copy into the register.  */
13361
13362 rtx
13363 neon_make_constant (rtx vals, bool generate)
13364 {
13365   machine_mode mode = GET_MODE (vals);
13366   rtx target;
13367   rtx const_vec = NULL_RTX;
13368   int n_elts = GET_MODE_NUNITS (mode);
13369   int n_const = 0;
13370   int i;
13371
13372   if (GET_CODE (vals) == CONST_VECTOR)
13373     const_vec = vals;
13374   else if (GET_CODE (vals) == PARALLEL)
13375     {
13376       /* A CONST_VECTOR must contain only CONST_INTs and
13377          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13378          Only store valid constants in a CONST_VECTOR.  */
13379       for (i = 0; i < n_elts; ++i)
13380         {
13381           rtx x = XVECEXP (vals, 0, i);
13382           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13383             n_const++;
13384         }
13385       if (n_const == n_elts)
13386         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13387     }
13388   else
13389     gcc_unreachable ();
13390
13391   if (const_vec != NULL
13392       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13393     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13394     return const_vec;
13395   else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
13396     return mve_bool_vec_to_const (const_vec);
13397   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13398     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13399        pipeline cycle; creating the constant takes one or two ARM
13400        pipeline cycles.  */
13401     return target;
13402   else if (const_vec != NULL_RTX)
13403     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13404        (for either double or quad vectors).  We cannot take advantage
13405        of single-cycle VLD1 because we need a PC-relative addressing
13406        mode.  */
13407     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13408   else
13409     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13410        We cannot construct an initializer.  */
13411     return NULL_RTX;
13412 }
13413
13414 /* Initialize vector TARGET to VALS.  */
13415
13416 void
13417 neon_expand_vector_init (rtx target, rtx vals)
13418 {
13419   machine_mode mode = GET_MODE (target);
13420   machine_mode inner_mode = GET_MODE_INNER (mode);
13421   int n_elts = GET_MODE_NUNITS (mode);
13422   int n_var = 0, one_var = -1;
13423   bool all_same = true;
13424   rtx x, mem;
13425   int i;
13426
13427   for (i = 0; i < n_elts; ++i)
13428     {
13429       x = XVECEXP (vals, 0, i);
13430       if (!CONSTANT_P (x))
13431         ++n_var, one_var = i;
13432
13433       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13434         all_same = false;
13435     }
13436
13437   if (n_var == 0)
13438     {
13439       rtx constant = neon_make_constant (vals);
13440       if (constant != NULL_RTX)
13441         {
13442           emit_move_insn (target, constant);
13443           return;
13444         }
13445     }
13446
13447   /* Splat a single non-constant element if we can.  */
13448   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13449     {
13450       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13451       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13452       return;
13453     }
13454
13455   /* One field is non-constant.  Load constant then overwrite varying
13456      field.  This is more efficient than using the stack.  */
13457   if (n_var == 1)
13458     {
13459       rtx copy = copy_rtx (vals);
13460       rtx merge_mask = GEN_INT (1 << one_var);
13461
13462       /* Load constant part of vector, substitute neighboring value for
13463          varying element.  */
13464       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13465       neon_expand_vector_init (target, copy);
13466
13467       /* Insert variable.  */
13468       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13469       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13470       return;
13471     }
13472
13473   /* Construct the vector in memory one field at a time
13474      and load the whole vector.  */
13475   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13476   for (i = 0; i < n_elts; i++)
13477     emit_move_insn (adjust_address_nv (mem, inner_mode,
13478                                     i * GET_MODE_SIZE (inner_mode)),
13479                     XVECEXP (vals, 0, i));
13480   emit_move_insn (target, mem);
13481 }
13482
13483 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13484    ERR if it doesn't.  EXP indicates the source location, which includes the
13485    inlining history for intrinsics.  */
13486
13487 static void
13488 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13489               const_tree exp, const char *desc)
13490 {
13491   HOST_WIDE_INT lane;
13492
13493   gcc_assert (CONST_INT_P (operand));
13494
13495   lane = INTVAL (operand);
13496
13497   if (lane < low || lane >= high)
13498     {
13499       if (exp)
13500         error_at (EXPR_LOCATION (exp),
13501                   "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13502       else
13503         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13504     }
13505 }
13506
13507 /* Bounds-check lanes.  */
13508
13509 void
13510 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13511                   const_tree exp)
13512 {
13513   bounds_check (operand, low, high, exp, "lane");
13514 }
13515
13516 /* Bounds-check constants.  */
13517
13518 void
13519 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13520 {
13521   bounds_check (operand, low, high, NULL_TREE, "constant");
13522 }
13523
13524 HOST_WIDE_INT
13525 neon_element_bits (machine_mode mode)
13526 {
13527   return GET_MODE_UNIT_BITSIZE (mode);
13528 }
13529
13530 \f
13531 /* Predicates for `match_operand' and `match_operator'.  */
13532
13533 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13534    WB level is 2 if full writeback address modes are allowed, 1
13535    if limited writeback address modes (POST_INC and PRE_DEC) are
13536    allowed and 0 if no writeback at all is supported.  */
13537
13538 int
13539 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13540 {
13541   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13542   rtx ind;
13543
13544   /* Reject eliminable registers.  */
13545   if (! (reload_in_progress || reload_completed || lra_in_progress)
13546       && (   reg_mentioned_p (frame_pointer_rtx, op)
13547           || reg_mentioned_p (arg_pointer_rtx, op)
13548           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13549           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13550           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13551           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13552     return FALSE;
13553
13554   /* Constants are converted into offsets from labels.  */
13555   if (!MEM_P (op))
13556     return FALSE;
13557
13558   ind = XEXP (op, 0);
13559
13560   if (reload_completed
13561       && (LABEL_REF_P (ind)
13562           || (GET_CODE (ind) == CONST
13563               && GET_CODE (XEXP (ind, 0)) == PLUS
13564               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13565               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13566     return TRUE;
13567
13568   /* Match: (mem (reg)).  */
13569   if (REG_P (ind))
13570     return arm_address_register_rtx_p (ind, 0);
13571
13572   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13573      acceptable in any case (subject to verification by
13574      arm_address_register_rtx_p).  We need full writeback to accept
13575      PRE_INC and POST_DEC, and at least restricted writeback for
13576      PRE_INC and POST_DEC.  */
13577   if (wb_level > 0
13578       && (GET_CODE (ind) == POST_INC
13579           || GET_CODE (ind) == PRE_DEC
13580           || (wb_level > 1
13581               && (GET_CODE (ind) == PRE_INC
13582                   || GET_CODE (ind) == POST_DEC))))
13583     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13584
13585   if (wb_level > 1
13586       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13587       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13588       && GET_CODE (XEXP (ind, 1)) == PLUS
13589       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13590     ind = XEXP (ind, 1);
13591
13592   /* Match:
13593      (plus (reg)
13594            (const))
13595
13596      The encoded immediate for 16-bit modes is multiplied by 2,
13597      while the encoded immediate for 32-bit and 64-bit modes is
13598      multiplied by 4.  */
13599   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13600   if (GET_CODE (ind) == PLUS
13601       && REG_P (XEXP (ind, 0))
13602       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13603       && CONST_INT_P (XEXP (ind, 1))
13604       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13605       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13606     return TRUE;
13607
13608   return FALSE;
13609 }
13610
13611 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13612    WB is true if full writeback address modes are allowed and is false
13613    if limited writeback address modes (POST_INC and PRE_DEC) are
13614    allowed.  */
13615
13616 int arm_coproc_mem_operand (rtx op, bool wb)
13617 {
13618   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13619 }
13620
13621 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13622    context in which no writeback address modes are allowed.  */
13623
13624 int
13625 arm_coproc_mem_operand_no_writeback (rtx op)
13626 {
13627   return arm_coproc_mem_operand_wb (op, 0);
13628 }
13629
13630 /* This function returns TRUE on matching mode and op.
13631 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13632 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13633 int
13634 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13635 {
13636   enum rtx_code code;
13637   int val, reg_no;
13638
13639   /* Match: (mem (reg)).  */
13640   if (REG_P (op))
13641     {
13642       int reg_no = REGNO (op);
13643       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13644                ? reg_no <= LAST_LO_REGNUM
13645                : reg_no < LAST_ARM_REGNUM)
13646               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13647     }
13648   code = GET_CODE (op);
13649
13650   if (code == POST_INC || code == PRE_DEC
13651       || code == PRE_INC || code == POST_DEC)
13652     {
13653       reg_no = REGNO (XEXP (op, 0));
13654       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13655                ? reg_no <= LAST_LO_REGNUM
13656                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13657               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13658     }
13659   else if (((code == POST_MODIFY || code == PRE_MODIFY)
13660             && GET_CODE (XEXP (op, 1)) == PLUS
13661             && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13662             && REG_P (XEXP (op, 0))
13663             && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13664            /* Make sure to only accept PLUS after reload_completed, otherwise
13665               this will interfere with auto_inc's pattern detection.  */
13666            || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13667                && GET_CODE (XEXP (op, 1)) == CONST_INT))
13668     {
13669       reg_no = REGNO (XEXP (op, 0));
13670       if (code == PLUS)
13671         val = INTVAL (XEXP (op, 1));
13672       else
13673         val = INTVAL (XEXP(XEXP (op, 1), 1));
13674
13675       switch (mode)
13676         {
13677           case E_V16QImode:
13678           case E_V8QImode:
13679           case E_V4QImode:
13680             if (abs (val) > 127)
13681               return FALSE;
13682             break;
13683           case E_V8HImode:
13684           case E_V8HFmode:
13685           case E_V4HImode:
13686           case E_V4HFmode:
13687             if (val % 2 != 0 || abs (val) > 254)
13688               return FALSE;
13689             break;
13690           case E_V4SImode:
13691           case E_V4SFmode:
13692             if (val % 4 != 0 || abs (val) > 508)
13693               return FALSE;
13694             break;
13695           default:
13696             return FALSE;
13697         }
13698       return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13699               || (MVE_STN_LDW_MODE (mode)
13700                   ? reg_no <= LAST_LO_REGNUM
13701                   : (reg_no < LAST_ARM_REGNUM
13702                      && (code == PLUS || reg_no != SP_REGNUM))));
13703     }
13704   return FALSE;
13705 }
13706
13707 /* Return TRUE if OP is a memory operand which we can load or store a vector
13708    to/from. TYPE is one of the following values:
13709     0 - Vector load/stor (vldr)
13710     1 - Core registers (ldm)
13711     2 - Element/structure loads (vld1)
13712  */
13713 int
13714 neon_vector_mem_operand (rtx op, int type, bool strict)
13715 {
13716   rtx ind;
13717
13718   /* Reject eliminable registers.  */
13719   if (strict && ! (reload_in_progress || reload_completed)
13720       && (reg_mentioned_p (frame_pointer_rtx, op)
13721           || reg_mentioned_p (arg_pointer_rtx, op)
13722           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13723           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13724           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13725           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13726     return FALSE;
13727
13728   /* Constants are converted into offsets from labels.  */
13729   if (!MEM_P (op))
13730     return FALSE;
13731
13732   ind = XEXP (op, 0);
13733
13734   if (reload_completed
13735       && (LABEL_REF_P (ind)
13736           || (GET_CODE (ind) == CONST
13737               && GET_CODE (XEXP (ind, 0)) == PLUS
13738               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13739               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13740     return TRUE;
13741
13742   /* Match: (mem (reg)).  */
13743   if (REG_P (ind))
13744     return arm_address_register_rtx_p (ind, 0);
13745
13746   /* Allow post-increment with Neon registers.  */
13747   if ((type != 1 && GET_CODE (ind) == POST_INC)
13748       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13749     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13750
13751   /* Allow post-increment by register for VLDn */
13752   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13753       && GET_CODE (XEXP (ind, 1)) == PLUS
13754       && REG_P (XEXP (XEXP (ind, 1), 1))
13755       && REG_P (XEXP (ind, 0))
13756       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13757      return true;
13758
13759   /* Match:
13760      (plus (reg)
13761           (const)).  */
13762   if (type == 0
13763       && GET_CODE (ind) == PLUS
13764       && REG_P (XEXP (ind, 0))
13765       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13766       && CONST_INT_P (XEXP (ind, 1))
13767       && INTVAL (XEXP (ind, 1)) > -1024
13768       /* For quad modes, we restrict the constant offset to be slightly less
13769          than what the instruction format permits.  We have no such constraint
13770          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13771       && (INTVAL (XEXP (ind, 1))
13772           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13773       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13774     return TRUE;
13775
13776   return FALSE;
13777 }
13778
13779 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13780    type.  */
13781 int
13782 mve_struct_mem_operand (rtx op)
13783 {
13784   rtx ind = XEXP (op, 0);
13785
13786   /* Match: (mem (reg)).  */
13787   if (REG_P (ind))
13788     return arm_address_register_rtx_p (ind, 0);
13789
13790   /* Allow only post-increment by the mode size.  */
13791   if (GET_CODE (ind) == POST_INC)
13792     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13793
13794   return FALSE;
13795 }
13796
13797 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13798    type.  */
13799 int
13800 neon_struct_mem_operand (rtx op)
13801 {
13802   rtx ind;
13803
13804   /* Reject eliminable registers.  */
13805   if (! (reload_in_progress || reload_completed)
13806       && (   reg_mentioned_p (frame_pointer_rtx, op)
13807           || reg_mentioned_p (arg_pointer_rtx, op)
13808           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13809           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13810           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13811           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13812     return FALSE;
13813
13814   /* Constants are converted into offsets from labels.  */
13815   if (!MEM_P (op))
13816     return FALSE;
13817
13818   ind = XEXP (op, 0);
13819
13820   if (reload_completed
13821       && (LABEL_REF_P (ind)
13822           || (GET_CODE (ind) == CONST
13823               && GET_CODE (XEXP (ind, 0)) == PLUS
13824               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13825               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13826     return TRUE;
13827
13828   /* Match: (mem (reg)).  */
13829   if (REG_P (ind))
13830     return arm_address_register_rtx_p (ind, 0);
13831
13832   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13833   if (GET_CODE (ind) == POST_INC
13834       || GET_CODE (ind) == PRE_DEC)
13835     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13836
13837   return FALSE;
13838 }
13839
13840 /* Prepares the operands for the VCMLA by lane instruction such that the right
13841    register number is selected.  This instruction is special in that it always
13842    requires a D register, however there is a choice to be made between Dn[0],
13843    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13844
13845    The VCMLA by lane function always selects two values. For instance given D0
13846    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13847    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13848    D0[0] or D1[0] are both valid.
13849
13850    This function centralizes that information based on OPERANDS, OPERANDS[3]
13851    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13852    updated to contain the right index.  */
13853
13854 rtx *
13855 neon_vcmla_lane_prepare_operands (rtx *operands)
13856 {
13857   int lane = INTVAL (operands[4]);
13858   machine_mode constmode = SImode;
13859   machine_mode mode = GET_MODE (operands[3]);
13860   int regno = REGNO (operands[3]);
13861   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13862   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13863     {
13864       operands[3] = gen_int_mode (regno + 1, constmode);
13865       operands[4]
13866         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13867     }
13868   else
13869     {
13870       operands[3] = gen_int_mode (regno, constmode);
13871       operands[4] = gen_int_mode (lane, constmode);
13872     }
13873   return operands;
13874 }
13875
13876
13877 /* Return true if X is a register that will be eliminated later on.  */
13878 int
13879 arm_eliminable_register (rtx x)
13880 {
13881   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13882                        || REGNO (x) == ARG_POINTER_REGNUM
13883                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13884                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13885 }
13886
13887 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13888    coprocessor registers.  Otherwise return NO_REGS.  */
13889
13890 enum reg_class
13891 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13892 {
13893   if (mode == HFmode)
13894     {
13895       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13896         return GENERAL_REGS;
13897       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13898         return NO_REGS;
13899       return GENERAL_REGS;
13900     }
13901
13902   /* The neon move patterns handle all legitimate vector and struct
13903      addresses.  */
13904   if (TARGET_NEON
13905       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13906       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13907           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13908           || VALID_NEON_STRUCT_MODE (mode)))
13909     return NO_REGS;
13910
13911   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13912     return NO_REGS;
13913
13914   return GENERAL_REGS;
13915 }
13916
13917 /* Values which must be returned in the most-significant end of the return
13918    register.  */
13919
13920 static bool
13921 arm_return_in_msb (const_tree valtype)
13922 {
13923   return (TARGET_AAPCS_BASED
13924           && BYTES_BIG_ENDIAN
13925           && (AGGREGATE_TYPE_P (valtype)
13926               || TREE_CODE (valtype) == COMPLEX_TYPE
13927               || FIXED_POINT_TYPE_P (valtype)));
13928 }
13929
13930 /* Return TRUE if X references a SYMBOL_REF.  */
13931 int
13932 symbol_mentioned_p (rtx x)
13933 {
13934   const char * fmt;
13935   int i;
13936
13937   if (SYMBOL_REF_P (x))
13938     return 1;
13939
13940   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13941      are constant offsets, not symbols.  */
13942   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13943     return 0;
13944
13945   fmt = GET_RTX_FORMAT (GET_CODE (x));
13946
13947   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13948     {
13949       if (fmt[i] == 'E')
13950         {
13951           int j;
13952
13953           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13954             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13955               return 1;
13956         }
13957       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13958         return 1;
13959     }
13960
13961   return 0;
13962 }
13963
13964 /* Return TRUE if X references a LABEL_REF.  */
13965 int
13966 label_mentioned_p (rtx x)
13967 {
13968   const char * fmt;
13969   int i;
13970
13971   if (LABEL_REF_P (x))
13972     return 1;
13973
13974   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13975      instruction, but they are constant offsets, not symbols.  */
13976   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13977     return 0;
13978
13979   fmt = GET_RTX_FORMAT (GET_CODE (x));
13980   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13981     {
13982       if (fmt[i] == 'E')
13983         {
13984           int j;
13985
13986           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13987             if (label_mentioned_p (XVECEXP (x, i, j)))
13988               return 1;
13989         }
13990       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13991         return 1;
13992     }
13993
13994   return 0;
13995 }
13996
13997 int
13998 tls_mentioned_p (rtx x)
13999 {
14000   switch (GET_CODE (x))
14001     {
14002     case CONST:
14003       return tls_mentioned_p (XEXP (x, 0));
14004
14005     case UNSPEC:
14006       if (XINT (x, 1) == UNSPEC_TLS)
14007         return 1;
14008
14009     /* Fall through.  */
14010     default:
14011       return 0;
14012     }
14013 }
14014
14015 /* Must not copy any rtx that uses a pc-relative address.
14016    Also, disallow copying of load-exclusive instructions that
14017    may appear after splitting of compare-and-swap-style operations
14018    so as to prevent those loops from being transformed away from their
14019    canonical forms (see PR 69904).  */
14020
14021 static bool
14022 arm_cannot_copy_insn_p (rtx_insn *insn)
14023 {
14024   /* The tls call insn cannot be copied, as it is paired with a data
14025      word.  */
14026   if (recog_memoized (insn) == CODE_FOR_tlscall)
14027     return true;
14028
14029   subrtx_iterator::array_type array;
14030   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14031     {
14032       const_rtx x = *iter;
14033       if (GET_CODE (x) == UNSPEC
14034           && (XINT (x, 1) == UNSPEC_PIC_BASE
14035               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14036         return true;
14037     }
14038
14039   rtx set = single_set (insn);
14040   if (set)
14041     {
14042       rtx src = SET_SRC (set);
14043       if (GET_CODE (src) == ZERO_EXTEND)
14044         src = XEXP (src, 0);
14045
14046       /* Catch the load-exclusive and load-acquire operations.  */
14047       if (GET_CODE (src) == UNSPEC_VOLATILE
14048           && (XINT (src, 1) == VUNSPEC_LL
14049               || XINT (src, 1) == VUNSPEC_LAX))
14050         return true;
14051     }
14052   return false;
14053 }
14054
14055 enum rtx_code
14056 minmax_code (rtx x)
14057 {
14058   enum rtx_code code = GET_CODE (x);
14059
14060   switch (code)
14061     {
14062     case SMAX:
14063       return GE;
14064     case SMIN:
14065       return LE;
14066     case UMIN:
14067       return LEU;
14068     case UMAX:
14069       return GEU;
14070     default:
14071       gcc_unreachable ();
14072     }
14073 }
14074
14075 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
14076
14077 bool
14078 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14079                         int *mask, bool *signed_sat)
14080 {
14081   /* The high bound must be a power of two minus one.  */
14082   int log = exact_log2 (INTVAL (hi_bound) + 1);
14083   if (log == -1)
14084     return false;
14085
14086   /* The low bound is either zero (for usat) or one less than the
14087      negation of the high bound (for ssat).  */
14088   if (INTVAL (lo_bound) == 0)
14089     {
14090       if (mask)
14091         *mask = log;
14092       if (signed_sat)
14093         *signed_sat = false;
14094
14095       return true;
14096     }
14097
14098   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14099     {
14100       if (mask)
14101         *mask = log + 1;
14102       if (signed_sat)
14103         *signed_sat = true;
14104
14105       return true;
14106     }
14107
14108   return false;
14109 }
14110
14111 /* Return 1 if memory locations are adjacent.  */
14112 int
14113 adjacent_mem_locations (rtx a, rtx b)
14114 {
14115   /* We don't guarantee to preserve the order of these memory refs.  */
14116   if (volatile_refs_p (a) || volatile_refs_p (b))
14117     return 0;
14118
14119   if ((REG_P (XEXP (a, 0))
14120        || (GET_CODE (XEXP (a, 0)) == PLUS
14121            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14122       && (REG_P (XEXP (b, 0))
14123           || (GET_CODE (XEXP (b, 0)) == PLUS
14124               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14125     {
14126       HOST_WIDE_INT val0 = 0, val1 = 0;
14127       rtx reg0, reg1;
14128       int val_diff;
14129
14130       if (GET_CODE (XEXP (a, 0)) == PLUS)
14131         {
14132           reg0 = XEXP (XEXP (a, 0), 0);
14133           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14134         }
14135       else
14136         reg0 = XEXP (a, 0);
14137
14138       if (GET_CODE (XEXP (b, 0)) == PLUS)
14139         {
14140           reg1 = XEXP (XEXP (b, 0), 0);
14141           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14142         }
14143       else
14144         reg1 = XEXP (b, 0);
14145
14146       /* Don't accept any offset that will require multiple
14147          instructions to handle, since this would cause the
14148          arith_adjacentmem pattern to output an overlong sequence.  */
14149       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14150         return 0;
14151
14152       /* Don't allow an eliminable register: register elimination can make
14153          the offset too large.  */
14154       if (arm_eliminable_register (reg0))
14155         return 0;
14156
14157       val_diff = val1 - val0;
14158
14159       if (arm_ld_sched)
14160         {
14161           /* If the target has load delay slots, then there's no benefit
14162              to using an ldm instruction unless the offset is zero and
14163              we are optimizing for size.  */
14164           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14165                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14166                   && (val_diff == 4 || val_diff == -4));
14167         }
14168
14169       return ((REGNO (reg0) == REGNO (reg1))
14170               && (val_diff == 4 || val_diff == -4));
14171     }
14172
14173   return 0;
14174 }
14175
14176 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
14177    for load operations, false for store operations.  CONSECUTIVE is true
14178    if the register numbers in the operation must be consecutive in the register
14179    bank. RETURN_PC is true if value is to be loaded in PC.
14180    The pattern we are trying to match for load is:
14181      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14182       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14183        :
14184        :
14185       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14186      ]
14187      where
14188      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14189      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14190      3.  If consecutive is TRUE, then for kth register being loaded,
14191          REGNO (R_dk) = REGNO (R_d0) + k.
14192    The pattern for store is similar.  */
14193 bool
14194 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14195                      bool consecutive, bool return_pc)
14196 {
14197   HOST_WIDE_INT count = XVECLEN (op, 0);
14198   rtx reg, mem, addr;
14199   unsigned regno;
14200   unsigned first_regno;
14201   HOST_WIDE_INT i = 1, base = 0, offset = 0;
14202   rtx elt;
14203   bool addr_reg_in_reglist = false;
14204   bool update = false;
14205   int reg_increment;
14206   int offset_adj;
14207   int regs_per_val;
14208
14209   /* If not in SImode, then registers must be consecutive
14210      (e.g., VLDM instructions for DFmode).  */
14211   gcc_assert ((mode == SImode) || consecutive);
14212   /* Setting return_pc for stores is illegal.  */
14213   gcc_assert (!return_pc || load);
14214
14215   /* Set up the increments and the regs per val based on the mode.  */
14216   reg_increment = GET_MODE_SIZE (mode);
14217   regs_per_val = reg_increment / 4;
14218   offset_adj = return_pc ? 1 : 0;
14219
14220   if (count <= 1
14221       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14222       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14223     return false;
14224
14225   /* Check if this is a write-back.  */
14226   elt = XVECEXP (op, 0, offset_adj);
14227   if (GET_CODE (SET_SRC (elt)) == PLUS)
14228     {
14229       i++;
14230       base = 1;
14231       update = true;
14232
14233       /* The offset adjustment must be the number of registers being
14234          popped times the size of a single register.  */
14235       if (!REG_P (SET_DEST (elt))
14236           || !REG_P (XEXP (SET_SRC (elt), 0))
14237           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14238           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14239           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14240              ((count - 1 - offset_adj) * reg_increment))
14241         return false;
14242     }
14243
14244   i = i + offset_adj;
14245   base = base + offset_adj;
14246   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14247      success depends on the type: VLDM can do just one reg,
14248      LDM must do at least two.  */
14249   if ((count <= i) && (mode == SImode))
14250       return false;
14251
14252   elt = XVECEXP (op, 0, i - 1);
14253   if (GET_CODE (elt) != SET)
14254     return false;
14255
14256   if (load)
14257     {
14258       reg = SET_DEST (elt);
14259       mem = SET_SRC (elt);
14260     }
14261   else
14262     {
14263       reg = SET_SRC (elt);
14264       mem = SET_DEST (elt);
14265     }
14266
14267   if (!REG_P (reg) || !MEM_P (mem))
14268     return false;
14269
14270   regno = REGNO (reg);
14271   first_regno = regno;
14272   addr = XEXP (mem, 0);
14273   if (GET_CODE (addr) == PLUS)
14274     {
14275       if (!CONST_INT_P (XEXP (addr, 1)))
14276         return false;
14277
14278       offset = INTVAL (XEXP (addr, 1));
14279       addr = XEXP (addr, 0);
14280     }
14281
14282   if (!REG_P (addr))
14283     return false;
14284
14285   /* Don't allow SP to be loaded unless it is also the base register. It
14286      guarantees that SP is reset correctly when an LDM instruction
14287      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14288   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14289     return false;
14290
14291   if (regno == REGNO (addr))
14292     addr_reg_in_reglist = true;
14293
14294   for (; i < count; i++)
14295     {
14296       elt = XVECEXP (op, 0, i);
14297       if (GET_CODE (elt) != SET)
14298         return false;
14299
14300       if (load)
14301         {
14302           reg = SET_DEST (elt);
14303           mem = SET_SRC (elt);
14304         }
14305       else
14306         {
14307           reg = SET_SRC (elt);
14308           mem = SET_DEST (elt);
14309         }
14310
14311       if (!REG_P (reg)
14312           || GET_MODE (reg) != mode
14313           || REGNO (reg) <= regno
14314           || (consecutive
14315               && (REGNO (reg) !=
14316                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14317           /* Don't allow SP to be loaded unless it is also the base register. It
14318              guarantees that SP is reset correctly when an LDM instruction
14319              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14320           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14321           || !MEM_P (mem)
14322           || GET_MODE (mem) != mode
14323           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14324                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14325                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14326                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14327                    offset + (i - base) * reg_increment))
14328               && (!REG_P (XEXP (mem, 0))
14329                   || offset + (i - base) * reg_increment != 0)))
14330         return false;
14331
14332       regno = REGNO (reg);
14333       if (regno == REGNO (addr))
14334         addr_reg_in_reglist = true;
14335     }
14336
14337   if (load)
14338     {
14339       if (update && addr_reg_in_reglist)
14340         return false;
14341
14342       /* For Thumb-1, address register is always modified - either by write-back
14343          or by explicit load.  If the pattern does not describe an update,
14344          then the address register must be in the list of loaded registers.  */
14345       if (TARGET_THUMB1)
14346         return update || addr_reg_in_reglist;
14347     }
14348
14349   return true;
14350 }
14351
14352 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14353    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14354    following form:
14355
14356    [(set (reg:SI <N>) (const_int 0))
14357     (set (reg:SI <M>) (const_int 0))
14358     ...
14359     (unspec_volatile [(const_int 0)]
14360                      VUNSPEC_CLRM_APSR)
14361     (clobber (reg:CC CC_REGNUM))
14362    ]
14363
14364    Any number (including 0) of set expressions is valid, the volatile unspec is
14365    optional.  All registers but SP and PC are allowed and registers must be in
14366    strict increasing order.
14367
14368    To be a valid VSCCLRM pattern, OP must have the following form:
14369
14370    [(unspec_volatile [(const_int 0)]
14371                      VUNSPEC_VSCCLRM_VPR)
14372     (set (reg:SF <N>) (const_int 0))
14373     (set (reg:SF <M>) (const_int 0))
14374     ...
14375    ]
14376
14377    As with CLRM, any number (including 0) of set expressions is valid, however
14378    the volatile unspec is mandatory here.  Any VFP single-precision register is
14379    accepted but all registers must be consecutive and in increasing order.  */
14380
14381 bool
14382 clear_operation_p (rtx op, bool vfp)
14383 {
14384   unsigned regno;
14385   unsigned last_regno = INVALID_REGNUM;
14386   rtx elt, reg, zero;
14387   int count = XVECLEN (op, 0);
14388   int first_set = vfp ? 1 : 0;
14389   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14390
14391   for (int i = first_set; i < count; i++)
14392     {
14393       elt = XVECEXP (op, 0, i);
14394
14395       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14396         {
14397           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14398               || XVECLEN (elt, 0) != 1
14399               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14400               || i != count - 2)
14401             return false;
14402
14403           continue;
14404         }
14405
14406       if (GET_CODE (elt) == CLOBBER)
14407         continue;
14408
14409       if (GET_CODE (elt) != SET)
14410         return false;
14411
14412       reg = SET_DEST (elt);
14413       zero = SET_SRC (elt);
14414
14415       if (!REG_P (reg)
14416           || GET_MODE (reg) != expected_mode
14417           || zero != CONST0_RTX (SImode))
14418         return false;
14419
14420       regno = REGNO (reg);
14421
14422       if (vfp)
14423         {
14424           if (i != first_set && regno != last_regno + 1)
14425             return false;
14426         }
14427       else
14428         {
14429           if (regno == SP_REGNUM || regno == PC_REGNUM)
14430             return false;
14431           if (i != first_set && regno <= last_regno)
14432             return false;
14433         }
14434
14435       last_regno = regno;
14436     }
14437
14438   return true;
14439 }
14440
14441 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14442    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14443    instruction.  ADD_OFFSET is nonzero if the base address register needs
14444    to be modified with an add instruction before we can use it.  */
14445
14446 static bool
14447 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14448                                  int nops, HOST_WIDE_INT add_offset)
14449  {
14450   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14451      if the offset isn't small enough.  The reason 2 ldrs are faster
14452      is because these ARMs are able to do more than one cache access
14453      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14454      whilst the ARM8 has a double bandwidth cache.  This means that
14455      these cores can do both an instruction fetch and a data fetch in
14456      a single cycle, so the trick of calculating the address into a
14457      scratch register (one of the result regs) and then doing a load
14458      multiple actually becomes slower (and no smaller in code size).
14459      That is the transformation
14460
14461         ldr     rd1, [rbase + offset]
14462         ldr     rd2, [rbase + offset + 4]
14463
14464      to
14465
14466         add     rd1, rbase, offset
14467         ldmia   rd1, {rd1, rd2}
14468
14469      produces worse code -- '3 cycles + any stalls on rd2' instead of
14470      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14471      access per cycle, the first sequence could never complete in less
14472      than 6 cycles, whereas the ldm sequence would only take 5 and
14473      would make better use of sequential accesses if not hitting the
14474      cache.
14475
14476      We cheat here and test 'arm_ld_sched' which we currently know to
14477      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14478      changes, then the test below needs to be reworked.  */
14479   if (nops == 2 && arm_ld_sched && add_offset != 0)
14480     return false;
14481
14482   /* XScale has load-store double instructions, but they have stricter
14483      alignment requirements than load-store multiple, so we cannot
14484      use them.
14485
14486      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14487      the pipeline until completion.
14488
14489         NREGS           CYCLES
14490           1               3
14491           2               4
14492           3               5
14493           4               6
14494
14495      An ldr instruction takes 1-3 cycles, but does not block the
14496      pipeline.
14497
14498         NREGS           CYCLES
14499           1              1-3
14500           2              2-6
14501           3              3-9
14502           4              4-12
14503
14504      Best case ldr will always win.  However, the more ldr instructions
14505      we issue, the less likely we are to be able to schedule them well.
14506      Using ldr instructions also increases code size.
14507
14508      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14509      for counts of 3 or 4 regs.  */
14510   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14511     return false;
14512   return true;
14513 }
14514
14515 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14516    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14517    an array ORDER which describes the sequence to use when accessing the
14518    offsets that produces an ascending order.  In this sequence, each
14519    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14520    must have been filled in with the lowest offset by the caller.
14521    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14522    we use to verify that ORDER produces an ascending order of registers.
14523    Return true if it was possible to construct such an order, false if
14524    not.  */
14525
14526 static bool
14527 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14528                       int *unsorted_regs)
14529 {
14530   int i;
14531   for (i = 1; i < nops; i++)
14532     {
14533       int j;
14534
14535       order[i] = order[i - 1];
14536       for (j = 0; j < nops; j++)
14537         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14538           {
14539             /* We must find exactly one offset that is higher than the
14540                previous one by 4.  */
14541             if (order[i] != order[i - 1])
14542               return false;
14543             order[i] = j;
14544           }
14545       if (order[i] == order[i - 1])
14546         return false;
14547       /* The register numbers must be ascending.  */
14548       if (unsorted_regs != NULL
14549           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14550         return false;
14551     }
14552   return true;
14553 }
14554
14555 /* Used to determine in a peephole whether a sequence of load
14556    instructions can be changed into a load-multiple instruction.
14557    NOPS is the number of separate load instructions we are examining.  The
14558    first NOPS entries in OPERANDS are the destination registers, the
14559    next NOPS entries are memory operands.  If this function is
14560    successful, *BASE is set to the common base register of the memory
14561    accesses; *LOAD_OFFSET is set to the first memory location's offset
14562    from that base register.
14563    REGS is an array filled in with the destination register numbers.
14564    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14565    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14566    the sequence of registers in REGS matches the loads from ascending memory
14567    locations, and the function verifies that the register numbers are
14568    themselves ascending.  If CHECK_REGS is false, the register numbers
14569    are stored in the order they are found in the operands.  */
14570 static int
14571 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14572                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14573 {
14574   int unsorted_regs[MAX_LDM_STM_OPS];
14575   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14576   int order[MAX_LDM_STM_OPS];
14577   int base_reg = -1;
14578   int i, ldm_case;
14579
14580   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14581      easily extended if required.  */
14582   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14583
14584   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14585
14586   /* Loop over the operands and check that the memory references are
14587      suitable (i.e. immediate offsets from the same base register).  At
14588      the same time, extract the target register, and the memory
14589      offsets.  */
14590   for (i = 0; i < nops; i++)
14591     {
14592       rtx reg;
14593       rtx offset;
14594
14595       /* Convert a subreg of a mem into the mem itself.  */
14596       if (GET_CODE (operands[nops + i]) == SUBREG)
14597         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14598
14599       gcc_assert (MEM_P (operands[nops + i]));
14600
14601       /* Don't reorder volatile memory references; it doesn't seem worth
14602          looking for the case where the order is ok anyway.  */
14603       if (MEM_VOLATILE_P (operands[nops + i]))
14604         return 0;
14605
14606       offset = const0_rtx;
14607
14608       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14609            || (SUBREG_P (reg)
14610                && REG_P (reg = SUBREG_REG (reg))))
14611           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14612               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14613                   || (SUBREG_P (reg)
14614                       && REG_P (reg = SUBREG_REG (reg))))
14615               && (CONST_INT_P (offset
14616                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14617         {
14618           if (i == 0)
14619             {
14620               base_reg = REGNO (reg);
14621               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14622                 return 0;
14623             }
14624           else if (base_reg != (int) REGNO (reg))
14625             /* Not addressed from the same base register.  */
14626             return 0;
14627
14628           unsorted_regs[i] = (REG_P (operands[i])
14629                               ? REGNO (operands[i])
14630                               : REGNO (SUBREG_REG (operands[i])));
14631
14632           /* If it isn't an integer register, or if it overwrites the
14633              base register but isn't the last insn in the list, then
14634              we can't do this.  */
14635           if (unsorted_regs[i] < 0
14636               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14637               || unsorted_regs[i] > 14
14638               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14639             return 0;
14640
14641           /* Don't allow SP to be loaded unless it is also the base
14642              register.  It guarantees that SP is reset correctly when
14643              an LDM instruction is interrupted.  Otherwise, we might
14644              end up with a corrupt stack.  */
14645           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14646             return 0;
14647
14648           unsorted_offsets[i] = INTVAL (offset);
14649           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14650             order[0] = i;
14651         }
14652       else
14653         /* Not a suitable memory address.  */
14654         return 0;
14655     }
14656
14657   /* All the useful information has now been extracted from the
14658      operands into unsorted_regs and unsorted_offsets; additionally,
14659      order[0] has been set to the lowest offset in the list.  Sort
14660      the offsets into order, verifying that they are adjacent, and
14661      check that the register numbers are ascending.  */
14662   if (!compute_offset_order (nops, unsorted_offsets, order,
14663                              check_regs ? unsorted_regs : NULL))
14664     return 0;
14665
14666   if (saved_order)
14667     memcpy (saved_order, order, sizeof order);
14668
14669   if (base)
14670     {
14671       *base = base_reg;
14672
14673       for (i = 0; i < nops; i++)
14674         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14675
14676       *load_offset = unsorted_offsets[order[0]];
14677     }
14678
14679   if (unsorted_offsets[order[0]] == 0)
14680     ldm_case = 1; /* ldmia */
14681   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14682     ldm_case = 2; /* ldmib */
14683   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14684     ldm_case = 3; /* ldmda */
14685   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14686     ldm_case = 4; /* ldmdb */
14687   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14688            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14689     ldm_case = 5;
14690   else
14691     return 0;
14692
14693   if (!multiple_operation_profitable_p (false, nops,
14694                                         ldm_case == 5
14695                                         ? unsorted_offsets[order[0]] : 0))
14696     return 0;
14697
14698   return ldm_case;
14699 }
14700
14701 /* Used to determine in a peephole whether a sequence of store instructions can
14702    be changed into a store-multiple instruction.
14703    NOPS is the number of separate store instructions we are examining.
14704    NOPS_TOTAL is the total number of instructions recognized by the peephole
14705    pattern.
14706    The first NOPS entries in OPERANDS are the source registers, the next
14707    NOPS entries are memory operands.  If this function is successful, *BASE is
14708    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14709    to the first memory location's offset from that base register.  REGS is an
14710    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14711    likewise filled with the corresponding rtx's.
14712    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14713    numbers to an ascending order of stores.
14714    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14715    from ascending memory locations, and the function verifies that the register
14716    numbers are themselves ascending.  If CHECK_REGS is false, the register
14717    numbers are stored in the order they are found in the operands.  */
14718 static int
14719 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14720                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14721                          HOST_WIDE_INT *load_offset, bool check_regs)
14722 {
14723   int unsorted_regs[MAX_LDM_STM_OPS];
14724   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14725   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14726   int order[MAX_LDM_STM_OPS];
14727   int base_reg = -1;
14728   rtx base_reg_rtx = NULL;
14729   int i, stm_case;
14730
14731   /* Write back of base register is currently only supported for Thumb 1.  */
14732   int base_writeback = TARGET_THUMB1;
14733
14734   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14735      easily extended if required.  */
14736   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14737
14738   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14739
14740   /* Loop over the operands and check that the memory references are
14741      suitable (i.e. immediate offsets from the same base register).  At
14742      the same time, extract the target register, and the memory
14743      offsets.  */
14744   for (i = 0; i < nops; i++)
14745     {
14746       rtx reg;
14747       rtx offset;
14748
14749       /* Convert a subreg of a mem into the mem itself.  */
14750       if (GET_CODE (operands[nops + i]) == SUBREG)
14751         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14752
14753       gcc_assert (MEM_P (operands[nops + i]));
14754
14755       /* Don't reorder volatile memory references; it doesn't seem worth
14756          looking for the case where the order is ok anyway.  */
14757       if (MEM_VOLATILE_P (operands[nops + i]))
14758         return 0;
14759
14760       offset = const0_rtx;
14761
14762       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14763            || (SUBREG_P (reg)
14764                && REG_P (reg = SUBREG_REG (reg))))
14765           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14766               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14767                   || (SUBREG_P (reg)
14768                       && REG_P (reg = SUBREG_REG (reg))))
14769               && (CONST_INT_P (offset
14770                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14771         {
14772           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14773                                   ? operands[i] : SUBREG_REG (operands[i]));
14774           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14775
14776           if (i == 0)
14777             {
14778               base_reg = REGNO (reg);
14779               base_reg_rtx = reg;
14780               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14781                 return 0;
14782             }
14783           else if (base_reg != (int) REGNO (reg))
14784             /* Not addressed from the same base register.  */
14785             return 0;
14786
14787           /* If it isn't an integer register, then we can't do this.  */
14788           if (unsorted_regs[i] < 0
14789               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14790               /* The effects are unpredictable if the base register is
14791                  both updated and stored.  */
14792               || (base_writeback && unsorted_regs[i] == base_reg)
14793               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14794               || unsorted_regs[i] > 14)
14795             return 0;
14796
14797           unsorted_offsets[i] = INTVAL (offset);
14798           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14799             order[0] = i;
14800         }
14801       else
14802         /* Not a suitable memory address.  */
14803         return 0;
14804     }
14805
14806   /* All the useful information has now been extracted from the
14807      operands into unsorted_regs and unsorted_offsets; additionally,
14808      order[0] has been set to the lowest offset in the list.  Sort
14809      the offsets into order, verifying that they are adjacent, and
14810      check that the register numbers are ascending.  */
14811   if (!compute_offset_order (nops, unsorted_offsets, order,
14812                              check_regs ? unsorted_regs : NULL))
14813     return 0;
14814
14815   if (saved_order)
14816     memcpy (saved_order, order, sizeof order);
14817
14818   if (base)
14819     {
14820       *base = base_reg;
14821
14822       for (i = 0; i < nops; i++)
14823         {
14824           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14825           if (reg_rtxs)
14826             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14827         }
14828
14829       *load_offset = unsorted_offsets[order[0]];
14830     }
14831
14832   if (TARGET_THUMB1
14833       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14834     return 0;
14835
14836   if (unsorted_offsets[order[0]] == 0)
14837     stm_case = 1; /* stmia */
14838   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14839     stm_case = 2; /* stmib */
14840   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14841     stm_case = 3; /* stmda */
14842   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14843     stm_case = 4; /* stmdb */
14844   else
14845     return 0;
14846
14847   if (!multiple_operation_profitable_p (false, nops, 0))
14848     return 0;
14849
14850   return stm_case;
14851 }
14852 \f
14853 /* Routines for use in generating RTL.  */
14854
14855 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14856    the instruction; REGS and MEMS are arrays containing the operands.
14857    BASEREG is the base register to be used in addressing the memory operands.
14858    WBACK_OFFSET is nonzero if the instruction should update the base
14859    register.  */
14860
14861 static rtx
14862 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14863                          HOST_WIDE_INT wback_offset)
14864 {
14865   int i = 0, j;
14866   rtx result;
14867
14868   if (!multiple_operation_profitable_p (false, count, 0))
14869     {
14870       rtx seq;
14871
14872       start_sequence ();
14873
14874       for (i = 0; i < count; i++)
14875         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14876
14877       if (wback_offset != 0)
14878         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14879
14880       seq = get_insns ();
14881       end_sequence ();
14882
14883       return seq;
14884     }
14885
14886   result = gen_rtx_PARALLEL (VOIDmode,
14887                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14888   if (wback_offset != 0)
14889     {
14890       XVECEXP (result, 0, 0)
14891         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14892       i = 1;
14893       count++;
14894     }
14895
14896   for (j = 0; i < count; i++, j++)
14897     XVECEXP (result, 0, i)
14898       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14899
14900   return result;
14901 }
14902
14903 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14904    the instruction; REGS and MEMS are arrays containing the operands.
14905    BASEREG is the base register to be used in addressing the memory operands.
14906    WBACK_OFFSET is nonzero if the instruction should update the base
14907    register.  */
14908
14909 static rtx
14910 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14911                           HOST_WIDE_INT wback_offset)
14912 {
14913   int i = 0, j;
14914   rtx result;
14915
14916   if (GET_CODE (basereg) == PLUS)
14917     basereg = XEXP (basereg, 0);
14918
14919   if (!multiple_operation_profitable_p (false, count, 0))
14920     {
14921       rtx seq;
14922
14923       start_sequence ();
14924
14925       for (i = 0; i < count; i++)
14926         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14927
14928       if (wback_offset != 0)
14929         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14930
14931       seq = get_insns ();
14932       end_sequence ();
14933
14934       return seq;
14935     }
14936
14937   result = gen_rtx_PARALLEL (VOIDmode,
14938                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14939   if (wback_offset != 0)
14940     {
14941       XVECEXP (result, 0, 0)
14942         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14943       i = 1;
14944       count++;
14945     }
14946
14947   for (j = 0; i < count; i++, j++)
14948     XVECEXP (result, 0, i)
14949       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14950
14951   return result;
14952 }
14953
14954 /* Generate either a load-multiple or a store-multiple instruction.  This
14955    function can be used in situations where we can start with a single MEM
14956    rtx and adjust its address upwards.
14957    COUNT is the number of operations in the instruction, not counting a
14958    possible update of the base register.  REGS is an array containing the
14959    register operands.
14960    BASEREG is the base register to be used in addressing the memory operands,
14961    which are constructed from BASEMEM.
14962    WRITE_BACK specifies whether the generated instruction should include an
14963    update of the base register.
14964    OFFSETP is used to pass an offset to and from this function; this offset
14965    is not used when constructing the address (instead BASEMEM should have an
14966    appropriate offset in its address), it is used only for setting
14967    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14968
14969 static rtx
14970 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14971                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14972 {
14973   rtx mems[MAX_LDM_STM_OPS];
14974   HOST_WIDE_INT offset = *offsetp;
14975   int i;
14976
14977   gcc_assert (count <= MAX_LDM_STM_OPS);
14978
14979   if (GET_CODE (basereg) == PLUS)
14980     basereg = XEXP (basereg, 0);
14981
14982   for (i = 0; i < count; i++)
14983     {
14984       rtx addr = plus_constant (Pmode, basereg, i * 4);
14985       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14986       offset += 4;
14987     }
14988
14989   if (write_back)
14990     *offsetp = offset;
14991
14992   if (is_load)
14993     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14994                                     write_back ? 4 * count : 0);
14995   else
14996     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14997                                      write_back ? 4 * count : 0);
14998 }
14999
15000 rtx
15001 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15002                        rtx basemem, HOST_WIDE_INT *offsetp)
15003 {
15004   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15005                               offsetp);
15006 }
15007
15008 rtx
15009 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15010                         rtx basemem, HOST_WIDE_INT *offsetp)
15011 {
15012   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15013                               offsetp);
15014 }
15015
15016 /* Called from a peephole2 expander to turn a sequence of loads into an
15017    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
15018    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
15019    is true if we can reorder the registers because they are used commutatively
15020    subsequently.
15021    Returns true iff we could generate a new instruction.  */
15022
15023 bool
15024 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15025 {
15026   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15027   rtx mems[MAX_LDM_STM_OPS];
15028   int i, j, base_reg;
15029   rtx base_reg_rtx;
15030   HOST_WIDE_INT offset;
15031   int write_back = FALSE;
15032   int ldm_case;
15033   rtx addr;
15034
15035   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15036                                      &base_reg, &offset, !sort_regs);
15037
15038   if (ldm_case == 0)
15039     return false;
15040
15041   if (sort_regs)
15042     for (i = 0; i < nops - 1; i++)
15043       for (j = i + 1; j < nops; j++)
15044         if (regs[i] > regs[j])
15045           {
15046             int t = regs[i];
15047             regs[i] = regs[j];
15048             regs[j] = t;
15049           }
15050   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15051
15052   if (TARGET_THUMB1)
15053     {
15054       gcc_assert (ldm_case == 1 || ldm_case == 5);
15055
15056       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
15057       write_back = true;
15058       for (i = 0; i < nops; i++)
15059         if (base_reg == regs[i])
15060           write_back = false;
15061
15062       /* Ensure the base is dead if it is updated.  */
15063       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15064         return false;
15065     }
15066
15067   if (ldm_case == 5)
15068     {
15069       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15070       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15071       offset = 0;
15072       base_reg_rtx = newbase;
15073     }
15074
15075   for (i = 0; i < nops; i++)
15076     {
15077       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15078       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15079                                               SImode, addr, 0);
15080     }
15081   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15082                                       write_back ? offset + i * 4 : 0));
15083   return true;
15084 }
15085
15086 /* Called from a peephole2 expander to turn a sequence of stores into an
15087    STM instruction.  OPERANDS are the operands found by the peephole matcher;
15088    NOPS indicates how many separate stores we are trying to combine.
15089    Returns true iff we could generate a new instruction.  */
15090
15091 bool
15092 gen_stm_seq (rtx *operands, int nops)
15093 {
15094   int i;
15095   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15096   rtx mems[MAX_LDM_STM_OPS];
15097   int base_reg;
15098   rtx base_reg_rtx;
15099   HOST_WIDE_INT offset;
15100   int write_back = FALSE;
15101   int stm_case;
15102   rtx addr;
15103   bool base_reg_dies;
15104
15105   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15106                                       mem_order, &base_reg, &offset, true);
15107
15108   if (stm_case == 0)
15109     return false;
15110
15111   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15112
15113   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15114   if (TARGET_THUMB1)
15115     {
15116       gcc_assert (base_reg_dies);
15117       write_back = TRUE;
15118     }
15119
15120   if (stm_case == 5)
15121     {
15122       gcc_assert (base_reg_dies);
15123       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15124       offset = 0;
15125     }
15126
15127   addr = plus_constant (Pmode, base_reg_rtx, offset);
15128
15129   for (i = 0; i < nops; i++)
15130     {
15131       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15132       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15133                                               SImode, addr, 0);
15134     }
15135   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15136                                        write_back ? offset + i * 4 : 0));
15137   return true;
15138 }
15139
15140 /* Called from a peephole2 expander to turn a sequence of stores that are
15141    preceded by constant loads into an STM instruction.  OPERANDS are the
15142    operands found by the peephole matcher; NOPS indicates how many
15143    separate stores we are trying to combine; there are 2 * NOPS
15144    instructions in the peephole.
15145    Returns true iff we could generate a new instruction.  */
15146
15147 bool
15148 gen_const_stm_seq (rtx *operands, int nops)
15149 {
15150   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15151   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15152   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15153   rtx mems[MAX_LDM_STM_OPS];
15154   int base_reg;
15155   rtx base_reg_rtx;
15156   HOST_WIDE_INT offset;
15157   int write_back = FALSE;
15158   int stm_case;
15159   rtx addr;
15160   bool base_reg_dies;
15161   int i, j;
15162   HARD_REG_SET allocated;
15163
15164   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15165                                       mem_order, &base_reg, &offset, false);
15166
15167   if (stm_case == 0)
15168     return false;
15169
15170   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15171
15172   /* If the same register is used more than once, try to find a free
15173      register.  */
15174   CLEAR_HARD_REG_SET (allocated);
15175   for (i = 0; i < nops; i++)
15176     {
15177       for (j = i + 1; j < nops; j++)
15178         if (regs[i] == regs[j])
15179           {
15180             rtx t = peep2_find_free_register (0, nops * 2,
15181                                               TARGET_THUMB1 ? "l" : "r",
15182                                               SImode, &allocated);
15183             if (t == NULL_RTX)
15184               return false;
15185             reg_rtxs[i] = t;
15186             regs[i] = REGNO (t);
15187           }
15188     }
15189
15190   /* Compute an ordering that maps the register numbers to an ascending
15191      sequence.  */
15192   reg_order[0] = 0;
15193   for (i = 0; i < nops; i++)
15194     if (regs[i] < regs[reg_order[0]])
15195       reg_order[0] = i;
15196
15197   for (i = 1; i < nops; i++)
15198     {
15199       int this_order = reg_order[i - 1];
15200       for (j = 0; j < nops; j++)
15201         if (regs[j] > regs[reg_order[i - 1]]
15202             && (this_order == reg_order[i - 1]
15203                 || regs[j] < regs[this_order]))
15204           this_order = j;
15205       reg_order[i] = this_order;
15206     }
15207
15208   /* Ensure that registers that must be live after the instruction end
15209      up with the correct value.  */
15210   for (i = 0; i < nops; i++)
15211     {
15212       int this_order = reg_order[i];
15213       if ((this_order != mem_order[i]
15214            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15215           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15216         return false;
15217     }
15218
15219   /* Load the constants.  */
15220   for (i = 0; i < nops; i++)
15221     {
15222       rtx op = operands[2 * nops + mem_order[i]];
15223       sorted_regs[i] = regs[reg_order[i]];
15224       emit_move_insn (reg_rtxs[reg_order[i]], op);
15225     }
15226
15227   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15228
15229   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15230   if (TARGET_THUMB1)
15231     {
15232       gcc_assert (base_reg_dies);
15233       write_back = TRUE;
15234     }
15235
15236   if (stm_case == 5)
15237     {
15238       gcc_assert (base_reg_dies);
15239       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15240       offset = 0;
15241     }
15242
15243   addr = plus_constant (Pmode, base_reg_rtx, offset);
15244
15245   for (i = 0; i < nops; i++)
15246     {
15247       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15248       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15249                                               SImode, addr, 0);
15250     }
15251   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15252                                        write_back ? offset + i * 4 : 0));
15253   return true;
15254 }
15255
15256 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15257    unaligned copies on processors which support unaligned semantics for those
15258    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
15259    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15260    An interleave factor of 1 (the minimum) will perform no interleaving.
15261    Load/store multiple are used for aligned addresses where possible.  */
15262
15263 static void
15264 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15265                                    HOST_WIDE_INT length,
15266                                    unsigned int interleave_factor)
15267 {
15268   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15269   int *regnos = XALLOCAVEC (int, interleave_factor);
15270   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15271   HOST_WIDE_INT i, j;
15272   HOST_WIDE_INT remaining = length, words;
15273   rtx halfword_tmp = NULL, byte_tmp = NULL;
15274   rtx dst, src;
15275   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15276   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15277   HOST_WIDE_INT srcoffset, dstoffset;
15278   HOST_WIDE_INT src_autoinc, dst_autoinc;
15279   rtx mem, addr;
15280
15281   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15282
15283   /* Use hard registers if we have aligned source or destination so we can use
15284      load/store multiple with contiguous registers.  */
15285   if (dst_aligned || src_aligned)
15286     for (i = 0; i < interleave_factor; i++)
15287       regs[i] = gen_rtx_REG (SImode, i);
15288   else
15289     for (i = 0; i < interleave_factor; i++)
15290       regs[i] = gen_reg_rtx (SImode);
15291
15292   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15293   src = copy_addr_to_reg (XEXP (srcbase, 0));
15294
15295   srcoffset = dstoffset = 0;
15296
15297   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15298      For copying the last bytes we want to subtract this offset again.  */
15299   src_autoinc = dst_autoinc = 0;
15300
15301   for (i = 0; i < interleave_factor; i++)
15302     regnos[i] = i;
15303
15304   /* Copy BLOCK_SIZE_BYTES chunks.  */
15305
15306   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15307     {
15308       /* Load words.  */
15309       if (src_aligned && interleave_factor > 1)
15310         {
15311           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15312                                             TRUE, srcbase, &srcoffset));
15313           src_autoinc += UNITS_PER_WORD * interleave_factor;
15314         }
15315       else
15316         {
15317           for (j = 0; j < interleave_factor; j++)
15318             {
15319               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15320                                                  - src_autoinc));
15321               mem = adjust_automodify_address (srcbase, SImode, addr,
15322                                                srcoffset + j * UNITS_PER_WORD);
15323               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15324             }
15325           srcoffset += block_size_bytes;
15326         }
15327
15328       /* Store words.  */
15329       if (dst_aligned && interleave_factor > 1)
15330         {
15331           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15332                                              TRUE, dstbase, &dstoffset));
15333           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15334         }
15335       else
15336         {
15337           for (j = 0; j < interleave_factor; j++)
15338             {
15339               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15340                                                  - dst_autoinc));
15341               mem = adjust_automodify_address (dstbase, SImode, addr,
15342                                                dstoffset + j * UNITS_PER_WORD);
15343               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15344             }
15345           dstoffset += block_size_bytes;
15346         }
15347
15348       remaining -= block_size_bytes;
15349     }
15350
15351   /* Copy any whole words left (note these aren't interleaved with any
15352      subsequent halfword/byte load/stores in the interests of simplicity).  */
15353
15354   words = remaining / UNITS_PER_WORD;
15355
15356   gcc_assert (words < interleave_factor);
15357
15358   if (src_aligned && words > 1)
15359     {
15360       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15361                                         &srcoffset));
15362       src_autoinc += UNITS_PER_WORD * words;
15363     }
15364   else
15365     {
15366       for (j = 0; j < words; j++)
15367         {
15368           addr = plus_constant (Pmode, src,
15369                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15370           mem = adjust_automodify_address (srcbase, SImode, addr,
15371                                            srcoffset + j * UNITS_PER_WORD);
15372           if (src_aligned)
15373             emit_move_insn (regs[j], mem);
15374           else
15375             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15376         }
15377       srcoffset += words * UNITS_PER_WORD;
15378     }
15379
15380   if (dst_aligned && words > 1)
15381     {
15382       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15383                                          &dstoffset));
15384       dst_autoinc += words * UNITS_PER_WORD;
15385     }
15386   else
15387     {
15388       for (j = 0; j < words; j++)
15389         {
15390           addr = plus_constant (Pmode, dst,
15391                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15392           mem = adjust_automodify_address (dstbase, SImode, addr,
15393                                            dstoffset + j * UNITS_PER_WORD);
15394           if (dst_aligned)
15395             emit_move_insn (mem, regs[j]);
15396           else
15397             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15398         }
15399       dstoffset += words * UNITS_PER_WORD;
15400     }
15401
15402   remaining -= words * UNITS_PER_WORD;
15403
15404   gcc_assert (remaining < 4);
15405
15406   /* Copy a halfword if necessary.  */
15407
15408   if (remaining >= 2)
15409     {
15410       halfword_tmp = gen_reg_rtx (SImode);
15411
15412       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15413       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15414       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15415
15416       /* Either write out immediately, or delay until we've loaded the last
15417          byte, depending on interleave factor.  */
15418       if (interleave_factor == 1)
15419         {
15420           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15421           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15422           emit_insn (gen_unaligned_storehi (mem,
15423                        gen_lowpart (HImode, halfword_tmp)));
15424           halfword_tmp = NULL;
15425           dstoffset += 2;
15426         }
15427
15428       remaining -= 2;
15429       srcoffset += 2;
15430     }
15431
15432   gcc_assert (remaining < 2);
15433
15434   /* Copy last byte.  */
15435
15436   if ((remaining & 1) != 0)
15437     {
15438       byte_tmp = gen_reg_rtx (SImode);
15439
15440       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15441       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15442       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15443
15444       if (interleave_factor == 1)
15445         {
15446           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15447           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15448           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15449           byte_tmp = NULL;
15450           dstoffset++;
15451         }
15452
15453       remaining--;
15454       srcoffset++;
15455     }
15456
15457   /* Store last halfword if we haven't done so already.  */
15458
15459   if (halfword_tmp)
15460     {
15461       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15462       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15463       emit_insn (gen_unaligned_storehi (mem,
15464                    gen_lowpart (HImode, halfword_tmp)));
15465       dstoffset += 2;
15466     }
15467
15468   /* Likewise for last byte.  */
15469
15470   if (byte_tmp)
15471     {
15472       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15473       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15474       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15475       dstoffset++;
15476     }
15477
15478   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15479 }
15480
15481 /* From mips_adjust_block_mem:
15482
15483    Helper function for doing a loop-based block operation on memory
15484    reference MEM.  Each iteration of the loop will operate on LENGTH
15485    bytes of MEM.
15486
15487    Create a new base register for use within the loop and point it to
15488    the start of MEM.  Create a new memory reference that uses this
15489    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15490
15491 static void
15492 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15493                       rtx *loop_mem)
15494 {
15495   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15496
15497   /* Although the new mem does not refer to a known location,
15498      it does keep up to LENGTH bytes of alignment.  */
15499   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15500   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15501 }
15502
15503 /* From mips_block_move_loop:
15504
15505    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15506    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15507    the memory regions do not overlap.  */
15508
15509 static void
15510 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15511                                unsigned int interleave_factor,
15512                                HOST_WIDE_INT bytes_per_iter)
15513 {
15514   rtx src_reg, dest_reg, final_src, test;
15515   HOST_WIDE_INT leftover;
15516
15517   leftover = length % bytes_per_iter;
15518   length -= leftover;
15519
15520   /* Create registers and memory references for use within the loop.  */
15521   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15522   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15523
15524   /* Calculate the value that SRC_REG should have after the last iteration of
15525      the loop.  */
15526   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15527                                    0, 0, OPTAB_WIDEN);
15528
15529   /* Emit the start of the loop.  */
15530   rtx_code_label *label = gen_label_rtx ();
15531   emit_label (label);
15532
15533   /* Emit the loop body.  */
15534   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15535                                      interleave_factor);
15536
15537   /* Move on to the next block.  */
15538   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15539   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15540
15541   /* Emit the loop condition.  */
15542   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15543   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15544
15545   /* Mop up any left-over bytes.  */
15546   if (leftover)
15547     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15548 }
15549
15550 /* Emit a block move when either the source or destination is unaligned (not
15551    aligned to a four-byte boundary).  This may need further tuning depending on
15552    core type, optimize_size setting, etc.  */
15553
15554 static int
15555 arm_cpymemqi_unaligned (rtx *operands)
15556 {
15557   HOST_WIDE_INT length = INTVAL (operands[2]);
15558
15559   if (optimize_size)
15560     {
15561       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15562       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15563       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15564          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15565          or dst_aligned though: allow more interleaving in those cases since the
15566          resulting code can be smaller.  */
15567       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15568       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15569
15570       if (length > 12)
15571         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15572                                        interleave_factor, bytes_per_iter);
15573       else
15574         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15575                                            interleave_factor);
15576     }
15577   else
15578     {
15579       /* Note that the loop created by arm_block_move_unaligned_loop may be
15580          subject to loop unrolling, which makes tuning this condition a little
15581          redundant.  */
15582       if (length > 32)
15583         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15584       else
15585         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15586     }
15587
15588   return 1;
15589 }
15590
15591 int
15592 arm_gen_cpymemqi (rtx *operands)
15593 {
15594   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15595   HOST_WIDE_INT srcoffset, dstoffset;
15596   rtx src, dst, srcbase, dstbase;
15597   rtx part_bytes_reg = NULL;
15598   rtx mem;
15599
15600   if (!CONST_INT_P (operands[2])
15601       || !CONST_INT_P (operands[3])
15602       || INTVAL (operands[2]) > 64)
15603     return 0;
15604
15605   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15606     return arm_cpymemqi_unaligned (operands);
15607
15608   if (INTVAL (operands[3]) & 3)
15609     return 0;
15610
15611   dstbase = operands[0];
15612   srcbase = operands[1];
15613
15614   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15615   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15616
15617   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15618   out_words_to_go = INTVAL (operands[2]) / 4;
15619   last_bytes = INTVAL (operands[2]) & 3;
15620   dstoffset = srcoffset = 0;
15621
15622   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15623     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15624
15625   while (in_words_to_go >= 2)
15626     {
15627       if (in_words_to_go > 4)
15628         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15629                                           TRUE, srcbase, &srcoffset));
15630       else
15631         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15632                                           src, FALSE, srcbase,
15633                                           &srcoffset));
15634
15635       if (out_words_to_go)
15636         {
15637           if (out_words_to_go > 4)
15638             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15639                                                TRUE, dstbase, &dstoffset));
15640           else if (out_words_to_go != 1)
15641             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15642                                                out_words_to_go, dst,
15643                                                (last_bytes == 0
15644                                                 ? FALSE : TRUE),
15645                                                dstbase, &dstoffset));
15646           else
15647             {
15648               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15649               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15650               if (last_bytes != 0)
15651                 {
15652                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15653                   dstoffset += 4;
15654                 }
15655             }
15656         }
15657
15658       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15659       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15660     }
15661
15662   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15663   if (out_words_to_go)
15664     {
15665       rtx sreg;
15666
15667       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15668       sreg = copy_to_reg (mem);
15669
15670       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15671       emit_move_insn (mem, sreg);
15672       in_words_to_go--;
15673
15674       gcc_assert (!in_words_to_go);     /* Sanity check */
15675     }
15676
15677   if (in_words_to_go)
15678     {
15679       gcc_assert (in_words_to_go > 0);
15680
15681       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15682       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15683     }
15684
15685   gcc_assert (!last_bytes || part_bytes_reg);
15686
15687   if (BYTES_BIG_ENDIAN && last_bytes)
15688     {
15689       rtx tmp = gen_reg_rtx (SImode);
15690
15691       /* The bytes we want are in the top end of the word.  */
15692       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15693                               GEN_INT (8 * (4 - last_bytes))));
15694       part_bytes_reg = tmp;
15695
15696       while (last_bytes)
15697         {
15698           mem = adjust_automodify_address (dstbase, QImode,
15699                                            plus_constant (Pmode, dst,
15700                                                           last_bytes - 1),
15701                                            dstoffset + last_bytes - 1);
15702           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15703
15704           if (--last_bytes)
15705             {
15706               tmp = gen_reg_rtx (SImode);
15707               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15708               part_bytes_reg = tmp;
15709             }
15710         }
15711
15712     }
15713   else
15714     {
15715       if (last_bytes > 1)
15716         {
15717           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15718           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15719           last_bytes -= 2;
15720           if (last_bytes)
15721             {
15722               rtx tmp = gen_reg_rtx (SImode);
15723               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15724               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15725               part_bytes_reg = tmp;
15726               dstoffset += 2;
15727             }
15728         }
15729
15730       if (last_bytes)
15731         {
15732           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15733           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15734         }
15735     }
15736
15737   return 1;
15738 }
15739
15740 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15741 by mode size.  */
15742 inline static rtx
15743 next_consecutive_mem (rtx mem)
15744 {
15745   machine_mode mode = GET_MODE (mem);
15746   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15747   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15748
15749   return adjust_automodify_address (mem, mode, addr, offset);
15750 }
15751
15752 /* Copy using LDRD/STRD instructions whenever possible.
15753    Returns true upon success. */
15754 bool
15755 gen_cpymem_ldrd_strd (rtx *operands)
15756 {
15757   unsigned HOST_WIDE_INT len;
15758   HOST_WIDE_INT align;
15759   rtx src, dst, base;
15760   rtx reg0;
15761   bool src_aligned, dst_aligned;
15762   bool src_volatile, dst_volatile;
15763
15764   gcc_assert (CONST_INT_P (operands[2]));
15765   gcc_assert (CONST_INT_P (operands[3]));
15766
15767   len = UINTVAL (operands[2]);
15768   if (len > 64)
15769     return false;
15770
15771   /* Maximum alignment we can assume for both src and dst buffers.  */
15772   align = INTVAL (operands[3]);
15773
15774   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15775     return false;
15776
15777   /* Place src and dst addresses in registers
15778      and update the corresponding mem rtx.  */
15779   dst = operands[0];
15780   dst_volatile = MEM_VOLATILE_P (dst);
15781   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15782   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15783   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15784
15785   src = operands[1];
15786   src_volatile = MEM_VOLATILE_P (src);
15787   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15788   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15789   src = adjust_automodify_address (src, VOIDmode, base, 0);
15790
15791   if (!unaligned_access && !(src_aligned && dst_aligned))
15792     return false;
15793
15794   if (src_volatile || dst_volatile)
15795     return false;
15796
15797   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15798   if (!(dst_aligned || src_aligned))
15799     return arm_gen_cpymemqi (operands);
15800
15801   /* If the either src or dst is unaligned we'll be accessing it as pairs
15802      of unaligned SImode accesses.  Otherwise we can generate DImode
15803      ldrd/strd instructions.  */
15804   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15805   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15806
15807   while (len >= 8)
15808     {
15809       len -= 8;
15810       reg0 = gen_reg_rtx (DImode);
15811       rtx first_reg = NULL_RTX;
15812       rtx second_reg = NULL_RTX;
15813
15814       if (!src_aligned || !dst_aligned)
15815         {
15816           if (BYTES_BIG_ENDIAN)
15817             {
15818               second_reg = gen_lowpart (SImode, reg0);
15819               first_reg = gen_highpart_mode (SImode, DImode, reg0);
15820             }
15821           else
15822             {
15823               first_reg = gen_lowpart (SImode, reg0);
15824               second_reg = gen_highpart_mode (SImode, DImode, reg0);
15825             }
15826         }
15827       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15828         emit_move_insn (reg0, src);
15829       else if (src_aligned)
15830         emit_insn (gen_unaligned_loaddi (reg0, src));
15831       else
15832         {
15833           emit_insn (gen_unaligned_loadsi (first_reg, src));
15834           src = next_consecutive_mem (src);
15835           emit_insn (gen_unaligned_loadsi (second_reg, src));
15836         }
15837
15838       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15839         emit_move_insn (dst, reg0);
15840       else if (dst_aligned)
15841         emit_insn (gen_unaligned_storedi (dst, reg0));
15842       else
15843         {
15844           emit_insn (gen_unaligned_storesi (dst, first_reg));
15845           dst = next_consecutive_mem (dst);
15846           emit_insn (gen_unaligned_storesi (dst, second_reg));
15847         }
15848
15849       src = next_consecutive_mem (src);
15850       dst = next_consecutive_mem (dst);
15851     }
15852
15853   gcc_assert (len < 8);
15854   if (len >= 4)
15855     {
15856       /* More than a word but less than a double-word to copy.  Copy a word.  */
15857       reg0 = gen_reg_rtx (SImode);
15858       src = adjust_address (src, SImode, 0);
15859       dst = adjust_address (dst, SImode, 0);
15860       if (src_aligned)
15861         emit_move_insn (reg0, src);
15862       else
15863         emit_insn (gen_unaligned_loadsi (reg0, src));
15864
15865       if (dst_aligned)
15866         emit_move_insn (dst, reg0);
15867       else
15868         emit_insn (gen_unaligned_storesi (dst, reg0));
15869
15870       src = next_consecutive_mem (src);
15871       dst = next_consecutive_mem (dst);
15872       len -= 4;
15873     }
15874
15875   if (len == 0)
15876     return true;
15877
15878   /* Copy the remaining bytes.  */
15879   if (len >= 2)
15880     {
15881       dst = adjust_address (dst, HImode, 0);
15882       src = adjust_address (src, HImode, 0);
15883       reg0 = gen_reg_rtx (SImode);
15884       if (src_aligned)
15885         emit_insn (gen_zero_extendhisi2 (reg0, src));
15886       else
15887         emit_insn (gen_unaligned_loadhiu (reg0, src));
15888
15889       if (dst_aligned)
15890         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15891       else
15892         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15893
15894       src = next_consecutive_mem (src);
15895       dst = next_consecutive_mem (dst);
15896       if (len == 2)
15897         return true;
15898     }
15899
15900   dst = adjust_address (dst, QImode, 0);
15901   src = adjust_address (src, QImode, 0);
15902   reg0 = gen_reg_rtx (QImode);
15903   emit_move_insn (reg0, src);
15904   emit_move_insn (dst, reg0);
15905   return true;
15906 }
15907
15908 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15909    into its component 32-bit subregs.  OP2 may be an immediate
15910    constant and we want to simplify it in that case.  */
15911 void
15912 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15913                         rtx *lo_op2, rtx *hi_op2)
15914 {
15915   *lo_op1 = gen_lowpart (SImode, op1);
15916   *hi_op1 = gen_highpart (SImode, op1);
15917   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15918                                  subreg_lowpart_offset (SImode, DImode));
15919   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15920                                  subreg_highpart_offset (SImode, DImode));
15921 }
15922
15923 /* Select a dominance comparison mode if possible for a test of the general
15924    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15925    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15926    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15927    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15928    In all cases OP will be either EQ or NE, but we don't need to know which
15929    here.  If we are unable to support a dominance comparison we return
15930    CC mode.  This will then fail to match for the RTL expressions that
15931    generate this call.  */
15932 machine_mode
15933 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15934 {
15935   enum rtx_code cond1, cond2;
15936   int swapped = 0;
15937
15938   /* Currently we will probably get the wrong result if the individual
15939      comparisons are not simple.  This also ensures that it is safe to
15940      reverse a comparison if necessary.  */
15941   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15942        != CCmode)
15943       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15944           != CCmode))
15945     return CCmode;
15946
15947   /* The if_then_else variant of this tests the second condition if the
15948      first passes, but is true if the first fails.  Reverse the first
15949      condition to get a true "inclusive-or" expression.  */
15950   if (cond_or == DOM_CC_NX_OR_Y)
15951     cond1 = reverse_condition (cond1);
15952
15953   /* If the comparisons are not equal, and one doesn't dominate the other,
15954      then we can't do this.  */
15955   if (cond1 != cond2
15956       && !comparison_dominates_p (cond1, cond2)
15957       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15958     return CCmode;
15959
15960   if (swapped)
15961     std::swap (cond1, cond2);
15962
15963   switch (cond1)
15964     {
15965     case EQ:
15966       if (cond_or == DOM_CC_X_AND_Y)
15967         return CC_DEQmode;
15968
15969       switch (cond2)
15970         {
15971         case EQ: return CC_DEQmode;
15972         case LE: return CC_DLEmode;
15973         case LEU: return CC_DLEUmode;
15974         case GE: return CC_DGEmode;
15975         case GEU: return CC_DGEUmode;
15976         default: gcc_unreachable ();
15977         }
15978
15979     case LT:
15980       if (cond_or == DOM_CC_X_AND_Y)
15981         return CC_DLTmode;
15982
15983       switch (cond2)
15984         {
15985         case  LT:
15986             return CC_DLTmode;
15987         case LE:
15988           return CC_DLEmode;
15989         case NE:
15990           return CC_DNEmode;
15991         default:
15992           gcc_unreachable ();
15993         }
15994
15995     case GT:
15996       if (cond_or == DOM_CC_X_AND_Y)
15997         return CC_DGTmode;
15998
15999       switch (cond2)
16000         {
16001         case GT:
16002           return CC_DGTmode;
16003         case GE:
16004           return CC_DGEmode;
16005         case NE:
16006           return CC_DNEmode;
16007         default:
16008           gcc_unreachable ();
16009         }
16010
16011     case LTU:
16012       if (cond_or == DOM_CC_X_AND_Y)
16013         return CC_DLTUmode;
16014
16015       switch (cond2)
16016         {
16017         case LTU:
16018           return CC_DLTUmode;
16019         case LEU:
16020           return CC_DLEUmode;
16021         case NE:
16022           return CC_DNEmode;
16023         default:
16024           gcc_unreachable ();
16025         }
16026
16027     case GTU:
16028       if (cond_or == DOM_CC_X_AND_Y)
16029         return CC_DGTUmode;
16030
16031       switch (cond2)
16032         {
16033         case GTU:
16034           return CC_DGTUmode;
16035         case GEU:
16036           return CC_DGEUmode;
16037         case NE:
16038           return CC_DNEmode;
16039         default:
16040           gcc_unreachable ();
16041         }
16042
16043     /* The remaining cases only occur when both comparisons are the
16044        same.  */
16045     case NE:
16046       gcc_assert (cond1 == cond2);
16047       return CC_DNEmode;
16048
16049     case LE:
16050       gcc_assert (cond1 == cond2);
16051       return CC_DLEmode;
16052
16053     case GE:
16054       gcc_assert (cond1 == cond2);
16055       return CC_DGEmode;
16056
16057     case LEU:
16058       gcc_assert (cond1 == cond2);
16059       return CC_DLEUmode;
16060
16061     case GEU:
16062       gcc_assert (cond1 == cond2);
16063       return CC_DGEUmode;
16064
16065     default:
16066       gcc_unreachable ();
16067     }
16068 }
16069
16070 machine_mode
16071 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16072 {
16073   /* All floating point compares return CCFP if it is an equality
16074      comparison, and CCFPE otherwise.  */
16075   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16076     {
16077       switch (op)
16078         {
16079         case EQ:
16080         case NE:
16081         case UNORDERED:
16082         case ORDERED:
16083         case UNLT:
16084         case UNLE:
16085         case UNGT:
16086         case UNGE:
16087         case UNEQ:
16088         case LTGT:
16089           return CCFPmode;
16090
16091         case LT:
16092         case LE:
16093         case GT:
16094         case GE:
16095           return CCFPEmode;
16096
16097         default:
16098           gcc_unreachable ();
16099         }
16100     }
16101
16102   /* A compare with a shifted operand.  Because of canonicalization, the
16103      comparison will have to be swapped when we emit the assembler.  */
16104   if (GET_MODE (y) == SImode
16105       && (REG_P (y) || (SUBREG_P (y)))
16106       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16107           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16108           || GET_CODE (x) == ROTATERT))
16109     return CC_SWPmode;
16110
16111   /* A widened compare of the sum of a value plus a carry against a
16112      constant.  This is a representation of RSC.  We want to swap the
16113      result of the comparison at output.  Not valid if the Z bit is
16114      needed.  */
16115   if (GET_MODE (x) == DImode
16116       && GET_CODE (x) == PLUS
16117       && arm_borrow_operation (XEXP (x, 1), DImode)
16118       && CONST_INT_P (y)
16119       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16120            && (op == LE || op == GT))
16121           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16122               && (op == LEU || op == GTU))))
16123     return CC_SWPmode;
16124
16125   /* If X is a constant we want to use CC_RSBmode.  This is
16126      non-canonical, but arm_gen_compare_reg uses this to generate the
16127      correct canonical form.  */
16128   if (GET_MODE (y) == SImode
16129       && (REG_P (y) || SUBREG_P (y))
16130       && CONST_INT_P (x))
16131     return CC_RSBmode;
16132
16133   /* This operation is performed swapped, but since we only rely on the Z
16134      flag we don't need an additional mode.  */
16135   if (GET_MODE (y) == SImode
16136       && (REG_P (y) || (SUBREG_P (y)))
16137       && GET_CODE (x) == NEG
16138       && (op == EQ || op == NE))
16139     return CC_Zmode;
16140
16141   /* This is a special case that is used by combine to allow a
16142      comparison of a shifted byte load to be split into a zero-extend
16143      followed by a comparison of the shifted integer (only valid for
16144      equalities and unsigned inequalities).  */
16145   if (GET_MODE (x) == SImode
16146       && GET_CODE (x) == ASHIFT
16147       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16148       && GET_CODE (XEXP (x, 0)) == SUBREG
16149       && MEM_P (SUBREG_REG (XEXP (x, 0)))
16150       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16151       && (op == EQ || op == NE
16152           || op == GEU || op == GTU || op == LTU || op == LEU)
16153       && CONST_INT_P (y))
16154     return CC_Zmode;
16155
16156   /* A construct for a conditional compare, if the false arm contains
16157      0, then both conditions must be true, otherwise either condition
16158      must be true.  Not all conditions are possible, so CCmode is
16159      returned if it can't be done.  */
16160   if (GET_CODE (x) == IF_THEN_ELSE
16161       && (XEXP (x, 2) == const0_rtx
16162           || XEXP (x, 2) == const1_rtx)
16163       && COMPARISON_P (XEXP (x, 0))
16164       && COMPARISON_P (XEXP (x, 1)))
16165     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16166                                          INTVAL (XEXP (x, 2)));
16167
16168   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
16169   if (GET_CODE (x) == AND
16170       && (op == EQ || op == NE)
16171       && COMPARISON_P (XEXP (x, 0))
16172       && COMPARISON_P (XEXP (x, 1)))
16173     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16174                                          DOM_CC_X_AND_Y);
16175
16176   if (GET_CODE (x) == IOR
16177       && (op == EQ || op == NE)
16178       && COMPARISON_P (XEXP (x, 0))
16179       && COMPARISON_P (XEXP (x, 1)))
16180     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16181                                          DOM_CC_X_OR_Y);
16182
16183   /* An operation (on Thumb) where we want to test for a single bit.
16184      This is done by shifting that bit up into the top bit of a
16185      scratch register; we can then branch on the sign bit.  */
16186   if (TARGET_THUMB1
16187       && GET_MODE (x) == SImode
16188       && (op == EQ || op == NE)
16189       && GET_CODE (x) == ZERO_EXTRACT
16190       && XEXP (x, 1) == const1_rtx)
16191     return CC_Nmode;
16192
16193   /* An operation that sets the condition codes as a side-effect, the
16194      V flag is not set correctly, so we can only use comparisons where
16195      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
16196      instead.)  */
16197   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
16198   if (GET_MODE (x) == SImode
16199       && y == const0_rtx
16200       && (op == EQ || op == NE || op == LT || op == GE)
16201       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16202           || GET_CODE (x) == AND || GET_CODE (x) == IOR
16203           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16204           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16205           || GET_CODE (x) == LSHIFTRT
16206           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16207           || GET_CODE (x) == ROTATERT
16208           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16209     return CC_NZmode;
16210
16211   /* A comparison of ~reg with a const is really a special
16212      canoncialization of compare (~const, reg), which is a reverse
16213      subtract operation.  We may not get here if CONST is 0, but that
16214      doesn't matter because ~0 isn't a valid immediate for RSB.  */
16215   if (GET_MODE (x) == SImode
16216       && GET_CODE (x) == NOT
16217       && CONST_INT_P (y))
16218     return CC_RSBmode;
16219
16220   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16221     return CC_Zmode;
16222
16223   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16224       && GET_CODE (x) == PLUS
16225       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16226     return CC_Cmode;
16227
16228   if (GET_MODE (x) == DImode
16229       && GET_CODE (x) == PLUS
16230       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16231       && CONST_INT_P (y)
16232       && UINTVAL (y) == 0x800000000
16233       && (op == GEU || op == LTU))
16234     return CC_ADCmode;
16235
16236   if (GET_MODE (x) == DImode
16237       && (op == GE || op == LT)
16238       && GET_CODE (x) == SIGN_EXTEND
16239       && ((GET_CODE (y) == PLUS
16240            && arm_borrow_operation (XEXP (y, 0), DImode))
16241           || arm_borrow_operation (y, DImode)))
16242     return CC_NVmode;
16243
16244   if (GET_MODE (x) == DImode
16245       && (op == GEU || op == LTU)
16246       && GET_CODE (x) == ZERO_EXTEND
16247       && ((GET_CODE (y) == PLUS
16248            && arm_borrow_operation (XEXP (y, 0), DImode))
16249           || arm_borrow_operation (y, DImode)))
16250     return CC_Bmode;
16251
16252   if (GET_MODE (x) == DImode
16253       && (op == EQ || op == NE)
16254       && (GET_CODE (x) == PLUS
16255           || GET_CODE (x) == MINUS)
16256       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16257           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16258       && GET_CODE (y) == SIGN_EXTEND
16259       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16260     return CC_Vmode;
16261
16262   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16263     return GET_MODE (x);
16264
16265   return CCmode;
16266 }
16267
16268 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
16269    the sequence of instructions needed to generate a suitable condition
16270    code register.  Return the CC register result.  */
16271 static rtx
16272 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16273 {
16274   machine_mode mode;
16275   rtx cc_reg;
16276
16277     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
16278   gcc_assert (TARGET_32BIT);
16279   gcc_assert (!CONST_INT_P (x));
16280
16281   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16282                                   subreg_lowpart_offset (SImode, DImode));
16283   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16284                                   subreg_highpart_offset (SImode, DImode));
16285   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16286                                   subreg_lowpart_offset (SImode, DImode));
16287   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16288                                   subreg_highpart_offset (SImode, DImode));
16289   switch (code)
16290     {
16291     case EQ:
16292     case NE:
16293       {
16294         if (y_lo == const0_rtx || y_hi == const0_rtx)
16295           {
16296             if (y_lo != const0_rtx)
16297               {
16298                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16299
16300                 gcc_assert (y_hi == const0_rtx);
16301                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16302                 if (!arm_add_operand (y_lo, SImode))
16303                   y_lo = force_reg (SImode, y_lo);
16304                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16305                 x_lo = scratch2;
16306               }
16307             else if (y_hi != const0_rtx)
16308               {
16309                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16310
16311                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16312                 if (!arm_add_operand (y_hi, SImode))
16313                   y_hi = force_reg (SImode, y_hi);
16314                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16315                 x_hi = scratch2;
16316               }
16317
16318             if (!scratch)
16319               {
16320                 gcc_assert (!reload_completed);
16321                 scratch = gen_rtx_SCRATCH (SImode);
16322               }
16323
16324             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16325             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16326
16327             rtx set
16328               = gen_rtx_SET (cc_reg,
16329                              gen_rtx_COMPARE (CC_NZmode,
16330                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16331                                               const0_rtx));
16332             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16333                                                               clobber)));
16334             return cc_reg;
16335           }
16336
16337         if (!arm_add_operand (y_lo, SImode))
16338           y_lo = force_reg (SImode, y_lo);
16339
16340         if (!arm_add_operand (y_hi, SImode))
16341           y_hi = force_reg (SImode, y_hi);
16342
16343         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16344         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16345         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16346         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16347         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16348
16349         emit_insn (gen_rtx_SET (cc_reg,
16350                                 gen_rtx_COMPARE (mode, conjunction,
16351                                                  const0_rtx)));
16352         return cc_reg;
16353       }
16354
16355     case LT:
16356     case GE:
16357       {
16358         if (y_lo == const0_rtx)
16359           {
16360             /* If the low word of y is 0, then this is simply a normal
16361                compare of the upper words.  */
16362             if (!arm_add_operand (y_hi, SImode))
16363               y_hi = force_reg (SImode, y_hi);
16364
16365             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16366           }
16367
16368         if (!arm_add_operand (y_lo, SImode))
16369           y_lo = force_reg (SImode, y_lo);
16370
16371         rtx cmp1
16372           = gen_rtx_LTU (DImode,
16373                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16374                          const0_rtx);
16375
16376         if (!scratch)
16377           scratch = gen_rtx_SCRATCH (SImode);
16378
16379         if (!arm_not_operand (y_hi, SImode))
16380           y_hi = force_reg (SImode, y_hi);
16381
16382         rtx_insn *insn;
16383         if (y_hi == const0_rtx)
16384           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16385                                                            cmp1));
16386         else if (CONST_INT_P (y_hi))
16387           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16388                                                              y_hi, cmp1));
16389         else
16390           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16391                                                          cmp1));
16392         return SET_DEST (single_set (insn));
16393       }
16394
16395     case LE:
16396     case GT:
16397       {
16398         /* During expansion, we only expect to get here if y is a
16399            constant that we want to handle, otherwise we should have
16400            swapped the operands already.  */
16401         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16402
16403         if (!const_ok_for_arm (INTVAL (y_lo)))
16404           y_lo = force_reg (SImode, y_lo);
16405
16406         /* Perform a reverse subtract and compare.  */
16407         rtx cmp1
16408           = gen_rtx_LTU (DImode,
16409                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16410                          const0_rtx);
16411         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16412                                                                  x_hi, cmp1));
16413         return SET_DEST (single_set (insn));
16414       }
16415
16416     case LTU:
16417     case GEU:
16418       {
16419         if (y_lo == const0_rtx)
16420           {
16421             /* If the low word of y is 0, then this is simply a normal
16422                compare of the upper words.  */
16423             if (!arm_add_operand (y_hi, SImode))
16424               y_hi = force_reg (SImode, y_hi);
16425
16426             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16427           }
16428
16429         if (!arm_add_operand (y_lo, SImode))
16430           y_lo = force_reg (SImode, y_lo);
16431
16432         rtx cmp1
16433           = gen_rtx_LTU (DImode,
16434                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16435                          const0_rtx);
16436
16437         if (!scratch)
16438           scratch = gen_rtx_SCRATCH (SImode);
16439         if (!arm_not_operand (y_hi, SImode))
16440           y_hi = force_reg (SImode, y_hi);
16441
16442         rtx_insn *insn;
16443         if (y_hi == const0_rtx)
16444           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16445                                                           cmp1));
16446         else if (CONST_INT_P (y_hi))
16447           {
16448             /* Constant is viewed as unsigned when zero-extended.  */
16449             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16450             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16451                                                               y_hi, cmp1));
16452           }
16453         else
16454           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16455                                                         cmp1));
16456         return SET_DEST (single_set (insn));
16457       }
16458
16459     case LEU:
16460     case GTU:
16461       {
16462         /* During expansion, we only expect to get here if y is a
16463            constant that we want to handle, otherwise we should have
16464            swapped the operands already.  */
16465         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16466
16467         if (!const_ok_for_arm (INTVAL (y_lo)))
16468           y_lo = force_reg (SImode, y_lo);
16469
16470         /* Perform a reverse subtract and compare.  */
16471         rtx cmp1
16472           = gen_rtx_LTU (DImode,
16473                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16474                          const0_rtx);
16475         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16476         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16477                                                                 x_hi, cmp1));
16478         return SET_DEST (single_set (insn));
16479       }
16480
16481     default:
16482       gcc_unreachable ();
16483     }
16484 }
16485
16486 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16487    return the rtx for register 0 in the proper mode.  */
16488 rtx
16489 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16490 {
16491   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16492     return arm_gen_dicompare_reg (code, x, y, scratch);
16493
16494   machine_mode mode = SELECT_CC_MODE (code, x, y);
16495   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16496   if (mode == CC_RSBmode)
16497     {
16498       if (!scratch)
16499         scratch = gen_rtx_SCRATCH (SImode);
16500       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16501                                               GEN_INT (~UINTVAL (x)), y));
16502     }
16503   else
16504     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16505
16506   return cc_reg;
16507 }
16508
16509 /* Generate a sequence of insns that will generate the correct return
16510    address mask depending on the physical architecture that the program
16511    is running on.  */
16512 rtx
16513 arm_gen_return_addr_mask (void)
16514 {
16515   rtx reg = gen_reg_rtx (Pmode);
16516
16517   emit_insn (gen_return_addr_mask (reg));
16518   return reg;
16519 }
16520
16521 void
16522 arm_reload_in_hi (rtx *operands)
16523 {
16524   rtx ref = operands[1];
16525   rtx base, scratch;
16526   HOST_WIDE_INT offset = 0;
16527
16528   if (SUBREG_P (ref))
16529     {
16530       offset = SUBREG_BYTE (ref);
16531       ref = SUBREG_REG (ref);
16532     }
16533
16534   if (REG_P (ref))
16535     {
16536       /* We have a pseudo which has been spilt onto the stack; there
16537          are two cases here: the first where there is a simple
16538          stack-slot replacement and a second where the stack-slot is
16539          out of range, or is used as a subreg.  */
16540       if (reg_equiv_mem (REGNO (ref)))
16541         {
16542           ref = reg_equiv_mem (REGNO (ref));
16543           base = find_replacement (&XEXP (ref, 0));
16544         }
16545       else
16546         /* The slot is out of range, or was dressed up in a SUBREG.  */
16547         base = reg_equiv_address (REGNO (ref));
16548
16549       /* PR 62554: If there is no equivalent memory location then just move
16550          the value as an SImode register move.  This happens when the target
16551          architecture variant does not have an HImode register move.  */
16552       if (base == NULL)
16553         {
16554           gcc_assert (REG_P (operands[0]));
16555           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16556                                 gen_rtx_SUBREG (SImode, ref, 0)));
16557           return;
16558         }
16559     }
16560   else
16561     base = find_replacement (&XEXP (ref, 0));
16562
16563   /* Handle the case where the address is too complex to be offset by 1.  */
16564   if (GET_CODE (base) == MINUS
16565       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16566     {
16567       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16568
16569       emit_set_insn (base_plus, base);
16570       base = base_plus;
16571     }
16572   else if (GET_CODE (base) == PLUS)
16573     {
16574       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16575       HOST_WIDE_INT hi, lo;
16576
16577       offset += INTVAL (XEXP (base, 1));
16578       base = XEXP (base, 0);
16579
16580       /* Rework the address into a legal sequence of insns.  */
16581       /* Valid range for lo is -4095 -> 4095 */
16582       lo = (offset >= 0
16583             ? (offset & 0xfff)
16584             : -((-offset) & 0xfff));
16585
16586       /* Corner case, if lo is the max offset then we would be out of range
16587          once we have added the additional 1 below, so bump the msb into the
16588          pre-loading insn(s).  */
16589       if (lo == 4095)
16590         lo &= 0x7ff;
16591
16592       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16593              ^ (HOST_WIDE_INT) 0x80000000)
16594             - (HOST_WIDE_INT) 0x80000000);
16595
16596       gcc_assert (hi + lo == offset);
16597
16598       if (hi != 0)
16599         {
16600           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16601
16602           /* Get the base address; addsi3 knows how to handle constants
16603              that require more than one insn.  */
16604           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16605           base = base_plus;
16606           offset = lo;
16607         }
16608     }
16609
16610   /* Operands[2] may overlap operands[0] (though it won't overlap
16611      operands[1]), that's why we asked for a DImode reg -- so we can
16612      use the bit that does not overlap.  */
16613   if (REGNO (operands[2]) == REGNO (operands[0]))
16614     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16615   else
16616     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16617
16618   emit_insn (gen_zero_extendqisi2 (scratch,
16619                                    gen_rtx_MEM (QImode,
16620                                                 plus_constant (Pmode, base,
16621                                                                offset))));
16622   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16623                                    gen_rtx_MEM (QImode,
16624                                                 plus_constant (Pmode, base,
16625                                                                offset + 1))));
16626   if (!BYTES_BIG_ENDIAN)
16627     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16628                    gen_rtx_IOR (SImode,
16629                                 gen_rtx_ASHIFT
16630                                 (SImode,
16631                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16632                                  GEN_INT (8)),
16633                                 scratch));
16634   else
16635     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16636                    gen_rtx_IOR (SImode,
16637                                 gen_rtx_ASHIFT (SImode, scratch,
16638                                                 GEN_INT (8)),
16639                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16640 }
16641
16642 /* Handle storing a half-word to memory during reload by synthesizing as two
16643    byte stores.  Take care not to clobber the input values until after we
16644    have moved them somewhere safe.  This code assumes that if the DImode
16645    scratch in operands[2] overlaps either the input value or output address
16646    in some way, then that value must die in this insn (we absolutely need
16647    two scratch registers for some corner cases).  */
16648 void
16649 arm_reload_out_hi (rtx *operands)
16650 {
16651   rtx ref = operands[0];
16652   rtx outval = operands[1];
16653   rtx base, scratch;
16654   HOST_WIDE_INT offset = 0;
16655
16656   if (SUBREG_P (ref))
16657     {
16658       offset = SUBREG_BYTE (ref);
16659       ref = SUBREG_REG (ref);
16660     }
16661
16662   if (REG_P (ref))
16663     {
16664       /* We have a pseudo which has been spilt onto the stack; there
16665          are two cases here: the first where there is a simple
16666          stack-slot replacement and a second where the stack-slot is
16667          out of range, or is used as a subreg.  */
16668       if (reg_equiv_mem (REGNO (ref)))
16669         {
16670           ref = reg_equiv_mem (REGNO (ref));
16671           base = find_replacement (&XEXP (ref, 0));
16672         }
16673       else
16674         /* The slot is out of range, or was dressed up in a SUBREG.  */
16675         base = reg_equiv_address (REGNO (ref));
16676
16677       /* PR 62254: If there is no equivalent memory location then just move
16678          the value as an SImode register move.  This happens when the target
16679          architecture variant does not have an HImode register move.  */
16680       if (base == NULL)
16681         {
16682           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16683
16684           if (REG_P (outval))
16685             {
16686               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16687                                     gen_rtx_SUBREG (SImode, outval, 0)));
16688             }
16689           else /* SUBREG_P (outval)  */
16690             {
16691               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16692                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16693                                       SUBREG_REG (outval)));
16694               else
16695                 /* FIXME: Handle other cases ?  */
16696                 gcc_unreachable ();
16697             }
16698           return;
16699         }
16700     }
16701   else
16702     base = find_replacement (&XEXP (ref, 0));
16703
16704   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16705
16706   /* Handle the case where the address is too complex to be offset by 1.  */
16707   if (GET_CODE (base) == MINUS
16708       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16709     {
16710       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16711
16712       /* Be careful not to destroy OUTVAL.  */
16713       if (reg_overlap_mentioned_p (base_plus, outval))
16714         {
16715           /* Updating base_plus might destroy outval, see if we can
16716              swap the scratch and base_plus.  */
16717           if (!reg_overlap_mentioned_p (scratch, outval))
16718             std::swap (scratch, base_plus);
16719           else
16720             {
16721               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16722
16723               /* Be conservative and copy OUTVAL into the scratch now,
16724                  this should only be necessary if outval is a subreg
16725                  of something larger than a word.  */
16726               /* XXX Might this clobber base?  I can't see how it can,
16727                  since scratch is known to overlap with OUTVAL, and
16728                  must be wider than a word.  */
16729               emit_insn (gen_movhi (scratch_hi, outval));
16730               outval = scratch_hi;
16731             }
16732         }
16733
16734       emit_set_insn (base_plus, base);
16735       base = base_plus;
16736     }
16737   else if (GET_CODE (base) == PLUS)
16738     {
16739       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16740       HOST_WIDE_INT hi, lo;
16741
16742       offset += INTVAL (XEXP (base, 1));
16743       base = XEXP (base, 0);
16744
16745       /* Rework the address into a legal sequence of insns.  */
16746       /* Valid range for lo is -4095 -> 4095 */
16747       lo = (offset >= 0
16748             ? (offset & 0xfff)
16749             : -((-offset) & 0xfff));
16750
16751       /* Corner case, if lo is the max offset then we would be out of range
16752          once we have added the additional 1 below, so bump the msb into the
16753          pre-loading insn(s).  */
16754       if (lo == 4095)
16755         lo &= 0x7ff;
16756
16757       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16758              ^ (HOST_WIDE_INT) 0x80000000)
16759             - (HOST_WIDE_INT) 0x80000000);
16760
16761       gcc_assert (hi + lo == offset);
16762
16763       if (hi != 0)
16764         {
16765           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16766
16767           /* Be careful not to destroy OUTVAL.  */
16768           if (reg_overlap_mentioned_p (base_plus, outval))
16769             {
16770               /* Updating base_plus might destroy outval, see if we
16771                  can swap the scratch and base_plus.  */
16772               if (!reg_overlap_mentioned_p (scratch, outval))
16773                 std::swap (scratch, base_plus);
16774               else
16775                 {
16776                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16777
16778                   /* Be conservative and copy outval into scratch now,
16779                      this should only be necessary if outval is a
16780                      subreg of something larger than a word.  */
16781                   /* XXX Might this clobber base?  I can't see how it
16782                      can, since scratch is known to overlap with
16783                      outval.  */
16784                   emit_insn (gen_movhi (scratch_hi, outval));
16785                   outval = scratch_hi;
16786                 }
16787             }
16788
16789           /* Get the base address; addsi3 knows how to handle constants
16790              that require more than one insn.  */
16791           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16792           base = base_plus;
16793           offset = lo;
16794         }
16795     }
16796
16797   if (BYTES_BIG_ENDIAN)
16798     {
16799       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16800                                          plus_constant (Pmode, base,
16801                                                         offset + 1)),
16802                             gen_lowpart (QImode, outval)));
16803       emit_insn (gen_lshrsi3 (scratch,
16804                               gen_rtx_SUBREG (SImode, outval, 0),
16805                               GEN_INT (8)));
16806       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16807                                                                 offset)),
16808                             gen_lowpart (QImode, scratch)));
16809     }
16810   else
16811     {
16812       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16813                                                                 offset)),
16814                             gen_lowpart (QImode, outval)));
16815       emit_insn (gen_lshrsi3 (scratch,
16816                               gen_rtx_SUBREG (SImode, outval, 0),
16817                               GEN_INT (8)));
16818       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16819                                          plus_constant (Pmode, base,
16820                                                         offset + 1)),
16821                             gen_lowpart (QImode, scratch)));
16822     }
16823 }
16824
16825 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16826    (padded to the size of a word) should be passed in a register.  */
16827
16828 static bool
16829 arm_must_pass_in_stack (const function_arg_info &arg)
16830 {
16831   if (TARGET_AAPCS_BASED)
16832     return must_pass_in_stack_var_size (arg);
16833   else
16834     return must_pass_in_stack_var_size_or_pad (arg);
16835 }
16836
16837
16838 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16839    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16840    the default.  For AAPCS based ABIs small aggregate types are placed
16841    in the lowest memory address.  */
16842
16843 static pad_direction
16844 arm_function_arg_padding (machine_mode mode, const_tree type)
16845 {
16846   if (!TARGET_AAPCS_BASED)
16847     return default_function_arg_padding (mode, type);
16848
16849   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16850     return PAD_DOWNWARD;
16851
16852   return PAD_UPWARD;
16853 }
16854
16855
16856 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16857    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16858    register has useful data, and return the opposite if the most
16859    significant byte does.  */
16860
16861 bool
16862 arm_pad_reg_upward (machine_mode mode,
16863                     tree type, int first ATTRIBUTE_UNUSED)
16864 {
16865   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16866     {
16867       /* For AAPCS, small aggregates, small fixed-point types,
16868          and small complex types are always padded upwards.  */
16869       if (type)
16870         {
16871           if ((AGGREGATE_TYPE_P (type)
16872                || TREE_CODE (type) == COMPLEX_TYPE
16873                || FIXED_POINT_TYPE_P (type))
16874               && int_size_in_bytes (type) <= 4)
16875             return true;
16876         }
16877       else
16878         {
16879           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16880               && GET_MODE_SIZE (mode) <= 4)
16881             return true;
16882         }
16883     }
16884
16885   /* Otherwise, use default padding.  */
16886   return !BYTES_BIG_ENDIAN;
16887 }
16888
16889 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16890    assuming that the address in the base register is word aligned.  */
16891 bool
16892 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16893 {
16894   HOST_WIDE_INT max_offset;
16895
16896   /* Offset must be a multiple of 4 in Thumb mode.  */
16897   if (TARGET_THUMB2 && ((offset & 3) != 0))
16898     return false;
16899
16900   if (TARGET_THUMB2)
16901     max_offset = 1020;
16902   else if (TARGET_ARM)
16903     max_offset = 255;
16904   else
16905     return false;
16906
16907   return ((offset <= max_offset) && (offset >= -max_offset));
16908 }
16909
16910 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16911    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16912    Assumes that the address in the base register RN is word aligned.  Pattern
16913    guarantees that both memory accesses use the same base register,
16914    the offsets are constants within the range, and the gap between the offsets is 4.
16915    If preload complete then check that registers are legal.  WBACK indicates whether
16916    address is updated.  LOAD indicates whether memory access is load or store.  */
16917 bool
16918 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16919                        bool wback, bool load)
16920 {
16921   unsigned int t, t2, n;
16922
16923   if (!reload_completed)
16924     return true;
16925
16926   if (!offset_ok_for_ldrd_strd (offset))
16927     return false;
16928
16929   t = REGNO (rt);
16930   t2 = REGNO (rt2);
16931   n = REGNO (rn);
16932
16933   if ((TARGET_THUMB2)
16934       && ((wback && (n == t || n == t2))
16935           || (t == SP_REGNUM)
16936           || (t == PC_REGNUM)
16937           || (t2 == SP_REGNUM)
16938           || (t2 == PC_REGNUM)
16939           || (!load && (n == PC_REGNUM))
16940           || (load && (t == t2))
16941           /* Triggers Cortex-M3 LDRD errata.  */
16942           || (!wback && load && fix_cm3_ldrd && (n == t))))
16943     return false;
16944
16945   if ((TARGET_ARM)
16946       && ((wback && (n == t || n == t2))
16947           || (t2 == PC_REGNUM)
16948           || (t % 2 != 0)   /* First destination register is not even.  */
16949           || (t2 != t + 1)
16950           /* PC can be used as base register (for offset addressing only),
16951              but it is depricated.  */
16952           || (n == PC_REGNUM)))
16953     return false;
16954
16955   return true;
16956 }
16957
16958 /* Return true if a 64-bit access with alignment ALIGN and with a
16959    constant offset OFFSET from the base pointer is permitted on this
16960    architecture.  */
16961 static bool
16962 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16963 {
16964   return (unaligned_access
16965           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16966           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16967 }
16968
16969 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
16970    operand MEM's address contains an immediate offset from the base
16971    register and has no side effects, in which case it sets BASE,
16972    OFFSET and ALIGN accordingly.  */
16973 static bool
16974 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16975 {
16976   rtx addr;
16977
16978   gcc_assert (base != NULL && offset != NULL);
16979
16980   /* TODO: Handle more general memory operand patterns, such as
16981      PRE_DEC and PRE_INC.  */
16982
16983   if (side_effects_p (mem))
16984     return false;
16985
16986   /* Can't deal with subregs.  */
16987   if (SUBREG_P (mem))
16988     return false;
16989
16990   gcc_assert (MEM_P (mem));
16991
16992   *offset = const0_rtx;
16993   *align = MEM_ALIGN (mem);
16994
16995   addr = XEXP (mem, 0);
16996
16997   /* If addr isn't valid for DImode, then we can't handle it.  */
16998   if (!arm_legitimate_address_p (DImode, addr,
16999                                  reload_in_progress || reload_completed))
17000     return false;
17001
17002   if (REG_P (addr))
17003     {
17004       *base = addr;
17005       return true;
17006     }
17007   else if (GET_CODE (addr) == PLUS)
17008     {
17009       *base = XEXP (addr, 0);
17010       *offset = XEXP (addr, 1);
17011       return (REG_P (*base) && CONST_INT_P (*offset));
17012     }
17013
17014   return false;
17015 }
17016
17017 /* Called from a peephole2 to replace two word-size accesses with a
17018    single LDRD/STRD instruction.  Returns true iff we can generate a
17019    new instruction sequence.  That is, both accesses use the same base
17020    register and the gap between constant offsets is 4.  This function
17021    may reorder its operands to match ldrd/strd RTL templates.
17022    OPERANDS are the operands found by the peephole matcher;
17023    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17024    corresponding memory operands.  LOAD indicaates whether the access
17025    is load or store.  CONST_STORE indicates a store of constant
17026    integer values held in OPERANDS[4,5] and assumes that the pattern
17027    is of length 4 insn, for the purpose of checking dead registers.
17028    COMMUTE indicates that register operands may be reordered.  */
17029 bool
17030 gen_operands_ldrd_strd (rtx *operands, bool load,
17031                         bool const_store, bool commute)
17032 {
17033   int nops = 2;
17034   HOST_WIDE_INT offsets[2], offset, align[2];
17035   rtx base = NULL_RTX;
17036   rtx cur_base, cur_offset, tmp;
17037   int i, gap;
17038   HARD_REG_SET regset;
17039
17040   gcc_assert (!const_store || !load);
17041   /* Check that the memory references are immediate offsets from the
17042      same base register.  Extract the base register, the destination
17043      registers, and the corresponding memory offsets.  */
17044   for (i = 0; i < nops; i++)
17045     {
17046       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17047                                  &align[i]))
17048         return false;
17049
17050       if (i == 0)
17051         base = cur_base;
17052       else if (REGNO (base) != REGNO (cur_base))
17053         return false;
17054
17055       offsets[i] = INTVAL (cur_offset);
17056       if (GET_CODE (operands[i]) == SUBREG)
17057         {
17058           tmp = SUBREG_REG (operands[i]);
17059           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17060           operands[i] = tmp;
17061         }
17062     }
17063
17064   /* Make sure there is no dependency between the individual loads.  */
17065   if (load && REGNO (operands[0]) == REGNO (base))
17066     return false; /* RAW */
17067
17068   if (load && REGNO (operands[0]) == REGNO (operands[1]))
17069     return false; /* WAW */
17070
17071   /* If the same input register is used in both stores
17072      when storing different constants, try to find a free register.
17073      For example, the code
17074         mov r0, 0
17075         str r0, [r2]
17076         mov r0, 1
17077         str r0, [r2, #4]
17078      can be transformed into
17079         mov r1, 0
17080         mov r0, 1
17081         strd r1, r0, [r2]
17082      in Thumb mode assuming that r1 is free.
17083      For ARM mode do the same but only if the starting register
17084      can be made to be even.  */
17085   if (const_store
17086       && REGNO (operands[0]) == REGNO (operands[1])
17087       && INTVAL (operands[4]) != INTVAL (operands[5]))
17088     {
17089     if (TARGET_THUMB2)
17090       {
17091         CLEAR_HARD_REG_SET (regset);
17092         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17093         if (tmp == NULL_RTX)
17094           return false;
17095
17096         /* Use the new register in the first load to ensure that
17097            if the original input register is not dead after peephole,
17098            then it will have the correct constant value.  */
17099         operands[0] = tmp;
17100       }
17101     else if (TARGET_ARM)
17102       {
17103         int regno = REGNO (operands[0]);
17104         if (!peep2_reg_dead_p (4, operands[0]))
17105           {
17106             /* When the input register is even and is not dead after the
17107                pattern, it has to hold the second constant but we cannot
17108                form a legal STRD in ARM mode with this register as the second
17109                register.  */
17110             if (regno % 2 == 0)
17111               return false;
17112
17113             /* Is regno-1 free? */
17114             SET_HARD_REG_SET (regset);
17115             CLEAR_HARD_REG_BIT(regset, regno - 1);
17116             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17117             if (tmp == NULL_RTX)
17118               return false;
17119
17120             operands[0] = tmp;
17121           }
17122         else
17123           {
17124             /* Find a DImode register.  */
17125             CLEAR_HARD_REG_SET (regset);
17126             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17127             if (tmp != NULL_RTX)
17128               {
17129                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17130                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17131               }
17132             else
17133               {
17134                 /* Can we use the input register to form a DI register?  */
17135                 SET_HARD_REG_SET (regset);
17136                 CLEAR_HARD_REG_BIT(regset,
17137                                    regno % 2 == 0 ? regno + 1 : regno - 1);
17138                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17139                 if (tmp == NULL_RTX)
17140                   return false;
17141                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17142               }
17143           }
17144
17145         gcc_assert (operands[0] != NULL_RTX);
17146         gcc_assert (operands[1] != NULL_RTX);
17147         gcc_assert (REGNO (operands[0]) % 2 == 0);
17148         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17149       }
17150     }
17151
17152   /* Make sure the instructions are ordered with lower memory access first.  */
17153   if (offsets[0] > offsets[1])
17154     {
17155       gap = offsets[0] - offsets[1];
17156       offset = offsets[1];
17157
17158       /* Swap the instructions such that lower memory is accessed first.  */
17159       std::swap (operands[0], operands[1]);
17160       std::swap (operands[2], operands[3]);
17161       std::swap (align[0], align[1]);
17162       if (const_store)
17163         std::swap (operands[4], operands[5]);
17164     }
17165   else
17166     {
17167       gap = offsets[1] - offsets[0];
17168       offset = offsets[0];
17169     }
17170
17171   /* Make sure accesses are to consecutive memory locations.  */
17172   if (gap != GET_MODE_SIZE (SImode))
17173     return false;
17174
17175   if (!align_ok_ldrd_strd (align[0], offset))
17176     return false;
17177
17178   /* Make sure we generate legal instructions.  */
17179   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17180                              false, load))
17181     return true;
17182
17183   /* In Thumb state, where registers are almost unconstrained, there
17184      is little hope to fix it.  */
17185   if (TARGET_THUMB2)
17186     return false;
17187
17188   if (load && commute)
17189     {
17190       /* Try reordering registers.  */
17191       std::swap (operands[0], operands[1]);
17192       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17193                                  false, load))
17194         return true;
17195     }
17196
17197   if (const_store)
17198     {
17199       /* If input registers are dead after this pattern, they can be
17200          reordered or replaced by other registers that are free in the
17201          current pattern.  */
17202       if (!peep2_reg_dead_p (4, operands[0])
17203           || !peep2_reg_dead_p (4, operands[1]))
17204         return false;
17205
17206       /* Try to reorder the input registers.  */
17207       /* For example, the code
17208            mov r0, 0
17209            mov r1, 1
17210            str r1, [r2]
17211            str r0, [r2, #4]
17212          can be transformed into
17213            mov r1, 0
17214            mov r0, 1
17215            strd r0, [r2]
17216       */
17217       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17218                                   false, false))
17219         {
17220           std::swap (operands[0], operands[1]);
17221           return true;
17222         }
17223
17224       /* Try to find a free DI register.  */
17225       CLEAR_HARD_REG_SET (regset);
17226       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17227       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17228       while (true)
17229         {
17230           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17231           if (tmp == NULL_RTX)
17232             return false;
17233
17234           /* DREG must be an even-numbered register in DImode.
17235              Split it into SI registers.  */
17236           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17237           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17238           gcc_assert (operands[0] != NULL_RTX);
17239           gcc_assert (operands[1] != NULL_RTX);
17240           gcc_assert (REGNO (operands[0]) % 2 == 0);
17241           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17242
17243           return (operands_ok_ldrd_strd (operands[0], operands[1],
17244                                          base, offset,
17245                                          false, load));
17246         }
17247     }
17248
17249   return false;
17250 }
17251
17252
17253 /* Return true if parallel execution of the two word-size accesses provided
17254    could be satisfied with a single LDRD/STRD instruction.  Two word-size
17255    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17256    register operands and OPERANDS[2,3] are the corresponding memory operands.
17257    */
17258 bool
17259 valid_operands_ldrd_strd (rtx *operands, bool load)
17260 {
17261   int nops = 2;
17262   HOST_WIDE_INT offsets[2], offset, align[2];
17263   rtx base = NULL_RTX;
17264   rtx cur_base, cur_offset;
17265   int i, gap;
17266
17267   /* Check that the memory references are immediate offsets from the
17268      same base register.  Extract the base register, the destination
17269      registers, and the corresponding memory offsets.  */
17270   for (i = 0; i < nops; i++)
17271     {
17272       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17273                                  &align[i]))
17274         return false;
17275
17276       if (i == 0)
17277         base = cur_base;
17278       else if (REGNO (base) != REGNO (cur_base))
17279         return false;
17280
17281       offsets[i] = INTVAL (cur_offset);
17282       if (GET_CODE (operands[i]) == SUBREG)
17283         return false;
17284     }
17285
17286   if (offsets[0] > offsets[1])
17287     return false;
17288
17289   gap = offsets[1] - offsets[0];
17290   offset = offsets[0];
17291
17292   /* Make sure accesses are to consecutive memory locations.  */
17293   if (gap != GET_MODE_SIZE (SImode))
17294     return false;
17295
17296   if (!align_ok_ldrd_strd (align[0], offset))
17297     return false;
17298
17299   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17300                                 false, load);
17301 }
17302
17303 \f
17304 /* Print a symbolic form of X to the debug file, F.  */
17305 static void
17306 arm_print_value (FILE *f, rtx x)
17307 {
17308   switch (GET_CODE (x))
17309     {
17310     case CONST_INT:
17311       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17312       return;
17313
17314     case CONST_DOUBLE:
17315       {
17316         char fpstr[20];
17317         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17318                          sizeof (fpstr), 0, 1);
17319         fputs (fpstr, f);
17320       }
17321       return;
17322
17323     case CONST_VECTOR:
17324       {
17325         int i;
17326
17327         fprintf (f, "<");
17328         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17329           {
17330             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17331             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17332               fputc (',', f);
17333           }
17334         fprintf (f, ">");
17335       }
17336       return;
17337
17338     case CONST_STRING:
17339       fprintf (f, "\"%s\"", XSTR (x, 0));
17340       return;
17341
17342     case SYMBOL_REF:
17343       fprintf (f, "`%s'", XSTR (x, 0));
17344       return;
17345
17346     case LABEL_REF:
17347       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17348       return;
17349
17350     case CONST:
17351       arm_print_value (f, XEXP (x, 0));
17352       return;
17353
17354     case PLUS:
17355       arm_print_value (f, XEXP (x, 0));
17356       fprintf (f, "+");
17357       arm_print_value (f, XEXP (x, 1));
17358       return;
17359
17360     case PC:
17361       fprintf (f, "pc");
17362       return;
17363
17364     default:
17365       fprintf (f, "????");
17366       return;
17367     }
17368 }
17369 \f
17370 /* Routines for manipulation of the constant pool.  */
17371
17372 /* Arm instructions cannot load a large constant directly into a
17373    register; they have to come from a pc relative load.  The constant
17374    must therefore be placed in the addressable range of the pc
17375    relative load.  Depending on the precise pc relative load
17376    instruction the range is somewhere between 256 bytes and 4k.  This
17377    means that we often have to dump a constant inside a function, and
17378    generate code to branch around it.
17379
17380    It is important to minimize this, since the branches will slow
17381    things down and make the code larger.
17382
17383    Normally we can hide the table after an existing unconditional
17384    branch so that there is no interruption of the flow, but in the
17385    worst case the code looks like this:
17386
17387         ldr     rn, L1
17388         ...
17389         b       L2
17390         align
17391         L1:     .long value
17392         L2:
17393         ...
17394
17395         ldr     rn, L3
17396         ...
17397         b       L4
17398         align
17399         L3:     .long value
17400         L4:
17401         ...
17402
17403    We fix this by performing a scan after scheduling, which notices
17404    which instructions need to have their operands fetched from the
17405    constant table and builds the table.
17406
17407    The algorithm starts by building a table of all the constants that
17408    need fixing up and all the natural barriers in the function (places
17409    where a constant table can be dropped without breaking the flow).
17410    For each fixup we note how far the pc-relative replacement will be
17411    able to reach and the offset of the instruction into the function.
17412
17413    Having built the table we then group the fixes together to form
17414    tables that are as large as possible (subject to addressing
17415    constraints) and emit each table of constants after the last
17416    barrier that is within range of all the instructions in the group.
17417    If a group does not contain a barrier, then we forcibly create one
17418    by inserting a jump instruction into the flow.  Once the table has
17419    been inserted, the insns are then modified to reference the
17420    relevant entry in the pool.
17421
17422    Possible enhancements to the algorithm (not implemented) are:
17423
17424    1) For some processors and object formats, there may be benefit in
17425    aligning the pools to the start of cache lines; this alignment
17426    would need to be taken into account when calculating addressability
17427    of a pool.  */
17428
17429 /* These typedefs are located at the start of this file, so that
17430    they can be used in the prototypes there.  This comment is to
17431    remind readers of that fact so that the following structures
17432    can be understood more easily.
17433
17434      typedef struct minipool_node    Mnode;
17435      typedef struct minipool_fixup   Mfix;  */
17436
17437 struct minipool_node
17438 {
17439   /* Doubly linked chain of entries.  */
17440   Mnode * next;
17441   Mnode * prev;
17442   /* The maximum offset into the code that this entry can be placed.  While
17443      pushing fixes for forward references, all entries are sorted in order
17444      of increasing max_address.  */
17445   HOST_WIDE_INT max_address;
17446   /* Similarly for an entry inserted for a backwards ref.  */
17447   HOST_WIDE_INT min_address;
17448   /* The number of fixes referencing this entry.  This can become zero
17449      if we "unpush" an entry.  In this case we ignore the entry when we
17450      come to emit the code.  */
17451   int refcount;
17452   /* The offset from the start of the minipool.  */
17453   HOST_WIDE_INT offset;
17454   /* The value in table.  */
17455   rtx value;
17456   /* The mode of value.  */
17457   machine_mode mode;
17458   /* The size of the value.  With iWMMXt enabled
17459      sizes > 4 also imply an alignment of 8-bytes.  */
17460   int fix_size;
17461 };
17462
17463 struct minipool_fixup
17464 {
17465   Mfix *            next;
17466   rtx_insn *        insn;
17467   HOST_WIDE_INT     address;
17468   rtx *             loc;
17469   machine_mode mode;
17470   int               fix_size;
17471   rtx               value;
17472   Mnode *           minipool;
17473   HOST_WIDE_INT     forwards;
17474   HOST_WIDE_INT     backwards;
17475 };
17476
17477 /* Fixes less than a word need padding out to a word boundary.  */
17478 #define MINIPOOL_FIX_SIZE(mode) \
17479   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17480
17481 static Mnode *  minipool_vector_head;
17482 static Mnode *  minipool_vector_tail;
17483 static rtx_code_label   *minipool_vector_label;
17484 static int      minipool_pad;
17485
17486 /* The linked list of all minipool fixes required for this function.  */
17487 Mfix *          minipool_fix_head;
17488 Mfix *          minipool_fix_tail;
17489 /* The fix entry for the current minipool, once it has been placed.  */
17490 Mfix *          minipool_barrier;
17491
17492 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17493 #define JUMP_TABLES_IN_TEXT_SECTION 0
17494 #endif
17495
17496 static HOST_WIDE_INT
17497 get_jump_table_size (rtx_jump_table_data *insn)
17498 {
17499   /* ADDR_VECs only take room if read-only data does into the text
17500      section.  */
17501   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17502     {
17503       rtx body = PATTERN (insn);
17504       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17505       HOST_WIDE_INT size;
17506       HOST_WIDE_INT modesize;
17507
17508       modesize = GET_MODE_SIZE (GET_MODE (body));
17509       size = modesize * XVECLEN (body, elt);
17510       switch (modesize)
17511         {
17512         case 1:
17513           /* Round up size  of TBB table to a halfword boundary.  */
17514           size = (size + 1) & ~HOST_WIDE_INT_1;
17515           break;
17516         case 2:
17517           /* No padding necessary for TBH.  */
17518           break;
17519         case 4:
17520           /* Add two bytes for alignment on Thumb.  */
17521           if (TARGET_THUMB)
17522             size += 2;
17523           break;
17524         default:
17525           gcc_unreachable ();
17526         }
17527       return size;
17528     }
17529
17530   return 0;
17531 }
17532
17533 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17534    function descriptor) into a register and the GOT address into the
17535    FDPIC register, returning an rtx for the register holding the
17536    function address.  */
17537
17538 rtx
17539 arm_load_function_descriptor (rtx funcdesc)
17540 {
17541   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17542   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17543   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17544   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17545
17546   emit_move_insn (fnaddr_reg, fnaddr);
17547
17548   /* The ABI requires the entry point address to be loaded first, but
17549      since we cannot support lazy binding for lack of atomic load of
17550      two 32-bits values, we do not need to bother to prevent the
17551      previous load from being moved after that of the GOT address.  */
17552   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17553
17554   return fnaddr_reg;
17555 }
17556
17557 /* Return the maximum amount of padding that will be inserted before
17558    label LABEL.  */
17559 static HOST_WIDE_INT
17560 get_label_padding (rtx label)
17561 {
17562   HOST_WIDE_INT align, min_insn_size;
17563
17564   align = 1 << label_to_alignment (label).levels[0].log;
17565   min_insn_size = TARGET_THUMB ? 2 : 4;
17566   return align > min_insn_size ? align - min_insn_size : 0;
17567 }
17568
17569 /* Move a minipool fix MP from its current location to before MAX_MP.
17570    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17571    constraints may need updating.  */
17572 static Mnode *
17573 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17574                                HOST_WIDE_INT max_address)
17575 {
17576   /* The code below assumes these are different.  */
17577   gcc_assert (mp != max_mp);
17578
17579   if (max_mp == NULL)
17580     {
17581       if (max_address < mp->max_address)
17582         mp->max_address = max_address;
17583     }
17584   else
17585     {
17586       if (max_address > max_mp->max_address - mp->fix_size)
17587         mp->max_address = max_mp->max_address - mp->fix_size;
17588       else
17589         mp->max_address = max_address;
17590
17591       /* Unlink MP from its current position.  Since max_mp is non-null,
17592        mp->prev must be non-null.  */
17593       mp->prev->next = mp->next;
17594       if (mp->next != NULL)
17595         mp->next->prev = mp->prev;
17596       else
17597         minipool_vector_tail = mp->prev;
17598
17599       /* Re-insert it before MAX_MP.  */
17600       mp->next = max_mp;
17601       mp->prev = max_mp->prev;
17602       max_mp->prev = mp;
17603
17604       if (mp->prev != NULL)
17605         mp->prev->next = mp;
17606       else
17607         minipool_vector_head = mp;
17608     }
17609
17610   /* Save the new entry.  */
17611   max_mp = mp;
17612
17613   /* Scan over the preceding entries and adjust their addresses as
17614      required.  */
17615   while (mp->prev != NULL
17616          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17617     {
17618       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17619       mp = mp->prev;
17620     }
17621
17622   return max_mp;
17623 }
17624
17625 /* Add a constant to the minipool for a forward reference.  Returns the
17626    node added or NULL if the constant will not fit in this pool.  */
17627 static Mnode *
17628 add_minipool_forward_ref (Mfix *fix)
17629 {
17630   /* If set, max_mp is the first pool_entry that has a lower
17631      constraint than the one we are trying to add.  */
17632   Mnode *       max_mp = NULL;
17633   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17634   Mnode *       mp;
17635
17636   /* If the minipool starts before the end of FIX->INSN then this FIX
17637      cannot be placed into the current pool.  Furthermore, adding the
17638      new constant pool entry may cause the pool to start FIX_SIZE bytes
17639      earlier.  */
17640   if (minipool_vector_head &&
17641       (fix->address + get_attr_length (fix->insn)
17642        >= minipool_vector_head->max_address - fix->fix_size))
17643     return NULL;
17644
17645   /* Scan the pool to see if a constant with the same value has
17646      already been added.  While we are doing this, also note the
17647      location where we must insert the constant if it doesn't already
17648      exist.  */
17649   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17650     {
17651       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17652           && fix->mode == mp->mode
17653           && (!LABEL_P (fix->value)
17654               || (CODE_LABEL_NUMBER (fix->value)
17655                   == CODE_LABEL_NUMBER (mp->value)))
17656           && rtx_equal_p (fix->value, mp->value))
17657         {
17658           /* More than one fix references this entry.  */
17659           mp->refcount++;
17660           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17661         }
17662
17663       /* Note the insertion point if necessary.  */
17664       if (max_mp == NULL
17665           && mp->max_address > max_address)
17666         max_mp = mp;
17667
17668       /* If we are inserting an 8-bytes aligned quantity and
17669          we have not already found an insertion point, then
17670          make sure that all such 8-byte aligned quantities are
17671          placed at the start of the pool.  */
17672       if (ARM_DOUBLEWORD_ALIGN
17673           && max_mp == NULL
17674           && fix->fix_size >= 8
17675           && mp->fix_size < 8)
17676         {
17677           max_mp = mp;
17678           max_address = mp->max_address;
17679         }
17680     }
17681
17682   /* The value is not currently in the minipool, so we need to create
17683      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17684      the end of the list since the placement is less constrained than
17685      any existing entry.  Otherwise, we insert the new fix before
17686      MAX_MP and, if necessary, adjust the constraints on the other
17687      entries.  */
17688   mp = XNEW (Mnode);
17689   mp->fix_size = fix->fix_size;
17690   mp->mode = fix->mode;
17691   mp->value = fix->value;
17692   mp->refcount = 1;
17693   /* Not yet required for a backwards ref.  */
17694   mp->min_address = -65536;
17695
17696   if (max_mp == NULL)
17697     {
17698       mp->max_address = max_address;
17699       mp->next = NULL;
17700       mp->prev = minipool_vector_tail;
17701
17702       if (mp->prev == NULL)
17703         {
17704           minipool_vector_head = mp;
17705           minipool_vector_label = gen_label_rtx ();
17706         }
17707       else
17708         mp->prev->next = mp;
17709
17710       minipool_vector_tail = mp;
17711     }
17712   else
17713     {
17714       if (max_address > max_mp->max_address - mp->fix_size)
17715         mp->max_address = max_mp->max_address - mp->fix_size;
17716       else
17717         mp->max_address = max_address;
17718
17719       mp->next = max_mp;
17720       mp->prev = max_mp->prev;
17721       max_mp->prev = mp;
17722       if (mp->prev != NULL)
17723         mp->prev->next = mp;
17724       else
17725         minipool_vector_head = mp;
17726     }
17727
17728   /* Save the new entry.  */
17729   max_mp = mp;
17730
17731   /* Scan over the preceding entries and adjust their addresses as
17732      required.  */
17733   while (mp->prev != NULL
17734          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17735     {
17736       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17737       mp = mp->prev;
17738     }
17739
17740   return max_mp;
17741 }
17742
17743 static Mnode *
17744 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17745                                 HOST_WIDE_INT  min_address)
17746 {
17747   HOST_WIDE_INT offset;
17748
17749   /* The code below assumes these are different.  */
17750   gcc_assert (mp != min_mp);
17751
17752   if (min_mp == NULL)
17753     {
17754       if (min_address > mp->min_address)
17755         mp->min_address = min_address;
17756     }
17757   else
17758     {
17759       /* We will adjust this below if it is too loose.  */
17760       mp->min_address = min_address;
17761
17762       /* Unlink MP from its current position.  Since min_mp is non-null,
17763          mp->next must be non-null.  */
17764       mp->next->prev = mp->prev;
17765       if (mp->prev != NULL)
17766         mp->prev->next = mp->next;
17767       else
17768         minipool_vector_head = mp->next;
17769
17770       /* Reinsert it after MIN_MP.  */
17771       mp->prev = min_mp;
17772       mp->next = min_mp->next;
17773       min_mp->next = mp;
17774       if (mp->next != NULL)
17775         mp->next->prev = mp;
17776       else
17777         minipool_vector_tail = mp;
17778     }
17779
17780   min_mp = mp;
17781
17782   offset = 0;
17783   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17784     {
17785       mp->offset = offset;
17786       if (mp->refcount > 0)
17787         offset += mp->fix_size;
17788
17789       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17790         mp->next->min_address = mp->min_address + mp->fix_size;
17791     }
17792
17793   return min_mp;
17794 }
17795
17796 /* Add a constant to the minipool for a backward reference.  Returns the
17797    node added or NULL if the constant will not fit in this pool.
17798
17799    Note that the code for insertion for a backwards reference can be
17800    somewhat confusing because the calculated offsets for each fix do
17801    not take into account the size of the pool (which is still under
17802    construction.  */
17803 static Mnode *
17804 add_minipool_backward_ref (Mfix *fix)
17805 {
17806   /* If set, min_mp is the last pool_entry that has a lower constraint
17807      than the one we are trying to add.  */
17808   Mnode *min_mp = NULL;
17809   /* This can be negative, since it is only a constraint.  */
17810   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17811   Mnode *mp;
17812
17813   /* If we can't reach the current pool from this insn, or if we can't
17814      insert this entry at the end of the pool without pushing other
17815      fixes out of range, then we don't try.  This ensures that we
17816      can't fail later on.  */
17817   if (min_address >= minipool_barrier->address
17818       || (minipool_vector_tail->min_address + fix->fix_size
17819           >= minipool_barrier->address))
17820     return NULL;
17821
17822   /* Scan the pool to see if a constant with the same value has
17823      already been added.  While we are doing this, also note the
17824      location where we must insert the constant if it doesn't already
17825      exist.  */
17826   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17827     {
17828       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17829           && fix->mode == mp->mode
17830           && (!LABEL_P (fix->value)
17831               || (CODE_LABEL_NUMBER (fix->value)
17832                   == CODE_LABEL_NUMBER (mp->value)))
17833           && rtx_equal_p (fix->value, mp->value)
17834           /* Check that there is enough slack to move this entry to the
17835              end of the table (this is conservative).  */
17836           && (mp->max_address
17837               > (minipool_barrier->address
17838                  + minipool_vector_tail->offset
17839                  + minipool_vector_tail->fix_size)))
17840         {
17841           mp->refcount++;
17842           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17843         }
17844
17845       if (min_mp != NULL)
17846         mp->min_address += fix->fix_size;
17847       else
17848         {
17849           /* Note the insertion point if necessary.  */
17850           if (mp->min_address < min_address)
17851             {
17852               /* For now, we do not allow the insertion of 8-byte alignment
17853                  requiring nodes anywhere but at the start of the pool.  */
17854               if (ARM_DOUBLEWORD_ALIGN
17855                   && fix->fix_size >= 8 && mp->fix_size < 8)
17856                 return NULL;
17857               else
17858                 min_mp = mp;
17859             }
17860           else if (mp->max_address
17861                    < minipool_barrier->address + mp->offset + fix->fix_size)
17862             {
17863               /* Inserting before this entry would push the fix beyond
17864                  its maximum address (which can happen if we have
17865                  re-located a forwards fix); force the new fix to come
17866                  after it.  */
17867               if (ARM_DOUBLEWORD_ALIGN
17868                   && fix->fix_size >= 8 && mp->fix_size < 8)
17869                 return NULL;
17870               else
17871                 {
17872                   min_mp = mp;
17873                   min_address = mp->min_address + fix->fix_size;
17874                 }
17875             }
17876           /* Do not insert a non-8-byte aligned quantity before 8-byte
17877              aligned quantities.  */
17878           else if (ARM_DOUBLEWORD_ALIGN
17879                    && fix->fix_size < 8
17880                    && mp->fix_size >= 8)
17881             {
17882               min_mp = mp;
17883               min_address = mp->min_address + fix->fix_size;
17884             }
17885         }
17886     }
17887
17888   /* We need to create a new entry.  */
17889   mp = XNEW (Mnode);
17890   mp->fix_size = fix->fix_size;
17891   mp->mode = fix->mode;
17892   mp->value = fix->value;
17893   mp->refcount = 1;
17894   mp->max_address = minipool_barrier->address + 65536;
17895
17896   mp->min_address = min_address;
17897
17898   if (min_mp == NULL)
17899     {
17900       mp->prev = NULL;
17901       mp->next = minipool_vector_head;
17902
17903       if (mp->next == NULL)
17904         {
17905           minipool_vector_tail = mp;
17906           minipool_vector_label = gen_label_rtx ();
17907         }
17908       else
17909         mp->next->prev = mp;
17910
17911       minipool_vector_head = mp;
17912     }
17913   else
17914     {
17915       mp->next = min_mp->next;
17916       mp->prev = min_mp;
17917       min_mp->next = mp;
17918
17919       if (mp->next != NULL)
17920         mp->next->prev = mp;
17921       else
17922         minipool_vector_tail = mp;
17923     }
17924
17925   /* Save the new entry.  */
17926   min_mp = mp;
17927
17928   if (mp->prev)
17929     mp = mp->prev;
17930   else
17931     mp->offset = 0;
17932
17933   /* Scan over the following entries and adjust their offsets.  */
17934   while (mp->next != NULL)
17935     {
17936       if (mp->next->min_address < mp->min_address + mp->fix_size)
17937         mp->next->min_address = mp->min_address + mp->fix_size;
17938
17939       if (mp->refcount)
17940         mp->next->offset = mp->offset + mp->fix_size;
17941       else
17942         mp->next->offset = mp->offset;
17943
17944       mp = mp->next;
17945     }
17946
17947   return min_mp;
17948 }
17949
17950 static void
17951 assign_minipool_offsets (Mfix *barrier)
17952 {
17953   HOST_WIDE_INT offset = 0;
17954   Mnode *mp;
17955
17956   minipool_barrier = barrier;
17957
17958   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17959     {
17960       mp->offset = offset;
17961
17962       if (mp->refcount > 0)
17963         offset += mp->fix_size;
17964     }
17965 }
17966
17967 /* Output the literal table */
17968 static void
17969 dump_minipool (rtx_insn *scan)
17970 {
17971   Mnode * mp;
17972   Mnode * nmp;
17973   int align64 = 0;
17974
17975   if (ARM_DOUBLEWORD_ALIGN)
17976     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17977       if (mp->refcount > 0 && mp->fix_size >= 8)
17978         {
17979           align64 = 1;
17980           break;
17981         }
17982
17983   if (dump_file)
17984     fprintf (dump_file,
17985              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17986              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17987
17988   scan = emit_label_after (gen_label_rtx (), scan);
17989   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17990   scan = emit_label_after (minipool_vector_label, scan);
17991
17992   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17993     {
17994       if (mp->refcount > 0)
17995         {
17996           if (dump_file)
17997             {
17998               fprintf (dump_file,
17999                        ";;  Offset %u, min %ld, max %ld ",
18000                        (unsigned) mp->offset, (unsigned long) mp->min_address,
18001                        (unsigned long) mp->max_address);
18002               arm_print_value (dump_file, mp->value);
18003               fputc ('\n', dump_file);
18004             }
18005
18006           rtx val = copy_rtx (mp->value);
18007
18008           switch (GET_MODE_SIZE (mp->mode))
18009             {
18010 #ifdef HAVE_consttable_1
18011             case 1:
18012               scan = emit_insn_after (gen_consttable_1 (val), scan);
18013               break;
18014
18015 #endif
18016 #ifdef HAVE_consttable_2
18017             case 2:
18018               scan = emit_insn_after (gen_consttable_2 (val), scan);
18019               break;
18020
18021 #endif
18022 #ifdef HAVE_consttable_4
18023             case 4:
18024               scan = emit_insn_after (gen_consttable_4 (val), scan);
18025               break;
18026
18027 #endif
18028 #ifdef HAVE_consttable_8
18029             case 8:
18030               scan = emit_insn_after (gen_consttable_8 (val), scan);
18031               break;
18032
18033 #endif
18034 #ifdef HAVE_consttable_16
18035             case 16:
18036               scan = emit_insn_after (gen_consttable_16 (val), scan);
18037               break;
18038
18039 #endif
18040             default:
18041               gcc_unreachable ();
18042             }
18043         }
18044
18045       nmp = mp->next;
18046       free (mp);
18047     }
18048
18049   minipool_vector_head = minipool_vector_tail = NULL;
18050   scan = emit_insn_after (gen_consttable_end (), scan);
18051   scan = emit_barrier_after (scan);
18052 }
18053
18054 /* Return the cost of forcibly inserting a barrier after INSN.  */
18055 static int
18056 arm_barrier_cost (rtx_insn *insn)
18057 {
18058   /* Basing the location of the pool on the loop depth is preferable,
18059      but at the moment, the basic block information seems to be
18060      corrupt by this stage of the compilation.  */
18061   int base_cost = 50;
18062   rtx_insn *next = next_nonnote_insn (insn);
18063
18064   if (next != NULL && LABEL_P (next))
18065     base_cost -= 20;
18066
18067   switch (GET_CODE (insn))
18068     {
18069     case CODE_LABEL:
18070       /* It will always be better to place the table before the label, rather
18071          than after it.  */
18072       return 50;
18073
18074     case INSN:
18075     case CALL_INSN:
18076       return base_cost;
18077
18078     case JUMP_INSN:
18079       return base_cost - 10;
18080
18081     default:
18082       return base_cost + 10;
18083     }
18084 }
18085
18086 /* Find the best place in the insn stream in the range
18087    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18088    Create the barrier by inserting a jump and add a new fix entry for
18089    it.  */
18090 static Mfix *
18091 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18092 {
18093   HOST_WIDE_INT count = 0;
18094   rtx_barrier *barrier;
18095   rtx_insn *from = fix->insn;
18096   /* The instruction after which we will insert the jump.  */
18097   rtx_insn *selected = NULL;
18098   int selected_cost;
18099   /* The address at which the jump instruction will be placed.  */
18100   HOST_WIDE_INT selected_address;
18101   Mfix * new_fix;
18102   HOST_WIDE_INT max_count = max_address - fix->address;
18103   rtx_code_label *label = gen_label_rtx ();
18104
18105   selected_cost = arm_barrier_cost (from);
18106   selected_address = fix->address;
18107
18108   while (from && count < max_count)
18109     {
18110       rtx_jump_table_data *tmp;
18111       int new_cost;
18112
18113       /* This code shouldn't have been called if there was a natural barrier
18114          within range.  */
18115       gcc_assert (!BARRIER_P (from));
18116
18117       /* Count the length of this insn.  This must stay in sync with the
18118          code that pushes minipool fixes.  */
18119       if (LABEL_P (from))
18120         count += get_label_padding (from);
18121       else
18122         count += get_attr_length (from);
18123
18124       /* If there is a jump table, add its length.  */
18125       if (tablejump_p (from, NULL, &tmp))
18126         {
18127           count += get_jump_table_size (tmp);
18128
18129           /* Jump tables aren't in a basic block, so base the cost on
18130              the dispatch insn.  If we select this location, we will
18131              still put the pool after the table.  */
18132           new_cost = arm_barrier_cost (from);
18133
18134           if (count < max_count
18135               && (!selected || new_cost <= selected_cost))
18136             {
18137               selected = tmp;
18138               selected_cost = new_cost;
18139               selected_address = fix->address + count;
18140             }
18141
18142           /* Continue after the dispatch table.  */
18143           from = NEXT_INSN (tmp);
18144           continue;
18145         }
18146
18147       new_cost = arm_barrier_cost (from);
18148
18149       if (count < max_count
18150           && (!selected || new_cost <= selected_cost))
18151         {
18152           selected = from;
18153           selected_cost = new_cost;
18154           selected_address = fix->address + count;
18155         }
18156
18157       from = NEXT_INSN (from);
18158     }
18159
18160   /* Make sure that we found a place to insert the jump.  */
18161   gcc_assert (selected);
18162
18163   /* Create a new JUMP_INSN that branches around a barrier.  */
18164   from = emit_jump_insn_after (gen_jump (label), selected);
18165   JUMP_LABEL (from) = label;
18166   barrier = emit_barrier_after (from);
18167   emit_label_after (label, barrier);
18168
18169   /* Create a minipool barrier entry for the new barrier.  */
18170   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18171   new_fix->insn = barrier;
18172   new_fix->address = selected_address;
18173   new_fix->next = fix->next;
18174   fix->next = new_fix;
18175
18176   return new_fix;
18177 }
18178
18179 /* Record that there is a natural barrier in the insn stream at
18180    ADDRESS.  */
18181 static void
18182 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18183 {
18184   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18185
18186   fix->insn = insn;
18187   fix->address = address;
18188
18189   fix->next = NULL;
18190   if (minipool_fix_head != NULL)
18191     minipool_fix_tail->next = fix;
18192   else
18193     minipool_fix_head = fix;
18194
18195   minipool_fix_tail = fix;
18196 }
18197
18198 /* Record INSN, which will need fixing up to load a value from the
18199    minipool.  ADDRESS is the offset of the insn since the start of the
18200    function; LOC is a pointer to the part of the insn which requires
18201    fixing; VALUE is the constant that must be loaded, which is of type
18202    MODE.  */
18203 static void
18204 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18205                    machine_mode mode, rtx value)
18206 {
18207   gcc_assert (!arm_disable_literal_pool);
18208   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18209
18210   fix->insn = insn;
18211   fix->address = address;
18212   fix->loc = loc;
18213   fix->mode = mode;
18214   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18215   fix->value = value;
18216   fix->forwards = get_attr_pool_range (insn);
18217   fix->backwards = get_attr_neg_pool_range (insn);
18218   fix->minipool = NULL;
18219
18220   /* If an insn doesn't have a range defined for it, then it isn't
18221      expecting to be reworked by this code.  Better to stop now than
18222      to generate duff assembly code.  */
18223   gcc_assert (fix->forwards || fix->backwards);
18224
18225   /* If an entry requires 8-byte alignment then assume all constant pools
18226      require 4 bytes of padding.  Trying to do this later on a per-pool
18227      basis is awkward because existing pool entries have to be modified.  */
18228   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18229     minipool_pad = 4;
18230
18231   if (dump_file)
18232     {
18233       fprintf (dump_file,
18234                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18235                GET_MODE_NAME (mode),
18236                INSN_UID (insn), (unsigned long) address,
18237                -1 * (long)fix->backwards, (long)fix->forwards);
18238       arm_print_value (dump_file, fix->value);
18239       fprintf (dump_file, "\n");
18240     }
18241
18242   /* Add it to the chain of fixes.  */
18243   fix->next = NULL;
18244
18245   if (minipool_fix_head != NULL)
18246     minipool_fix_tail->next = fix;
18247   else
18248     minipool_fix_head = fix;
18249
18250   minipool_fix_tail = fix;
18251 }
18252
18253 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18254    Returns the number of insns needed, or 99 if we always want to synthesize
18255    the value.  */
18256 int
18257 arm_max_const_double_inline_cost ()
18258 {
18259   return ((optimize_size || arm_ld_sched) ? 3 : 4);
18260 }
18261
18262 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18263    Returns the number of insns needed, or 99 if we don't know how to
18264    do it.  */
18265 int
18266 arm_const_double_inline_cost (rtx val)
18267 {
18268   rtx lowpart, highpart;
18269   machine_mode mode;
18270
18271   mode = GET_MODE (val);
18272
18273   if (mode == VOIDmode)
18274     mode = DImode;
18275
18276   gcc_assert (GET_MODE_SIZE (mode) == 8);
18277
18278   lowpart = gen_lowpart (SImode, val);
18279   highpart = gen_highpart_mode (SImode, mode, val);
18280
18281   gcc_assert (CONST_INT_P (lowpart));
18282   gcc_assert (CONST_INT_P (highpart));
18283
18284   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18285                             NULL_RTX, NULL_RTX, 0, 0)
18286           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18287                               NULL_RTX, NULL_RTX, 0, 0));
18288 }
18289
18290 /* Cost of loading a SImode constant.  */
18291 static inline int
18292 arm_const_inline_cost (enum rtx_code code, rtx val)
18293 {
18294   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18295                            NULL_RTX, NULL_RTX, 1, 0);
18296 }
18297
18298 /* Return true if it is worthwhile to split a 64-bit constant into two
18299    32-bit operations.  This is the case if optimizing for size, or
18300    if we have load delay slots, or if one 32-bit part can be done with
18301    a single data operation.  */
18302 bool
18303 arm_const_double_by_parts (rtx val)
18304 {
18305   machine_mode mode = GET_MODE (val);
18306   rtx part;
18307
18308   if (optimize_size || arm_ld_sched)
18309     return true;
18310
18311   if (mode == VOIDmode)
18312     mode = DImode;
18313
18314   part = gen_highpart_mode (SImode, mode, val);
18315
18316   gcc_assert (CONST_INT_P (part));
18317
18318   if (const_ok_for_arm (INTVAL (part))
18319       || const_ok_for_arm (~INTVAL (part)))
18320     return true;
18321
18322   part = gen_lowpart (SImode, val);
18323
18324   gcc_assert (CONST_INT_P (part));
18325
18326   if (const_ok_for_arm (INTVAL (part))
18327       || const_ok_for_arm (~INTVAL (part)))
18328     return true;
18329
18330   return false;
18331 }
18332
18333 /* Return true if it is possible to inline both the high and low parts
18334    of a 64-bit constant into 32-bit data processing instructions.  */
18335 bool
18336 arm_const_double_by_immediates (rtx val)
18337 {
18338   machine_mode mode = GET_MODE (val);
18339   rtx part;
18340
18341   if (mode == VOIDmode)
18342     mode = DImode;
18343
18344   part = gen_highpart_mode (SImode, mode, val);
18345
18346   gcc_assert (CONST_INT_P (part));
18347
18348   if (!const_ok_for_arm (INTVAL (part)))
18349     return false;
18350
18351   part = gen_lowpart (SImode, val);
18352
18353   gcc_assert (CONST_INT_P (part));
18354
18355   if (!const_ok_for_arm (INTVAL (part)))
18356     return false;
18357
18358   return true;
18359 }
18360
18361 /* Scan INSN and note any of its operands that need fixing.
18362    If DO_PUSHES is false we do not actually push any of the fixups
18363    needed.  */
18364 static void
18365 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18366 {
18367   int opno;
18368
18369   extract_constrain_insn (insn);
18370
18371   if (recog_data.n_alternatives == 0)
18372     return;
18373
18374   /* Fill in recog_op_alt with information about the constraints of
18375      this insn.  */
18376   preprocess_constraints (insn);
18377
18378   const operand_alternative *op_alt = which_op_alt ();
18379   for (opno = 0; opno < recog_data.n_operands; opno++)
18380     {
18381       /* Things we need to fix can only occur in inputs.  */
18382       if (recog_data.operand_type[opno] != OP_IN)
18383         continue;
18384
18385       /* If this alternative is a memory reference, then any mention
18386          of constants in this alternative is really to fool reload
18387          into allowing us to accept one there.  We need to fix them up
18388          now so that we output the right code.  */
18389       if (op_alt[opno].memory_ok)
18390         {
18391           rtx op = recog_data.operand[opno];
18392
18393           if (CONSTANT_P (op))
18394             {
18395               if (do_pushes)
18396                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18397                                    recog_data.operand_mode[opno], op);
18398             }
18399           else if (MEM_P (op)
18400                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18401                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18402             {
18403               if (do_pushes)
18404                 {
18405                   rtx cop = avoid_constant_pool_reference (op);
18406
18407                   /* Casting the address of something to a mode narrower
18408                      than a word can cause avoid_constant_pool_reference()
18409                      to return the pool reference itself.  That's no good to
18410                      us here.  Lets just hope that we can use the
18411                      constant pool value directly.  */
18412                   if (op == cop)
18413                     cop = get_pool_constant (XEXP (op, 0));
18414
18415                   push_minipool_fix (insn, address,
18416                                      recog_data.operand_loc[opno],
18417                                      recog_data.operand_mode[opno], cop);
18418                 }
18419
18420             }
18421         }
18422     }
18423
18424   return;
18425 }
18426
18427 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18428    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18429    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18430    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18431    or four masks, depending on whether it is being computed for a
18432    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18433    respectively.  The tree for the type of the argument or a field within an
18434    argument is passed in ARG_TYPE, the current register this argument or field
18435    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18436    argument or field starts at is passed in STARTING_BIT and the last used bit
18437    is kept in LAST_USED_BIT which is also updated accordingly.  */
18438
18439 static unsigned HOST_WIDE_INT
18440 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18441                                uint32_t * padding_bits_to_clear,
18442                                unsigned starting_bit, int * last_used_bit)
18443
18444 {
18445   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18446
18447   if (TREE_CODE (arg_type) == RECORD_TYPE)
18448     {
18449       unsigned current_bit = starting_bit;
18450       tree field;
18451       long int offset, size;
18452
18453
18454       field = TYPE_FIELDS (arg_type);
18455       while (field)
18456         {
18457           /* The offset within a structure is always an offset from
18458              the start of that structure.  Make sure we take that into the
18459              calculation of the register based offset that we use here.  */
18460           offset = starting_bit;
18461           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18462           offset %= 32;
18463
18464           /* This is the actual size of the field, for bitfields this is the
18465              bitfield width and not the container size.  */
18466           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18467
18468           if (*last_used_bit != offset)
18469             {
18470               if (offset < *last_used_bit)
18471                 {
18472                   /* This field's offset is before the 'last_used_bit', that
18473                      means this field goes on the next register.  So we need to
18474                      pad the rest of the current register and increase the
18475                      register number.  */
18476                   uint32_t mask;
18477                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18478                   mask++;
18479
18480                   padding_bits_to_clear[*regno] |= mask;
18481                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18482                   (*regno)++;
18483                 }
18484               else
18485                 {
18486                   /* Otherwise we pad the bits between the last field's end and
18487                      the start of the new field.  */
18488                   uint32_t mask;
18489
18490                   mask = ((uint32_t)-1) >> (32 - offset);
18491                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18492                   padding_bits_to_clear[*regno] |= mask;
18493                 }
18494               current_bit = offset;
18495             }
18496
18497           /* Calculate further padding bits for inner structs/unions too.  */
18498           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18499             {
18500               *last_used_bit = current_bit;
18501               not_to_clear_reg_mask
18502                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18503                                                   padding_bits_to_clear, offset,
18504                                                   last_used_bit);
18505             }
18506           else
18507             {
18508               /* Update 'current_bit' with this field's size.  If the
18509                  'current_bit' lies in a subsequent register, update 'regno' and
18510                  reset 'current_bit' to point to the current bit in that new
18511                  register.  */
18512               current_bit += size;
18513               while (current_bit >= 32)
18514                 {
18515                   current_bit-=32;
18516                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18517                   (*regno)++;
18518                 }
18519               *last_used_bit = current_bit;
18520             }
18521
18522           field = TREE_CHAIN (field);
18523         }
18524       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18525     }
18526   else if (TREE_CODE (arg_type) == UNION_TYPE)
18527     {
18528       tree field, field_t;
18529       int i, regno_t, field_size;
18530       int max_reg = -1;
18531       int max_bit = -1;
18532       uint32_t mask;
18533       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18534         = {-1, -1, -1, -1};
18535
18536       /* To compute the padding bits in a union we only consider bits as
18537          padding bits if they are always either a padding bit or fall outside a
18538          fields size for all fields in the union.  */
18539       field = TYPE_FIELDS (arg_type);
18540       while (field)
18541         {
18542           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18543             = {0U, 0U, 0U, 0U};
18544           int last_used_bit_t = *last_used_bit;
18545           regno_t = *regno;
18546           field_t = TREE_TYPE (field);
18547
18548           /* If the field's type is either a record or a union make sure to
18549              compute their padding bits too.  */
18550           if (RECORD_OR_UNION_TYPE_P (field_t))
18551             not_to_clear_reg_mask
18552               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18553                                                 &padding_bits_to_clear_t[0],
18554                                                 starting_bit, &last_used_bit_t);
18555           else
18556             {
18557               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18558               regno_t = (field_size / 32) + *regno;
18559               last_used_bit_t = (starting_bit + field_size) % 32;
18560             }
18561
18562           for (i = *regno; i < regno_t; i++)
18563             {
18564               /* For all but the last register used by this field only keep the
18565                  padding bits that were padding bits in this field.  */
18566               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18567             }
18568
18569             /* For the last register, keep all padding bits that were padding
18570                bits in this field and any padding bits that are still valid
18571                as padding bits but fall outside of this field's size.  */
18572             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18573             padding_bits_to_clear_res[regno_t]
18574               &= padding_bits_to_clear_t[regno_t] | mask;
18575
18576           /* Update the maximum size of the fields in terms of registers used
18577              ('max_reg') and the 'last_used_bit' in said register.  */
18578           if (max_reg < regno_t)
18579             {
18580               max_reg = regno_t;
18581               max_bit = last_used_bit_t;
18582             }
18583           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18584             max_bit = last_used_bit_t;
18585
18586           field = TREE_CHAIN (field);
18587         }
18588
18589       /* Update the current padding_bits_to_clear using the intersection of the
18590          padding bits of all the fields.  */
18591       for (i=*regno; i < max_reg; i++)
18592         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18593
18594       /* Do not keep trailing padding bits, we do not know yet whether this
18595          is the end of the argument.  */
18596       mask = ((uint32_t) 1 << max_bit) - 1;
18597       padding_bits_to_clear[max_reg]
18598         |= padding_bits_to_clear_res[max_reg] & mask;
18599
18600       *regno = max_reg;
18601       *last_used_bit = max_bit;
18602     }
18603   else
18604     /* This function should only be used for structs and unions.  */
18605     gcc_unreachable ();
18606
18607   return not_to_clear_reg_mask;
18608 }
18609
18610 /* In the context of ARMv8-M Security Extensions, this function is used for both
18611    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18612    registers are used when returning or passing arguments, which is then
18613    returned as a mask.  It will also compute a mask to indicate padding/unused
18614    bits for each of these registers, and passes this through the
18615    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18616    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18617    the starting register used to pass this argument or return value is passed
18618    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18619    for struct and union types.  */
18620
18621 static unsigned HOST_WIDE_INT
18622 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18623                              uint32_t * padding_bits_to_clear)
18624
18625 {
18626   int last_used_bit = 0;
18627   unsigned HOST_WIDE_INT not_to_clear_mask;
18628
18629   if (RECORD_OR_UNION_TYPE_P (arg_type))
18630     {
18631       not_to_clear_mask
18632         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18633                                          padding_bits_to_clear, 0,
18634                                          &last_used_bit);
18635
18636
18637       /* If the 'last_used_bit' is not zero, that means we are still using a
18638          part of the last 'regno'.  In such cases we must clear the trailing
18639          bits.  Otherwise we are not using regno and we should mark it as to
18640          clear.  */
18641       if (last_used_bit != 0)
18642         padding_bits_to_clear[regno]
18643           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18644       else
18645         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18646     }
18647   else
18648     {
18649       not_to_clear_mask = 0;
18650       /* We are not dealing with structs nor unions.  So these arguments may be
18651          passed in floating point registers too.  In some cases a BLKmode is
18652          used when returning or passing arguments in multiple VFP registers.  */
18653       if (GET_MODE (arg_rtx) == BLKmode)
18654         {
18655           int i, arg_regs;
18656           rtx reg;
18657
18658           /* This should really only occur when dealing with the hard-float
18659              ABI.  */
18660           gcc_assert (TARGET_HARD_FLOAT_ABI);
18661
18662           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18663             {
18664               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18665               gcc_assert (REG_P (reg));
18666
18667               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18668
18669               /* If we are dealing with DF mode, make sure we don't
18670                  clear either of the registers it addresses.  */
18671               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18672               if (arg_regs > 1)
18673                 {
18674                   unsigned HOST_WIDE_INT mask;
18675                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18676                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18677                   not_to_clear_mask |= mask;
18678                 }
18679             }
18680         }
18681       else
18682         {
18683           /* Otherwise we can rely on the MODE to determine how many registers
18684              are being used by this argument.  */
18685           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18686           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18687           if (arg_regs > 1)
18688             {
18689               unsigned HOST_WIDE_INT
18690               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18691               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18692               not_to_clear_mask |= mask;
18693             }
18694         }
18695     }
18696
18697   return not_to_clear_mask;
18698 }
18699
18700 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18701    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18702    are to be fully cleared, using the value in register CLEARING_REG if more
18703    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18704    the bits that needs to be cleared in caller-saved core registers, with
18705    SCRATCH_REG used as a scratch register for that clearing.
18706
18707    NOTE: one of three following assertions must hold:
18708    - SCRATCH_REG is a low register
18709    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18710      in TO_CLEAR_BITMAP)
18711    - CLEARING_REG is a low register.  */
18712
18713 static void
18714 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18715                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18716 {
18717   bool saved_clearing = false;
18718   rtx saved_clearing_reg = NULL_RTX;
18719   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18720
18721   gcc_assert (arm_arch_cmse);
18722
18723   if (!bitmap_empty_p (to_clear_bitmap))
18724     {
18725       minregno = bitmap_first_set_bit (to_clear_bitmap);
18726       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18727     }
18728   clearing_regno = REGNO (clearing_reg);
18729
18730   /* Clear padding bits.  */
18731   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18732   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18733     {
18734       uint64_t mask;
18735       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18736
18737       if (padding_bits_to_clear[i] == 0)
18738         continue;
18739
18740       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18741          CLEARING_REG as scratch.  */
18742       if (TARGET_THUMB1
18743           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18744         {
18745           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18746              such that we can use clearing_reg to clear the unused bits in the
18747              arguments.  */
18748           if ((clearing_regno > maxregno
18749                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18750               && !saved_clearing)
18751             {
18752               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18753               emit_move_insn (scratch_reg, clearing_reg);
18754               saved_clearing = true;
18755               saved_clearing_reg = scratch_reg;
18756             }
18757           scratch_reg = clearing_reg;
18758         }
18759
18760       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18761       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18762       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18763
18764       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18765       mask = (~padding_bits_to_clear[i]) >> 16;
18766       rtx16 = gen_int_mode (16, SImode);
18767       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18768       if (mask)
18769         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18770
18771       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18772     }
18773   if (saved_clearing)
18774     emit_move_insn (clearing_reg, saved_clearing_reg);
18775
18776
18777   /* Clear full registers.  */
18778
18779   if (TARGET_HAVE_FPCXT_CMSE)
18780     {
18781       rtvec vunspec_vec;
18782       int i, j, k, nb_regs;
18783       rtx use_seq, par, reg, set, vunspec;
18784       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18785       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18786       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18787
18788       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18789         {
18790           /* Find next register to clear and exit if none.  */
18791           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18792           if (i > maxregno)
18793             break;
18794
18795           /* Compute number of consecutive registers to clear.  */
18796           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18797                j++);
18798           nb_regs = j - i;
18799
18800           /* Create VSCCLRM RTX pattern.  */
18801           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18802           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18803           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18804                                              VUNSPEC_VSCCLRM_VPR);
18805           XVECEXP (par, 0, 0) = vunspec;
18806
18807           /* Insert VFP register clearing RTX in the pattern.  */
18808           start_sequence ();
18809           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18810             {
18811               if (!bitmap_bit_p (to_clear_bitmap, j))
18812                 continue;
18813
18814               reg = gen_rtx_REG (SFmode, j);
18815               set = gen_rtx_SET (reg, const0_rtx);
18816               XVECEXP (par, 0, k++) = set;
18817               emit_use (reg);
18818             }
18819           use_seq = get_insns ();
18820           end_sequence ();
18821
18822           emit_insn_after (use_seq, emit_insn (par));
18823         }
18824
18825       /* Get set of core registers to clear.  */
18826       bitmap_clear (core_regs_bitmap);
18827       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18828                         IP_REGNUM - R0_REGNUM + 1);
18829       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18830                   core_regs_bitmap);
18831       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18832
18833       if (bitmap_empty_p (to_clear_core_bitmap))
18834         return;
18835
18836       /* Create clrm RTX pattern.  */
18837       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18838       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18839
18840       /* Insert core register clearing RTX in the pattern.  */
18841       start_sequence ();
18842       for (j = 0, i = minregno; j < nb_regs; i++)
18843         {
18844           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18845             continue;
18846
18847           reg = gen_rtx_REG (SImode, i);
18848           set = gen_rtx_SET (reg, const0_rtx);
18849           XVECEXP (par, 0, j++) = set;
18850           emit_use (reg);
18851         }
18852
18853       /* Insert APSR register clearing RTX in the pattern
18854        * along with clobbering CC.  */
18855       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18856       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18857                                          VUNSPEC_CLRM_APSR);
18858
18859       XVECEXP (par, 0, j++) = vunspec;
18860
18861       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18862       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18863       XVECEXP (par, 0, j) = clobber;
18864
18865       use_seq = get_insns ();
18866       end_sequence ();
18867
18868       emit_insn_after (use_seq, emit_insn (par));
18869     }
18870   else
18871     {
18872       /* If not marked for clearing, clearing_reg already does not contain
18873          any secret.  */
18874       if (clearing_regno <= maxregno
18875           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18876         {
18877           emit_move_insn (clearing_reg, const0_rtx);
18878           emit_use (clearing_reg);
18879           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18880         }
18881
18882       for (regno = minregno; regno <= maxregno; regno++)
18883         {
18884           if (!bitmap_bit_p (to_clear_bitmap, regno))
18885             continue;
18886
18887           if (IS_VFP_REGNUM (regno))
18888             {
18889               /* If regno is an even vfp register and its successor is also to
18890                  be cleared, use vmov.  */
18891               if (TARGET_VFP_DOUBLE
18892                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18893                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18894                 {
18895                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18896                                   CONST1_RTX (DFmode));
18897                   emit_use (gen_rtx_REG (DFmode, regno));
18898                   regno++;
18899                 }
18900               else
18901                 {
18902                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18903                                   CONST1_RTX (SFmode));
18904                   emit_use (gen_rtx_REG (SFmode, regno));
18905                 }
18906             }
18907           else
18908             {
18909               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18910               emit_use (gen_rtx_REG (SImode, regno));
18911             }
18912         }
18913     }
18914 }
18915
18916 /* Clear core and caller-saved VFP registers not used to pass arguments before
18917    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18918    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18919    libgcc/config/arm/cmse_nonsecure_call.S.  */
18920
18921 static void
18922 cmse_nonsecure_call_inline_register_clear (void)
18923 {
18924   basic_block bb;
18925
18926   FOR_EACH_BB_FN (bb, cfun)
18927     {
18928       rtx_insn *insn;
18929
18930       FOR_BB_INSNS (bb, insn)
18931         {
18932           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18933           /* frame = VFP regs + FPSCR + VPR.  */
18934           unsigned lazy_store_stack_frame_size
18935             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18936           unsigned long callee_saved_mask
18937             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18938             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18939           unsigned address_regnum, regno;
18940           unsigned max_int_regno
18941             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18942           unsigned max_fp_regno
18943             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18944           unsigned maxregno
18945             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18946           auto_sbitmap to_clear_bitmap (maxregno + 1);
18947           rtx_insn *seq;
18948           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18949           rtx address;
18950           CUMULATIVE_ARGS args_so_far_v;
18951           cumulative_args_t args_so_far;
18952           tree arg_type, fntype;
18953           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18954           function_args_iterator args_iter;
18955           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18956
18957           if (!NONDEBUG_INSN_P (insn))
18958             continue;
18959
18960           if (!CALL_P (insn))
18961             continue;
18962
18963           pat = PATTERN (insn);
18964           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18965           call = XVECEXP (pat, 0, 0);
18966
18967           /* Get the real call RTX if the insn sets a value, ie. returns.  */
18968           if (GET_CODE (call) == SET)
18969               call = SET_SRC (call);
18970
18971           /* Check if it is a cmse_nonsecure_call.  */
18972           unspec = XEXP (call, 0);
18973           if (GET_CODE (unspec) != UNSPEC
18974               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18975             continue;
18976
18977           /* Mark registers that needs to be cleared.  Those that holds a
18978              parameter are removed from the set further below.  */
18979           bitmap_clear (to_clear_bitmap);
18980           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18981                             max_int_regno - R0_REGNUM + 1);
18982
18983           /* Only look at the caller-saved floating point registers in case of
18984              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
18985              lazy store and loads which clear both caller- and callee-saved
18986              registers.  */
18987           if (!lazy_fpclear)
18988             {
18989               auto_sbitmap float_bitmap (maxregno + 1);
18990
18991               bitmap_clear (float_bitmap);
18992               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18993                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
18994               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18995             }
18996
18997           /* Make sure the register used to hold the function address is not
18998              cleared.  */
18999           address = RTVEC_ELT (XVEC (unspec, 0), 0);
19000           gcc_assert (MEM_P (address));
19001           gcc_assert (REG_P (XEXP (address, 0)));
19002           address_regnum = REGNO (XEXP (address, 0));
19003           if (address_regnum <= max_int_regno)
19004             bitmap_clear_bit (to_clear_bitmap, address_regnum);
19005
19006           /* Set basic block of call insn so that df rescan is performed on
19007              insns inserted here.  */
19008           set_block_for_insn (insn, bb);
19009           df_set_flags (DF_DEFER_INSN_RESCAN);
19010           start_sequence ();
19011
19012           /* Make sure the scheduler doesn't schedule other insns beyond
19013              here.  */
19014           emit_insn (gen_blockage ());
19015
19016           /* Walk through all arguments and clear registers appropriately.
19017           */
19018           fntype = TREE_TYPE (MEM_EXPR (address));
19019           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19020                                     NULL_TREE);
19021           args_so_far = pack_cumulative_args (&args_so_far_v);
19022           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19023             {
19024               rtx arg_rtx;
19025               uint64_t to_clear_args_mask;
19026
19027               if (VOID_TYPE_P (arg_type))
19028                 continue;
19029
19030               function_arg_info arg (arg_type, /*named=*/true);
19031               if (!first_param)
19032                 /* ??? We should advance after processing the argument and pass
19033                    the argument we're advancing past.  */
19034                 arm_function_arg_advance (args_so_far, arg);
19035
19036               arg_rtx = arm_function_arg (args_so_far, arg);
19037               gcc_assert (REG_P (arg_rtx));
19038               to_clear_args_mask
19039                 = compute_not_to_clear_mask (arg_type, arg_rtx,
19040                                              REGNO (arg_rtx),
19041                                              &padding_bits_to_clear[0]);
19042               if (to_clear_args_mask)
19043                 {
19044                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
19045                     {
19046                       if (to_clear_args_mask & (1ULL << regno))
19047                         bitmap_clear_bit (to_clear_bitmap, regno);
19048                     }
19049                 }
19050
19051               first_param = false;
19052             }
19053
19054           /* We use right shift and left shift to clear the LSB of the address
19055              we jump to instead of using bic, to avoid having to use an extra
19056              register on Thumb-1.  */
19057           clearing_reg = XEXP (address, 0);
19058           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19059           emit_insn (gen_rtx_SET (clearing_reg, shift));
19060           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19061           emit_insn (gen_rtx_SET (clearing_reg, shift));
19062
19063           if (clear_callee_saved)
19064             {
19065               rtx push_insn =
19066                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19067               /* Disable frame debug info in push because it needs to be
19068                  disabled for pop (see below).  */
19069               RTX_FRAME_RELATED_P (push_insn) = 0;
19070
19071               /* Lazy store multiple.  */
19072               if (lazy_fpclear)
19073                 {
19074                   rtx imm;
19075                   rtx_insn *add_insn;
19076
19077                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19078                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19079                                                     stack_pointer_rtx, imm));
19080                   /* If we have the frame pointer, then it will be the
19081                      CFA reg.  Otherwise, the stack pointer is the CFA
19082                      reg, so we need to emit a CFA adjust.  */
19083                   if (!frame_pointer_needed)
19084                     arm_add_cfa_adjust_cfa_note (add_insn,
19085                                                  - lazy_store_stack_frame_size,
19086                                                  stack_pointer_rtx,
19087                                                  stack_pointer_rtx);
19088                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19089                 }
19090               /* Save VFP callee-saved registers.  */
19091               else
19092                 {
19093                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19094                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
19095                   /* Disable frame debug info in push because it needs to be
19096                      disabled for vpop (see below).  */
19097                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19098                 }
19099             }
19100
19101           /* Clear caller-saved registers that leak before doing a non-secure
19102              call.  */
19103           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19104           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19105                                 NUM_ARG_REGS, ip_reg, clearing_reg);
19106
19107           seq = get_insns ();
19108           end_sequence ();
19109           emit_insn_before (seq, insn);
19110
19111           if (TARGET_HAVE_FPCXT_CMSE)
19112             {
19113               rtx_insn *last, *pop_insn, *after = insn;
19114
19115               start_sequence ();
19116
19117               /* Lazy load multiple done as part of libcall in Armv8-M.  */
19118               if (lazy_fpclear)
19119                 {
19120                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19121                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19122                   rtx_insn *add_insn =
19123                     emit_insn (gen_addsi3 (stack_pointer_rtx,
19124                                            stack_pointer_rtx, imm));
19125                   if (!frame_pointer_needed)
19126                     arm_add_cfa_adjust_cfa_note (add_insn,
19127                                                  lazy_store_stack_frame_size,
19128                                                  stack_pointer_rtx,
19129                                                  stack_pointer_rtx);
19130                 }
19131               /* Restore VFP callee-saved registers.  */
19132               else
19133                 {
19134                   int nb_callee_saved_vfp_regs =
19135                     (max_fp_regno - D7_VFP_REGNUM) / 2;
19136                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19137                                               nb_callee_saved_vfp_regs,
19138                                               stack_pointer_rtx);
19139                   /* Disable frame debug info in vpop because the SP adjustment
19140                      is made using a CFA adjustment note while CFA used is
19141                      sometimes R7.  This then causes an assert failure in the
19142                      CFI note creation code.  */
19143                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19144                 }
19145
19146               arm_emit_multi_reg_pop (callee_saved_mask);
19147               pop_insn = get_last_insn ();
19148
19149               /* Disable frame debug info in pop because they reset the state
19150                  of popped registers to what it was at the beginning of the
19151                  function, before the prologue.  This leads to incorrect state
19152                  when doing the pop after the nonsecure call for registers that
19153                  are pushed both in prologue and before the nonsecure call.
19154
19155                  It also occasionally triggers an assert failure in CFI note
19156                  creation code when there are two codepaths to the epilogue,
19157                  one of which does not go through the nonsecure call.
19158                  Obviously this mean that debugging between the push and pop is
19159                  not reliable.  */
19160               RTX_FRAME_RELATED_P (pop_insn) = 0;
19161
19162               seq = get_insns ();
19163               last = get_last_insn ();
19164               end_sequence ();
19165
19166               emit_insn_after (seq, after);
19167
19168               /* Skip pop we have just inserted after nonsecure call, we know
19169                  it does not contain a nonsecure call.  */
19170               insn = last;
19171             }
19172         }
19173     }
19174 }
19175
19176 /* Rewrite move insn into subtract of 0 if the condition codes will
19177    be useful in next conditional jump insn.  */
19178
19179 static void
19180 thumb1_reorg (void)
19181 {
19182   basic_block bb;
19183
19184   FOR_EACH_BB_FN (bb, cfun)
19185     {
19186       rtx dest, src;
19187       rtx cmp, op0, op1, set = NULL;
19188       rtx_insn *prev, *insn = BB_END (bb);
19189       bool insn_clobbered = false;
19190
19191       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19192         insn = PREV_INSN (insn);
19193
19194       /* Find the last cbranchsi4_insn in basic block BB.  */
19195       if (insn == BB_HEAD (bb)
19196           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19197         continue;
19198
19199       /* Get the register with which we are comparing.  */
19200       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19201       op0 = XEXP (cmp, 0);
19202       op1 = XEXP (cmp, 1);
19203
19204       /* Check that comparison is against ZERO.  */
19205       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19206         continue;
19207
19208       /* Find the first flag setting insn before INSN in basic block BB.  */
19209       gcc_assert (insn != BB_HEAD (bb));
19210       for (prev = PREV_INSN (insn);
19211            (!insn_clobbered
19212             && prev != BB_HEAD (bb)
19213             && (NOTE_P (prev)
19214                 || DEBUG_INSN_P (prev)
19215                 || ((set = single_set (prev)) != NULL
19216                     && get_attr_conds (prev) == CONDS_NOCOND)));
19217            prev = PREV_INSN (prev))
19218         {
19219           if (reg_set_p (op0, prev))
19220             insn_clobbered = true;
19221         }
19222
19223       /* Skip if op0 is clobbered by insn other than prev. */
19224       if (insn_clobbered)
19225         continue;
19226
19227       if (!set)
19228         continue;
19229
19230       dest = SET_DEST (set);
19231       src = SET_SRC (set);
19232       if (!low_register_operand (dest, SImode)
19233           || !low_register_operand (src, SImode))
19234         continue;
19235
19236       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19237          in INSN.  Both src and dest of the move insn are checked.  */
19238       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19239         {
19240           dest = copy_rtx (dest);
19241           src = copy_rtx (src);
19242           src = gen_rtx_MINUS (SImode, src, const0_rtx);
19243           PATTERN (prev) = gen_rtx_SET (dest, src);
19244           INSN_CODE (prev) = -1;
19245           /* Set test register in INSN to dest.  */
19246           XEXP (cmp, 0) = copy_rtx (dest);
19247           INSN_CODE (insn) = -1;
19248         }
19249     }
19250 }
19251
19252 /* Convert instructions to their cc-clobbering variant if possible, since
19253    that allows us to use smaller encodings.  */
19254
19255 static void
19256 thumb2_reorg (void)
19257 {
19258   basic_block bb;
19259   regset_head live;
19260
19261   INIT_REG_SET (&live);
19262
19263   /* We are freeing block_for_insn in the toplev to keep compatibility
19264      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
19265   compute_bb_for_insn ();
19266   df_analyze ();
19267
19268   enum Convert_Action {SKIP, CONV, SWAP_CONV};
19269
19270   FOR_EACH_BB_FN (bb, cfun)
19271     {
19272       if ((current_tune->disparage_flag_setting_t16_encodings
19273            == tune_params::DISPARAGE_FLAGS_ALL)
19274           && optimize_bb_for_speed_p (bb))
19275         continue;
19276
19277       rtx_insn *insn;
19278       Convert_Action action = SKIP;
19279       Convert_Action action_for_partial_flag_setting
19280         = ((current_tune->disparage_flag_setting_t16_encodings
19281             != tune_params::DISPARAGE_FLAGS_NEITHER)
19282            && optimize_bb_for_speed_p (bb))
19283           ? SKIP : CONV;
19284
19285       COPY_REG_SET (&live, DF_LR_OUT (bb));
19286       df_simulate_initialize_backwards (bb, &live);
19287       FOR_BB_INSNS_REVERSE (bb, insn)
19288         {
19289           if (NONJUMP_INSN_P (insn)
19290               && !REGNO_REG_SET_P (&live, CC_REGNUM)
19291               && GET_CODE (PATTERN (insn)) == SET)
19292             {
19293               action = SKIP;
19294               rtx pat = PATTERN (insn);
19295               rtx dst = XEXP (pat, 0);
19296               rtx src = XEXP (pat, 1);
19297               rtx op0 = NULL_RTX, op1 = NULL_RTX;
19298
19299               if (UNARY_P (src) || BINARY_P (src))
19300                   op0 = XEXP (src, 0);
19301
19302               if (BINARY_P (src))
19303                   op1 = XEXP (src, 1);
19304
19305               if (low_register_operand (dst, SImode))
19306                 {
19307                   switch (GET_CODE (src))
19308                     {
19309                     case PLUS:
19310                       /* Adding two registers and storing the result
19311                          in the first source is already a 16-bit
19312                          operation.  */
19313                       if (rtx_equal_p (dst, op0)
19314                           && register_operand (op1, SImode))
19315                         break;
19316
19317                       if (low_register_operand (op0, SImode))
19318                         {
19319                           /* ADDS <Rd>,<Rn>,<Rm>  */
19320                           if (low_register_operand (op1, SImode))
19321                             action = CONV;
19322                           /* ADDS <Rdn>,#<imm8>  */
19323                           /* SUBS <Rdn>,#<imm8>  */
19324                           else if (rtx_equal_p (dst, op0)
19325                                    && CONST_INT_P (op1)
19326                                    && IN_RANGE (INTVAL (op1), -255, 255))
19327                             action = CONV;
19328                           /* ADDS <Rd>,<Rn>,#<imm3>  */
19329                           /* SUBS <Rd>,<Rn>,#<imm3>  */
19330                           else if (CONST_INT_P (op1)
19331                                    && IN_RANGE (INTVAL (op1), -7, 7))
19332                             action = CONV;
19333                         }
19334                       /* ADCS <Rd>, <Rn>  */
19335                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19336                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19337                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19338                                                        SImode)
19339                               && COMPARISON_P (op1)
19340                               && cc_register (XEXP (op1, 0), VOIDmode)
19341                               && maybe_get_arm_condition_code (op1) == ARM_CS
19342                               && XEXP (op1, 1) == const0_rtx)
19343                         action = CONV;
19344                       break;
19345
19346                     case MINUS:
19347                       /* RSBS <Rd>,<Rn>,#0
19348                          Not handled here: see NEG below.  */
19349                       /* SUBS <Rd>,<Rn>,#<imm3>
19350                          SUBS <Rdn>,#<imm8>
19351                          Not handled here: see PLUS above.  */
19352                       /* SUBS <Rd>,<Rn>,<Rm>  */
19353                       if (low_register_operand (op0, SImode)
19354                           && low_register_operand (op1, SImode))
19355                             action = CONV;
19356                       break;
19357
19358                     case MULT:
19359                       /* MULS <Rdm>,<Rn>,<Rdm>
19360                          As an exception to the rule, this is only used
19361                          when optimizing for size since MULS is slow on all
19362                          known implementations.  We do not even want to use
19363                          MULS in cold code, if optimizing for speed, so we
19364                          test the global flag here.  */
19365                       if (!optimize_size)
19366                         break;
19367                       /* Fall through.  */
19368                     case AND:
19369                     case IOR:
19370                     case XOR:
19371                       /* ANDS <Rdn>,<Rm>  */
19372                       if (rtx_equal_p (dst, op0)
19373                           && low_register_operand (op1, SImode))
19374                         action = action_for_partial_flag_setting;
19375                       else if (rtx_equal_p (dst, op1)
19376                                && low_register_operand (op0, SImode))
19377                         action = action_for_partial_flag_setting == SKIP
19378                                  ? SKIP : SWAP_CONV;
19379                       break;
19380
19381                     case ASHIFTRT:
19382                     case ASHIFT:
19383                     case LSHIFTRT:
19384                       /* ASRS <Rdn>,<Rm> */
19385                       /* LSRS <Rdn>,<Rm> */
19386                       /* LSLS <Rdn>,<Rm> */
19387                       if (rtx_equal_p (dst, op0)
19388                           && low_register_operand (op1, SImode))
19389                         action = action_for_partial_flag_setting;
19390                       /* ASRS <Rd>,<Rm>,#<imm5> */
19391                       /* LSRS <Rd>,<Rm>,#<imm5> */
19392                       /* LSLS <Rd>,<Rm>,#<imm5> */
19393                       else if (low_register_operand (op0, SImode)
19394                                && CONST_INT_P (op1)
19395                                && IN_RANGE (INTVAL (op1), 0, 31))
19396                         action = action_for_partial_flag_setting;
19397                       break;
19398
19399                     case ROTATERT:
19400                       /* RORS <Rdn>,<Rm>  */
19401                       if (rtx_equal_p (dst, op0)
19402                           && low_register_operand (op1, SImode))
19403                         action = action_for_partial_flag_setting;
19404                       break;
19405
19406                     case NOT:
19407                       /* MVNS <Rd>,<Rm>  */
19408                       if (low_register_operand (op0, SImode))
19409                         action = action_for_partial_flag_setting;
19410                       break;
19411
19412                     case NEG:
19413                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19414                       if (low_register_operand (op0, SImode))
19415                         action = CONV;
19416                       break;
19417
19418                     case CONST_INT:
19419                       /* MOVS <Rd>,#<imm8>  */
19420                       if (CONST_INT_P (src)
19421                           && IN_RANGE (INTVAL (src), 0, 255))
19422                         action = action_for_partial_flag_setting;
19423                       break;
19424
19425                     case REG:
19426                       /* MOVS and MOV<c> with registers have different
19427                          encodings, so are not relevant here.  */
19428                       break;
19429
19430                     default:
19431                       break;
19432                     }
19433                 }
19434
19435               if (action != SKIP)
19436                 {
19437                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19438                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19439                   rtvec vec;
19440
19441                   if (action == SWAP_CONV)
19442                     {
19443                       src = copy_rtx (src);
19444                       XEXP (src, 0) = op1;
19445                       XEXP (src, 1) = op0;
19446                       pat = gen_rtx_SET (dst, src);
19447                       vec = gen_rtvec (2, pat, clobber);
19448                     }
19449                   else /* action == CONV */
19450                     vec = gen_rtvec (2, pat, clobber);
19451
19452                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19453                   INSN_CODE (insn) = -1;
19454                 }
19455             }
19456
19457           if (NONDEBUG_INSN_P (insn))
19458             df_simulate_one_insn_backwards (bb, insn, &live);
19459         }
19460     }
19461
19462   CLEAR_REG_SET (&live);
19463 }
19464
19465 /* Gcc puts the pool in the wrong place for ARM, since we can only
19466    load addresses a limited distance around the pc.  We do some
19467    special munging to move the constant pool values to the correct
19468    point in the code.  */
19469 static void
19470 arm_reorg (void)
19471 {
19472   rtx_insn *insn;
19473   HOST_WIDE_INT address = 0;
19474   Mfix * fix;
19475
19476   if (use_cmse)
19477     cmse_nonsecure_call_inline_register_clear ();
19478
19479   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19480   if (cfun->is_thunk)
19481     ;
19482   else if (TARGET_THUMB1)
19483     thumb1_reorg ();
19484   else if (TARGET_THUMB2)
19485     thumb2_reorg ();
19486
19487   /* Ensure all insns that must be split have been split at this point.
19488      Otherwise, the pool placement code below may compute incorrect
19489      insn lengths.  Note that when optimizing, all insns have already
19490      been split at this point.  */
19491   if (!optimize)
19492     split_all_insns_noflow ();
19493
19494   /* Make sure we do not attempt to create a literal pool even though it should
19495      no longer be necessary to create any.  */
19496   if (arm_disable_literal_pool)
19497     return ;
19498
19499   minipool_fix_head = minipool_fix_tail = NULL;
19500
19501   /* The first insn must always be a note, or the code below won't
19502      scan it properly.  */
19503   insn = get_insns ();
19504   gcc_assert (NOTE_P (insn));
19505   minipool_pad = 0;
19506
19507   /* Scan all the insns and record the operands that will need fixing.  */
19508   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19509     {
19510       if (BARRIER_P (insn))
19511         push_minipool_barrier (insn, address);
19512       else if (INSN_P (insn))
19513         {
19514           rtx_jump_table_data *table;
19515
19516           note_invalid_constants (insn, address, true);
19517           address += get_attr_length (insn);
19518
19519           /* If the insn is a vector jump, add the size of the table
19520              and skip the table.  */
19521           if (tablejump_p (insn, NULL, &table))
19522             {
19523               address += get_jump_table_size (table);
19524               insn = table;
19525             }
19526         }
19527       else if (LABEL_P (insn))
19528         /* Add the worst-case padding due to alignment.  We don't add
19529            the _current_ padding because the minipool insertions
19530            themselves might change it.  */
19531         address += get_label_padding (insn);
19532     }
19533
19534   fix = minipool_fix_head;
19535
19536   /* Now scan the fixups and perform the required changes.  */
19537   while (fix)
19538     {
19539       Mfix * ftmp;
19540       Mfix * fdel;
19541       Mfix *  last_added_fix;
19542       Mfix * last_barrier = NULL;
19543       Mfix * this_fix;
19544
19545       /* Skip any further barriers before the next fix.  */
19546       while (fix && BARRIER_P (fix->insn))
19547         fix = fix->next;
19548
19549       /* No more fixes.  */
19550       if (fix == NULL)
19551         break;
19552
19553       last_added_fix = NULL;
19554
19555       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19556         {
19557           if (BARRIER_P (ftmp->insn))
19558             {
19559               if (ftmp->address >= minipool_vector_head->max_address)
19560                 break;
19561
19562               last_barrier = ftmp;
19563             }
19564           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19565             break;
19566
19567           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19568         }
19569
19570       /* If we found a barrier, drop back to that; any fixes that we
19571          could have reached but come after the barrier will now go in
19572          the next mini-pool.  */
19573       if (last_barrier != NULL)
19574         {
19575           /* Reduce the refcount for those fixes that won't go into this
19576              pool after all.  */
19577           for (fdel = last_barrier->next;
19578                fdel && fdel != ftmp;
19579                fdel = fdel->next)
19580             {
19581               fdel->minipool->refcount--;
19582               fdel->minipool = NULL;
19583             }
19584
19585           ftmp = last_barrier;
19586         }
19587       else
19588         {
19589           /* ftmp is first fix that we can't fit into this pool and
19590              there no natural barriers that we could use.  Insert a
19591              new barrier in the code somewhere between the previous
19592              fix and this one, and arrange to jump around it.  */
19593           HOST_WIDE_INT max_address;
19594
19595           /* The last item on the list of fixes must be a barrier, so
19596              we can never run off the end of the list of fixes without
19597              last_barrier being set.  */
19598           gcc_assert (ftmp);
19599
19600           max_address = minipool_vector_head->max_address;
19601           /* Check that there isn't another fix that is in range that
19602              we couldn't fit into this pool because the pool was
19603              already too large: we need to put the pool before such an
19604              instruction.  The pool itself may come just after the
19605              fix because create_fix_barrier also allows space for a
19606              jump instruction.  */
19607           if (ftmp->address < max_address)
19608             max_address = ftmp->address + 1;
19609
19610           last_barrier = create_fix_barrier (last_added_fix, max_address);
19611         }
19612
19613       assign_minipool_offsets (last_barrier);
19614
19615       while (ftmp)
19616         {
19617           if (!BARRIER_P (ftmp->insn)
19618               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19619                   == NULL))
19620             break;
19621
19622           ftmp = ftmp->next;
19623         }
19624
19625       /* Scan over the fixes we have identified for this pool, fixing them
19626          up and adding the constants to the pool itself.  */
19627       for (this_fix = fix; this_fix && ftmp != this_fix;
19628            this_fix = this_fix->next)
19629         if (!BARRIER_P (this_fix->insn))
19630           {
19631             rtx addr
19632               = plus_constant (Pmode,
19633                                gen_rtx_LABEL_REF (VOIDmode,
19634                                                   minipool_vector_label),
19635                                this_fix->minipool->offset);
19636             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19637           }
19638
19639       dump_minipool (last_barrier->insn);
19640       fix = ftmp;
19641     }
19642
19643   /* From now on we must synthesize any constants that we can't handle
19644      directly.  This can happen if the RTL gets split during final
19645      instruction generation.  */
19646   cfun->machine->after_arm_reorg = 1;
19647
19648   /* Free the minipool memory.  */
19649   obstack_free (&minipool_obstack, minipool_startobj);
19650 }
19651 \f
19652 /* Routines to output assembly language.  */
19653
19654 /* Return string representation of passed in real value.  */
19655 static const char *
19656 fp_const_from_val (REAL_VALUE_TYPE *r)
19657 {
19658   if (!fp_consts_inited)
19659     init_fp_table ();
19660
19661   gcc_assert (real_equal (r, &value_fp0));
19662   return "0";
19663 }
19664
19665 /* OPERANDS[0] is the entire list of insns that constitute pop,
19666    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19667    is in the list, UPDATE is true iff the list contains explicit
19668    update of base register.  */
19669 void
19670 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19671                          bool update)
19672 {
19673   int i;
19674   char pattern[100];
19675   int offset;
19676   const char *conditional;
19677   int num_saves = XVECLEN (operands[0], 0);
19678   unsigned int regno;
19679   unsigned int regno_base = REGNO (operands[1]);
19680   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19681
19682   offset = 0;
19683   offset += update ? 1 : 0;
19684   offset += return_pc ? 1 : 0;
19685
19686   /* Is the base register in the list?  */
19687   for (i = offset; i < num_saves; i++)
19688     {
19689       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19690       /* If SP is in the list, then the base register must be SP.  */
19691       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19692       /* If base register is in the list, there must be no explicit update.  */
19693       if (regno == regno_base)
19694         gcc_assert (!update);
19695     }
19696
19697   conditional = reverse ? "%?%D0" : "%?%d0";
19698   /* Can't use POP if returning from an interrupt.  */
19699   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19700     sprintf (pattern, "pop%s\t{", conditional);
19701   else
19702     {
19703       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19704          It's just a convention, their semantics are identical.  */
19705       if (regno_base == SP_REGNUM)
19706         sprintf (pattern, "ldmfd%s\t", conditional);
19707       else if (update)
19708         sprintf (pattern, "ldmia%s\t", conditional);
19709       else
19710         sprintf (pattern, "ldm%s\t", conditional);
19711
19712       strcat (pattern, reg_names[regno_base]);
19713       if (update)
19714         strcat (pattern, "!, {");
19715       else
19716         strcat (pattern, ", {");
19717     }
19718
19719   /* Output the first destination register.  */
19720   strcat (pattern,
19721           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19722
19723   /* Output the rest of the destination registers.  */
19724   for (i = offset + 1; i < num_saves; i++)
19725     {
19726       strcat (pattern, ", ");
19727       strcat (pattern,
19728               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19729     }
19730
19731   strcat (pattern, "}");
19732
19733   if (interrupt_p && return_pc)
19734     strcat (pattern, "^");
19735
19736   output_asm_insn (pattern, &cond);
19737 }
19738
19739
19740 /* Output the assembly for a store multiple.  */
19741
19742 const char *
19743 vfp_output_vstmd (rtx * operands)
19744 {
19745   char pattern[100];
19746   int p;
19747   int base;
19748   int i;
19749   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19750                    ? XEXP (operands[0], 0)
19751                    : XEXP (XEXP (operands[0], 0), 0);
19752   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19753
19754   if (push_p)
19755     strcpy (pattern, "vpush%?.64\t{%P1");
19756   else
19757     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19758
19759   p = strlen (pattern);
19760
19761   gcc_assert (REG_P (operands[1]));
19762
19763   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19764   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19765     {
19766       p += sprintf (&pattern[p], ", d%d", base + i);
19767     }
19768   strcpy (&pattern[p], "}");
19769
19770   output_asm_insn (pattern, operands);
19771   return "";
19772 }
19773
19774
19775 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19776    number of bytes pushed.  */
19777
19778 static int
19779 vfp_emit_fstmd (int base_reg, int count)
19780 {
19781   rtx par;
19782   rtx dwarf;
19783   rtx tmp, reg;
19784   int i;
19785
19786   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19787      register pairs are stored by a store multiple insn.  We avoid this
19788      by pushing an extra pair.  */
19789   if (count == 2 && !arm_arch6)
19790     {
19791       if (base_reg == LAST_VFP_REGNUM - 3)
19792         base_reg -= 2;
19793       count++;
19794     }
19795
19796   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19797      larger stores into multiple parts (up to a maximum of two, in
19798      practice).  */
19799   if (count > 16)
19800     {
19801       int saved;
19802       /* NOTE: base_reg is an internal register number, so each D register
19803          counts as 2.  */
19804       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19805       saved += vfp_emit_fstmd (base_reg, 16);
19806       return saved;
19807     }
19808
19809   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19810   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19811
19812   reg = gen_rtx_REG (DFmode, base_reg);
19813   base_reg += 2;
19814
19815   XVECEXP (par, 0, 0)
19816     = gen_rtx_SET (gen_frame_mem
19817                    (BLKmode,
19818                     gen_rtx_PRE_MODIFY (Pmode,
19819                                         stack_pointer_rtx,
19820                                         plus_constant
19821                                         (Pmode, stack_pointer_rtx,
19822                                          - (count * 8)))
19823                     ),
19824                    gen_rtx_UNSPEC (BLKmode,
19825                                    gen_rtvec (1, reg),
19826                                    UNSPEC_PUSH_MULT));
19827
19828   tmp = gen_rtx_SET (stack_pointer_rtx,
19829                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19830   RTX_FRAME_RELATED_P (tmp) = 1;
19831   XVECEXP (dwarf, 0, 0) = tmp;
19832
19833   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19834   RTX_FRAME_RELATED_P (tmp) = 1;
19835   XVECEXP (dwarf, 0, 1) = tmp;
19836
19837   for (i = 1; i < count; i++)
19838     {
19839       reg = gen_rtx_REG (DFmode, base_reg);
19840       base_reg += 2;
19841       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19842
19843       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19844                                         plus_constant (Pmode,
19845                                                        stack_pointer_rtx,
19846                                                        i * 8)),
19847                          reg);
19848       RTX_FRAME_RELATED_P (tmp) = 1;
19849       XVECEXP (dwarf, 0, i + 1) = tmp;
19850     }
19851
19852   par = emit_insn (par);
19853   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19854   RTX_FRAME_RELATED_P (par) = 1;
19855
19856   return count * 8;
19857 }
19858
19859 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19860    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19861
19862 bool
19863 detect_cmse_nonsecure_call (tree addr)
19864 {
19865   if (!addr)
19866     return FALSE;
19867
19868   tree fntype = TREE_TYPE (addr);
19869   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19870                                     TYPE_ATTRIBUTES (fntype)))
19871     return TRUE;
19872   return FALSE;
19873 }
19874
19875
19876 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19877    the call target.  */
19878
19879 void
19880 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19881 {
19882   rtx insn;
19883
19884   insn = emit_call_insn (pat);
19885
19886   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19887      If the call might use such an entry, add a use of the PIC register
19888      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19889   if (TARGET_VXWORKS_RTP
19890       && flag_pic
19891       && !sibcall
19892       && SYMBOL_REF_P (addr)
19893       && (SYMBOL_REF_DECL (addr)
19894           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19895           : !SYMBOL_REF_LOCAL_P (addr)))
19896     {
19897       require_pic_register (NULL_RTX, false /*compute_now*/);
19898       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19899     }
19900
19901   if (TARGET_FDPIC)
19902     {
19903       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19904       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19905     }
19906
19907   if (TARGET_AAPCS_BASED)
19908     {
19909       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19910          linker.  We need to add an IP clobber to allow setting
19911          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19912          is not needed since it's a fixed register.  */
19913       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19914       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19915     }
19916 }
19917
19918 /* Output a 'call' insn.  */
19919 const char *
19920 output_call (rtx *operands)
19921 {
19922   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19923
19924   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19925   if (REGNO (operands[0]) == LR_REGNUM)
19926     {
19927       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19928       output_asm_insn ("mov%?\t%0, %|lr", operands);
19929     }
19930
19931   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19932
19933   if (TARGET_INTERWORK || arm_arch4t)
19934     output_asm_insn ("bx%?\t%0", operands);
19935   else
19936     output_asm_insn ("mov%?\t%|pc, %0", operands);
19937
19938   return "";
19939 }
19940
19941 /* Output a move from arm registers to arm registers of a long double
19942    OPERANDS[0] is the destination.
19943    OPERANDS[1] is the source.  */
19944 const char *
19945 output_mov_long_double_arm_from_arm (rtx *operands)
19946 {
19947   /* We have to be careful here because the two might overlap.  */
19948   int dest_start = REGNO (operands[0]);
19949   int src_start = REGNO (operands[1]);
19950   rtx ops[2];
19951   int i;
19952
19953   if (dest_start < src_start)
19954     {
19955       for (i = 0; i < 3; i++)
19956         {
19957           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19958           ops[1] = gen_rtx_REG (SImode, src_start + i);
19959           output_asm_insn ("mov%?\t%0, %1", ops);
19960         }
19961     }
19962   else
19963     {
19964       for (i = 2; i >= 0; i--)
19965         {
19966           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19967           ops[1] = gen_rtx_REG (SImode, src_start + i);
19968           output_asm_insn ("mov%?\t%0, %1", ops);
19969         }
19970     }
19971
19972   return "";
19973 }
19974
19975 void
19976 arm_emit_movpair (rtx dest, rtx src)
19977  {
19978   /* If the src is an immediate, simplify it.  */
19979   if (CONST_INT_P (src))
19980     {
19981       HOST_WIDE_INT val = INTVAL (src);
19982       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19983       if ((val >> 16) & 0x0000ffff)
19984         {
19985           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19986                                                GEN_INT (16)),
19987                          GEN_INT ((val >> 16) & 0x0000ffff));
19988           rtx_insn *insn = get_last_insn ();
19989           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19990         }
19991       return;
19992     }
19993    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19994    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19995    rtx_insn *insn = get_last_insn ();
19996    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19997  }
19998
19999 /* Output a move between double words.  It must be REG<-MEM
20000    or MEM<-REG.  */
20001 const char *
20002 output_move_double (rtx *operands, bool emit, int *count)
20003 {
20004   enum rtx_code code0 = GET_CODE (operands[0]);
20005   enum rtx_code code1 = GET_CODE (operands[1]);
20006   rtx otherops[3];
20007   if (count)
20008     *count = 1;
20009
20010   /* The only case when this might happen is when
20011      you are looking at the length of a DImode instruction
20012      that has an invalid constant in it.  */
20013   if (code0 == REG && code1 != MEM)
20014     {
20015       gcc_assert (!emit);
20016       *count = 2;
20017       return "";
20018     }
20019
20020   if (code0 == REG)
20021     {
20022       unsigned int reg0 = REGNO (operands[0]);
20023       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20024
20025       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20026
20027       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
20028
20029       switch (GET_CODE (XEXP (operands[1], 0)))
20030         {
20031         case REG:
20032
20033           if (emit)
20034             {
20035               if (can_ldrd
20036                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20037                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20038               else
20039                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20040             }
20041           break;
20042
20043         case PRE_INC:
20044           gcc_assert (can_ldrd);
20045           if (emit)
20046             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20047           break;
20048
20049         case PRE_DEC:
20050           if (emit)
20051             {
20052               if (can_ldrd)
20053                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20054               else
20055                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20056             }
20057           break;
20058
20059         case POST_INC:
20060           if (emit)
20061             {
20062               if (can_ldrd)
20063                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20064               else
20065                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20066             }
20067           break;
20068
20069         case POST_DEC:
20070           gcc_assert (can_ldrd);
20071           if (emit)
20072             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20073           break;
20074
20075         case PRE_MODIFY:
20076         case POST_MODIFY:
20077           /* Autoicrement addressing modes should never have overlapping
20078              base and destination registers, and overlapping index registers
20079              are already prohibited, so this doesn't need to worry about
20080              fix_cm3_ldrd.  */
20081           otherops[0] = operands[0];
20082           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20083           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20084
20085           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20086             {
20087               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20088                 {
20089                   /* Registers overlap so split out the increment.  */
20090                   if (emit)
20091                     {
20092                       gcc_assert (can_ldrd);
20093                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
20094                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20095                     }
20096                   if (count)
20097                     *count = 2;
20098                 }
20099               else
20100                 {
20101                   /* Use a single insn if we can.
20102                      FIXME: IWMMXT allows offsets larger than ldrd can
20103                      handle, fix these up with a pair of ldr.  */
20104                   if (can_ldrd
20105                       && (TARGET_THUMB2
20106                       || !CONST_INT_P (otherops[2])
20107                       || (INTVAL (otherops[2]) > -256
20108                           && INTVAL (otherops[2]) < 256)))
20109                     {
20110                       if (emit)
20111                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20112                     }
20113                   else
20114                     {
20115                       if (emit)
20116                         {
20117                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20118                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20119                         }
20120                       if (count)
20121                         *count = 2;
20122
20123                     }
20124                 }
20125             }
20126           else
20127             {
20128               /* Use a single insn if we can.
20129                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
20130                  fix these up with a pair of ldr.  */
20131               if (can_ldrd
20132                   && (TARGET_THUMB2
20133                   || !CONST_INT_P (otherops[2])
20134                   || (INTVAL (otherops[2]) > -256
20135                       && INTVAL (otherops[2]) < 256)))
20136                 {
20137                   if (emit)
20138                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20139                 }
20140               else
20141                 {
20142                   if (emit)
20143                     {
20144                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20145                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20146                     }
20147                   if (count)
20148                     *count = 2;
20149                 }
20150             }
20151           break;
20152
20153         case LABEL_REF:
20154         case CONST:
20155           /* We might be able to use ldrd %0, %1 here.  However the range is
20156              different to ldr/adr, and it is broken on some ARMv7-M
20157              implementations.  */
20158           /* Use the second register of the pair to avoid problematic
20159              overlap.  */
20160           otherops[1] = operands[1];
20161           if (emit)
20162             output_asm_insn ("adr%?\t%0, %1", otherops);
20163           operands[1] = otherops[0];
20164           if (emit)
20165             {
20166               if (can_ldrd)
20167                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20168               else
20169                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20170             }
20171
20172           if (count)
20173             *count = 2;
20174           break;
20175
20176           /* ??? This needs checking for thumb2.  */
20177         default:
20178           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20179                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20180             {
20181               otherops[0] = operands[0];
20182               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20183               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20184
20185               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20186                 {
20187                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20188                     {
20189                       switch ((int) INTVAL (otherops[2]))
20190                         {
20191                         case -8:
20192                           if (emit)
20193                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20194                           return "";
20195                         case -4:
20196                           if (TARGET_THUMB2)
20197                             break;
20198                           if (emit)
20199                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20200                           return "";
20201                         case 4:
20202                           if (TARGET_THUMB2)
20203                             break;
20204                           if (emit)
20205                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20206                           return "";
20207                         }
20208                     }
20209                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20210                   operands[1] = otherops[0];
20211                   if (can_ldrd
20212                       && (REG_P (otherops[2])
20213                           || TARGET_THUMB2
20214                           || (CONST_INT_P (otherops[2])
20215                               && INTVAL (otherops[2]) > -256
20216                               && INTVAL (otherops[2]) < 256)))
20217                     {
20218                       if (reg_overlap_mentioned_p (operands[0],
20219                                                    otherops[2]))
20220                         {
20221                           /* Swap base and index registers over to
20222                              avoid a conflict.  */
20223                           std::swap (otherops[1], otherops[2]);
20224                         }
20225                       /* If both registers conflict, it will usually
20226                          have been fixed by a splitter.  */
20227                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
20228                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20229                         {
20230                           if (emit)
20231                             {
20232                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
20233                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20234                             }
20235                           if (count)
20236                             *count = 2;
20237                         }
20238                       else
20239                         {
20240                           otherops[0] = operands[0];
20241                           if (emit)
20242                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20243                         }
20244                       return "";
20245                     }
20246
20247                   if (CONST_INT_P (otherops[2]))
20248                     {
20249                       if (emit)
20250                         {
20251                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20252                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20253                           else
20254                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
20255                         }
20256                     }
20257                   else
20258                     {
20259                       if (emit)
20260                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
20261                     }
20262                 }
20263               else
20264                 {
20265                   if (emit)
20266                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20267                 }
20268
20269               if (count)
20270                 *count = 2;
20271
20272               if (can_ldrd)
20273                 return "ldrd%?\t%0, [%1]";
20274
20275               return "ldmia%?\t%1, %M0";
20276             }
20277           else
20278             {
20279               otherops[1] = adjust_address (operands[1], SImode, 4);
20280               /* Take care of overlapping base/data reg.  */
20281               if (reg_mentioned_p (operands[0], operands[1]))
20282                 {
20283                   if (emit)
20284                     {
20285                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20286                       output_asm_insn ("ldr%?\t%0, %1", operands);
20287                     }
20288                   if (count)
20289                     *count = 2;
20290
20291                 }
20292               else
20293                 {
20294                   if (emit)
20295                     {
20296                       output_asm_insn ("ldr%?\t%0, %1", operands);
20297                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20298                     }
20299                   if (count)
20300                     *count = 2;
20301                 }
20302             }
20303         }
20304     }
20305   else
20306     {
20307       /* Constraints should ensure this.  */
20308       gcc_assert (code0 == MEM && code1 == REG);
20309       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20310                   || (TARGET_ARM && TARGET_LDRD));
20311
20312       /* For TARGET_ARM the first source register of an STRD
20313          must be even.  This is usually the case for double-word
20314          values but user assembly constraints can force an odd
20315          starting register.  */
20316       bool allow_strd = TARGET_LDRD
20317                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20318       switch (GET_CODE (XEXP (operands[0], 0)))
20319         {
20320         case REG:
20321           if (emit)
20322             {
20323               if (allow_strd)
20324                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20325               else
20326                 output_asm_insn ("stm%?\t%m0, %M1", operands);
20327             }
20328           break;
20329
20330         case PRE_INC:
20331           gcc_assert (allow_strd);
20332           if (emit)
20333             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20334           break;
20335
20336         case PRE_DEC:
20337           if (emit)
20338             {
20339               if (allow_strd)
20340                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20341               else
20342                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20343             }
20344           break;
20345
20346         case POST_INC:
20347           if (emit)
20348             {
20349               if (allow_strd)
20350                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20351               else
20352                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20353             }
20354           break;
20355
20356         case POST_DEC:
20357           gcc_assert (allow_strd);
20358           if (emit)
20359             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20360           break;
20361
20362         case PRE_MODIFY:
20363         case POST_MODIFY:
20364           otherops[0] = operands[1];
20365           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20366           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20367
20368           /* IWMMXT allows offsets larger than strd can handle,
20369              fix these up with a pair of str.  */
20370           if (!TARGET_THUMB2
20371               && CONST_INT_P (otherops[2])
20372               && (INTVAL(otherops[2]) <= -256
20373                   || INTVAL(otherops[2]) >= 256))
20374             {
20375               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20376                 {
20377                   if (emit)
20378                     {
20379                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20380                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20381                     }
20382                   if (count)
20383                     *count = 2;
20384                 }
20385               else
20386                 {
20387                   if (emit)
20388                     {
20389                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20390                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20391                     }
20392                   if (count)
20393                     *count = 2;
20394                 }
20395             }
20396           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20397             {
20398               if (emit)
20399                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20400             }
20401           else
20402             {
20403               if (emit)
20404                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20405             }
20406           break;
20407
20408         case PLUS:
20409           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20410           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20411             {
20412               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20413                 {
20414                 case -8:
20415                   if (emit)
20416                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20417                   return "";
20418
20419                 case -4:
20420                   if (TARGET_THUMB2)
20421                     break;
20422                   if (emit)
20423                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20424                   return "";
20425
20426                 case 4:
20427                   if (TARGET_THUMB2)
20428                     break;
20429                   if (emit)
20430                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20431                   return "";
20432                 }
20433             }
20434           if (allow_strd
20435               && (REG_P (otherops[2])
20436                   || TARGET_THUMB2
20437                   || (CONST_INT_P (otherops[2])
20438                       && INTVAL (otherops[2]) > -256
20439                       && INTVAL (otherops[2]) < 256)))
20440             {
20441               otherops[0] = operands[1];
20442               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20443               if (emit)
20444                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20445               return "";
20446             }
20447           /* Fall through */
20448
20449         default:
20450           otherops[0] = adjust_address (operands[0], SImode, 4);
20451           otherops[1] = operands[1];
20452           if (emit)
20453             {
20454               output_asm_insn ("str%?\t%1, %0", operands);
20455               output_asm_insn ("str%?\t%H1, %0", otherops);
20456             }
20457           if (count)
20458             *count = 2;
20459         }
20460     }
20461
20462   return "";
20463 }
20464
20465 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20466    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20467
20468 const char *
20469 output_move_quad (rtx *operands)
20470 {
20471   if (REG_P (operands[0]))
20472     {
20473       /* Load, or reg->reg move.  */
20474
20475       if (MEM_P (operands[1]))
20476         {
20477           switch (GET_CODE (XEXP (operands[1], 0)))
20478             {
20479             case REG:
20480               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20481               break;
20482
20483             case LABEL_REF:
20484             case CONST:
20485               output_asm_insn ("adr%?\t%0, %1", operands);
20486               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20487               break;
20488
20489             default:
20490               gcc_unreachable ();
20491             }
20492         }
20493       else
20494         {
20495           rtx ops[2];
20496           int dest, src, i;
20497
20498           gcc_assert (REG_P (operands[1]));
20499
20500           dest = REGNO (operands[0]);
20501           src = REGNO (operands[1]);
20502
20503           /* This seems pretty dumb, but hopefully GCC won't try to do it
20504              very often.  */
20505           if (dest < src)
20506             for (i = 0; i < 4; i++)
20507               {
20508                 ops[0] = gen_rtx_REG (SImode, dest + i);
20509                 ops[1] = gen_rtx_REG (SImode, src + i);
20510                 output_asm_insn ("mov%?\t%0, %1", ops);
20511               }
20512           else
20513             for (i = 3; i >= 0; i--)
20514               {
20515                 ops[0] = gen_rtx_REG (SImode, dest + i);
20516                 ops[1] = gen_rtx_REG (SImode, src + i);
20517                 output_asm_insn ("mov%?\t%0, %1", ops);
20518               }
20519         }
20520     }
20521   else
20522     {
20523       gcc_assert (MEM_P (operands[0]));
20524       gcc_assert (REG_P (operands[1]));
20525       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20526
20527       switch (GET_CODE (XEXP (operands[0], 0)))
20528         {
20529         case REG:
20530           output_asm_insn ("stm%?\t%m0, %M1", operands);
20531           break;
20532
20533         default:
20534           gcc_unreachable ();
20535         }
20536     }
20537
20538   return "";
20539 }
20540
20541 /* Output a VFP load or store instruction.  */
20542
20543 const char *
20544 output_move_vfp (rtx *operands)
20545 {
20546   rtx reg, mem, addr, ops[2];
20547   int load = REG_P (operands[0]);
20548   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20549   int sp = (!TARGET_VFP_FP16INST
20550             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20551   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20552   const char *templ;
20553   char buff[50];
20554   machine_mode mode;
20555
20556   reg = operands[!load];
20557   mem = operands[load];
20558
20559   mode = GET_MODE (reg);
20560
20561   gcc_assert (REG_P (reg));
20562   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20563   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20564               || mode == SFmode
20565               || mode == DFmode
20566               || mode == HImode
20567               || mode == SImode
20568               || mode == DImode
20569               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20570   gcc_assert (MEM_P (mem));
20571
20572   addr = XEXP (mem, 0);
20573
20574   switch (GET_CODE (addr))
20575     {
20576     case PRE_DEC:
20577       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20578       ops[0] = XEXP (addr, 0);
20579       ops[1] = reg;
20580       break;
20581
20582     case POST_INC:
20583       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20584       ops[0] = XEXP (addr, 0);
20585       ops[1] = reg;
20586       break;
20587
20588     default:
20589       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20590       ops[0] = reg;
20591       ops[1] = mem;
20592       break;
20593     }
20594
20595   sprintf (buff, templ,
20596            load ? "ld" : "st",
20597            dp ? "64" : sp ? "32" : "16",
20598            dp ? "P" : "",
20599            integer_p ? "\t%@ int" : "");
20600   output_asm_insn (buff, ops);
20601
20602   return "";
20603 }
20604
20605 /* Output a Neon double-word or quad-word load or store, or a load
20606    or store for larger structure modes.
20607
20608    WARNING: The ordering of elements is weird in big-endian mode,
20609    because the EABI requires that vectors stored in memory appear
20610    as though they were stored by a VSTM, as required by the EABI.
20611    GCC RTL defines element ordering based on in-memory order.
20612    This can be different from the architectural ordering of elements
20613    within a NEON register. The intrinsics defined in arm_neon.h use the
20614    NEON register element ordering, not the GCC RTL element ordering.
20615
20616    For example, the in-memory ordering of a big-endian a quadword
20617    vector with 16-bit elements when stored from register pair {d0,d1}
20618    will be (lowest address first, d0[N] is NEON register element N):
20619
20620      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20621
20622    When necessary, quadword registers (dN, dN+1) are moved to ARM
20623    registers from rN in the order:
20624
20625      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20626
20627    So that STM/LDM can be used on vectors in ARM registers, and the
20628    same memory layout will result as if VSTM/VLDM were used.
20629
20630    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20631    possible, which allows use of appropriate alignment tags.
20632    Note that the choice of "64" is independent of the actual vector
20633    element size; this size simply ensures that the behavior is
20634    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20635
20636    Due to limitations of those instructions, use of VST1.64/VLD1.64
20637    is not possible if:
20638     - the address contains PRE_DEC, or
20639     - the mode refers to more than 4 double-word registers
20640
20641    In those cases, it would be possible to replace VSTM/VLDM by a
20642    sequence of instructions; this is not currently implemented since
20643    this is not certain to actually improve performance.  */
20644
20645 const char *
20646 output_move_neon (rtx *operands)
20647 {
20648   rtx reg, mem, addr, ops[2];
20649   int regno, nregs, load = REG_P (operands[0]);
20650   const char *templ;
20651   char buff[50];
20652   machine_mode mode;
20653
20654   reg = operands[!load];
20655   mem = operands[load];
20656
20657   mode = GET_MODE (reg);
20658
20659   gcc_assert (REG_P (reg));
20660   regno = REGNO (reg);
20661   nregs = REG_NREGS (reg) / 2;
20662   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20663               || NEON_REGNO_OK_FOR_QUAD (regno));
20664   gcc_assert (VALID_NEON_DREG_MODE (mode)
20665               || VALID_NEON_QREG_MODE (mode)
20666               || VALID_NEON_STRUCT_MODE (mode));
20667   gcc_assert (MEM_P (mem));
20668
20669   addr = XEXP (mem, 0);
20670
20671   /* Strip off const from addresses like (const (plus (...))).  */
20672   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20673     addr = XEXP (addr, 0);
20674
20675   switch (GET_CODE (addr))
20676     {
20677     case POST_INC:
20678       /* We have to use vldm / vstm for too-large modes.  */
20679       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20680         {
20681           templ = "v%smia%%?\t%%0!, %%h1";
20682           ops[0] = XEXP (addr, 0);
20683         }
20684       else
20685         {
20686           templ = "v%s1.64\t%%h1, %%A0";
20687           ops[0] = mem;
20688         }
20689       ops[1] = reg;
20690       break;
20691
20692     case PRE_DEC:
20693       /* We have to use vldm / vstm in this case, since there is no
20694          pre-decrement form of the vld1 / vst1 instructions.  */
20695       templ = "v%smdb%%?\t%%0!, %%h1";
20696       ops[0] = XEXP (addr, 0);
20697       ops[1] = reg;
20698       break;
20699
20700     case POST_MODIFY:
20701       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20702       gcc_unreachable ();
20703
20704     case REG:
20705       /* We have to use vldm / vstm for too-large modes.  */
20706       if (nregs > 1)
20707         {
20708           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20709             templ = "v%smia%%?\t%%m0, %%h1";
20710           else
20711             templ = "v%s1.64\t%%h1, %%A0";
20712
20713           ops[0] = mem;
20714           ops[1] = reg;
20715           break;
20716         }
20717       /* Fall through.  */
20718     case PLUS:
20719       if (GET_CODE (addr) == PLUS)
20720         addr = XEXP (addr, 0);
20721       /* Fall through.  */
20722     case LABEL_REF:
20723       {
20724         int i;
20725         int overlap = -1;
20726         for (i = 0; i < nregs; i++)
20727           {
20728             /* We're only using DImode here because it's a convenient
20729                size.  */
20730             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20731             ops[1] = adjust_address (mem, DImode, 8 * i);
20732             if (reg_overlap_mentioned_p (ops[0], mem))
20733               {
20734                 gcc_assert (overlap == -1);
20735                 overlap = i;
20736               }
20737             else
20738               {
20739                 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20740                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20741                 else
20742                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20743                 output_asm_insn (buff, ops);
20744               }
20745           }
20746         if (overlap != -1)
20747           {
20748             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20749             ops[1] = adjust_address (mem, SImode, 8 * overlap);
20750             if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20751               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20752             else
20753               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20754             output_asm_insn (buff, ops);
20755           }
20756
20757         return "";
20758       }
20759
20760     default:
20761       gcc_unreachable ();
20762     }
20763
20764   sprintf (buff, templ, load ? "ld" : "st");
20765   output_asm_insn (buff, ops);
20766
20767   return "";
20768 }
20769
20770 /* Compute and return the length of neon_mov<mode>, where <mode> is
20771    one of VSTRUCT modes: EI, OI, CI or XI.  */
20772 int
20773 arm_attr_length_move_neon (rtx_insn *insn)
20774 {
20775   rtx reg, mem, addr;
20776   int load;
20777   machine_mode mode;
20778
20779   extract_insn_cached (insn);
20780
20781   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20782     {
20783       mode = GET_MODE (recog_data.operand[0]);
20784       switch (mode)
20785         {
20786         case E_EImode:
20787         case E_OImode:
20788           return 8;
20789         case E_CImode:
20790           return 12;
20791         case E_XImode:
20792           return 16;
20793         default:
20794           gcc_unreachable ();
20795         }
20796     }
20797
20798   load = REG_P (recog_data.operand[0]);
20799   reg = recog_data.operand[!load];
20800   mem = recog_data.operand[load];
20801
20802   gcc_assert (MEM_P (mem));
20803
20804   addr = XEXP (mem, 0);
20805
20806   /* Strip off const from addresses like (const (plus (...))).  */
20807   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20808     addr = XEXP (addr, 0);
20809
20810   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20811     {
20812       int insns = REG_NREGS (reg) / 2;
20813       return insns * 4;
20814     }
20815   else
20816     return 4;
20817 }
20818
20819 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20820    return zero.  */
20821
20822 int
20823 arm_address_offset_is_imm (rtx_insn *insn)
20824 {
20825   rtx mem, addr;
20826
20827   extract_insn_cached (insn);
20828
20829   if (REG_P (recog_data.operand[0]))
20830     return 0;
20831
20832   mem = recog_data.operand[0];
20833
20834   gcc_assert (MEM_P (mem));
20835
20836   addr = XEXP (mem, 0);
20837
20838   if (REG_P (addr)
20839       || (GET_CODE (addr) == PLUS
20840           && REG_P (XEXP (addr, 0))
20841           && CONST_INT_P (XEXP (addr, 1))))
20842     return 1;
20843   else
20844     return 0;
20845 }
20846
20847 /* Output an ADD r, s, #n where n may be too big for one instruction.
20848    If adding zero to one register, output nothing.  */
20849 const char *
20850 output_add_immediate (rtx *operands)
20851 {
20852   HOST_WIDE_INT n = INTVAL (operands[2]);
20853
20854   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20855     {
20856       if (n < 0)
20857         output_multi_immediate (operands,
20858                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20859                                 -n);
20860       else
20861         output_multi_immediate (operands,
20862                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20863                                 n);
20864     }
20865
20866   return "";
20867 }
20868
20869 /* Output a multiple immediate operation.
20870    OPERANDS is the vector of operands referred to in the output patterns.
20871    INSTR1 is the output pattern to use for the first constant.
20872    INSTR2 is the output pattern to use for subsequent constants.
20873    IMMED_OP is the index of the constant slot in OPERANDS.
20874    N is the constant value.  */
20875 static const char *
20876 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20877                         int immed_op, HOST_WIDE_INT n)
20878 {
20879 #if HOST_BITS_PER_WIDE_INT > 32
20880   n &= 0xffffffff;
20881 #endif
20882
20883   if (n == 0)
20884     {
20885       /* Quick and easy output.  */
20886       operands[immed_op] = const0_rtx;
20887       output_asm_insn (instr1, operands);
20888     }
20889   else
20890     {
20891       int i;
20892       const char * instr = instr1;
20893
20894       /* Note that n is never zero here (which would give no output).  */
20895       for (i = 0; i < 32; i += 2)
20896         {
20897           if (n & (3 << i))
20898             {
20899               operands[immed_op] = GEN_INT (n & (255 << i));
20900               output_asm_insn (instr, operands);
20901               instr = instr2;
20902               i += 6;
20903             }
20904         }
20905     }
20906
20907   return "";
20908 }
20909
20910 /* Return the name of a shifter operation.  */
20911 static const char *
20912 arm_shift_nmem(enum rtx_code code)
20913 {
20914   switch (code)
20915     {
20916     case ASHIFT:
20917       return ARM_LSL_NAME;
20918
20919     case ASHIFTRT:
20920       return "asr";
20921
20922     case LSHIFTRT:
20923       return "lsr";
20924
20925     case ROTATERT:
20926       return "ror";
20927
20928     default:
20929       abort();
20930     }
20931 }
20932
20933 /* Return the appropriate ARM instruction for the operation code.
20934    The returned result should not be overwritten.  OP is the rtx of the
20935    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20936    was shifted.  */
20937 const char *
20938 arithmetic_instr (rtx op, int shift_first_arg)
20939 {
20940   switch (GET_CODE (op))
20941     {
20942     case PLUS:
20943       return "add";
20944
20945     case MINUS:
20946       return shift_first_arg ? "rsb" : "sub";
20947
20948     case IOR:
20949       return "orr";
20950
20951     case XOR:
20952       return "eor";
20953
20954     case AND:
20955       return "and";
20956
20957     case ASHIFT:
20958     case ASHIFTRT:
20959     case LSHIFTRT:
20960     case ROTATERT:
20961       return arm_shift_nmem(GET_CODE(op));
20962
20963     default:
20964       gcc_unreachable ();
20965     }
20966 }
20967
20968 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20969    for the operation code.  The returned result should not be overwritten.
20970    OP is the rtx code of the shift.
20971    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20972    shift.  */
20973 static const char *
20974 shift_op (rtx op, HOST_WIDE_INT *amountp)
20975 {
20976   const char * mnem;
20977   enum rtx_code code = GET_CODE (op);
20978
20979   switch (code)
20980     {
20981     case ROTATE:
20982       if (!CONST_INT_P (XEXP (op, 1)))
20983         {
20984           output_operand_lossage ("invalid shift operand");
20985           return NULL;
20986         }
20987
20988       code = ROTATERT;
20989       *amountp = 32 - INTVAL (XEXP (op, 1));
20990       mnem = "ror";
20991       break;
20992
20993     case ASHIFT:
20994     case ASHIFTRT:
20995     case LSHIFTRT:
20996     case ROTATERT:
20997       mnem = arm_shift_nmem(code);
20998       if (CONST_INT_P (XEXP (op, 1)))
20999         {
21000           *amountp = INTVAL (XEXP (op, 1));
21001         }
21002       else if (REG_P (XEXP (op, 1)))
21003         {
21004           *amountp = -1;
21005           return mnem;
21006         }
21007       else
21008         {
21009           output_operand_lossage ("invalid shift operand");
21010           return NULL;
21011         }
21012       break;
21013
21014     case MULT:
21015       /* We never have to worry about the amount being other than a
21016          power of 2, since this case can never be reloaded from a reg.  */
21017       if (!CONST_INT_P (XEXP (op, 1)))
21018         {
21019           output_operand_lossage ("invalid shift operand");
21020           return NULL;
21021         }
21022
21023       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21024
21025       /* Amount must be a power of two.  */
21026       if (*amountp & (*amountp - 1))
21027         {
21028           output_operand_lossage ("invalid shift operand");
21029           return NULL;
21030         }
21031
21032       *amountp = exact_log2 (*amountp);
21033       gcc_assert (IN_RANGE (*amountp, 0, 31));
21034       return ARM_LSL_NAME;
21035
21036     default:
21037       output_operand_lossage ("invalid shift operand");
21038       return NULL;
21039     }
21040
21041   /* This is not 100% correct, but follows from the desire to merge
21042      multiplication by a power of 2 with the recognizer for a
21043      shift.  >=32 is not a valid shift for "lsl", so we must try and
21044      output a shift that produces the correct arithmetical result.
21045      Using lsr #32 is identical except for the fact that the carry bit
21046      is not set correctly if we set the flags; but we never use the
21047      carry bit from such an operation, so we can ignore that.  */
21048   if (code == ROTATERT)
21049     /* Rotate is just modulo 32.  */
21050     *amountp &= 31;
21051   else if (*amountp != (*amountp & 31))
21052     {
21053       if (code == ASHIFT)
21054         mnem = "lsr";
21055       *amountp = 32;
21056     }
21057
21058   /* Shifts of 0 are no-ops.  */
21059   if (*amountp == 0)
21060     return NULL;
21061
21062   return mnem;
21063 }
21064
21065 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
21066    because /bin/as is horribly restrictive.  The judgement about
21067    whether or not each character is 'printable' (and can be output as
21068    is) or not (and must be printed with an octal escape) must be made
21069    with reference to the *host* character set -- the situation is
21070    similar to that discussed in the comments above pp_c_char in
21071    c-pretty-print.cc.  */
21072
21073 #define MAX_ASCII_LEN 51
21074
21075 void
21076 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21077 {
21078   int i;
21079   int len_so_far = 0;
21080
21081   fputs ("\t.ascii\t\"", stream);
21082
21083   for (i = 0; i < len; i++)
21084     {
21085       int c = p[i];
21086
21087       if (len_so_far >= MAX_ASCII_LEN)
21088         {
21089           fputs ("\"\n\t.ascii\t\"", stream);
21090           len_so_far = 0;
21091         }
21092
21093       if (ISPRINT (c))
21094         {
21095           if (c == '\\' || c == '\"')
21096             {
21097               putc ('\\', stream);
21098               len_so_far++;
21099             }
21100           putc (c, stream);
21101           len_so_far++;
21102         }
21103       else
21104         {
21105           fprintf (stream, "\\%03o", c);
21106           len_so_far += 4;
21107         }
21108     }
21109
21110   fputs ("\"\n", stream);
21111 }
21112 \f
21113
21114 /* Compute the register save mask for registers 0 through 12
21115    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
21116
21117 static unsigned long
21118 arm_compute_save_reg0_reg12_mask (void)
21119 {
21120   unsigned long func_type = arm_current_func_type ();
21121   unsigned long save_reg_mask = 0;
21122   unsigned int reg;
21123
21124   if (IS_INTERRUPT (func_type))
21125     {
21126       unsigned int max_reg;
21127       /* Interrupt functions must not corrupt any registers,
21128          even call clobbered ones.  If this is a leaf function
21129          we can just examine the registers used by the RTL, but
21130          otherwise we have to assume that whatever function is
21131          called might clobber anything, and so we have to save
21132          all the call-clobbered registers as well.  */
21133       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21134         /* FIQ handlers have registers r8 - r12 banked, so
21135            we only need to check r0 - r7, Normal ISRs only
21136            bank r14 and r15, so we must check up to r12.
21137            r13 is the stack pointer which is always preserved,
21138            so we do not need to consider it here.  */
21139         max_reg = 7;
21140       else
21141         max_reg = 12;
21142
21143       for (reg = 0; reg <= max_reg; reg++)
21144         if (reg_needs_saving_p (reg))
21145           save_reg_mask |= (1 << reg);
21146
21147       /* Also save the pic base register if necessary.  */
21148       if (PIC_REGISTER_MAY_NEED_SAVING
21149           && crtl->uses_pic_offset_table)
21150         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21151     }
21152   else if (IS_VOLATILE(func_type))
21153     {
21154       /* For noreturn functions we historically omitted register saves
21155          altogether.  However this really messes up debugging.  As a
21156          compromise save just the frame pointers.  Combined with the link
21157          register saved elsewhere this should be sufficient to get
21158          a backtrace.  */
21159       if (frame_pointer_needed)
21160         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21161       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21162         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21163       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21164         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21165     }
21166   else
21167     {
21168       /* In the normal case we only need to save those registers
21169          which are call saved and which are used by this function.  */
21170       for (reg = 0; reg <= 11; reg++)
21171         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21172           save_reg_mask |= (1 << reg);
21173
21174       /* Handle the frame pointer as a special case.  */
21175       if (frame_pointer_needed)
21176         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21177
21178       /* If we aren't loading the PIC register,
21179          don't stack it even though it may be live.  */
21180       if (PIC_REGISTER_MAY_NEED_SAVING
21181           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21182               || crtl->uses_pic_offset_table))
21183         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21184
21185       /* The prologue will copy SP into R0, so save it.  */
21186       if (IS_STACKALIGN (func_type))
21187         save_reg_mask |= 1;
21188     }
21189
21190   /* Save registers so the exception handler can modify them.  */
21191   if (crtl->calls_eh_return)
21192     {
21193       unsigned int i;
21194
21195       for (i = 0; ; i++)
21196         {
21197           reg = EH_RETURN_DATA_REGNO (i);
21198           if (reg == INVALID_REGNUM)
21199             break;
21200           save_reg_mask |= 1 << reg;
21201         }
21202     }
21203
21204   return save_reg_mask;
21205 }
21206
21207 /* Return true if r3 is live at the start of the function.  */
21208
21209 static bool
21210 arm_r3_live_at_start_p (void)
21211 {
21212   /* Just look at cfg info, which is still close enough to correct at this
21213      point.  This gives false positives for broken functions that might use
21214      uninitialized data that happens to be allocated in r3, but who cares?  */
21215   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21216 }
21217
21218 /* Compute the number of bytes used to store the static chain register on the
21219    stack, above the stack frame.  We need to know this accurately to get the
21220    alignment of the rest of the stack frame correct.  */
21221
21222 static int
21223 arm_compute_static_chain_stack_bytes (void)
21224 {
21225   /* Once the value is updated from the init value of -1, do not
21226      re-compute.  */
21227   if (cfun->machine->static_chain_stack_bytes != -1)
21228     return cfun->machine->static_chain_stack_bytes;
21229
21230   /* See the defining assertion in arm_expand_prologue.  */
21231   if (IS_NESTED (arm_current_func_type ())
21232       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21233           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21234                || flag_stack_clash_protection)
21235               && !df_regs_ever_live_p (LR_REGNUM)))
21236       && arm_r3_live_at_start_p ()
21237       && crtl->args.pretend_args_size == 0)
21238     return 4;
21239
21240   return 0;
21241 }
21242
21243 /* Compute a bit mask of which core registers need to be
21244    saved on the stack for the current function.
21245    This is used by arm_compute_frame_layout, which may add extra registers.  */
21246
21247 static unsigned long
21248 arm_compute_save_core_reg_mask (void)
21249 {
21250   unsigned int save_reg_mask = 0;
21251   unsigned long func_type = arm_current_func_type ();
21252   unsigned int reg;
21253
21254   if (IS_NAKED (func_type))
21255     /* This should never really happen.  */
21256     return 0;
21257
21258   /* If we are creating a stack frame, then we must save the frame pointer,
21259      IP (which will hold the old stack pointer), LR and the PC.  */
21260   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21261     save_reg_mask |=
21262       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21263       | (1 << IP_REGNUM)
21264       | (1 << LR_REGNUM)
21265       | (1 << PC_REGNUM);
21266
21267   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21268
21269   if (arm_current_function_pac_enabled_p ())
21270     save_reg_mask |= 1 << IP_REGNUM;
21271
21272   /* Decide if we need to save the link register.
21273      Interrupt routines have their own banked link register,
21274      so they never need to save it.
21275      Otherwise if we do not use the link register we do not need to save
21276      it.  If we are pushing other registers onto the stack however, we
21277      can save an instruction in the epilogue by pushing the link register
21278      now and then popping it back into the PC.  This incurs extra memory
21279      accesses though, so we only do it when optimizing for size, and only
21280      if we know that we will not need a fancy return sequence.  */
21281   if (df_regs_ever_live_p (LR_REGNUM)
21282       || (save_reg_mask
21283           && optimize_size
21284           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21285           && !crtl->tail_call_emit
21286           && !crtl->calls_eh_return))
21287     save_reg_mask |= 1 << LR_REGNUM;
21288
21289   if (cfun->machine->lr_save_eliminated)
21290     save_reg_mask &= ~ (1 << LR_REGNUM);
21291
21292   if (TARGET_REALLY_IWMMXT
21293       && ((bit_count (save_reg_mask)
21294            + ARM_NUM_INTS (crtl->args.pretend_args_size +
21295                            arm_compute_static_chain_stack_bytes())
21296            ) % 2) != 0)
21297     {
21298       /* The total number of registers that are going to be pushed
21299          onto the stack is odd.  We need to ensure that the stack
21300          is 64-bit aligned before we start to save iWMMXt registers,
21301          and also before we start to create locals.  (A local variable
21302          might be a double or long long which we will load/store using
21303          an iWMMXt instruction).  Therefore we need to push another
21304          ARM register, so that the stack will be 64-bit aligned.  We
21305          try to avoid using the arg registers (r0 -r3) as they might be
21306          used to pass values in a tail call.  */
21307       for (reg = 4; reg <= 12; reg++)
21308         if ((save_reg_mask & (1 << reg)) == 0)
21309           break;
21310
21311       if (reg <= 12)
21312         save_reg_mask |= (1 << reg);
21313       else
21314         {
21315           cfun->machine->sibcall_blocked = 1;
21316           save_reg_mask |= (1 << 3);
21317         }
21318     }
21319
21320   /* We may need to push an additional register for use initializing the
21321      PIC base register.  */
21322   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21323       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21324     {
21325       reg = thumb_find_work_register (1 << 4);
21326       if (!call_used_or_fixed_reg_p (reg))
21327         save_reg_mask |= (1 << reg);
21328     }
21329
21330   return save_reg_mask;
21331 }
21332
21333 /* Compute a bit mask of which core registers need to be
21334    saved on the stack for the current function.  */
21335 static unsigned long
21336 thumb1_compute_save_core_reg_mask (void)
21337 {
21338   unsigned long mask;
21339   unsigned reg;
21340
21341   mask = 0;
21342   for (reg = 0; reg < 12; reg ++)
21343     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21344       mask |= 1 << reg;
21345
21346   /* Handle the frame pointer as a special case.  */
21347   if (frame_pointer_needed)
21348     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21349
21350   if (flag_pic
21351       && !TARGET_SINGLE_PIC_BASE
21352       && arm_pic_register != INVALID_REGNUM
21353       && crtl->uses_pic_offset_table)
21354     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21355
21356   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21357   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21358     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21359
21360   /* LR will also be pushed if any lo regs are pushed.  */
21361   if (mask & 0xff || thumb_force_lr_save ())
21362     mask |= (1 << LR_REGNUM);
21363
21364   bool call_clobbered_scratch
21365     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21366        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21367
21368   /* Make sure we have a low work register if we need one.  We will
21369      need one if we are going to push a high register, but we are not
21370      currently intending to push a low register.  However if both the
21371      prologue and epilogue have a spare call-clobbered low register,
21372      then we won't need to find an additional work register.  It does
21373      not need to be the same register in the prologue and
21374      epilogue.  */
21375   if ((mask & 0xff) == 0
21376       && !call_clobbered_scratch
21377       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21378     {
21379       /* Use thumb_find_work_register to choose which register
21380          we will use.  If the register is live then we will
21381          have to push it.  Use LAST_LO_REGNUM as our fallback
21382          choice for the register to select.  */
21383       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21384       /* Make sure the register returned by thumb_find_work_register is
21385          not part of the return value.  */
21386       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21387         reg = LAST_LO_REGNUM;
21388
21389       if (callee_saved_reg_p (reg))
21390         mask |= 1 << reg;
21391     }
21392
21393   /* The 504 below is 8 bytes less than 512 because there are two possible
21394      alignment words.  We can't tell here if they will be present or not so we
21395      have to play it safe and assume that they are. */
21396   if ((CALLER_INTERWORKING_SLOT_SIZE +
21397        ROUND_UP_WORD (get_frame_size ()) +
21398        crtl->outgoing_args_size) >= 504)
21399     {
21400       /* This is the same as the code in thumb1_expand_prologue() which
21401          determines which register to use for stack decrement. */
21402       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21403         if (mask & (1 << reg))
21404           break;
21405
21406       if (reg > LAST_LO_REGNUM)
21407         {
21408           /* Make sure we have a register available for stack decrement. */
21409           mask |= 1 << LAST_LO_REGNUM;
21410         }
21411     }
21412
21413   return mask;
21414 }
21415
21416 /* Return the number of bytes required to save VFP registers.  */
21417 static int
21418 arm_get_vfp_saved_size (void)
21419 {
21420   unsigned int regno;
21421   int count;
21422   int saved;
21423
21424   saved = 0;
21425   /* Space for saved VFP registers.  */
21426   if (TARGET_VFP_BASE)
21427     {
21428       count = 0;
21429       for (regno = FIRST_VFP_REGNUM;
21430            regno < LAST_VFP_REGNUM;
21431            regno += 2)
21432         {
21433           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21434             {
21435               if (count > 0)
21436                 {
21437                   /* Workaround ARM10 VFPr1 bug.  */
21438                   if (count == 2 && !arm_arch6)
21439                     count++;
21440                   saved += count * 8;
21441                 }
21442               count = 0;
21443             }
21444           else
21445             count++;
21446         }
21447       if (count > 0)
21448         {
21449           if (count == 2 && !arm_arch6)
21450             count++;
21451           saved += count * 8;
21452         }
21453     }
21454   return saved;
21455 }
21456
21457
21458 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21459    everything bar the final return instruction.  If simple_return is true,
21460    then do not output epilogue, because it has already been emitted in RTL.
21461
21462    Note: do not forget to update length attribute of corresponding insn pattern
21463    when changing assembly output (eg. length attribute of
21464    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21465    register clearing sequences).  */
21466 const char *
21467 output_return_instruction (rtx operand, bool really_return, bool reverse,
21468                            bool simple_return)
21469 {
21470   char conditional[10];
21471   char instr[100];
21472   unsigned reg;
21473   unsigned long live_regs_mask;
21474   unsigned long func_type;
21475   arm_stack_offsets *offsets;
21476
21477   func_type = arm_current_func_type ();
21478
21479   if (IS_NAKED (func_type))
21480     return "";
21481
21482   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21483     {
21484       /* If this function was declared non-returning, and we have
21485          found a tail call, then we have to trust that the called
21486          function won't return.  */
21487       if (really_return)
21488         {
21489           rtx ops[2];
21490
21491           /* Otherwise, trap an attempted return by aborting.  */
21492           ops[0] = operand;
21493           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21494                                        : "abort");
21495           assemble_external_libcall (ops[1]);
21496           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21497         }
21498
21499       return "";
21500     }
21501
21502   gcc_assert (!cfun->calls_alloca || really_return);
21503
21504   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21505
21506   cfun->machine->return_used_this_function = 1;
21507
21508   offsets = arm_get_frame_offsets ();
21509   live_regs_mask = offsets->saved_regs_mask;
21510
21511   if (!simple_return && live_regs_mask)
21512     {
21513       const char * return_reg;
21514
21515       /* If we do not have any special requirements for function exit
21516          (e.g. interworking) then we can load the return address
21517          directly into the PC.  Otherwise we must load it into LR.  */
21518       if (really_return
21519           && !IS_CMSE_ENTRY (func_type)
21520           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21521         return_reg = reg_names[PC_REGNUM];
21522       else
21523         return_reg = reg_names[LR_REGNUM];
21524
21525       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21526         {
21527           /* There are three possible reasons for the IP register
21528              being saved.  1) a stack frame was created, in which case
21529              IP contains the old stack pointer, or 2) an ISR routine
21530              corrupted it, or 3) it was saved to align the stack on
21531              iWMMXt.  In case 1, restore IP into SP, otherwise just
21532              restore IP.  */
21533           if (frame_pointer_needed)
21534             {
21535               live_regs_mask &= ~ (1 << IP_REGNUM);
21536               live_regs_mask |=   (1 << SP_REGNUM);
21537             }
21538           else
21539             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21540         }
21541
21542       /* On some ARM architectures it is faster to use LDR rather than
21543          LDM to load a single register.  On other architectures, the
21544          cost is the same.  In 26 bit mode, or for exception handlers,
21545          we have to use LDM to load the PC so that the CPSR is also
21546          restored.  */
21547       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21548         if (live_regs_mask == (1U << reg))
21549           break;
21550
21551       if (reg <= LAST_ARM_REGNUM
21552           && (reg != LR_REGNUM
21553               || ! really_return
21554               || ! IS_INTERRUPT (func_type)))
21555         {
21556           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21557                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21558         }
21559       else
21560         {
21561           char *p;
21562           int first = 1;
21563
21564           /* Generate the load multiple instruction to restore the
21565              registers.  Note we can get here, even if
21566              frame_pointer_needed is true, but only if sp already
21567              points to the base of the saved core registers.  */
21568           if (live_regs_mask & (1 << SP_REGNUM))
21569             {
21570               unsigned HOST_WIDE_INT stack_adjust;
21571
21572               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21573               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21574
21575               if (stack_adjust && arm_arch5t && TARGET_ARM)
21576                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21577               else
21578                 {
21579                   /* If we can't use ldmib (SA110 bug),
21580                      then try to pop r3 instead.  */
21581                   if (stack_adjust)
21582                     live_regs_mask |= 1 << 3;
21583
21584                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21585                 }
21586             }
21587           /* For interrupt returns we have to use an LDM rather than
21588              a POP so that we can use the exception return variant.  */
21589           else if (IS_INTERRUPT (func_type))
21590             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21591           else
21592             sprintf (instr, "pop%s\t{", conditional);
21593
21594           p = instr + strlen (instr);
21595
21596           for (reg = 0; reg <= SP_REGNUM; reg++)
21597             if (live_regs_mask & (1 << reg))
21598               {
21599                 int l = strlen (reg_names[reg]);
21600
21601                 if (first)
21602                   first = 0;
21603                 else
21604                   {
21605                     memcpy (p, ", ", 2);
21606                     p += 2;
21607                   }
21608
21609                 memcpy (p, "%|", 2);
21610                 memcpy (p + 2, reg_names[reg], l);
21611                 p += l + 2;
21612               }
21613
21614           if (live_regs_mask & (1 << LR_REGNUM))
21615             {
21616               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21617               /* If returning from an interrupt, restore the CPSR.  */
21618               if (IS_INTERRUPT (func_type))
21619                 strcat (p, "^");
21620             }
21621           else
21622             strcpy (p, "}");
21623         }
21624
21625       output_asm_insn (instr, & operand);
21626
21627       /* See if we need to generate an extra instruction to
21628          perform the actual function return.  */
21629       if (really_return
21630           && func_type != ARM_FT_INTERWORKED
21631           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21632         {
21633           /* The return has already been handled
21634              by loading the LR into the PC.  */
21635           return "";
21636         }
21637     }
21638
21639   if (really_return)
21640     {
21641       switch ((int) ARM_FUNC_TYPE (func_type))
21642         {
21643         case ARM_FT_ISR:
21644         case ARM_FT_FIQ:
21645           /* ??? This is wrong for unified assembly syntax.  */
21646           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21647           break;
21648
21649         case ARM_FT_INTERWORKED:
21650           gcc_assert (arm_arch5t || arm_arch4t);
21651           sprintf (instr, "bx%s\t%%|lr", conditional);
21652           break;
21653
21654         case ARM_FT_EXCEPTION:
21655           /* ??? This is wrong for unified assembly syntax.  */
21656           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21657           break;
21658
21659         default:
21660           if (IS_CMSE_ENTRY (func_type))
21661             {
21662               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21663                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21664                  VSTR/VLDR instructions in the prologue and epilogue.  */
21665               if (!TARGET_HAVE_FPCXT_CMSE)
21666                 {
21667                   /* Check if we have to clear the 'GE bits' which is only used if
21668                      parallel add and subtraction instructions are available.  */
21669                   if (TARGET_INT_SIMD)
21670                     snprintf (instr, sizeof (instr),
21671                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21672                   else
21673                     snprintf (instr, sizeof (instr),
21674                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21675
21676                   output_asm_insn (instr, & operand);
21677                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21678                      care of it.  */
21679                   if (TARGET_HARD_FLOAT)
21680                     {
21681                       /* Clear the cumulative exception-status bits (0-4,7) and
21682                          the condition code bits (28-31) of the FPSCR.  We need
21683                          to remember to clear the first scratch register used
21684                          (IP) and save and restore the second (r4).
21685
21686                          Important note: the length of the
21687                          thumb2_cmse_entry_return insn pattern must account for
21688                          the size of the below instructions.  */
21689                       output_asm_insn ("push\t{%|r4}", & operand);
21690                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21691                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21692                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21693                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21694                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21695                       output_asm_insn ("pop\t{%|r4}", & operand);
21696                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21697                     }
21698                 }
21699               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21700             }
21701           /* Use bx if it's available.  */
21702           else if (arm_arch5t || arm_arch4t)
21703             sprintf (instr, "bx%s\t%%|lr", conditional);
21704           else
21705             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21706           break;
21707         }
21708
21709       output_asm_insn (instr, & operand);
21710     }
21711
21712   return "";
21713 }
21714
21715 /* Output in FILE asm statements needed to declare the NAME of the function
21716    defined by its DECL node.  */
21717
21718 void
21719 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21720 {
21721   size_t cmse_name_len;
21722   char *cmse_name = 0;
21723   char cmse_prefix[] = "__acle_se_";
21724
21725   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21726      extra function label for each function with the 'cmse_nonsecure_entry'
21727      attribute.  This extra function label should be prepended with
21728      '__acle_se_', telling the linker that it needs to create secure gateway
21729      veneers for this function.  */
21730   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21731                                     DECL_ATTRIBUTES (decl)))
21732     {
21733       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21734       cmse_name = XALLOCAVEC (char, cmse_name_len);
21735       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21736       targetm.asm_out.globalize_label (file, cmse_name);
21737
21738       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21739       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21740     }
21741
21742   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21743   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21744   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21745   ASM_OUTPUT_LABEL (file, name);
21746
21747   if (cmse_name)
21748     ASM_OUTPUT_LABEL (file, cmse_name);
21749
21750   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21751 }
21752
21753 /* Write the function name into the code section, directly preceding
21754    the function prologue.
21755
21756    Code will be output similar to this:
21757      t0
21758          .ascii "arm_poke_function_name", 0
21759          .align
21760      t1
21761          .word 0xff000000 + (t1 - t0)
21762      arm_poke_function_name
21763          mov     ip, sp
21764          stmfd   sp!, {fp, ip, lr, pc}
21765          sub     fp, ip, #4
21766
21767    When performing a stack backtrace, code can inspect the value
21768    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21769    at location pc - 12 and the top 8 bits are set, then we know
21770    that there is a function name embedded immediately preceding this
21771    location and has length ((pc[-3]) & 0xff000000).
21772
21773    We assume that pc is declared as a pointer to an unsigned long.
21774
21775    It is of no benefit to output the function name if we are assembling
21776    a leaf function.  These function types will not contain a stack
21777    backtrace structure, therefore it is not possible to determine the
21778    function name.  */
21779 void
21780 arm_poke_function_name (FILE *stream, const char *name)
21781 {
21782   unsigned long alignlength;
21783   unsigned long length;
21784   rtx           x;
21785
21786   length      = strlen (name) + 1;
21787   alignlength = ROUND_UP_WORD (length);
21788
21789   ASM_OUTPUT_ASCII (stream, name, length);
21790   ASM_OUTPUT_ALIGN (stream, 2);
21791   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21792   assemble_aligned_integer (UNITS_PER_WORD, x);
21793 }
21794
21795 /* Place some comments into the assembler stream
21796    describing the current function.  */
21797 static void
21798 arm_output_function_prologue (FILE *f)
21799 {
21800   unsigned long func_type;
21801
21802   /* Sanity check.  */
21803   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21804
21805   func_type = arm_current_func_type ();
21806
21807   switch ((int) ARM_FUNC_TYPE (func_type))
21808     {
21809     default:
21810     case ARM_FT_NORMAL:
21811       break;
21812     case ARM_FT_INTERWORKED:
21813       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21814       break;
21815     case ARM_FT_ISR:
21816       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21817       break;
21818     case ARM_FT_FIQ:
21819       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21820       break;
21821     case ARM_FT_EXCEPTION:
21822       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21823       break;
21824     }
21825
21826   if (IS_NAKED (func_type))
21827     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21828
21829   if (IS_VOLATILE (func_type))
21830     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21831
21832   if (IS_NESTED (func_type))
21833     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21834   if (IS_STACKALIGN (func_type))
21835     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21836   if (IS_CMSE_ENTRY (func_type))
21837     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21838
21839   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21840                (HOST_WIDE_INT) crtl->args.size,
21841                crtl->args.pretend_args_size,
21842                (HOST_WIDE_INT) get_frame_size ());
21843
21844   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21845                frame_pointer_needed,
21846                cfun->machine->uses_anonymous_args);
21847
21848   if (cfun->machine->lr_save_eliminated)
21849     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21850
21851   if (crtl->calls_eh_return)
21852     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21853
21854 }
21855
21856 static void
21857 arm_output_function_epilogue (FILE *)
21858 {
21859   arm_stack_offsets *offsets;
21860
21861   if (TARGET_THUMB1)
21862     {
21863       int regno;
21864
21865       /* Emit any call-via-reg trampolines that are needed for v4t support
21866          of call_reg and call_value_reg type insns.  */
21867       for (regno = 0; regno < LR_REGNUM; regno++)
21868         {
21869           rtx label = cfun->machine->call_via[regno];
21870
21871           if (label != NULL)
21872             {
21873               switch_to_section (function_section (current_function_decl));
21874               targetm.asm_out.internal_label (asm_out_file, "L",
21875                                               CODE_LABEL_NUMBER (label));
21876               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21877             }
21878         }
21879
21880       /* ??? Probably not safe to set this here, since it assumes that a
21881          function will be emitted as assembly immediately after we generate
21882          RTL for it.  This does not happen for inline functions.  */
21883       cfun->machine->return_used_this_function = 0;
21884     }
21885   else /* TARGET_32BIT */
21886     {
21887       /* We need to take into account any stack-frame rounding.  */
21888       offsets = arm_get_frame_offsets ();
21889
21890       gcc_assert (!use_return_insn (FALSE, NULL)
21891                   || (cfun->machine->return_used_this_function != 0)
21892                   || offsets->saved_regs == offsets->outgoing_args
21893                   || frame_pointer_needed);
21894     }
21895 }
21896
21897 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21898    STR and STRD.  If an even number of registers are being pushed, one
21899    or more STRD patterns are created for each register pair.  If an
21900    odd number of registers are pushed, emit an initial STR followed by
21901    as many STRD instructions as are needed.  This works best when the
21902    stack is initially 64-bit aligned (the normal case), since it
21903    ensures that each STRD is also 64-bit aligned.  */
21904 static void
21905 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21906 {
21907   int num_regs = 0;
21908   int i;
21909   int regno;
21910   rtx par = NULL_RTX;
21911   rtx dwarf = NULL_RTX;
21912   rtx tmp;
21913   bool first = true;
21914
21915   num_regs = bit_count (saved_regs_mask);
21916
21917   /* Must be at least one register to save, and can't save SP or PC.  */
21918   gcc_assert (num_regs > 0 && num_regs <= 14);
21919   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21920   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21921
21922   /* Create sequence for DWARF info.  All the frame-related data for
21923      debugging is held in this wrapper.  */
21924   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21925
21926   /* Describe the stack adjustment.  */
21927   tmp = gen_rtx_SET (stack_pointer_rtx,
21928                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21929   RTX_FRAME_RELATED_P (tmp) = 1;
21930   XVECEXP (dwarf, 0, 0) = tmp;
21931
21932   /* Find the first register.  */
21933   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21934     ;
21935
21936   i = 0;
21937
21938   /* If there's an odd number of registers to push.  Start off by
21939      pushing a single register.  This ensures that subsequent strd
21940      operations are dword aligned (assuming that SP was originally
21941      64-bit aligned).  */
21942   if ((num_regs & 1) != 0)
21943     {
21944       rtx reg, mem, insn;
21945
21946       reg = gen_rtx_REG (SImode, regno);
21947       if (num_regs == 1)
21948         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21949                                                      stack_pointer_rtx));
21950       else
21951         mem = gen_frame_mem (Pmode,
21952                              gen_rtx_PRE_MODIFY
21953                              (Pmode, stack_pointer_rtx,
21954                               plus_constant (Pmode, stack_pointer_rtx,
21955                                              -4 * num_regs)));
21956
21957       tmp = gen_rtx_SET (mem, reg);
21958       RTX_FRAME_RELATED_P (tmp) = 1;
21959       insn = emit_insn (tmp);
21960       RTX_FRAME_RELATED_P (insn) = 1;
21961       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21962       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21963       RTX_FRAME_RELATED_P (tmp) = 1;
21964       i++;
21965       regno++;
21966       XVECEXP (dwarf, 0, i) = tmp;
21967       first = false;
21968     }
21969
21970   while (i < num_regs)
21971     if (saved_regs_mask & (1 << regno))
21972       {
21973         rtx reg1, reg2, mem1, mem2;
21974         rtx tmp0, tmp1, tmp2;
21975         int regno2;
21976
21977         /* Find the register to pair with this one.  */
21978         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21979              regno2++)
21980           ;
21981
21982         reg1 = gen_rtx_REG (SImode, regno);
21983         reg2 = gen_rtx_REG (SImode, regno2);
21984
21985         if (first)
21986           {
21987             rtx insn;
21988
21989             first = false;
21990             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21991                                                         stack_pointer_rtx,
21992                                                         -4 * num_regs));
21993             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21994                                                         stack_pointer_rtx,
21995                                                         -4 * (num_regs - 1)));
21996             tmp0 = gen_rtx_SET (stack_pointer_rtx,
21997                                 plus_constant (Pmode, stack_pointer_rtx,
21998                                                -4 * (num_regs)));
21999             tmp1 = gen_rtx_SET (mem1, reg1);
22000             tmp2 = gen_rtx_SET (mem2, reg2);
22001             RTX_FRAME_RELATED_P (tmp0) = 1;
22002             RTX_FRAME_RELATED_P (tmp1) = 1;
22003             RTX_FRAME_RELATED_P (tmp2) = 1;
22004             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22005             XVECEXP (par, 0, 0) = tmp0;
22006             XVECEXP (par, 0, 1) = tmp1;
22007             XVECEXP (par, 0, 2) = tmp2;
22008             insn = emit_insn (par);
22009             RTX_FRAME_RELATED_P (insn) = 1;
22010             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22011           }
22012         else
22013           {
22014             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22015                                                         stack_pointer_rtx,
22016                                                         4 * i));
22017             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22018                                                         stack_pointer_rtx,
22019                                                         4 * (i + 1)));
22020             tmp1 = gen_rtx_SET (mem1, reg1);
22021             tmp2 = gen_rtx_SET (mem2, reg2);
22022             RTX_FRAME_RELATED_P (tmp1) = 1;
22023             RTX_FRAME_RELATED_P (tmp2) = 1;
22024             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22025             XVECEXP (par, 0, 0) = tmp1;
22026             XVECEXP (par, 0, 1) = tmp2;
22027             emit_insn (par);
22028           }
22029
22030         /* Create unwind information.  This is an approximation.  */
22031         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22032                                            plus_constant (Pmode,
22033                                                           stack_pointer_rtx,
22034                                                           4 * i)),
22035                             reg1);
22036         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22037                                            plus_constant (Pmode,
22038                                                           stack_pointer_rtx,
22039                                                           4 * (i + 1))),
22040                             reg2);
22041
22042         RTX_FRAME_RELATED_P (tmp1) = 1;
22043         RTX_FRAME_RELATED_P (tmp2) = 1;
22044         XVECEXP (dwarf, 0, i + 1) = tmp1;
22045         XVECEXP (dwarf, 0, i + 2) = tmp2;
22046         i += 2;
22047         regno = regno2 + 1;
22048       }
22049     else
22050       regno++;
22051
22052   return;
22053 }
22054
22055 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
22056    whenever possible, otherwise it emits single-word stores.  The first store
22057    also allocates stack space for all saved registers, using writeback with
22058    post-addressing mode.  All other stores use offset addressing.  If no STRD
22059    can be emitted, this function emits a sequence of single-word stores,
22060    and not an STM as before, because single-word stores provide more freedom
22061    scheduling and can be turned into an STM by peephole optimizations.  */
22062 static void
22063 arm_emit_strd_push (unsigned long saved_regs_mask)
22064 {
22065   int num_regs = 0;
22066   int i, j, dwarf_index  = 0;
22067   int offset = 0;
22068   rtx dwarf = NULL_RTX;
22069   rtx insn = NULL_RTX;
22070   rtx tmp, mem;
22071
22072   /* TODO: A more efficient code can be emitted by changing the
22073      layout, e.g., first push all pairs that can use STRD to keep the
22074      stack aligned, and then push all other registers.  */
22075   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22076     if (saved_regs_mask & (1 << i))
22077       num_regs++;
22078
22079   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22080   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22081   gcc_assert (num_regs > 0);
22082
22083   /* Create sequence for DWARF info.  */
22084   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22085
22086   /* For dwarf info, we generate explicit stack update.  */
22087   tmp = gen_rtx_SET (stack_pointer_rtx,
22088                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22089   RTX_FRAME_RELATED_P (tmp) = 1;
22090   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22091
22092   /* Save registers.  */
22093   offset = - 4 * num_regs;
22094   j = 0;
22095   while (j <= LAST_ARM_REGNUM)
22096     if (saved_regs_mask & (1 << j))
22097       {
22098         if ((j % 2 == 0)
22099             && (saved_regs_mask & (1 << (j + 1))))
22100           {
22101             /* Current register and previous register form register pair for
22102                which STRD can be generated.  */
22103             if (offset < 0)
22104               {
22105                 /* Allocate stack space for all saved registers.  */
22106                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22107                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22108                 mem = gen_frame_mem (DImode, tmp);
22109                 offset = 0;
22110               }
22111             else if (offset > 0)
22112               mem = gen_frame_mem (DImode,
22113                                    plus_constant (Pmode,
22114                                                   stack_pointer_rtx,
22115                                                   offset));
22116             else
22117               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22118
22119             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22120             RTX_FRAME_RELATED_P (tmp) = 1;
22121             tmp = emit_insn (tmp);
22122
22123             /* Record the first store insn.  */
22124             if (dwarf_index == 1)
22125               insn = tmp;
22126
22127             /* Generate dwarf info.  */
22128             mem = gen_frame_mem (SImode,
22129                                  plus_constant (Pmode,
22130                                                 stack_pointer_rtx,
22131                                                 offset));
22132             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22133             RTX_FRAME_RELATED_P (tmp) = 1;
22134             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22135
22136             mem = gen_frame_mem (SImode,
22137                                  plus_constant (Pmode,
22138                                                 stack_pointer_rtx,
22139                                                 offset + 4));
22140             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22141             RTX_FRAME_RELATED_P (tmp) = 1;
22142             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22143
22144             offset += 8;
22145             j += 2;
22146           }
22147         else
22148           {
22149             /* Emit a single word store.  */
22150             if (offset < 0)
22151               {
22152                 /* Allocate stack space for all saved registers.  */
22153                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22154                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22155                 mem = gen_frame_mem (SImode, tmp);
22156                 offset = 0;
22157               }
22158             else if (offset > 0)
22159               mem = gen_frame_mem (SImode,
22160                                    plus_constant (Pmode,
22161                                                   stack_pointer_rtx,
22162                                                   offset));
22163             else
22164               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22165
22166             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22167             RTX_FRAME_RELATED_P (tmp) = 1;
22168             tmp = emit_insn (tmp);
22169
22170             /* Record the first store insn.  */
22171             if (dwarf_index == 1)
22172               insn = tmp;
22173
22174             /* Generate dwarf info.  */
22175             mem = gen_frame_mem (SImode,
22176                                  plus_constant(Pmode,
22177                                                stack_pointer_rtx,
22178                                                offset));
22179             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22180             RTX_FRAME_RELATED_P (tmp) = 1;
22181             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22182
22183             offset += 4;
22184             j += 1;
22185           }
22186       }
22187     else
22188       j++;
22189
22190   /* Attach dwarf info to the first insn we generate.  */
22191   gcc_assert (insn != NULL_RTX);
22192   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22193   RTX_FRAME_RELATED_P (insn) = 1;
22194 }
22195
22196 /* Generate and emit an insn that we will recognize as a push_multi.
22197    Unfortunately, since this insn does not reflect very well the actual
22198    semantics of the operation, we need to annotate the insn for the benefit
22199    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
22200    MASK for registers that should be annotated for DWARF2 frame unwind
22201    information.  */
22202 static rtx
22203 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22204 {
22205   int num_regs = 0;
22206   int num_dwarf_regs = 0;
22207   int i, j;
22208   rtx par;
22209   rtx dwarf;
22210   int dwarf_par_index;
22211   rtx tmp, reg;
22212
22213   /* We don't record the PC in the dwarf frame information.  */
22214   dwarf_regs_mask &= ~(1 << PC_REGNUM);
22215
22216   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22217     {
22218       if (mask & (1 << i))
22219         num_regs++;
22220       if (dwarf_regs_mask & (1 << i))
22221         num_dwarf_regs++;
22222     }
22223
22224   gcc_assert (num_regs && num_regs <= 16);
22225   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22226
22227   /* For the body of the insn we are going to generate an UNSPEC in
22228      parallel with several USEs.  This allows the insn to be recognized
22229      by the push_multi pattern in the arm.md file.
22230
22231      The body of the insn looks something like this:
22232
22233        (parallel [
22234            (set (mem:BLK (pre_modify:SI (reg:SI sp)
22235                                         (const_int:SI <num>)))
22236                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22237            (use (reg:SI XX))
22238            (use (reg:SI YY))
22239            ...
22240         ])
22241
22242      For the frame note however, we try to be more explicit and actually
22243      show each register being stored into the stack frame, plus a (single)
22244      decrement of the stack pointer.  We do it this way in order to be
22245      friendly to the stack unwinding code, which only wants to see a single
22246      stack decrement per instruction.  The RTL we generate for the note looks
22247      something like this:
22248
22249       (sequence [
22250            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22251            (set (mem:SI (reg:SI sp)) (reg:SI r4))
22252            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22253            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22254            ...
22255         ])
22256
22257      FIXME:: In an ideal world the PRE_MODIFY would not exist and
22258      instead we'd have a parallel expression detailing all
22259      the stores to the various memory addresses so that debug
22260      information is more up-to-date. Remember however while writing
22261      this to take care of the constraints with the push instruction.
22262
22263      Note also that this has to be taken care of for the VFP registers.
22264
22265      For more see PR43399.  */
22266
22267   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22268   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22269   dwarf_par_index = 1;
22270
22271   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22272     {
22273       if (mask & (1 << i))
22274         {
22275           reg = gen_rtx_REG (SImode, i);
22276
22277           XVECEXP (par, 0, 0)
22278             = gen_rtx_SET (gen_frame_mem
22279                            (BLKmode,
22280                             gen_rtx_PRE_MODIFY (Pmode,
22281                                                 stack_pointer_rtx,
22282                                                 plus_constant
22283                                                 (Pmode, stack_pointer_rtx,
22284                                                  -4 * num_regs))
22285                             ),
22286                            gen_rtx_UNSPEC (BLKmode,
22287                                            gen_rtvec (1, reg),
22288                                            UNSPEC_PUSH_MULT));
22289
22290           if (dwarf_regs_mask & (1 << i))
22291             {
22292               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22293                                  reg);
22294               RTX_FRAME_RELATED_P (tmp) = 1;
22295               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22296             }
22297
22298           break;
22299         }
22300     }
22301
22302   for (j = 1, i++; j < num_regs; i++)
22303     {
22304       if (mask & (1 << i))
22305         {
22306           reg = gen_rtx_REG (SImode, i);
22307
22308           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22309
22310           if (dwarf_regs_mask & (1 << i))
22311             {
22312               tmp
22313                 = gen_rtx_SET (gen_frame_mem
22314                                (SImode,
22315                                 plus_constant (Pmode, stack_pointer_rtx,
22316                                                4 * j)),
22317                                reg);
22318               RTX_FRAME_RELATED_P (tmp) = 1;
22319               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22320             }
22321
22322           j++;
22323         }
22324     }
22325
22326   par = emit_insn (par);
22327
22328   tmp = gen_rtx_SET (stack_pointer_rtx,
22329                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22330   RTX_FRAME_RELATED_P (tmp) = 1;
22331   XVECEXP (dwarf, 0, 0) = tmp;
22332
22333   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22334
22335   return par;
22336 }
22337
22338 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22339    SIZE is the offset to be adjusted.
22340    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22341 static void
22342 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22343 {
22344   rtx dwarf;
22345
22346   RTX_FRAME_RELATED_P (insn) = 1;
22347   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22348   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22349 }
22350
22351 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22352    SAVED_REGS_MASK shows which registers need to be restored.
22353
22354    Unfortunately, since this insn does not reflect very well the actual
22355    semantics of the operation, we need to annotate the insn for the benefit
22356    of DWARF2 frame unwind information.  */
22357 static void
22358 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22359 {
22360   int num_regs = 0;
22361   int i, j;
22362   rtx par;
22363   rtx dwarf = NULL_RTX;
22364   rtx tmp, reg;
22365   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22366   int offset_adj;
22367   int emit_update;
22368
22369   offset_adj = return_in_pc ? 1 : 0;
22370   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22371     if (saved_regs_mask & (1 << i))
22372       num_regs++;
22373
22374   gcc_assert (num_regs && num_regs <= 16);
22375
22376   /* If SP is in reglist, then we don't emit SP update insn.  */
22377   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22378
22379   /* The parallel needs to hold num_regs SETs
22380      and one SET for the stack update.  */
22381   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22382
22383   if (return_in_pc)
22384     XVECEXP (par, 0, 0) = ret_rtx;
22385
22386   if (emit_update)
22387     {
22388       /* Increment the stack pointer, based on there being
22389          num_regs 4-byte registers to restore.  */
22390       tmp = gen_rtx_SET (stack_pointer_rtx,
22391                          plus_constant (Pmode,
22392                                         stack_pointer_rtx,
22393                                         4 * num_regs));
22394       RTX_FRAME_RELATED_P (tmp) = 1;
22395       XVECEXP (par, 0, offset_adj) = tmp;
22396     }
22397
22398   /* Now restore every reg, which may include PC.  */
22399   for (j = 0, i = 0; j < num_regs; i++)
22400     if (saved_regs_mask & (1 << i))
22401       {
22402         reg = gen_rtx_REG (SImode, i);
22403         if ((num_regs == 1) && emit_update && !return_in_pc)
22404           {
22405             /* Emit single load with writeback.  */
22406             tmp = gen_frame_mem (SImode,
22407                                  gen_rtx_POST_INC (Pmode,
22408                                                    stack_pointer_rtx));
22409             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22410             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22411             return;
22412           }
22413
22414         tmp = gen_rtx_SET (reg,
22415                            gen_frame_mem
22416                            (SImode,
22417                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22418         RTX_FRAME_RELATED_P (tmp) = 1;
22419         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22420
22421         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22422            should not have PC, skip PC.  */
22423         if (i != PC_REGNUM)
22424           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22425
22426         j++;
22427       }
22428
22429   if (return_in_pc)
22430     par = emit_jump_insn (par);
22431   else
22432     par = emit_insn (par);
22433
22434   REG_NOTES (par) = dwarf;
22435   if (!return_in_pc)
22436     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22437                                  stack_pointer_rtx, stack_pointer_rtx);
22438 }
22439
22440 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22441    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22442
22443    Unfortunately, since this insn does not reflect very well the actual
22444    semantics of the operation, we need to annotate the insn for the benefit
22445    of DWARF2 frame unwind information.  */
22446 static void
22447 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22448 {
22449   int i, j;
22450   rtx par;
22451   rtx dwarf = NULL_RTX;
22452   rtx tmp, reg;
22453
22454   gcc_assert (num_regs && num_regs <= 32);
22455
22456     /* Workaround ARM10 VFPr1 bug.  */
22457   if (num_regs == 2 && !arm_arch6)
22458     {
22459       if (first_reg == 15)
22460         first_reg--;
22461
22462       num_regs++;
22463     }
22464
22465   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22466      there could be up to 32 D-registers to restore.
22467      If there are more than 16 D-registers, make two recursive calls,
22468      each of which emits one pop_multi instruction.  */
22469   if (num_regs > 16)
22470     {
22471       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22472       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22473       return;
22474     }
22475
22476   /* The parallel needs to hold num_regs SETs
22477      and one SET for the stack update.  */
22478   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22479
22480   /* Increment the stack pointer, based on there being
22481      num_regs 8-byte registers to restore.  */
22482   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22483   RTX_FRAME_RELATED_P (tmp) = 1;
22484   XVECEXP (par, 0, 0) = tmp;
22485
22486   /* Now show every reg that will be restored, using a SET for each.  */
22487   for (j = 0, i=first_reg; j < num_regs; i += 2)
22488     {
22489       reg = gen_rtx_REG (DFmode, i);
22490
22491       tmp = gen_rtx_SET (reg,
22492                          gen_frame_mem
22493                          (DFmode,
22494                           plus_constant (Pmode, base_reg, 8 * j)));
22495       RTX_FRAME_RELATED_P (tmp) = 1;
22496       XVECEXP (par, 0, j + 1) = tmp;
22497
22498       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22499
22500       j++;
22501     }
22502
22503   par = emit_insn (par);
22504   REG_NOTES (par) = dwarf;
22505
22506   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22507   if (REGNO (base_reg) == IP_REGNUM)
22508     {
22509       RTX_FRAME_RELATED_P (par) = 1;
22510       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22511     }
22512   else
22513     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22514                                  base_reg, base_reg);
22515 }
22516
22517 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22518    number of registers are being popped, multiple LDRD patterns are created for
22519    all register pairs.  If odd number of registers are popped, last register is
22520    loaded by using LDR pattern.  */
22521 static void
22522 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22523 {
22524   int num_regs = 0;
22525   int i, j;
22526   rtx par = NULL_RTX;
22527   rtx dwarf = NULL_RTX;
22528   rtx tmp, reg, tmp1;
22529   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22530
22531   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22532     if (saved_regs_mask & (1 << i))
22533       num_regs++;
22534
22535   gcc_assert (num_regs && num_regs <= 16);
22536
22537   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22538      to be popped.  So, if num_regs is even, now it will become odd,
22539      and we can generate pop with PC.  If num_regs is odd, it will be
22540      even now, and ldr with return can be generated for PC.  */
22541   if (return_in_pc)
22542     num_regs--;
22543
22544   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22545
22546   /* Var j iterates over all the registers to gather all the registers in
22547      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22548      A PARALLEL RTX of register-pair is created here, so that pattern for
22549      LDRD can be matched.  As PC is always last register to be popped, and
22550      we have already decremented num_regs if PC, we don't have to worry
22551      about PC in this loop.  */
22552   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22553     if (saved_regs_mask & (1 << j))
22554       {
22555         /* Create RTX for memory load.  */
22556         reg = gen_rtx_REG (SImode, j);
22557         tmp = gen_rtx_SET (reg,
22558                            gen_frame_mem (SImode,
22559                                plus_constant (Pmode,
22560                                               stack_pointer_rtx, 4 * i)));
22561         RTX_FRAME_RELATED_P (tmp) = 1;
22562
22563         if (i % 2 == 0)
22564           {
22565             /* When saved-register index (i) is even, the RTX to be emitted is
22566                yet to be created.  Hence create it first.  The LDRD pattern we
22567                are generating is :
22568                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22569                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22570                where target registers need not be consecutive.  */
22571             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22572             dwarf = NULL_RTX;
22573           }
22574
22575         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22576            added as 0th element and if i is odd, reg_i is added as 1st element
22577            of LDRD pattern shown above.  */
22578         XVECEXP (par, 0, (i % 2)) = tmp;
22579         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22580
22581         if ((i % 2) == 1)
22582           {
22583             /* When saved-register index (i) is odd, RTXs for both the registers
22584                to be loaded are generated in above given LDRD pattern, and the
22585                pattern can be emitted now.  */
22586             par = emit_insn (par);
22587             REG_NOTES (par) = dwarf;
22588             RTX_FRAME_RELATED_P (par) = 1;
22589           }
22590
22591         i++;
22592       }
22593
22594   /* If the number of registers pushed is odd AND return_in_pc is false OR
22595      number of registers are even AND return_in_pc is true, last register is
22596      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22597      then LDR with post increment.  */
22598
22599   /* Increment the stack pointer, based on there being
22600      num_regs 4-byte registers to restore.  */
22601   tmp = gen_rtx_SET (stack_pointer_rtx,
22602                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22603   RTX_FRAME_RELATED_P (tmp) = 1;
22604   tmp = emit_insn (tmp);
22605   if (!return_in_pc)
22606     {
22607       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22608                                    stack_pointer_rtx, stack_pointer_rtx);
22609     }
22610
22611   dwarf = NULL_RTX;
22612
22613   if (((num_regs % 2) == 1 && !return_in_pc)
22614       || ((num_regs % 2) == 0 && return_in_pc))
22615     {
22616       /* Scan for the single register to be popped.  Skip until the saved
22617          register is found.  */
22618       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22619
22620       /* Gen LDR with post increment here.  */
22621       tmp1 = gen_rtx_MEM (SImode,
22622                           gen_rtx_POST_INC (SImode,
22623                                             stack_pointer_rtx));
22624       set_mem_alias_set (tmp1, get_frame_alias_set ());
22625
22626       reg = gen_rtx_REG (SImode, j);
22627       tmp = gen_rtx_SET (reg, tmp1);
22628       RTX_FRAME_RELATED_P (tmp) = 1;
22629       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22630
22631       if (return_in_pc)
22632         {
22633           /* If return_in_pc, j must be PC_REGNUM.  */
22634           gcc_assert (j == PC_REGNUM);
22635           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22636           XVECEXP (par, 0, 0) = ret_rtx;
22637           XVECEXP (par, 0, 1) = tmp;
22638           par = emit_jump_insn (par);
22639         }
22640       else
22641         {
22642           par = emit_insn (tmp);
22643           REG_NOTES (par) = dwarf;
22644           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22645                                        stack_pointer_rtx, stack_pointer_rtx);
22646         }
22647
22648     }
22649   else if ((num_regs % 2) == 1 && return_in_pc)
22650     {
22651       /* There are 2 registers to be popped.  So, generate the pattern
22652          pop_multiple_with_stack_update_and_return to pop in PC.  */
22653       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22654     }
22655
22656   return;
22657 }
22658
22659 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22660    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22661    offset addressing and then generates one separate stack udpate. This provides
22662    more scheduling freedom, compared to writeback on every load.  However,
22663    if the function returns using load into PC directly
22664    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22665    before the last load.  TODO: Add a peephole optimization to recognize
22666    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22667    peephole optimization to merge the load at stack-offset zero
22668    with the stack update instruction using load with writeback
22669    in post-index addressing mode.  */
22670 static void
22671 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22672 {
22673   int j = 0;
22674   int offset = 0;
22675   rtx par = NULL_RTX;
22676   rtx dwarf = NULL_RTX;
22677   rtx tmp, mem;
22678
22679   /* Restore saved registers.  */
22680   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22681   j = 0;
22682   while (j <= LAST_ARM_REGNUM)
22683     if (saved_regs_mask & (1 << j))
22684       {
22685         if ((j % 2) == 0
22686             && (saved_regs_mask & (1 << (j + 1)))
22687             && (j + 1) != PC_REGNUM)
22688           {
22689             /* Current register and next register form register pair for which
22690                LDRD can be generated. PC is always the last register popped, and
22691                we handle it separately.  */
22692             if (offset > 0)
22693               mem = gen_frame_mem (DImode,
22694                                    plus_constant (Pmode,
22695                                                   stack_pointer_rtx,
22696                                                   offset));
22697             else
22698               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22699
22700             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22701             tmp = emit_insn (tmp);
22702             RTX_FRAME_RELATED_P (tmp) = 1;
22703
22704             /* Generate dwarf info.  */
22705
22706             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22707                                     gen_rtx_REG (SImode, j),
22708                                     NULL_RTX);
22709             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22710                                     gen_rtx_REG (SImode, j + 1),
22711                                     dwarf);
22712
22713             REG_NOTES (tmp) = dwarf;
22714
22715             offset += 8;
22716             j += 2;
22717           }
22718         else if (j != PC_REGNUM)
22719           {
22720             /* Emit a single word load.  */
22721             if (offset > 0)
22722               mem = gen_frame_mem (SImode,
22723                                    plus_constant (Pmode,
22724                                                   stack_pointer_rtx,
22725                                                   offset));
22726             else
22727               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22728
22729             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22730             tmp = emit_insn (tmp);
22731             RTX_FRAME_RELATED_P (tmp) = 1;
22732
22733             /* Generate dwarf info.  */
22734             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22735                                               gen_rtx_REG (SImode, j),
22736                                               NULL_RTX);
22737
22738             offset += 4;
22739             j += 1;
22740           }
22741         else /* j == PC_REGNUM */
22742           j++;
22743       }
22744     else
22745       j++;
22746
22747   /* Update the stack.  */
22748   if (offset > 0)
22749     {
22750       tmp = gen_rtx_SET (stack_pointer_rtx,
22751                          plus_constant (Pmode,
22752                                         stack_pointer_rtx,
22753                                         offset));
22754       tmp = emit_insn (tmp);
22755       arm_add_cfa_adjust_cfa_note (tmp, offset,
22756                                    stack_pointer_rtx, stack_pointer_rtx);
22757       offset = 0;
22758     }
22759
22760   if (saved_regs_mask & (1 << PC_REGNUM))
22761     {
22762       /* Only PC is to be popped.  */
22763       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22764       XVECEXP (par, 0, 0) = ret_rtx;
22765       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22766                          gen_frame_mem (SImode,
22767                                         gen_rtx_POST_INC (SImode,
22768                                                           stack_pointer_rtx)));
22769       RTX_FRAME_RELATED_P (tmp) = 1;
22770       XVECEXP (par, 0, 1) = tmp;
22771       par = emit_jump_insn (par);
22772
22773       /* Generate dwarf info.  */
22774       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22775                               gen_rtx_REG (SImode, PC_REGNUM),
22776                               NULL_RTX);
22777       REG_NOTES (par) = dwarf;
22778       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22779                                    stack_pointer_rtx, stack_pointer_rtx);
22780     }
22781 }
22782
22783 /* Calculate the size of the return value that is passed in registers.  */
22784 static unsigned
22785 arm_size_return_regs (void)
22786 {
22787   machine_mode mode;
22788
22789   if (crtl->return_rtx != 0)
22790     mode = GET_MODE (crtl->return_rtx);
22791   else
22792     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22793
22794   return GET_MODE_SIZE (mode);
22795 }
22796
22797 /* Return true if the current function needs to save/restore LR.  */
22798 static bool
22799 thumb_force_lr_save (void)
22800 {
22801   return !cfun->machine->lr_save_eliminated
22802          && (!crtl->is_leaf
22803              || thumb_far_jump_used_p ()
22804              || df_regs_ever_live_p (LR_REGNUM));
22805 }
22806
22807 /* We do not know if r3 will be available because
22808    we do have an indirect tailcall happening in this
22809    particular case.  */
22810 static bool
22811 is_indirect_tailcall_p (rtx call)
22812 {
22813   rtx pat = PATTERN (call);
22814
22815   /* Indirect tail call.  */
22816   pat = XVECEXP (pat, 0, 0);
22817   if (GET_CODE (pat) == SET)
22818     pat = SET_SRC (pat);
22819
22820   pat = XEXP (XEXP (pat, 0), 0);
22821   return REG_P (pat);
22822 }
22823
22824 /* Return true if r3 is used by any of the tail call insns in the
22825    current function.  */
22826 static bool
22827 any_sibcall_could_use_r3 (void)
22828 {
22829   edge_iterator ei;
22830   edge e;
22831
22832   if (!crtl->tail_call_emit)
22833     return false;
22834   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22835     if (e->flags & EDGE_SIBCALL)
22836       {
22837         rtx_insn *call = BB_END (e->src);
22838         if (!CALL_P (call))
22839           call = prev_nonnote_nondebug_insn (call);
22840         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22841         if (find_regno_fusage (call, USE, 3)
22842             || is_indirect_tailcall_p (call))
22843           return true;
22844       }
22845   return false;
22846 }
22847
22848
22849 /* Compute the distance from register FROM to register TO.
22850    These can be the arg pointer (26), the soft frame pointer (25),
22851    the stack pointer (13) or the hard frame pointer (11).
22852    In thumb mode r7 is used as the soft frame pointer, if needed.
22853    Typical stack layout looks like this:
22854
22855        old stack pointer -> |    |
22856                              ----
22857                             |    | \
22858                             |    |   saved arguments for
22859                             |    |   vararg functions
22860                             |    | /
22861                               --
22862    hard FP & arg pointer -> |    | \
22863                             |    |   stack
22864                             |    |   frame
22865                             |    | /
22866                               --
22867                             |    | \
22868                             |    |   call saved
22869                             |    |   registers
22870       soft frame pointer -> |    | /
22871                               --
22872                             |    | \
22873                             |    |   local
22874                             |    |   variables
22875      locals base pointer -> |    | /
22876                               --
22877                             |    | \
22878                             |    |   outgoing
22879                             |    |   arguments
22880    current stack pointer -> |    | /
22881                               --
22882
22883   For a given function some or all of these stack components
22884   may not be needed, giving rise to the possibility of
22885   eliminating some of the registers.
22886
22887   The values returned by this function must reflect the behavior
22888   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22889
22890   The sign of the number returned reflects the direction of stack
22891   growth, so the values are positive for all eliminations except
22892   from the soft frame pointer to the hard frame pointer.
22893
22894   SFP may point just inside the local variables block to ensure correct
22895   alignment.  */
22896
22897
22898 /* Return cached stack offsets.  */
22899
22900 static arm_stack_offsets *
22901 arm_get_frame_offsets (void)
22902 {
22903   struct arm_stack_offsets *offsets;
22904
22905   offsets = &cfun->machine->stack_offsets;
22906
22907   return offsets;
22908 }
22909
22910
22911 /* Calculate stack offsets.  These are used to calculate register elimination
22912    offsets and in prologue/epilogue code.  Also calculates which registers
22913    should be saved.  */
22914
22915 static void
22916 arm_compute_frame_layout (void)
22917 {
22918   struct arm_stack_offsets *offsets;
22919   unsigned long func_type;
22920   int saved;
22921   int core_saved;
22922   HOST_WIDE_INT frame_size;
22923   int i;
22924
22925   offsets = &cfun->machine->stack_offsets;
22926
22927   /* Initially this is the size of the local variables.  It will translated
22928      into an offset once we have determined the size of preceding data.  */
22929   frame_size = ROUND_UP_WORD (get_frame_size ());
22930
22931   /* Space for variadic functions.  */
22932   offsets->saved_args = crtl->args.pretend_args_size;
22933
22934   /* In Thumb mode this is incorrect, but never used.  */
22935   offsets->frame
22936     = (offsets->saved_args
22937        + arm_compute_static_chain_stack_bytes ()
22938        + (frame_pointer_needed ? 4 : 0));
22939
22940   if (TARGET_32BIT)
22941     {
22942       unsigned int regno;
22943
22944       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22945       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22946       saved = core_saved;
22947
22948       /* We know that SP will be doubleword aligned on entry, and we must
22949          preserve that condition at any subroutine call.  We also require the
22950          soft frame pointer to be doubleword aligned.  */
22951
22952       if (TARGET_REALLY_IWMMXT)
22953         {
22954           /* Check for the call-saved iWMMXt registers.  */
22955           for (regno = FIRST_IWMMXT_REGNUM;
22956                regno <= LAST_IWMMXT_REGNUM;
22957                regno++)
22958             if (reg_needs_saving_p (regno))
22959               saved += 8;
22960         }
22961
22962       func_type = arm_current_func_type ();
22963       /* Space for saved VFP registers.  */
22964       if (! IS_VOLATILE (func_type)
22965           && TARGET_VFP_BASE)
22966         saved += arm_get_vfp_saved_size ();
22967
22968       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22969          nonecure entry functions with VSTR/VLDR.  */
22970       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22971         saved += 4;
22972     }
22973   else /* TARGET_THUMB1 */
22974     {
22975       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22976       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22977       saved = core_saved;
22978       if (TARGET_BACKTRACE)
22979         saved += 16;
22980     }
22981
22982   /* Saved registers include the stack frame.  */
22983   offsets->saved_regs
22984     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22985   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22986
22987   /* A leaf function does not need any stack alignment if it has nothing
22988      on the stack.  */
22989   if (crtl->is_leaf && frame_size == 0
22990       /* However if it calls alloca(), we have a dynamically allocated
22991          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
22992       && ! cfun->calls_alloca)
22993     {
22994       offsets->outgoing_args = offsets->soft_frame;
22995       offsets->locals_base = offsets->soft_frame;
22996       return;
22997     }
22998
22999   /* Ensure SFP has the correct alignment.  */
23000   if (ARM_DOUBLEWORD_ALIGN
23001       && (offsets->soft_frame & 7))
23002     {
23003       offsets->soft_frame += 4;
23004       /* Try to align stack by pushing an extra reg.  Don't bother doing this
23005          when there is a stack frame as the alignment will be rolled into
23006          the normal stack adjustment.  */
23007       if (frame_size + crtl->outgoing_args_size == 0)
23008         {
23009           int reg = -1;
23010
23011           /* Register r3 is caller-saved.  Normally it does not need to be
23012              saved on entry by the prologue.  However if we choose to save
23013              it for padding then we may confuse the compiler into thinking
23014              a prologue sequence is required when in fact it is not.  This
23015              will occur when shrink-wrapping if r3 is used as a scratch
23016              register and there are no other callee-saved writes.
23017
23018              This situation can be avoided when other callee-saved registers
23019              are available and r3 is not mandatory if we choose a callee-saved
23020              register for padding.  */
23021           bool prefer_callee_reg_p = false;
23022
23023           /* If it is safe to use r3, then do so.  This sometimes
23024              generates better code on Thumb-2 by avoiding the need to
23025              use 32-bit push/pop instructions.  */
23026           if (! any_sibcall_could_use_r3 ()
23027               && arm_size_return_regs () <= 12
23028               && (offsets->saved_regs_mask & (1 << 3)) == 0
23029               && (TARGET_THUMB2
23030                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23031             {
23032               reg = 3;
23033               if (!TARGET_THUMB2)
23034                 prefer_callee_reg_p = true;
23035             }
23036           if (reg == -1
23037               || prefer_callee_reg_p)
23038             {
23039               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23040                 {
23041                   /* Avoid fixed registers; they may be changed at
23042                      arbitrary times so it's unsafe to restore them
23043                      during the epilogue.  */
23044                   if (!fixed_regs[i]
23045                       && (offsets->saved_regs_mask & (1 << i)) == 0)
23046                     {
23047                       reg = i;
23048                       break;
23049                     }
23050                 }
23051             }
23052
23053           if (reg != -1)
23054             {
23055               offsets->saved_regs += 4;
23056               offsets->saved_regs_mask |= (1 << reg);
23057             }
23058         }
23059     }
23060
23061   offsets->locals_base = offsets->soft_frame + frame_size;
23062   offsets->outgoing_args = (offsets->locals_base
23063                             + crtl->outgoing_args_size);
23064
23065   if (ARM_DOUBLEWORD_ALIGN)
23066     {
23067       /* Ensure SP remains doubleword aligned.  */
23068       if (offsets->outgoing_args & 7)
23069         offsets->outgoing_args += 4;
23070       gcc_assert (!(offsets->outgoing_args & 7));
23071     }
23072 }
23073
23074
23075 /* Calculate the relative offsets for the different stack pointers.  Positive
23076    offsets are in the direction of stack growth.  */
23077
23078 HOST_WIDE_INT
23079 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23080 {
23081   arm_stack_offsets *offsets;
23082
23083   offsets = arm_get_frame_offsets ();
23084
23085   /* OK, now we have enough information to compute the distances.
23086      There must be an entry in these switch tables for each pair
23087      of registers in ELIMINABLE_REGS, even if some of the entries
23088      seem to be redundant or useless.  */
23089   switch (from)
23090     {
23091     case ARG_POINTER_REGNUM:
23092       switch (to)
23093         {
23094         case THUMB_HARD_FRAME_POINTER_REGNUM:
23095           return 0;
23096
23097         case FRAME_POINTER_REGNUM:
23098           /* This is the reverse of the soft frame pointer
23099              to hard frame pointer elimination below.  */
23100           return offsets->soft_frame - offsets->saved_args;
23101
23102         case ARM_HARD_FRAME_POINTER_REGNUM:
23103           /* This is only non-zero in the case where the static chain register
23104              is stored above the frame.  */
23105           return offsets->frame - offsets->saved_args - 4;
23106
23107         case STACK_POINTER_REGNUM:
23108           /* If nothing has been pushed on the stack at all
23109              then this will return -4.  This *is* correct!  */
23110           return offsets->outgoing_args - (offsets->saved_args + 4);
23111
23112         default:
23113           gcc_unreachable ();
23114         }
23115       gcc_unreachable ();
23116
23117     case FRAME_POINTER_REGNUM:
23118       switch (to)
23119         {
23120         case THUMB_HARD_FRAME_POINTER_REGNUM:
23121           return 0;
23122
23123         case ARM_HARD_FRAME_POINTER_REGNUM:
23124           /* The hard frame pointer points to the top entry in the
23125              stack frame.  The soft frame pointer to the bottom entry
23126              in the stack frame.  If there is no stack frame at all,
23127              then they are identical.  */
23128
23129           return offsets->frame - offsets->soft_frame;
23130
23131         case STACK_POINTER_REGNUM:
23132           return offsets->outgoing_args - offsets->soft_frame;
23133
23134         default:
23135           gcc_unreachable ();
23136         }
23137       gcc_unreachable ();
23138
23139     default:
23140       /* You cannot eliminate from the stack pointer.
23141          In theory you could eliminate from the hard frame
23142          pointer to the stack pointer, but this will never
23143          happen, since if a stack frame is not needed the
23144          hard frame pointer will never be used.  */
23145       gcc_unreachable ();
23146     }
23147 }
23148
23149 /* Given FROM and TO register numbers, say whether this elimination is
23150    allowed.  Frame pointer elimination is automatically handled.
23151
23152    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
23153    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
23154    pointer, we must eliminate FRAME_POINTER_REGNUM into
23155    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23156    ARG_POINTER_REGNUM.  */
23157
23158 bool
23159 arm_can_eliminate (const int from, const int to)
23160 {
23161   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23162           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23163           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23164           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23165            true);
23166 }
23167
23168 /* Emit RTL to save coprocessor registers on function entry.  Returns the
23169    number of bytes pushed.  */
23170
23171 static int
23172 arm_save_coproc_regs(void)
23173 {
23174   int saved_size = 0;
23175   unsigned reg;
23176   unsigned start_reg;
23177   rtx insn;
23178
23179   if (TARGET_REALLY_IWMMXT)
23180   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23181     if (reg_needs_saving_p (reg))
23182       {
23183         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23184         insn = gen_rtx_MEM (V2SImode, insn);
23185         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23186         RTX_FRAME_RELATED_P (insn) = 1;
23187         saved_size += 8;
23188       }
23189
23190   if (TARGET_VFP_BASE)
23191     {
23192       start_reg = FIRST_VFP_REGNUM;
23193
23194       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23195         {
23196           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23197             {
23198               if (start_reg != reg)
23199                 saved_size += vfp_emit_fstmd (start_reg,
23200                                               (reg - start_reg) / 2);
23201               start_reg = reg + 2;
23202             }
23203         }
23204       if (start_reg != reg)
23205         saved_size += vfp_emit_fstmd (start_reg,
23206                                       (reg - start_reg) / 2);
23207     }
23208   return saved_size;
23209 }
23210
23211
23212 /* Set the Thumb frame pointer from the stack pointer.  */
23213
23214 static void
23215 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23216 {
23217   HOST_WIDE_INT amount;
23218   rtx insn, dwarf;
23219
23220   amount = offsets->outgoing_args - offsets->locals_base;
23221   if (amount < 1024)
23222     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23223                                   stack_pointer_rtx, GEN_INT (amount)));
23224   else
23225     {
23226       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23227       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
23228          expects the first two operands to be the same.  */
23229       if (TARGET_THUMB2)
23230         {
23231           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23232                                         stack_pointer_rtx,
23233                                         hard_frame_pointer_rtx));
23234         }
23235       else
23236         {
23237           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23238                                         hard_frame_pointer_rtx,
23239                                         stack_pointer_rtx));
23240         }
23241       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23242                            plus_constant (Pmode, stack_pointer_rtx, amount));
23243       RTX_FRAME_RELATED_P (dwarf) = 1;
23244       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23245     }
23246
23247   RTX_FRAME_RELATED_P (insn) = 1;
23248 }
23249
23250 struct scratch_reg {
23251   rtx reg;
23252   bool saved;
23253 };
23254
23255 /* Return a short-lived scratch register for use as a 2nd scratch register on
23256    function entry after the registers are saved in the prologue.  This register
23257    must be released by means of release_scratch_register_on_entry.  IP is not
23258    considered since it is always used as the 1st scratch register if available.
23259
23260    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23261    mask of live registers.  */
23262
23263 static void
23264 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23265                                unsigned long live_regs)
23266 {
23267   int regno = -1;
23268
23269   sr->saved = false;
23270
23271   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23272     regno = LR_REGNUM;
23273   else
23274     {
23275       unsigned int i;
23276
23277       for (i = 4; i < 11; i++)
23278         if (regno1 != i && (live_regs & (1 << i)) != 0)
23279           {
23280             regno = i;
23281             break;
23282           }
23283
23284       if (regno < 0)
23285         {
23286           /* If IP is used as the 1st scratch register for a nested function,
23287              then either r3 wasn't available or is used to preserve IP.  */
23288           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23289             regno1 = 3;
23290           regno = (regno1 == 3 ? 2 : 3);
23291           sr->saved
23292             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23293                                regno);
23294         }
23295     }
23296
23297   sr->reg = gen_rtx_REG (SImode, regno);
23298   if (sr->saved)
23299     {
23300       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23301       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23302       rtx x = gen_rtx_SET (stack_pointer_rtx,
23303                            plus_constant (Pmode, stack_pointer_rtx, -4));
23304       RTX_FRAME_RELATED_P (insn) = 1;
23305       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23306     }
23307 }
23308
23309 /* Release a scratch register obtained from the preceding function.  */
23310
23311 static void
23312 release_scratch_register_on_entry (struct scratch_reg *sr)
23313 {
23314   if (sr->saved)
23315     {
23316       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23317       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23318       rtx x = gen_rtx_SET (stack_pointer_rtx,
23319                            plus_constant (Pmode, stack_pointer_rtx, 4));
23320       RTX_FRAME_RELATED_P (insn) = 1;
23321       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23322     }
23323 }
23324
23325 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23326
23327 #if PROBE_INTERVAL > 4096
23328 #error Cannot use indexed addressing mode for stack probing
23329 #endif
23330
23331 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23332    inclusive.  These are offsets from the current stack pointer.  REGNO1
23333    is the index number of the 1st scratch register and LIVE_REGS is the
23334    mask of live registers.  */
23335
23336 static void
23337 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23338                             unsigned int regno1, unsigned long live_regs)
23339 {
23340   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23341
23342   /* See if we have a constant small number of probes to generate.  If so,
23343      that's the easy case.  */
23344   if (size <= PROBE_INTERVAL)
23345     {
23346       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23347       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23348       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23349     }
23350
23351   /* The run-time loop is made up of 10 insns in the generic case while the
23352      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23353   else if (size <= 5 * PROBE_INTERVAL)
23354     {
23355       HOST_WIDE_INT i, rem;
23356
23357       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23358       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23359       emit_stack_probe (reg1);
23360
23361       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23362          it exceeds SIZE.  If only two probes are needed, this will not
23363          generate any code.  Then probe at FIRST + SIZE.  */
23364       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23365         {
23366           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23367           emit_stack_probe (reg1);
23368         }
23369
23370       rem = size - (i - PROBE_INTERVAL);
23371       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23372         {
23373           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23374           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23375         }
23376       else
23377         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23378     }
23379
23380   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23381      extra careful with variables wrapping around because we might be at
23382      the very top (or the very bottom) of the address space and we have
23383      to be able to handle this case properly; in particular, we use an
23384      equality test for the loop condition.  */
23385   else
23386     {
23387       HOST_WIDE_INT rounded_size;
23388       struct scratch_reg sr;
23389
23390       get_scratch_register_on_entry (&sr, regno1, live_regs);
23391
23392       emit_move_insn (reg1, GEN_INT (first));
23393
23394
23395       /* Step 1: round SIZE to the previous multiple of the interval.  */
23396
23397       rounded_size = size & -PROBE_INTERVAL;
23398       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23399
23400
23401       /* Step 2: compute initial and final value of the loop counter.  */
23402
23403       /* TEST_ADDR = SP + FIRST.  */
23404       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23405
23406       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23407       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23408
23409
23410       /* Step 3: the loop
23411
23412          do
23413            {
23414              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23415              probe at TEST_ADDR
23416            }
23417          while (TEST_ADDR != LAST_ADDR)
23418
23419          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23420          until it is equal to ROUNDED_SIZE.  */
23421
23422       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23423
23424
23425       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23426          that SIZE is equal to ROUNDED_SIZE.  */
23427
23428       if (size != rounded_size)
23429         {
23430           HOST_WIDE_INT rem = size - rounded_size;
23431
23432           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23433             {
23434               emit_set_insn (sr.reg,
23435                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23436               emit_stack_probe (plus_constant (Pmode, sr.reg,
23437                                                PROBE_INTERVAL - rem));
23438             }
23439           else
23440             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23441         }
23442
23443       release_scratch_register_on_entry (&sr);
23444     }
23445
23446   /* Make sure nothing is scheduled before we are done.  */
23447   emit_insn (gen_blockage ());
23448 }
23449
23450 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23451    absolute addresses.  */
23452
23453 const char *
23454 output_probe_stack_range (rtx reg1, rtx reg2)
23455 {
23456   static int labelno = 0;
23457   char loop_lab[32];
23458   rtx xops[2];
23459
23460   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23461
23462   /* Loop.  */
23463   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23464
23465   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23466   xops[0] = reg1;
23467   xops[1] = GEN_INT (PROBE_INTERVAL);
23468   output_asm_insn ("sub\t%0, %0, %1", xops);
23469
23470   /* Probe at TEST_ADDR.  */
23471   output_asm_insn ("str\tr0, [%0, #0]", xops);
23472
23473   /* Test if TEST_ADDR == LAST_ADDR.  */
23474   xops[1] = reg2;
23475   output_asm_insn ("cmp\t%0, %1", xops);
23476
23477   /* Branch.  */
23478   fputs ("\tbne\t", asm_out_file);
23479   assemble_name_raw (asm_out_file, loop_lab);
23480   fputc ('\n', asm_out_file);
23481
23482   return "";
23483 }
23484
23485 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23486    function.  */
23487 void
23488 arm_expand_prologue (void)
23489 {
23490   rtx amount;
23491   rtx insn;
23492   rtx ip_rtx;
23493   unsigned long live_regs_mask;
23494   unsigned long func_type;
23495   int fp_offset = 0;
23496   int saved_pretend_args = 0;
23497   int saved_regs = 0;
23498   unsigned HOST_WIDE_INT args_to_push;
23499   HOST_WIDE_INT size;
23500   arm_stack_offsets *offsets;
23501   bool clobber_ip;
23502
23503   func_type = arm_current_func_type ();
23504
23505   /* Naked functions don't have prologues.  */
23506   if (IS_NAKED (func_type))
23507     {
23508       if (flag_stack_usage_info)
23509         current_function_static_stack_size = 0;
23510       return;
23511     }
23512
23513   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23514   args_to_push = crtl->args.pretend_args_size;
23515
23516   /* Compute which register we will have to save onto the stack.  */
23517   offsets = arm_get_frame_offsets ();
23518   live_regs_mask = offsets->saved_regs_mask;
23519
23520   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23521
23522   if (IS_STACKALIGN (func_type))
23523     {
23524       rtx r0, r1;
23525
23526       /* Handle a word-aligned stack pointer.  We generate the following:
23527
23528           mov r0, sp
23529           bic r1, r0, #7
23530           mov sp, r1
23531           <save and restore r0 in normal prologue/epilogue>
23532           mov sp, r0
23533           bx lr
23534
23535          The unwinder doesn't need to know about the stack realignment.
23536          Just tell it we saved SP in r0.  */
23537       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23538
23539       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23540       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23541
23542       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23543       RTX_FRAME_RELATED_P (insn) = 1;
23544       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23545
23546       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23547
23548       /* ??? The CFA changes here, which may cause GDB to conclude that it
23549          has entered a different function.  That said, the unwind info is
23550          correct, individually, before and after this instruction because
23551          we've described the save of SP, which will override the default
23552          handling of SP as restoring from the CFA.  */
23553       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23554     }
23555
23556   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23557      now the value must be -1 as stored by arm_init_machine_status ().  */
23558   cfun->machine->static_chain_stack_bytes
23559     = arm_compute_static_chain_stack_bytes ();
23560
23561   /* The static chain register is the same as the IP register.  If it is
23562      clobbered when creating the frame, we need to save and restore it.  */
23563   clobber_ip = (IS_NESTED (func_type)
23564                 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23565                      || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23566                           || flag_stack_clash_protection)
23567                          && !df_regs_ever_live_p (LR_REGNUM)
23568                          && arm_r3_live_at_start_p ()))
23569                     || arm_current_function_pac_enabled_p ()));
23570
23571   /* Find somewhere to store IP whilst the frame is being created.
23572      We try the following places in order:
23573
23574        1. The last argument register r3 if it is available.
23575        2. A slot on the stack above the frame if there are no
23576           arguments to push onto the stack.
23577        3. Register r3 again, after pushing the argument registers
23578           onto the stack, if this is a varargs function.
23579        4. The last slot on the stack created for the arguments to
23580           push, if this isn't a varargs function.
23581
23582      Note - we only need to tell the dwarf2 backend about the SP
23583      adjustment in the second variant; the static chain register
23584      doesn't need to be unwound, as it doesn't contain a value
23585      inherited from the caller.  */
23586   if (clobber_ip)
23587     {
23588       if (!arm_r3_live_at_start_p ())
23589         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23590       else if (args_to_push == 0)
23591         {
23592           rtx addr, dwarf;
23593
23594           saved_regs += 4;
23595
23596           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23597           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23598           fp_offset = 4;
23599
23600           /* Just tell the dwarf backend that we adjusted SP.  */
23601           dwarf = gen_rtx_SET (stack_pointer_rtx,
23602                                plus_constant (Pmode, stack_pointer_rtx,
23603                                               -fp_offset));
23604           RTX_FRAME_RELATED_P (insn) = 1;
23605           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23606         }
23607       else
23608         {
23609           /* Store the args on the stack.  */
23610           if (cfun->machine->uses_anonymous_args)
23611             {
23612               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23613                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23614               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23615               saved_pretend_args = 1;
23616             }
23617           else
23618             {
23619               rtx addr, dwarf;
23620
23621               if (args_to_push == 4)
23622                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23623               else
23624                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23625                                            plus_constant (Pmode,
23626                                                           stack_pointer_rtx,
23627                                                           -args_to_push));
23628
23629               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23630
23631               /* Just tell the dwarf backend that we adjusted SP.  */
23632               dwarf = gen_rtx_SET (stack_pointer_rtx,
23633                                    plus_constant (Pmode, stack_pointer_rtx,
23634                                                   -args_to_push));
23635               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23636             }
23637
23638           RTX_FRAME_RELATED_P (insn) = 1;
23639           fp_offset = args_to_push;
23640           args_to_push = 0;
23641         }
23642     }
23643
23644   if (arm_current_function_pac_enabled_p ())
23645     {
23646       /* If IP was clobbered we only emit a PAC instruction as the BTI
23647          one will be added before the push of the clobbered IP (if
23648          necessary) by the bti pass.  */
23649       if (aarch_bti_enabled () && !clobber_ip)
23650         emit_insn (gen_pacbti_nop ());
23651       else
23652         emit_insn (gen_pac_nop ());
23653     }
23654
23655   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23656     {
23657       if (IS_INTERRUPT (func_type))
23658         {
23659           /* Interrupt functions must not corrupt any registers.
23660              Creating a frame pointer however, corrupts the IP
23661              register, so we must push it first.  */
23662           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23663
23664           /* Do not set RTX_FRAME_RELATED_P on this insn.
23665              The dwarf stack unwinding code only wants to see one
23666              stack decrement per function, and this is not it.  If
23667              this instruction is labeled as being part of the frame
23668              creation sequence then dwarf2out_frame_debug_expr will
23669              die when it encounters the assignment of IP to FP
23670              later on, since the use of SP here establishes SP as
23671              the CFA register and not IP.
23672
23673              Anyway this instruction is not really part of the stack
23674              frame creation although it is part of the prologue.  */
23675         }
23676
23677       insn = emit_set_insn (ip_rtx,
23678                             plus_constant (Pmode, stack_pointer_rtx,
23679                                            fp_offset));
23680       RTX_FRAME_RELATED_P (insn) = 1;
23681     }
23682
23683   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23684   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23685     {
23686       saved_regs += 4;
23687       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23688                                                 GEN_INT (FPCXTNS_ENUM)));
23689       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23690                           plus_constant (Pmode, stack_pointer_rtx, -4));
23691       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23692       RTX_FRAME_RELATED_P (insn) = 1;
23693     }
23694
23695   if (args_to_push)
23696     {
23697       /* Push the argument registers, or reserve space for them.  */
23698       if (cfun->machine->uses_anonymous_args)
23699         insn = emit_multi_reg_push
23700           ((0xf0 >> (args_to_push / 4)) & 0xf,
23701            (0xf0 >> (args_to_push / 4)) & 0xf);
23702       else
23703         insn = emit_insn
23704           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23705                        GEN_INT (- args_to_push)));
23706       RTX_FRAME_RELATED_P (insn) = 1;
23707     }
23708
23709   /* If this is an interrupt service routine, and the link register
23710      is going to be pushed, and we're not generating extra
23711      push of IP (needed when frame is needed and frame layout if apcs),
23712      subtracting four from LR now will mean that the function return
23713      can be done with a single instruction.  */
23714   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23715       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23716       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23717       && TARGET_ARM)
23718     {
23719       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23720
23721       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23722     }
23723
23724   if (live_regs_mask)
23725     {
23726       unsigned long dwarf_regs_mask = live_regs_mask;
23727
23728       saved_regs += bit_count (live_regs_mask) * 4;
23729       if (optimize_size && !frame_pointer_needed
23730           && saved_regs == offsets->saved_regs - offsets->saved_args)
23731         {
23732           /* If no coprocessor registers are being pushed and we don't have
23733              to worry about a frame pointer then push extra registers to
23734              create the stack frame.  This is done in a way that does not
23735              alter the frame layout, so is independent of the epilogue.  */
23736           int n;
23737           int frame;
23738           n = 0;
23739           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23740             n++;
23741           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23742           if (frame && n * 4 >= frame)
23743             {
23744               n = frame / 4;
23745               live_regs_mask |= (1 << n) - 1;
23746               saved_regs += frame;
23747             }
23748         }
23749
23750       if (TARGET_LDRD
23751           && current_tune->prefer_ldrd_strd
23752           && !optimize_function_for_size_p (cfun))
23753         {
23754           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23755           if (TARGET_THUMB2)
23756             thumb2_emit_strd_push (live_regs_mask);
23757           else if (TARGET_ARM
23758                    && !TARGET_APCS_FRAME
23759                    && !IS_INTERRUPT (func_type))
23760             arm_emit_strd_push (live_regs_mask);
23761           else
23762             {
23763               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23764               RTX_FRAME_RELATED_P (insn) = 1;
23765             }
23766         }
23767       else
23768         {
23769           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23770           RTX_FRAME_RELATED_P (insn) = 1;
23771         }
23772     }
23773
23774   if (! IS_VOLATILE (func_type))
23775     saved_regs += arm_save_coproc_regs ();
23776
23777   if (frame_pointer_needed && TARGET_ARM)
23778     {
23779       /* Create the new frame pointer.  */
23780       if (TARGET_APCS_FRAME)
23781         {
23782           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23783           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23784           RTX_FRAME_RELATED_P (insn) = 1;
23785         }
23786       else
23787         {
23788           insn = GEN_INT (saved_regs - (4 + fp_offset));
23789           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23790                                         stack_pointer_rtx, insn));
23791           RTX_FRAME_RELATED_P (insn) = 1;
23792         }
23793     }
23794
23795   size = offsets->outgoing_args - offsets->saved_args;
23796   if (flag_stack_usage_info)
23797     current_function_static_stack_size = size;
23798
23799   /* If this isn't an interrupt service routine and we have a frame, then do
23800      stack checking.  We use IP as the first scratch register, except for the
23801      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23802   if (!IS_INTERRUPT (func_type)
23803       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23804           || flag_stack_clash_protection))
23805     {
23806       unsigned int regno;
23807
23808       if (!IS_NESTED (func_type) || clobber_ip)
23809         regno = IP_REGNUM;
23810       else if (df_regs_ever_live_p (LR_REGNUM))
23811         regno = LR_REGNUM;
23812       else
23813         regno = 3;
23814
23815       if (crtl->is_leaf && !cfun->calls_alloca)
23816         {
23817           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23818             arm_emit_probe_stack_range (get_stack_check_protect (),
23819                                         size - get_stack_check_protect (),
23820                                         regno, live_regs_mask);
23821         }
23822       else if (size > 0)
23823         arm_emit_probe_stack_range (get_stack_check_protect (), size,
23824                                     regno, live_regs_mask);
23825     }
23826
23827   /* Recover the static chain register.  */
23828   if (clobber_ip)
23829     {
23830       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23831         insn = gen_rtx_REG (SImode, 3);
23832       else
23833         {
23834           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23835           insn = gen_frame_mem (SImode, insn);
23836         }
23837       emit_set_insn (ip_rtx, insn);
23838       emit_insn (gen_force_register_use (ip_rtx));
23839     }
23840
23841   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23842     {
23843       /* This add can produce multiple insns for a large constant, so we
23844          need to get tricky.  */
23845       rtx_insn *last = get_last_insn ();
23846
23847       amount = GEN_INT (offsets->saved_args + saved_regs
23848                         - offsets->outgoing_args);
23849
23850       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23851                                     amount));
23852       do
23853         {
23854           last = last ? NEXT_INSN (last) : get_insns ();
23855           RTX_FRAME_RELATED_P (last) = 1;
23856         }
23857       while (last != insn);
23858
23859       /* If the frame pointer is needed, emit a special barrier that
23860          will prevent the scheduler from moving stores to the frame
23861          before the stack adjustment.  */
23862       if (frame_pointer_needed)
23863         emit_insn (gen_stack_tie (stack_pointer_rtx,
23864                                   hard_frame_pointer_rtx));
23865     }
23866
23867
23868   if (frame_pointer_needed && TARGET_THUMB2)
23869     thumb_set_frame_pointer (offsets);
23870
23871   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23872     {
23873       unsigned long mask;
23874
23875       mask = live_regs_mask;
23876       mask &= THUMB2_WORK_REGS;
23877       if (!IS_NESTED (func_type))
23878         mask |= (1 << IP_REGNUM);
23879       arm_load_pic_register (mask, NULL_RTX);
23880     }
23881
23882   /* If we are profiling, make sure no instructions are scheduled before
23883      the call to mcount.  Similarly if the user has requested no
23884      scheduling in the prolog.  Similarly if we want non-call exceptions
23885      using the EABI unwinder, to prevent faulting instructions from being
23886      swapped with a stack adjustment.  */
23887   if (crtl->profile || !TARGET_SCHED_PROLOG
23888       || (arm_except_unwind_info (&global_options) == UI_TARGET
23889           && cfun->can_throw_non_call_exceptions))
23890     emit_insn (gen_blockage ());
23891
23892   /* If the link register is being kept alive, with the return address in it,
23893      then make sure that it does not get reused by the ce2 pass.  */
23894   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23895     cfun->machine->lr_save_eliminated = 1;
23896 }
23897 \f
23898 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23899 static void
23900 arm_print_condition (FILE *stream)
23901 {
23902   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23903     {
23904       /* Branch conversion is not implemented for Thumb-2.  */
23905       if (TARGET_THUMB)
23906         {
23907           output_operand_lossage ("predicated Thumb instruction");
23908           return;
23909         }
23910       if (current_insn_predicate != NULL)
23911         {
23912           output_operand_lossage
23913             ("predicated instruction in conditional sequence");
23914           return;
23915         }
23916
23917       fputs (arm_condition_codes[arm_current_cc], stream);
23918     }
23919   else if (current_insn_predicate)
23920     {
23921       enum arm_cond_code code;
23922
23923       if (TARGET_THUMB1)
23924         {
23925           output_operand_lossage ("predicated Thumb instruction");
23926           return;
23927         }
23928
23929       code = get_arm_condition_code (current_insn_predicate);
23930       fputs (arm_condition_codes[code], stream);
23931     }
23932 }
23933
23934
23935 /* Globally reserved letters: acln
23936    Puncutation letters currently used: @_|?().!#
23937    Lower case letters currently used: bcdefhimpqtvwxyz
23938    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23939    Letters previously used, but now deprecated/obsolete: sWXYZ.
23940
23941    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23942
23943    If CODE is 'd', then the X is a condition operand and the instruction
23944    should only be executed if the condition is true.
23945    if CODE is 'D', then the X is a condition operand and the instruction
23946    should only be executed if the condition is false: however, if the mode
23947    of the comparison is CCFPEmode, then always execute the instruction -- we
23948    do this because in these circumstances !GE does not necessarily imply LT;
23949    in these cases the instruction pattern will take care to make sure that
23950    an instruction containing %d will follow, thereby undoing the effects of
23951    doing this instruction unconditionally.
23952    If CODE is 'N' then X is a floating point operand that must be negated
23953    before output.
23954    If CODE is 'B' then output a bitwise inverted value of X (a const int).
23955    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23956    If CODE is 'V', then the operand must be a CONST_INT representing
23957    the bits to preserve in the modified register (Rd) of a BFI or BFC
23958    instruction: print out both the width and lsb (shift) fields.  */
23959 static void
23960 arm_print_operand (FILE *stream, rtx x, int code)
23961 {
23962   switch (code)
23963     {
23964     case '@':
23965       fputs (ASM_COMMENT_START, stream);
23966       return;
23967
23968     case '_':
23969       fputs (user_label_prefix, stream);
23970       return;
23971
23972     case '|':
23973       fputs (REGISTER_PREFIX, stream);
23974       return;
23975
23976     case '?':
23977       arm_print_condition (stream);
23978       return;
23979
23980     case '.':
23981       /* The current condition code for a condition code setting instruction.
23982          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
23983       fputc('s', stream);
23984       arm_print_condition (stream);
23985       return;
23986
23987     case '!':
23988       /* If the instruction is conditionally executed then print
23989          the current condition code, otherwise print 's'.  */
23990       gcc_assert (TARGET_THUMB2);
23991       if (current_insn_predicate)
23992         arm_print_condition (stream);
23993       else
23994         fputc('s', stream);
23995       break;
23996
23997     /* %# is a "break" sequence. It doesn't output anything, but is used to
23998        separate e.g. operand numbers from following text, if that text consists
23999        of further digits which we don't want to be part of the operand
24000        number.  */
24001     case '#':
24002       return;
24003
24004     case 'N':
24005       {
24006         REAL_VALUE_TYPE r;
24007         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24008         fprintf (stream, "%s", fp_const_from_val (&r));
24009       }
24010       return;
24011
24012     /* An integer or symbol address without a preceding # sign.  */
24013     case 'c':
24014       switch (GET_CODE (x))
24015         {
24016         case CONST_INT:
24017           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24018           break;
24019
24020         case SYMBOL_REF:
24021           output_addr_const (stream, x);
24022           break;
24023
24024         case CONST:
24025           if (GET_CODE (XEXP (x, 0)) == PLUS
24026               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24027             {
24028               output_addr_const (stream, x);
24029               break;
24030             }
24031           /* Fall through.  */
24032
24033         default:
24034           output_operand_lossage ("Unsupported operand for code '%c'", code);
24035         }
24036       return;
24037
24038     /* An integer that we want to print in HEX.  */
24039     case 'x':
24040       switch (GET_CODE (x))
24041         {
24042         case CONST_INT:
24043           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24044           break;
24045
24046         default:
24047           output_operand_lossage ("Unsupported operand for code '%c'", code);
24048         }
24049       return;
24050
24051     case 'B':
24052       if (CONST_INT_P (x))
24053         {
24054           HOST_WIDE_INT val;
24055           val = ARM_SIGN_EXTEND (~INTVAL (x));
24056           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24057         }
24058       else
24059         {
24060           putc ('~', stream);
24061           output_addr_const (stream, x);
24062         }
24063       return;
24064
24065     case 'b':
24066       /* Print the log2 of a CONST_INT.  */
24067       {
24068         HOST_WIDE_INT val;
24069
24070         if (!CONST_INT_P (x)
24071             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24072           output_operand_lossage ("Unsupported operand for code '%c'", code);
24073         else
24074           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24075       }
24076       return;
24077
24078     case 'L':
24079       /* The low 16 bits of an immediate constant.  */
24080       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24081       return;
24082
24083     case 'i':
24084       fprintf (stream, "%s", arithmetic_instr (x, 1));
24085       return;
24086
24087     case 'I':
24088       fprintf (stream, "%s", arithmetic_instr (x, 0));
24089       return;
24090
24091     case 'S':
24092       {
24093         HOST_WIDE_INT val;
24094         const char *shift;
24095
24096         shift = shift_op (x, &val);
24097
24098         if (shift)
24099           {
24100             fprintf (stream, ", %s ", shift);
24101             if (val == -1)
24102               arm_print_operand (stream, XEXP (x, 1), 0);
24103             else
24104               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24105           }
24106       }
24107       return;
24108
24109       /* An explanation of the 'Q', 'R' and 'H' register operands:
24110
24111          In a pair of registers containing a DI or DF value the 'Q'
24112          operand returns the register number of the register containing
24113          the least significant part of the value.  The 'R' operand returns
24114          the register number of the register containing the most
24115          significant part of the value.
24116
24117          The 'H' operand returns the higher of the two register numbers.
24118          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24119          same as the 'Q' operand, since the most significant part of the
24120          value is held in the lower number register.  The reverse is true
24121          on systems where WORDS_BIG_ENDIAN is false.
24122
24123          The purpose of these operands is to distinguish between cases
24124          where the endian-ness of the values is important (for example
24125          when they are added together), and cases where the endian-ness
24126          is irrelevant, but the order of register operations is important.
24127          For example when loading a value from memory into a register
24128          pair, the endian-ness does not matter.  Provided that the value
24129          from the lower memory address is put into the lower numbered
24130          register, and the value from the higher address is put into the
24131          higher numbered register, the load will work regardless of whether
24132          the value being loaded is big-wordian or little-wordian.  The
24133          order of the two register loads can matter however, if the address
24134          of the memory location is actually held in one of the registers
24135          being overwritten by the load.
24136
24137          The 'Q' and 'R' constraints are also available for 64-bit
24138          constants.  */
24139     case 'Q':
24140       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24141         {
24142           rtx part = gen_lowpart (SImode, x);
24143           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24144           return;
24145         }
24146
24147       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24148         {
24149           output_operand_lossage ("invalid operand for code '%c'", code);
24150           return;
24151         }
24152
24153       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24154       return;
24155
24156     case 'R':
24157       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24158         {
24159           machine_mode mode = GET_MODE (x);
24160           rtx part;
24161
24162           if (mode == VOIDmode)
24163             mode = DImode;
24164           part = gen_highpart_mode (SImode, mode, x);
24165           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24166           return;
24167         }
24168
24169       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24170         {
24171           output_operand_lossage ("invalid operand for code '%c'", code);
24172           return;
24173         }
24174
24175       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24176       return;
24177
24178     case 'H':
24179       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24180         {
24181           output_operand_lossage ("invalid operand for code '%c'", code);
24182           return;
24183         }
24184
24185       asm_fprintf (stream, "%r", REGNO (x) + 1);
24186       return;
24187
24188     case 'J':
24189       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24190         {
24191           output_operand_lossage ("invalid operand for code '%c'", code);
24192           return;
24193         }
24194
24195       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24196       return;
24197
24198     case 'K':
24199       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24200         {
24201           output_operand_lossage ("invalid operand for code '%c'", code);
24202           return;
24203         }
24204
24205       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24206       return;
24207
24208     case 'm':
24209       asm_fprintf (stream, "%r",
24210                    REG_P (XEXP (x, 0))
24211                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24212       return;
24213
24214     case 'M':
24215       asm_fprintf (stream, "{%r-%r}",
24216                    REGNO (x),
24217                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24218       return;
24219
24220     /* Like 'M', but writing doubleword vector registers, for use by Neon
24221        insns.  */
24222     case 'h':
24223       {
24224         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24225         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24226         if (numregs == 1)
24227           asm_fprintf (stream, "{d%d}", regno);
24228         else
24229           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24230       }
24231       return;
24232
24233     case 'd':
24234       /* CONST_TRUE_RTX means always -- that's the default.  */
24235       if (x == const_true_rtx)
24236         return;
24237
24238       if (!COMPARISON_P (x))
24239         {
24240           output_operand_lossage ("invalid operand for code '%c'", code);
24241           return;
24242         }
24243
24244       fputs (arm_condition_codes[get_arm_condition_code (x)],
24245              stream);
24246       return;
24247
24248     case 'D':
24249       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
24250          want to do that.  */
24251       if (x == const_true_rtx)
24252         {
24253           output_operand_lossage ("instruction never executed");
24254           return;
24255         }
24256       if (!COMPARISON_P (x))
24257         {
24258           output_operand_lossage ("invalid operand for code '%c'", code);
24259           return;
24260         }
24261
24262       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24263                                  (get_arm_condition_code (x))],
24264              stream);
24265       return;
24266
24267     case 'V':
24268       {
24269         /* Output the LSB (shift) and width for a bitmask instruction
24270            based on a literal mask.  The LSB is printed first,
24271            followed by the width.
24272
24273            Eg. For 0b1...1110001, the result is #1, #3.  */
24274         if (!CONST_INT_P (x))
24275           {
24276             output_operand_lossage ("invalid operand for code '%c'", code);
24277             return;
24278           }
24279
24280         unsigned HOST_WIDE_INT val
24281           = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24282         int lsb = exact_log2 (val & -val);
24283         asm_fprintf (stream, "#%d, #%d", lsb,
24284                      (exact_log2 (val + (val & -val)) - lsb));
24285       }
24286       return;
24287
24288     case 's':
24289     case 'W':
24290     case 'X':
24291     case 'Y':
24292     case 'Z':
24293       /* Former Maverick support, removed after GCC-4.7.  */
24294       output_operand_lossage ("obsolete Maverick format code '%c'", code);
24295       return;
24296
24297     case 'U':
24298       if (!REG_P (x)
24299           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24300           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24301         /* Bad value for wCG register number.  */
24302         {
24303           output_operand_lossage ("invalid operand for code '%c'", code);
24304           return;
24305         }
24306
24307       else
24308         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24309       return;
24310
24311       /* Print an iWMMXt control register name.  */
24312     case 'w':
24313       if (!CONST_INT_P (x)
24314           || INTVAL (x) < 0
24315           || INTVAL (x) >= 16)
24316         /* Bad value for wC register number.  */
24317         {
24318           output_operand_lossage ("invalid operand for code '%c'", code);
24319           return;
24320         }
24321
24322       else
24323         {
24324           static const char * wc_reg_names [16] =
24325             {
24326               "wCID",  "wCon",  "wCSSF", "wCASF",
24327               "wC4",   "wC5",   "wC6",   "wC7",
24328               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24329               "wC12",  "wC13",  "wC14",  "wC15"
24330             };
24331
24332           fputs (wc_reg_names [INTVAL (x)], stream);
24333         }
24334       return;
24335
24336     /* Print the high single-precision register of a VFP double-precision
24337        register.  */
24338     case 'p':
24339       {
24340         machine_mode mode = GET_MODE (x);
24341         int regno;
24342
24343         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24344           {
24345             output_operand_lossage ("invalid operand for code '%c'", code);
24346             return;
24347           }
24348
24349         regno = REGNO (x);
24350         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24351           {
24352             output_operand_lossage ("invalid operand for code '%c'", code);
24353             return;
24354           }
24355
24356         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24357       }
24358       return;
24359
24360     /* Print a VFP/Neon double precision or quad precision register name.  */
24361     case 'P':
24362     case 'q':
24363       {
24364         machine_mode mode = GET_MODE (x);
24365         int is_quad = (code == 'q');
24366         int regno;
24367
24368         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24369           {
24370             output_operand_lossage ("invalid operand for code '%c'", code);
24371             return;
24372           }
24373
24374         if (!REG_P (x)
24375             || !IS_VFP_REGNUM (REGNO (x)))
24376           {
24377             output_operand_lossage ("invalid operand for code '%c'", code);
24378             return;
24379           }
24380
24381         regno = REGNO (x);
24382         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24383             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24384           {
24385             output_operand_lossage ("invalid operand for code '%c'", code);
24386             return;
24387           }
24388
24389         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24390           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24391       }
24392       return;
24393
24394     /* These two codes print the low/high doubleword register of a Neon quad
24395        register, respectively.  For pair-structure types, can also print
24396        low/high quadword registers.  */
24397     case 'e':
24398     case 'f':
24399       {
24400         machine_mode mode = GET_MODE (x);
24401         int regno;
24402
24403         if ((GET_MODE_SIZE (mode) != 16
24404              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24405           {
24406             output_operand_lossage ("invalid operand for code '%c'", code);
24407             return;
24408           }
24409
24410         regno = REGNO (x);
24411         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24412           {
24413             output_operand_lossage ("invalid operand for code '%c'", code);
24414             return;
24415           }
24416
24417         if (GET_MODE_SIZE (mode) == 16)
24418           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24419                                   + (code == 'f' ? 1 : 0));
24420         else
24421           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24422                                   + (code == 'f' ? 1 : 0));
24423       }
24424       return;
24425
24426     /* Print a VFPv3 floating-point constant, represented as an integer
24427        index.  */
24428     case 'G':
24429       {
24430         int index = vfp3_const_double_index (x);
24431         gcc_assert (index != -1);
24432         fprintf (stream, "%d", index);
24433       }
24434       return;
24435
24436     /* Print bits representing opcode features for Neon.
24437
24438        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24439        and polynomials as unsigned.
24440
24441        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24442
24443        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24444
24445     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24446     case 'T':
24447       {
24448         HOST_WIDE_INT bits = INTVAL (x);
24449         fputc ("uspf"[bits & 3], stream);
24450       }
24451       return;
24452
24453     /* Likewise, but signed and unsigned integers are both 'i'.  */
24454     case 'F':
24455       {
24456         HOST_WIDE_INT bits = INTVAL (x);
24457         fputc ("iipf"[bits & 3], stream);
24458       }
24459       return;
24460
24461     /* As for 'T', but emit 'u' instead of 'p'.  */
24462     case 't':
24463       {
24464         HOST_WIDE_INT bits = INTVAL (x);
24465         fputc ("usuf"[bits & 3], stream);
24466       }
24467       return;
24468
24469     /* Bit 2: rounding (vs none).  */
24470     case 'O':
24471       {
24472         HOST_WIDE_INT bits = INTVAL (x);
24473         fputs ((bits & 4) != 0 ? "r" : "", stream);
24474       }
24475       return;
24476
24477     /* Memory operand for vld1/vst1 instruction.  */
24478     case 'A':
24479       {
24480         rtx addr;
24481         bool postinc = FALSE;
24482         rtx postinc_reg = NULL;
24483         unsigned align, memsize, align_bits;
24484
24485         gcc_assert (MEM_P (x));
24486         addr = XEXP (x, 0);
24487         if (GET_CODE (addr) == POST_INC)
24488           {
24489             postinc = 1;
24490             addr = XEXP (addr, 0);
24491           }
24492         if (GET_CODE (addr) == POST_MODIFY)
24493           {
24494             postinc_reg = XEXP( XEXP (addr, 1), 1);
24495             addr = XEXP (addr, 0);
24496           }
24497         asm_fprintf (stream, "[%r", REGNO (addr));
24498
24499         /* We know the alignment of this access, so we can emit a hint in the
24500            instruction (for some alignments) as an aid to the memory subsystem
24501            of the target.  */
24502         align = MEM_ALIGN (x) >> 3;
24503         memsize = MEM_SIZE (x);
24504
24505         /* Only certain alignment specifiers are supported by the hardware.  */
24506         if (memsize == 32 && (align % 32) == 0)
24507           align_bits = 256;
24508         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24509           align_bits = 128;
24510         else if (memsize >= 8 && (align % 8) == 0)
24511           align_bits = 64;
24512         else
24513           align_bits = 0;
24514
24515         if (align_bits != 0)
24516           asm_fprintf (stream, ":%d", align_bits);
24517
24518         asm_fprintf (stream, "]");
24519
24520         if (postinc)
24521           fputs("!", stream);
24522         if (postinc_reg)
24523           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24524       }
24525       return;
24526
24527     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24528        rtx_code the memory operands output looks like following.
24529        1. [Rn], #+/-<imm>
24530        2. [Rn, #+/-<imm>]!
24531        3. [Rn, #+/-<imm>]
24532        4. [Rn].  */
24533     case 'E':
24534       {
24535         rtx addr;
24536         rtx postinc_reg = NULL;
24537         unsigned inc_val = 0;
24538         enum rtx_code code;
24539
24540         gcc_assert (MEM_P (x));
24541         addr = XEXP (x, 0);
24542         code = GET_CODE (addr);
24543         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24544             || code  == PRE_DEC)
24545           {
24546             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24547             inc_val = GET_MODE_SIZE (GET_MODE (x));
24548             if (code == POST_INC || code == POST_DEC)
24549               asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24550                                               ? "": "-", inc_val);
24551             else
24552               asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24553                                                ? "": "-", inc_val);
24554           }
24555         else if (code == POST_MODIFY || code == PRE_MODIFY)
24556           {
24557             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24558             postinc_reg = XEXP (XEXP (addr, 1), 1);
24559             if (postinc_reg && CONST_INT_P (postinc_reg))
24560               {
24561                 if (code == POST_MODIFY)
24562                   asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24563                 else
24564                   asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24565               }
24566           }
24567         else if (code == PLUS)
24568           {
24569             rtx base = XEXP (addr, 0);
24570             rtx index = XEXP (addr, 1);
24571
24572             gcc_assert (REG_P (base) && CONST_INT_P (index));
24573
24574             HOST_WIDE_INT offset = INTVAL (index);
24575             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24576           }
24577         else
24578           {
24579             gcc_assert (REG_P (addr));
24580             asm_fprintf (stream, "[%r]",REGNO (addr));
24581           }
24582       }
24583       return;
24584
24585     case 'C':
24586       {
24587         rtx addr;
24588
24589         gcc_assert (MEM_P (x));
24590         addr = XEXP (x, 0);
24591         gcc_assert (REG_P (addr));
24592         asm_fprintf (stream, "[%r]", REGNO (addr));
24593       }
24594       return;
24595
24596     /* Translate an S register number into a D register number and element index.  */
24597     case 'y':
24598       {
24599         machine_mode mode = GET_MODE (x);
24600         int regno;
24601
24602         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24603           {
24604             output_operand_lossage ("invalid operand for code '%c'", code);
24605             return;
24606           }
24607
24608         regno = REGNO (x);
24609         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24610           {
24611             output_operand_lossage ("invalid operand for code '%c'", code);
24612             return;
24613           }
24614
24615         regno = regno - FIRST_VFP_REGNUM;
24616         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24617       }
24618       return;
24619
24620     case 'v':
24621         gcc_assert (CONST_DOUBLE_P (x));
24622         int result;
24623         result = vfp3_const_double_for_fract_bits (x);
24624         if (result == 0)
24625           result = vfp3_const_double_for_bits (x);
24626         fprintf (stream, "#%d", result);
24627         return;
24628
24629     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24630        number into a D register number and element index.  */
24631     case 'z':
24632       {
24633         machine_mode mode = GET_MODE (x);
24634         int regno;
24635
24636         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24637           {
24638             output_operand_lossage ("invalid operand for code '%c'", code);
24639             return;
24640           }
24641
24642         regno = REGNO (x);
24643         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24644           {
24645             output_operand_lossage ("invalid operand for code '%c'", code);
24646             return;
24647           }
24648
24649         regno = regno - FIRST_VFP_REGNUM;
24650         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24651       }
24652       return;
24653
24654     default:
24655       if (x == 0)
24656         {
24657           output_operand_lossage ("missing operand");
24658           return;
24659         }
24660
24661       switch (GET_CODE (x))
24662         {
24663         case REG:
24664           asm_fprintf (stream, "%r", REGNO (x));
24665           break;
24666
24667         case MEM:
24668           output_address (GET_MODE (x), XEXP (x, 0));
24669           break;
24670
24671         case CONST_DOUBLE:
24672           {
24673             char fpstr[20];
24674             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24675                               sizeof (fpstr), 0, 1);
24676             fprintf (stream, "#%s", fpstr);
24677           }
24678           break;
24679
24680         default:
24681           gcc_assert (GET_CODE (x) != NEG);
24682           fputc ('#', stream);
24683           if (GET_CODE (x) == HIGH)
24684             {
24685               fputs (":lower16:", stream);
24686               x = XEXP (x, 0);
24687             }
24688
24689           output_addr_const (stream, x);
24690           break;
24691         }
24692     }
24693 }
24694 \f
24695 /* Target hook for printing a memory address.  */
24696 static void
24697 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24698 {
24699   if (TARGET_32BIT)
24700     {
24701       int is_minus = GET_CODE (x) == MINUS;
24702
24703       if (REG_P (x))
24704         asm_fprintf (stream, "[%r]", REGNO (x));
24705       else if (GET_CODE (x) == PLUS || is_minus)
24706         {
24707           rtx base = XEXP (x, 0);
24708           rtx index = XEXP (x, 1);
24709           HOST_WIDE_INT offset = 0;
24710           if (!REG_P (base)
24711               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24712             {
24713               /* Ensure that BASE is a register.  */
24714               /* (one of them must be).  */
24715               /* Also ensure the SP is not used as in index register.  */
24716               std::swap (base, index);
24717             }
24718           switch (GET_CODE (index))
24719             {
24720             case CONST_INT:
24721               offset = INTVAL (index);
24722               if (is_minus)
24723                 offset = -offset;
24724               asm_fprintf (stream, "[%r, #%wd]",
24725                            REGNO (base), offset);
24726               break;
24727
24728             case REG:
24729               asm_fprintf (stream, "[%r, %s%r]",
24730                            REGNO (base), is_minus ? "-" : "",
24731                            REGNO (index));
24732               break;
24733
24734             case MULT:
24735             case ASHIFTRT:
24736             case LSHIFTRT:
24737             case ASHIFT:
24738             case ROTATERT:
24739               {
24740                 asm_fprintf (stream, "[%r, %s%r",
24741                              REGNO (base), is_minus ? "-" : "",
24742                              REGNO (XEXP (index, 0)));
24743                 arm_print_operand (stream, index, 'S');
24744                 fputs ("]", stream);
24745                 break;
24746               }
24747
24748             default:
24749               gcc_unreachable ();
24750             }
24751         }
24752       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24753                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24754         {
24755           gcc_assert (REG_P (XEXP (x, 0)));
24756
24757           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24758             asm_fprintf (stream, "[%r, #%s%d]!",
24759                          REGNO (XEXP (x, 0)),
24760                          GET_CODE (x) == PRE_DEC ? "-" : "",
24761                          GET_MODE_SIZE (mode));
24762           else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24763             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24764           else
24765             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24766                          GET_CODE (x) == POST_DEC ? "-" : "",
24767                          GET_MODE_SIZE (mode));
24768         }
24769       else if (GET_CODE (x) == PRE_MODIFY)
24770         {
24771           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24772           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24773             asm_fprintf (stream, "#%wd]!",
24774                          INTVAL (XEXP (XEXP (x, 1), 1)));
24775           else
24776             asm_fprintf (stream, "%r]!",
24777                          REGNO (XEXP (XEXP (x, 1), 1)));
24778         }
24779       else if (GET_CODE (x) == POST_MODIFY)
24780         {
24781           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24782           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24783             asm_fprintf (stream, "#%wd",
24784                          INTVAL (XEXP (XEXP (x, 1), 1)));
24785           else
24786             asm_fprintf (stream, "%r",
24787                          REGNO (XEXP (XEXP (x, 1), 1)));
24788         }
24789       else output_addr_const (stream, x);
24790     }
24791   else
24792     {
24793       if (REG_P (x))
24794         asm_fprintf (stream, "[%r]", REGNO (x));
24795       else if (GET_CODE (x) == POST_INC)
24796         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24797       else if (GET_CODE (x) == PLUS)
24798         {
24799           gcc_assert (REG_P (XEXP (x, 0)));
24800           if (CONST_INT_P (XEXP (x, 1)))
24801             asm_fprintf (stream, "[%r, #%wd]",
24802                          REGNO (XEXP (x, 0)),
24803                          INTVAL (XEXP (x, 1)));
24804           else
24805             asm_fprintf (stream, "[%r, %r]",
24806                          REGNO (XEXP (x, 0)),
24807                          REGNO (XEXP (x, 1)));
24808         }
24809       else
24810         output_addr_const (stream, x);
24811     }
24812 }
24813 \f
24814 /* Target hook for indicating whether a punctuation character for
24815    TARGET_PRINT_OPERAND is valid.  */
24816 static bool
24817 arm_print_operand_punct_valid_p (unsigned char code)
24818 {
24819   return (code == '@' || code == '|' || code == '.'
24820           || code == '(' || code == ')' || code == '#'
24821           || (TARGET_32BIT && (code == '?'))
24822           || (TARGET_THUMB2 && (code == '!'))
24823           || (TARGET_THUMB && (code == '_')));
24824 }
24825 \f
24826 /* Target hook for assembling integer objects.  The ARM version needs to
24827    handle word-sized values specially.  */
24828 static bool
24829 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24830 {
24831   machine_mode mode;
24832
24833   if (size == UNITS_PER_WORD && aligned_p)
24834     {
24835       fputs ("\t.word\t", asm_out_file);
24836       output_addr_const (asm_out_file, x);
24837
24838       /* Mark symbols as position independent.  We only do this in the
24839          .text segment, not in the .data segment.  */
24840       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24841           (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24842         {
24843           /* See legitimize_pic_address for an explanation of the
24844              TARGET_VXWORKS_RTP check.  */
24845           /* References to weak symbols cannot be resolved locally:
24846              they may be overridden by a non-weak definition at link
24847              time.  */
24848           if (!arm_pic_data_is_text_relative
24849               || (SYMBOL_REF_P (x)
24850                   && (!SYMBOL_REF_LOCAL_P (x)
24851                       || (SYMBOL_REF_DECL (x)
24852                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24853                       || (SYMBOL_REF_FUNCTION_P (x)
24854                           && !arm_fdpic_local_funcdesc_p (x)))))
24855             {
24856               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24857                 fputs ("(GOTFUNCDESC)", asm_out_file);
24858               else
24859                 fputs ("(GOT)", asm_out_file);
24860             }
24861           else
24862             {
24863               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24864                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24865               else
24866                 {
24867                   bool is_readonly;
24868
24869                   if (!TARGET_FDPIC
24870                       || arm_is_segment_info_known (x, &is_readonly))
24871                     fputs ("(GOTOFF)", asm_out_file);
24872                   else
24873                     fputs ("(GOT)", asm_out_file);
24874                 }
24875             }
24876         }
24877
24878       /* For FDPIC we also have to mark symbol for .data section.  */
24879       if (TARGET_FDPIC
24880           && !making_const_table
24881           && SYMBOL_REF_P (x)
24882           && SYMBOL_REF_FUNCTION_P (x))
24883         fputs ("(FUNCDESC)", asm_out_file);
24884
24885       fputc ('\n', asm_out_file);
24886       return true;
24887     }
24888
24889   mode = GET_MODE (x);
24890
24891   if (arm_vector_mode_supported_p (mode))
24892     {
24893       int i, units;
24894
24895       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24896
24897       units = CONST_VECTOR_NUNITS (x);
24898       size = GET_MODE_UNIT_SIZE (mode);
24899
24900       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24901         for (i = 0; i < units; i++)
24902           {
24903             rtx elt = CONST_VECTOR_ELT (x, i);
24904             assemble_integer
24905               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24906           }
24907       else
24908         for (i = 0; i < units; i++)
24909           {
24910             rtx elt = CONST_VECTOR_ELT (x, i);
24911             assemble_real
24912               (*CONST_DOUBLE_REAL_VALUE (elt),
24913                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24914                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24915           }
24916
24917       return true;
24918     }
24919
24920   return default_assemble_integer (x, size, aligned_p);
24921 }
24922
24923 static void
24924 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24925 {
24926   section *s;
24927
24928   if (!TARGET_AAPCS_BASED)
24929     {
24930       (is_ctor ?
24931        default_named_section_asm_out_constructor
24932        : default_named_section_asm_out_destructor) (symbol, priority);
24933       return;
24934     }
24935
24936   /* Put these in the .init_array section, using a special relocation.  */
24937   if (priority != DEFAULT_INIT_PRIORITY)
24938     {
24939       char buf[18];
24940       sprintf (buf, "%s.%.5u",
24941                is_ctor ? ".init_array" : ".fini_array",
24942                priority);
24943       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24944     }
24945   else if (is_ctor)
24946     s = ctors_section;
24947   else
24948     s = dtors_section;
24949
24950   switch_to_section (s);
24951   assemble_align (POINTER_SIZE);
24952   fputs ("\t.word\t", asm_out_file);
24953   output_addr_const (asm_out_file, symbol);
24954   fputs ("(target1)\n", asm_out_file);
24955 }
24956
24957 /* Add a function to the list of static constructors.  */
24958
24959 static void
24960 arm_elf_asm_constructor (rtx symbol, int priority)
24961 {
24962   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24963 }
24964
24965 /* Add a function to the list of static destructors.  */
24966
24967 static void
24968 arm_elf_asm_destructor (rtx symbol, int priority)
24969 {
24970   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24971 }
24972 \f
24973 /* A finite state machine takes care of noticing whether or not instructions
24974    can be conditionally executed, and thus decrease execution time and code
24975    size by deleting branch instructions.  The fsm is controlled by
24976    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
24977
24978 /* The state of the fsm controlling condition codes are:
24979    0: normal, do nothing special
24980    1: make ASM_OUTPUT_OPCODE not output this instruction
24981    2: make ASM_OUTPUT_OPCODE not output this instruction
24982    3: make instructions conditional
24983    4: make instructions conditional
24984
24985    State transitions (state->state by whom under condition):
24986    0 -> 1 final_prescan_insn if the `target' is a label
24987    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24988    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24989    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24990    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24991           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24992    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24993           (the target insn is arm_target_insn).
24994
24995    If the jump clobbers the conditions then we use states 2 and 4.
24996
24997    A similar thing can be done with conditional return insns.
24998
24999    XXX In case the `target' is an unconditional branch, this conditionalising
25000    of the instructions always reduces code size, but not always execution
25001    time.  But then, I want to reduce the code size to somewhere near what
25002    /bin/cc produces.  */
25003
25004 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25005    instructions.  When a COND_EXEC instruction is seen the subsequent
25006    instructions are scanned so that multiple conditional instructions can be
25007    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
25008    specify the length and true/false mask for the IT block.  These will be
25009    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
25010
25011 /* Returns the index of the ARM condition code string in
25012    `arm_condition_codes', or ARM_NV if the comparison is invalid.
25013    COMPARISON should be an rtx like `(eq (...) (...))'.  */
25014
25015 enum arm_cond_code
25016 maybe_get_arm_condition_code (rtx comparison)
25017 {
25018   machine_mode mode = GET_MODE (XEXP (comparison, 0));
25019   enum arm_cond_code code;
25020   enum rtx_code comp_code = GET_CODE (comparison);
25021
25022   if (GET_MODE_CLASS (mode) != MODE_CC)
25023     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25024                            XEXP (comparison, 1));
25025
25026   switch (mode)
25027     {
25028     case E_CC_DNEmode: code = ARM_NE; goto dominance;
25029     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25030     case E_CC_DGEmode: code = ARM_GE; goto dominance;
25031     case E_CC_DGTmode: code = ARM_GT; goto dominance;
25032     case E_CC_DLEmode: code = ARM_LE; goto dominance;
25033     case E_CC_DLTmode: code = ARM_LT; goto dominance;
25034     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25035     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25036     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25037     case E_CC_DLTUmode: code = ARM_CC;
25038
25039     dominance:
25040       if (comp_code == EQ)
25041         return ARM_INVERSE_CONDITION_CODE (code);
25042       if (comp_code == NE)
25043         return code;
25044       return ARM_NV;
25045
25046     case E_CC_NZmode:
25047       switch (comp_code)
25048         {
25049         case NE: return ARM_NE;
25050         case EQ: return ARM_EQ;
25051         case GE: return ARM_PL;
25052         case LT: return ARM_MI;
25053         default: return ARM_NV;
25054         }
25055
25056     case E_CC_Zmode:
25057       switch (comp_code)
25058         {
25059         case NE: return ARM_NE;
25060         case EQ: return ARM_EQ;
25061         default: return ARM_NV;
25062         }
25063
25064     case E_CC_Nmode:
25065       switch (comp_code)
25066         {
25067         case NE: return ARM_MI;
25068         case EQ: return ARM_PL;
25069         default: return ARM_NV;
25070         }
25071
25072     case E_CCFPEmode:
25073     case E_CCFPmode:
25074       /* We can handle all cases except UNEQ and LTGT.  */
25075       switch (comp_code)
25076         {
25077         case GE: return ARM_GE;
25078         case GT: return ARM_GT;
25079         case LE: return ARM_LS;
25080         case LT: return ARM_MI;
25081         case NE: return ARM_NE;
25082         case EQ: return ARM_EQ;
25083         case ORDERED: return ARM_VC;
25084         case UNORDERED: return ARM_VS;
25085         case UNLT: return ARM_LT;
25086         case UNLE: return ARM_LE;
25087         case UNGT: return ARM_HI;
25088         case UNGE: return ARM_PL;
25089           /* UNEQ and LTGT do not have a representation.  */
25090         case UNEQ: /* Fall through.  */
25091         case LTGT: /* Fall through.  */
25092         default: return ARM_NV;
25093         }
25094
25095     case E_CC_SWPmode:
25096       switch (comp_code)
25097         {
25098         case NE: return ARM_NE;
25099         case EQ: return ARM_EQ;
25100         case GE: return ARM_LE;
25101         case GT: return ARM_LT;
25102         case LE: return ARM_GE;
25103         case LT: return ARM_GT;
25104         case GEU: return ARM_LS;
25105         case GTU: return ARM_CC;
25106         case LEU: return ARM_CS;
25107         case LTU: return ARM_HI;
25108         default: return ARM_NV;
25109         }
25110
25111     case E_CC_Cmode:
25112       switch (comp_code)
25113         {
25114         case LTU: return ARM_CS;
25115         case GEU: return ARM_CC;
25116         default: return ARM_NV;
25117         }
25118
25119     case E_CC_NVmode:
25120       switch (comp_code)
25121         {
25122         case GE: return ARM_GE;
25123         case LT: return ARM_LT;
25124         default: return ARM_NV;
25125         }
25126
25127     case E_CC_Bmode:
25128       switch (comp_code)
25129         {
25130         case GEU: return ARM_CS;
25131         case LTU: return ARM_CC;
25132         default: return ARM_NV;
25133         }
25134
25135     case E_CC_Vmode:
25136       switch (comp_code)
25137         {
25138         case NE: return ARM_VS;
25139         case EQ: return ARM_VC;
25140         default: return ARM_NV;
25141         }
25142
25143     case E_CC_ADCmode:
25144       switch (comp_code)
25145         {
25146         case GEU: return ARM_CS;
25147         case LTU: return ARM_CC;
25148         default: return ARM_NV;
25149         }
25150
25151     case E_CCmode:
25152     case E_CC_RSBmode:
25153       switch (comp_code)
25154         {
25155         case NE: return ARM_NE;
25156         case EQ: return ARM_EQ;
25157         case GE: return ARM_GE;
25158         case GT: return ARM_GT;
25159         case LE: return ARM_LE;
25160         case LT: return ARM_LT;
25161         case GEU: return ARM_CS;
25162         case GTU: return ARM_HI;
25163         case LEU: return ARM_LS;
25164         case LTU: return ARM_CC;
25165         default: return ARM_NV;
25166         }
25167
25168     default: gcc_unreachable ();
25169     }
25170 }
25171
25172 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
25173 static enum arm_cond_code
25174 get_arm_condition_code (rtx comparison)
25175 {
25176   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25177   gcc_assert (code != ARM_NV);
25178   return code;
25179 }
25180
25181 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
25182    code registers when not targetting Thumb1.  The VFP condition register
25183    only exists when generating hard-float code.  */
25184 static bool
25185 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25186 {
25187   if (!TARGET_32BIT)
25188     return false;
25189
25190   *p1 = CC_REGNUM;
25191   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25192   return true;
25193 }
25194
25195 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25196    instructions.  */
25197 void
25198 thumb2_final_prescan_insn (rtx_insn *insn)
25199 {
25200   rtx_insn *first_insn = insn;
25201   rtx body = PATTERN (insn);
25202   rtx predicate;
25203   enum arm_cond_code code;
25204   int n;
25205   int mask;
25206   int max;
25207
25208   /* max_insns_skipped in the tune was already taken into account in the
25209      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
25210      just emit the IT blocks as we can.  It does not make sense to split
25211      the IT blocks.  */
25212   max = MAX_INSN_PER_IT_BLOCK;
25213
25214   /* Remove the previous insn from the count of insns to be output.  */
25215   if (arm_condexec_count)
25216       arm_condexec_count--;
25217
25218   /* Nothing to do if we are already inside a conditional block.  */
25219   if (arm_condexec_count)
25220     return;
25221
25222   if (GET_CODE (body) != COND_EXEC)
25223     return;
25224
25225   /* Conditional jumps are implemented directly.  */
25226   if (JUMP_P (insn))
25227     return;
25228
25229   predicate = COND_EXEC_TEST (body);
25230   arm_current_cc = get_arm_condition_code (predicate);
25231
25232   n = get_attr_ce_count (insn);
25233   arm_condexec_count = 1;
25234   arm_condexec_mask = (1 << n) - 1;
25235   arm_condexec_masklen = n;
25236   /* See if subsequent instructions can be combined into the same block.  */
25237   for (;;)
25238     {
25239       insn = next_nonnote_insn (insn);
25240
25241       /* Jumping into the middle of an IT block is illegal, so a label or
25242          barrier terminates the block.  */
25243       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25244         break;
25245
25246       body = PATTERN (insn);
25247       /* USE and CLOBBER aren't really insns, so just skip them.  */
25248       if (GET_CODE (body) == USE
25249           || GET_CODE (body) == CLOBBER)
25250         continue;
25251
25252       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
25253       if (GET_CODE (body) != COND_EXEC)
25254         break;
25255       /* Maximum number of conditionally executed instructions in a block.  */
25256       n = get_attr_ce_count (insn);
25257       if (arm_condexec_masklen + n > max)
25258         break;
25259
25260       predicate = COND_EXEC_TEST (body);
25261       code = get_arm_condition_code (predicate);
25262       mask = (1 << n) - 1;
25263       if (arm_current_cc == code)
25264         arm_condexec_mask |= (mask << arm_condexec_masklen);
25265       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25266         break;
25267
25268       arm_condexec_count++;
25269       arm_condexec_masklen += n;
25270
25271       /* A jump must be the last instruction in a conditional block.  */
25272       if (JUMP_P (insn))
25273         break;
25274     }
25275   /* Restore recog_data (getting the attributes of other insns can
25276      destroy this array, but final.cc assumes that it remains intact
25277      across this call).  */
25278   extract_constrain_insn_cached (first_insn);
25279 }
25280
25281 void
25282 arm_final_prescan_insn (rtx_insn *insn)
25283 {
25284   /* BODY will hold the body of INSN.  */
25285   rtx body = PATTERN (insn);
25286
25287   /* This will be 1 if trying to repeat the trick, and things need to be
25288      reversed if it appears to fail.  */
25289   int reverse = 0;
25290
25291   /* If we start with a return insn, we only succeed if we find another one.  */
25292   int seeking_return = 0;
25293   enum rtx_code return_code = UNKNOWN;
25294
25295   /* START_INSN will hold the insn from where we start looking.  This is the
25296      first insn after the following code_label if REVERSE is true.  */
25297   rtx_insn *start_insn = insn;
25298
25299   /* If in state 4, check if the target branch is reached, in order to
25300      change back to state 0.  */
25301   if (arm_ccfsm_state == 4)
25302     {
25303       if (insn == arm_target_insn)
25304         {
25305           arm_target_insn = NULL;
25306           arm_ccfsm_state = 0;
25307         }
25308       return;
25309     }
25310
25311   /* If in state 3, it is possible to repeat the trick, if this insn is an
25312      unconditional branch to a label, and immediately following this branch
25313      is the previous target label which is only used once, and the label this
25314      branch jumps to is not too far off.  */
25315   if (arm_ccfsm_state == 3)
25316     {
25317       if (simplejump_p (insn))
25318         {
25319           start_insn = next_nonnote_insn (start_insn);
25320           if (BARRIER_P (start_insn))
25321             {
25322               /* XXX Isn't this always a barrier?  */
25323               start_insn = next_nonnote_insn (start_insn);
25324             }
25325           if (LABEL_P (start_insn)
25326               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25327               && LABEL_NUSES (start_insn) == 1)
25328             reverse = TRUE;
25329           else
25330             return;
25331         }
25332       else if (ANY_RETURN_P (body))
25333         {
25334           start_insn = next_nonnote_insn (start_insn);
25335           if (BARRIER_P (start_insn))
25336             start_insn = next_nonnote_insn (start_insn);
25337           if (LABEL_P (start_insn)
25338               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25339               && LABEL_NUSES (start_insn) == 1)
25340             {
25341               reverse = TRUE;
25342               seeking_return = 1;
25343               return_code = GET_CODE (body);
25344             }
25345           else
25346             return;
25347         }
25348       else
25349         return;
25350     }
25351
25352   gcc_assert (!arm_ccfsm_state || reverse);
25353   if (!JUMP_P (insn))
25354     return;
25355
25356   /* This jump might be paralleled with a clobber of the condition codes
25357      the jump should always come first */
25358   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25359     body = XVECEXP (body, 0, 0);
25360
25361   if (reverse
25362       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25363           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25364     {
25365       int insns_skipped;
25366       int fail = FALSE, succeed = FALSE;
25367       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25368       int then_not_else = TRUE;
25369       rtx_insn *this_insn = start_insn;
25370       rtx label = 0;
25371
25372       /* Register the insn jumped to.  */
25373       if (reverse)
25374         {
25375           if (!seeking_return)
25376             label = XEXP (SET_SRC (body), 0);
25377         }
25378       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25379         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25380       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25381         {
25382           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25383           then_not_else = FALSE;
25384         }
25385       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25386         {
25387           seeking_return = 1;
25388           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25389         }
25390       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25391         {
25392           seeking_return = 1;
25393           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25394           then_not_else = FALSE;
25395         }
25396       else
25397         gcc_unreachable ();
25398
25399       /* See how many insns this branch skips, and what kind of insns.  If all
25400          insns are okay, and the label or unconditional branch to the same
25401          label is not too far away, succeed.  */
25402       for (insns_skipped = 0;
25403            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25404         {
25405           rtx scanbody;
25406
25407           this_insn = next_nonnote_insn (this_insn);
25408           if (!this_insn)
25409             break;
25410
25411           switch (GET_CODE (this_insn))
25412             {
25413             case CODE_LABEL:
25414               /* Succeed if it is the target label, otherwise fail since
25415                  control falls in from somewhere else.  */
25416               if (this_insn == label)
25417                 {
25418                   arm_ccfsm_state = 1;
25419                   succeed = TRUE;
25420                 }
25421               else
25422                 fail = TRUE;
25423               break;
25424
25425             case BARRIER:
25426               /* Succeed if the following insn is the target label.
25427                  Otherwise fail.
25428                  If return insns are used then the last insn in a function
25429                  will be a barrier.  */
25430               this_insn = next_nonnote_insn (this_insn);
25431               if (this_insn && this_insn == label)
25432                 {
25433                   arm_ccfsm_state = 1;
25434                   succeed = TRUE;
25435                 }
25436               else
25437                 fail = TRUE;
25438               break;
25439
25440             case CALL_INSN:
25441               /* The AAPCS says that conditional calls should not be
25442                  used since they make interworking inefficient (the
25443                  linker can't transform BL<cond> into BLX).  That's
25444                  only a problem if the machine has BLX.  */
25445               if (arm_arch5t)
25446                 {
25447                   fail = TRUE;
25448                   break;
25449                 }
25450
25451               /* Succeed if the following insn is the target label, or
25452                  if the following two insns are a barrier and the
25453                  target label.  */
25454               this_insn = next_nonnote_insn (this_insn);
25455               if (this_insn && BARRIER_P (this_insn))
25456                 this_insn = next_nonnote_insn (this_insn);
25457
25458               if (this_insn && this_insn == label
25459                   && insns_skipped < max_insns_skipped)
25460                 {
25461                   arm_ccfsm_state = 1;
25462                   succeed = TRUE;
25463                 }
25464               else
25465                 fail = TRUE;
25466               break;
25467
25468             case JUMP_INSN:
25469               /* If this is an unconditional branch to the same label, succeed.
25470                  If it is to another label, do nothing.  If it is conditional,
25471                  fail.  */
25472               /* XXX Probably, the tests for SET and the PC are
25473                  unnecessary.  */
25474
25475               scanbody = PATTERN (this_insn);
25476               if (GET_CODE (scanbody) == SET
25477                   && GET_CODE (SET_DEST (scanbody)) == PC)
25478                 {
25479                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25480                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25481                     {
25482                       arm_ccfsm_state = 2;
25483                       succeed = TRUE;
25484                     }
25485                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25486                     fail = TRUE;
25487                 }
25488               /* Fail if a conditional return is undesirable (e.g. on a
25489                  StrongARM), but still allow this if optimizing for size.  */
25490               else if (GET_CODE (scanbody) == return_code
25491                        && !use_return_insn (TRUE, NULL)
25492                        && !optimize_size)
25493                 fail = TRUE;
25494               else if (GET_CODE (scanbody) == return_code)
25495                 {
25496                   arm_ccfsm_state = 2;
25497                   succeed = TRUE;
25498                 }
25499               else if (GET_CODE (scanbody) == PARALLEL)
25500                 {
25501                   switch (get_attr_conds (this_insn))
25502                     {
25503                     case CONDS_NOCOND:
25504                       break;
25505                     default:
25506                       fail = TRUE;
25507                       break;
25508                     }
25509                 }
25510               else
25511                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25512
25513               break;
25514
25515             case INSN:
25516               /* Instructions using or affecting the condition codes make it
25517                  fail.  */
25518               scanbody = PATTERN (this_insn);
25519               if (!(GET_CODE (scanbody) == SET
25520                     || GET_CODE (scanbody) == PARALLEL)
25521                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25522                 fail = TRUE;
25523               break;
25524
25525             default:
25526               break;
25527             }
25528         }
25529       if (succeed)
25530         {
25531           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25532             arm_target_label = CODE_LABEL_NUMBER (label);
25533           else
25534             {
25535               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25536
25537               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25538                 {
25539                   this_insn = next_nonnote_insn (this_insn);
25540                   gcc_assert (!this_insn
25541                               || (!BARRIER_P (this_insn)
25542                                   && !LABEL_P (this_insn)));
25543                 }
25544               if (!this_insn)
25545                 {
25546                   /* Oh, dear! we ran off the end.. give up.  */
25547                   extract_constrain_insn_cached (insn);
25548                   arm_ccfsm_state = 0;
25549                   arm_target_insn = NULL;
25550                   return;
25551                 }
25552               arm_target_insn = this_insn;
25553             }
25554
25555           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25556              what it was.  */
25557           if (!reverse)
25558             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25559
25560           if (reverse || then_not_else)
25561             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25562         }
25563
25564       /* Restore recog_data (getting the attributes of other insns can
25565          destroy this array, but final.cc assumes that it remains intact
25566          across this call.  */
25567       extract_constrain_insn_cached (insn);
25568     }
25569 }
25570
25571 /* Output IT instructions.  */
25572 void
25573 thumb2_asm_output_opcode (FILE * stream)
25574 {
25575   char buff[5];
25576   int n;
25577
25578   if (arm_condexec_mask)
25579     {
25580       for (n = 0; n < arm_condexec_masklen; n++)
25581         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25582       buff[n] = 0;
25583       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25584                   arm_condition_codes[arm_current_cc]);
25585       arm_condexec_mask = 0;
25586     }
25587 }
25588
25589 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25590    UNITS_PER_WORD bytes wide.  */
25591 static unsigned int
25592 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25593 {
25594   if (IS_VPR_REGNUM (regno))
25595     return CEIL (GET_MODE_SIZE (mode), 2);
25596
25597   if (TARGET_32BIT
25598       && regno > PC_REGNUM
25599       && regno != FRAME_POINTER_REGNUM
25600       && regno != ARG_POINTER_REGNUM
25601       && !IS_VFP_REGNUM (regno))
25602     return 1;
25603
25604   return ARM_NUM_REGS (mode);
25605 }
25606
25607 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25608 static bool
25609 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25610 {
25611   if (GET_MODE_CLASS (mode) == MODE_CC)
25612     return (regno == CC_REGNUM
25613             || (TARGET_VFP_BASE
25614                 && regno == VFPCC_REGNUM));
25615
25616   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25617     return false;
25618
25619   if (IS_VPR_REGNUM (regno))
25620     return mode == HImode
25621       || mode == V16BImode
25622       || mode == V8BImode
25623       || mode == V4BImode;
25624
25625   if (TARGET_THUMB1)
25626     /* For the Thumb we only allow values bigger than SImode in
25627        registers 0 - 6, so that there is always a second low
25628        register available to hold the upper part of the value.
25629        We probably we ought to ensure that the register is the
25630        start of an even numbered register pair.  */
25631     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25632
25633   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25634     {
25635       if (mode == DFmode || mode == DImode)
25636         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25637
25638       if (mode == HFmode || mode == BFmode || mode == HImode
25639           || mode == SFmode || mode == SImode)
25640         return VFP_REGNO_OK_FOR_SINGLE (regno);
25641
25642       if (TARGET_NEON)
25643         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25644                || (VALID_NEON_QREG_MODE (mode)
25645                    && NEON_REGNO_OK_FOR_QUAD (regno))
25646                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25647                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25648                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25649                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25650                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25651      if (TARGET_HAVE_MVE)
25652        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25653                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25654                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25655
25656       return false;
25657     }
25658
25659   if (TARGET_REALLY_IWMMXT)
25660     {
25661       if (IS_IWMMXT_GR_REGNUM (regno))
25662         return mode == SImode;
25663
25664       if (IS_IWMMXT_REGNUM (regno))
25665         return VALID_IWMMXT_REG_MODE (mode);
25666     }
25667
25668   /* We allow almost any value to be stored in the general registers.
25669      Restrict doubleword quantities to even register pairs in ARM state
25670      so that we can use ldrd. The same restriction applies for MVE
25671      in order to support Armv8.1-M Mainline instructions.
25672      Do not allow very large Neon structure  opaque modes in general
25673      registers; they would use too many.  */
25674   if (regno <= LAST_ARM_REGNUM)
25675     {
25676       if (ARM_NUM_REGS (mode) > 4)
25677         return false;
25678
25679       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25680         return true;
25681
25682       return !((TARGET_LDRD || TARGET_CDE)
25683                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25684     }
25685
25686   if (regno == FRAME_POINTER_REGNUM
25687       || regno == ARG_POINTER_REGNUM)
25688     /* We only allow integers in the fake hard registers.  */
25689     return GET_MODE_CLASS (mode) == MODE_INT;
25690
25691   return false;
25692 }
25693
25694 /* Implement TARGET_MODES_TIEABLE_P.  */
25695
25696 static bool
25697 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25698 {
25699   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25700     return true;
25701
25702   /* We specifically want to allow elements of "structure" modes to
25703      be tieable to the structure.  This more general condition allows
25704      other rarer situations too.  */
25705   if ((TARGET_NEON
25706        && (VALID_NEON_DREG_MODE (mode1)
25707            || VALID_NEON_QREG_MODE (mode1)
25708            || VALID_NEON_STRUCT_MODE (mode1))
25709        && (VALID_NEON_DREG_MODE (mode2)
25710            || VALID_NEON_QREG_MODE (mode2)
25711            || VALID_NEON_STRUCT_MODE (mode2)))
25712       || (TARGET_HAVE_MVE
25713           && (VALID_MVE_MODE (mode1)
25714               || VALID_MVE_STRUCT_MODE (mode1))
25715           && (VALID_MVE_MODE (mode2)
25716               || VALID_MVE_STRUCT_MODE (mode2))))
25717     return true;
25718
25719   return false;
25720 }
25721
25722 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25723    not used in arm mode.  */
25724
25725 enum reg_class
25726 arm_regno_class (int regno)
25727 {
25728   if (regno == PC_REGNUM)
25729     return NO_REGS;
25730
25731   if (IS_VPR_REGNUM (regno))
25732     return VPR_REG;
25733
25734   if (TARGET_THUMB1)
25735     {
25736       if (regno == STACK_POINTER_REGNUM)
25737         return STACK_REG;
25738       if (regno == CC_REGNUM)
25739         return CC_REG;
25740       if (regno < 8)
25741         return LO_REGS;
25742       return HI_REGS;
25743     }
25744
25745   if (TARGET_THUMB2 && regno < 8)
25746     return LO_REGS;
25747
25748   if (   regno <= LAST_ARM_REGNUM
25749       || regno == FRAME_POINTER_REGNUM
25750       || regno == ARG_POINTER_REGNUM)
25751     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25752
25753   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25754     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25755
25756   if (IS_VFP_REGNUM (regno))
25757     {
25758       if (regno <= D7_VFP_REGNUM)
25759         return VFP_D0_D7_REGS;
25760       else if (regno <= LAST_LO_VFP_REGNUM)
25761         return VFP_LO_REGS;
25762       else
25763         return VFP_HI_REGS;
25764     }
25765
25766   if (IS_IWMMXT_REGNUM (regno))
25767     return IWMMXT_REGS;
25768
25769   if (IS_IWMMXT_GR_REGNUM (regno))
25770     return IWMMXT_GR_REGS;
25771
25772   return NO_REGS;
25773 }
25774
25775 /* Handle a special case when computing the offset
25776    of an argument from the frame pointer.  */
25777 int
25778 arm_debugger_arg_offset (int value, rtx addr)
25779 {
25780   rtx_insn *insn;
25781
25782   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25783   if (value != 0)
25784     return 0;
25785
25786   /* We can only cope with the case where the address is held in a register.  */
25787   if (!REG_P (addr))
25788     return 0;
25789
25790   /* If we are using the frame pointer to point at the argument, then
25791      an offset of 0 is correct.  */
25792   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25793     return 0;
25794
25795   /* If we are using the stack pointer to point at the
25796      argument, then an offset of 0 is correct.  */
25797   /* ??? Check this is consistent with thumb2 frame layout.  */
25798   if ((TARGET_THUMB || !frame_pointer_needed)
25799       && REGNO (addr) == SP_REGNUM)
25800     return 0;
25801
25802   /* Oh dear.  The argument is pointed to by a register rather
25803      than being held in a register, or being stored at a known
25804      offset from the frame pointer.  Since GDB only understands
25805      those two kinds of argument we must translate the address
25806      held in the register into an offset from the frame pointer.
25807      We do this by searching through the insns for the function
25808      looking to see where this register gets its value.  If the
25809      register is initialized from the frame pointer plus an offset
25810      then we are in luck and we can continue, otherwise we give up.
25811
25812      This code is exercised by producing debugging information
25813      for a function with arguments like this:
25814
25815            double func (double a, double b, int c, double d) {return d;}
25816
25817      Without this code the stab for parameter 'd' will be set to
25818      an offset of 0 from the frame pointer, rather than 8.  */
25819
25820   /* The if() statement says:
25821
25822      If the insn is a normal instruction
25823      and if the insn is setting the value in a register
25824      and if the register being set is the register holding the address of the argument
25825      and if the address is computing by an addition
25826      that involves adding to a register
25827      which is the frame pointer
25828      a constant integer
25829
25830      then...  */
25831
25832   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25833     {
25834       if (   NONJUMP_INSN_P (insn)
25835           && GET_CODE (PATTERN (insn)) == SET
25836           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25837           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25838           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25839           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25840           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25841              )
25842         {
25843           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25844
25845           break;
25846         }
25847     }
25848
25849   if (value == 0)
25850     {
25851       debug_rtx (addr);
25852       warning (0, "unable to compute real location of stacked parameter");
25853       value = 8; /* XXX magic hack */
25854     }
25855
25856   return value;
25857 }
25858 \f
25859 /* Implement TARGET_PROMOTED_TYPE.  */
25860
25861 static tree
25862 arm_promoted_type (const_tree t)
25863 {
25864   if (SCALAR_FLOAT_TYPE_P (t)
25865       && TYPE_PRECISION (t) == 16
25866       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25867     return float_type_node;
25868   return NULL_TREE;
25869 }
25870
25871 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25872    This simply adds HFmode as a supported mode; even though we don't
25873    implement arithmetic on this type directly, it's supported by
25874    optabs conversions, much the way the double-word arithmetic is
25875    special-cased in the default hook.  */
25876
25877 static bool
25878 arm_scalar_mode_supported_p (scalar_mode mode)
25879 {
25880   if (mode == HFmode)
25881     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25882   else if (ALL_FIXED_POINT_MODE_P (mode))
25883     return true;
25884   else
25885     return default_scalar_mode_supported_p (mode);
25886 }
25887
25888 /* Set the value of FLT_EVAL_METHOD.
25889    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25890
25891     0: evaluate all operations and constants, whose semantic type has at
25892        most the range and precision of type float, to the range and
25893        precision of float; evaluate all other operations and constants to
25894        the range and precision of the semantic type;
25895
25896     N, where _FloatN is a supported interchange floating type
25897        evaluate all operations and constants, whose semantic type has at
25898        most the range and precision of _FloatN type, to the range and
25899        precision of the _FloatN type; evaluate all other operations and
25900        constants to the range and precision of the semantic type;
25901
25902    If we have the ARMv8.2-A extensions then we support _Float16 in native
25903    precision, so we should set this to 16.  Otherwise, we support the type,
25904    but want to evaluate expressions in float precision, so set this to
25905    0.  */
25906
25907 static enum flt_eval_method
25908 arm_excess_precision (enum excess_precision_type type)
25909 {
25910   switch (type)
25911     {
25912       case EXCESS_PRECISION_TYPE_FAST:
25913       case EXCESS_PRECISION_TYPE_STANDARD:
25914         /* We can calculate either in 16-bit range and precision or
25915            32-bit range and precision.  Make that decision based on whether
25916            we have native support for the ARMv8.2-A 16-bit floating-point
25917            instructions or not.  */
25918         return (TARGET_VFP_FP16INST
25919                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25920                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25921       case EXCESS_PRECISION_TYPE_IMPLICIT:
25922       case EXCESS_PRECISION_TYPE_FLOAT16:
25923         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25924       default:
25925         gcc_unreachable ();
25926     }
25927   return FLT_EVAL_METHOD_UNPREDICTABLE;
25928 }
25929
25930
25931 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
25932    _Float16 if we are using anything other than ieee format for 16-bit
25933    floating point.  Otherwise, punt to the default implementation.  */
25934 static opt_scalar_float_mode
25935 arm_floatn_mode (int n, bool extended)
25936 {
25937   if (!extended && n == 16)
25938     {
25939       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25940         return HFmode;
25941       return opt_scalar_float_mode ();
25942     }
25943
25944   return default_floatn_mode (n, extended);
25945 }
25946
25947
25948 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25949    not to early-clobber SRC registers in the process.
25950
25951    We assume that the operands described by SRC and DEST represent a
25952    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25953    number of components into which the copy has been decomposed.  */
25954 void
25955 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25956 {
25957   unsigned int i;
25958
25959   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25960       || REGNO (operands[0]) < REGNO (operands[1]))
25961     {
25962       for (i = 0; i < count; i++)
25963         {
25964           operands[2 * i] = dest[i];
25965           operands[2 * i + 1] = src[i];
25966         }
25967     }
25968   else
25969     {
25970       for (i = 0; i < count; i++)
25971         {
25972           operands[2 * i] = dest[count - i - 1];
25973           operands[2 * i + 1] = src[count - i - 1];
25974         }
25975     }
25976 }
25977
25978 /* Split operands into moves from op[1] + op[2] into op[0].  */
25979
25980 void
25981 neon_split_vcombine (rtx operands[3])
25982 {
25983   unsigned int dest = REGNO (operands[0]);
25984   unsigned int src1 = REGNO (operands[1]);
25985   unsigned int src2 = REGNO (operands[2]);
25986   machine_mode halfmode = GET_MODE (operands[1]);
25987   unsigned int halfregs = REG_NREGS (operands[1]);
25988   rtx destlo, desthi;
25989
25990   if (src1 == dest && src2 == dest + halfregs)
25991     {
25992       /* No-op move.  Can't split to nothing; emit something.  */
25993       emit_note (NOTE_INSN_DELETED);
25994       return;
25995     }
25996
25997   /* Preserve register attributes for variable tracking.  */
25998   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25999   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26000                                GET_MODE_SIZE (halfmode));
26001
26002   /* Special case of reversed high/low parts.  Use VSWP.  */
26003   if (src2 == dest && src1 == dest + halfregs)
26004     {
26005       rtx x = gen_rtx_SET (destlo, operands[1]);
26006       rtx y = gen_rtx_SET (desthi, operands[2]);
26007       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26008       return;
26009     }
26010
26011   if (!reg_overlap_mentioned_p (operands[2], destlo))
26012     {
26013       /* Try to avoid unnecessary moves if part of the result
26014          is in the right place already.  */
26015       if (src1 != dest)
26016         emit_move_insn (destlo, operands[1]);
26017       if (src2 != dest + halfregs)
26018         emit_move_insn (desthi, operands[2]);
26019     }
26020   else
26021     {
26022       if (src2 != dest + halfregs)
26023         emit_move_insn (desthi, operands[2]);
26024       if (src1 != dest)
26025         emit_move_insn (destlo, operands[1]);
26026     }
26027 }
26028 \f
26029 /* Return the number (counting from 0) of
26030    the least significant set bit in MASK.  */
26031
26032 inline static int
26033 number_of_first_bit_set (unsigned mask)
26034 {
26035   return ctz_hwi (mask);
26036 }
26037
26038 /* Like emit_multi_reg_push, but allowing for a different set of
26039    registers to be described as saved.  MASK is the set of registers
26040    to be saved; REAL_REGS is the set of registers to be described as
26041    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26042
26043 static rtx_insn *
26044 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26045 {
26046   unsigned long regno;
26047   rtx par[10], tmp, reg;
26048   rtx_insn *insn;
26049   int i, j;
26050
26051   /* Build the parallel of the registers actually being stored.  */
26052   for (i = 0; mask; ++i, mask &= mask - 1)
26053     {
26054       regno = ctz_hwi (mask);
26055       reg = gen_rtx_REG (SImode, regno);
26056
26057       if (i == 0)
26058         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26059       else
26060         tmp = gen_rtx_USE (VOIDmode, reg);
26061
26062       par[i] = tmp;
26063     }
26064
26065   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26066   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26067   tmp = gen_frame_mem (BLKmode, tmp);
26068   tmp = gen_rtx_SET (tmp, par[0]);
26069   par[0] = tmp;
26070
26071   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26072   insn = emit_insn (tmp);
26073
26074   /* Always build the stack adjustment note for unwind info.  */
26075   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26076   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26077   par[0] = tmp;
26078
26079   /* Build the parallel of the registers recorded as saved for unwind.  */
26080   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26081     {
26082       regno = ctz_hwi (real_regs);
26083       reg = gen_rtx_REG (SImode, regno);
26084
26085       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26086       tmp = gen_frame_mem (SImode, tmp);
26087       tmp = gen_rtx_SET (tmp, reg);
26088       RTX_FRAME_RELATED_P (tmp) = 1;
26089       par[j + 1] = tmp;
26090     }
26091
26092   if (j == 0)
26093     tmp = par[0];
26094   else
26095     {
26096       RTX_FRAME_RELATED_P (par[0]) = 1;
26097       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26098     }
26099
26100   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26101
26102   return insn;
26103 }
26104
26105 /* Emit code to push or pop registers to or from the stack.  F is the
26106    assembly file.  MASK is the registers to pop.  */
26107 static void
26108 thumb_pop (FILE *f, unsigned long mask)
26109 {
26110   int regno;
26111   int lo_mask = mask & 0xFF;
26112
26113   gcc_assert (mask);
26114
26115   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26116     {
26117       /* Special case.  Do not generate a POP PC statement here, do it in
26118          thumb_exit() */
26119       thumb_exit (f, -1);
26120       return;
26121     }
26122
26123   fprintf (f, "\tpop\t{");
26124
26125   /* Look at the low registers first.  */
26126   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26127     {
26128       if (lo_mask & 1)
26129         {
26130           asm_fprintf (f, "%r", regno);
26131
26132           if ((lo_mask & ~1) != 0)
26133             fprintf (f, ", ");
26134         }
26135     }
26136
26137   if (mask & (1 << PC_REGNUM))
26138     {
26139       /* Catch popping the PC.  */
26140       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26141           || IS_CMSE_ENTRY (arm_current_func_type ()))
26142         {
26143           /* The PC is never poped directly, instead
26144              it is popped into r3 and then BX is used.  */
26145           fprintf (f, "}\n");
26146
26147           thumb_exit (f, -1);
26148
26149           return;
26150         }
26151       else
26152         {
26153           if (mask & 0xFF)
26154             fprintf (f, ", ");
26155
26156           asm_fprintf (f, "%r", PC_REGNUM);
26157         }
26158     }
26159
26160   fprintf (f, "}\n");
26161 }
26162
26163 /* Generate code to return from a thumb function.
26164    If 'reg_containing_return_addr' is -1, then the return address is
26165    actually on the stack, at the stack pointer.
26166
26167    Note: do not forget to update length attribute of corresponding insn pattern
26168    when changing assembly output (eg. length attribute of epilogue_insns when
26169    updating Armv8-M Baseline Security Extensions register clearing
26170    sequences).  */
26171 static void
26172 thumb_exit (FILE *f, int reg_containing_return_addr)
26173 {
26174   unsigned regs_available_for_popping;
26175   unsigned regs_to_pop;
26176   int pops_needed;
26177   unsigned available;
26178   unsigned required;
26179   machine_mode mode;
26180   int size;
26181   int restore_a4 = FALSE;
26182
26183   /* Compute the registers we need to pop.  */
26184   regs_to_pop = 0;
26185   pops_needed = 0;
26186
26187   if (reg_containing_return_addr == -1)
26188     {
26189       regs_to_pop |= 1 << LR_REGNUM;
26190       ++pops_needed;
26191     }
26192
26193   if (TARGET_BACKTRACE)
26194     {
26195       /* Restore the (ARM) frame pointer and stack pointer.  */
26196       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26197       pops_needed += 2;
26198     }
26199
26200   /* If there is nothing to pop then just emit the BX instruction and
26201      return.  */
26202   if (pops_needed == 0)
26203     {
26204       if (crtl->calls_eh_return)
26205         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26206
26207       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26208         {
26209           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26210              emitted by cmse_nonsecure_entry_clear_before_return ().  */
26211           if (!TARGET_HAVE_FPCXT_CMSE)
26212             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26213                          reg_containing_return_addr);
26214           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26215         }
26216       else
26217         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26218       return;
26219     }
26220   /* Otherwise if we are not supporting interworking and we have not created
26221      a backtrace structure and the function was not entered in ARM mode then
26222      just pop the return address straight into the PC.  */
26223   else if (!TARGET_INTERWORK
26224            && !TARGET_BACKTRACE
26225            && !is_called_in_ARM_mode (current_function_decl)
26226            && !crtl->calls_eh_return
26227            && !IS_CMSE_ENTRY (arm_current_func_type ()))
26228     {
26229       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26230       return;
26231     }
26232
26233   /* Find out how many of the (return) argument registers we can corrupt.  */
26234   regs_available_for_popping = 0;
26235
26236   /* If returning via __builtin_eh_return, the bottom three registers
26237      all contain information needed for the return.  */
26238   if (crtl->calls_eh_return)
26239     size = 12;
26240   else
26241     {
26242       /* If we can deduce the registers used from the function's
26243          return value.  This is more reliable that examining
26244          df_regs_ever_live_p () because that will be set if the register is
26245          ever used in the function, not just if the register is used
26246          to hold a return value.  */
26247
26248       if (crtl->return_rtx != 0)
26249         mode = GET_MODE (crtl->return_rtx);
26250       else
26251         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26252
26253       size = GET_MODE_SIZE (mode);
26254
26255       if (size == 0)
26256         {
26257           /* In a void function we can use any argument register.
26258              In a function that returns a structure on the stack
26259              we can use the second and third argument registers.  */
26260           if (mode == VOIDmode)
26261             regs_available_for_popping =
26262               (1 << ARG_REGISTER (1))
26263               | (1 << ARG_REGISTER (2))
26264               | (1 << ARG_REGISTER (3));
26265           else
26266             regs_available_for_popping =
26267               (1 << ARG_REGISTER (2))
26268               | (1 << ARG_REGISTER (3));
26269         }
26270       else if (size <= 4)
26271         regs_available_for_popping =
26272           (1 << ARG_REGISTER (2))
26273           | (1 << ARG_REGISTER (3));
26274       else if (size <= 8)
26275         regs_available_for_popping =
26276           (1 << ARG_REGISTER (3));
26277     }
26278
26279   /* Match registers to be popped with registers into which we pop them.  */
26280   for (available = regs_available_for_popping,
26281        required  = regs_to_pop;
26282        required != 0 && available != 0;
26283        available &= ~(available & - available),
26284        required  &= ~(required  & - required))
26285     -- pops_needed;
26286
26287   /* If we have any popping registers left over, remove them.  */
26288   if (available > 0)
26289     regs_available_for_popping &= ~available;
26290
26291   /* Otherwise if we need another popping register we can use
26292      the fourth argument register.  */
26293   else if (pops_needed)
26294     {
26295       /* If we have not found any free argument registers and
26296          reg a4 contains the return address, we must move it.  */
26297       if (regs_available_for_popping == 0
26298           && reg_containing_return_addr == LAST_ARG_REGNUM)
26299         {
26300           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26301           reg_containing_return_addr = LR_REGNUM;
26302         }
26303       else if (size > 12)
26304         {
26305           /* Register a4 is being used to hold part of the return value,
26306              but we have dire need of a free, low register.  */
26307           restore_a4 = TRUE;
26308
26309           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26310         }
26311
26312       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26313         {
26314           /* The fourth argument register is available.  */
26315           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26316
26317           --pops_needed;
26318         }
26319     }
26320
26321   /* Pop as many registers as we can.  */
26322   thumb_pop (f, regs_available_for_popping);
26323
26324   /* Process the registers we popped.  */
26325   if (reg_containing_return_addr == -1)
26326     {
26327       /* The return address was popped into the lowest numbered register.  */
26328       regs_to_pop &= ~(1 << LR_REGNUM);
26329
26330       reg_containing_return_addr =
26331         number_of_first_bit_set (regs_available_for_popping);
26332
26333       /* Remove this register for the mask of available registers, so that
26334          the return address will not be corrupted by further pops.  */
26335       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26336     }
26337
26338   /* If we popped other registers then handle them here.  */
26339   if (regs_available_for_popping)
26340     {
26341       int frame_pointer;
26342
26343       /* Work out which register currently contains the frame pointer.  */
26344       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26345
26346       /* Move it into the correct place.  */
26347       asm_fprintf (f, "\tmov\t%r, %r\n",
26348                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26349
26350       /* (Temporarily) remove it from the mask of popped registers.  */
26351       regs_available_for_popping &= ~(1 << frame_pointer);
26352       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26353
26354       if (regs_available_for_popping)
26355         {
26356           int stack_pointer;
26357
26358           /* We popped the stack pointer as well,
26359              find the register that contains it.  */
26360           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26361
26362           /* Move it into the stack register.  */
26363           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26364
26365           /* At this point we have popped all necessary registers, so
26366              do not worry about restoring regs_available_for_popping
26367              to its correct value:
26368
26369              assert (pops_needed == 0)
26370              assert (regs_available_for_popping == (1 << frame_pointer))
26371              assert (regs_to_pop == (1 << STACK_POINTER))  */
26372         }
26373       else
26374         {
26375           /* Since we have just move the popped value into the frame
26376              pointer, the popping register is available for reuse, and
26377              we know that we still have the stack pointer left to pop.  */
26378           regs_available_for_popping |= (1 << frame_pointer);
26379         }
26380     }
26381
26382   /* If we still have registers left on the stack, but we no longer have
26383      any registers into which we can pop them, then we must move the return
26384      address into the link register and make available the register that
26385      contained it.  */
26386   if (regs_available_for_popping == 0 && pops_needed > 0)
26387     {
26388       regs_available_for_popping |= 1 << reg_containing_return_addr;
26389
26390       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26391                    reg_containing_return_addr);
26392
26393       reg_containing_return_addr = LR_REGNUM;
26394     }
26395
26396   /* If we have registers left on the stack then pop some more.
26397      We know that at most we will want to pop FP and SP.  */
26398   if (pops_needed > 0)
26399     {
26400       int  popped_into;
26401       int  move_to;
26402
26403       thumb_pop (f, regs_available_for_popping);
26404
26405       /* We have popped either FP or SP.
26406          Move whichever one it is into the correct register.  */
26407       popped_into = number_of_first_bit_set (regs_available_for_popping);
26408       move_to     = number_of_first_bit_set (regs_to_pop);
26409
26410       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26411       --pops_needed;
26412     }
26413
26414   /* If we still have not popped everything then we must have only
26415      had one register available to us and we are now popping the SP.  */
26416   if (pops_needed > 0)
26417     {
26418       int  popped_into;
26419
26420       thumb_pop (f, regs_available_for_popping);
26421
26422       popped_into = number_of_first_bit_set (regs_available_for_popping);
26423
26424       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26425       /*
26426         assert (regs_to_pop == (1 << STACK_POINTER))
26427         assert (pops_needed == 1)
26428       */
26429     }
26430
26431   /* If necessary restore the a4 register.  */
26432   if (restore_a4)
26433     {
26434       if (reg_containing_return_addr != LR_REGNUM)
26435         {
26436           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26437           reg_containing_return_addr = LR_REGNUM;
26438         }
26439
26440       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26441     }
26442
26443   if (crtl->calls_eh_return)
26444     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26445
26446   /* Return to caller.  */
26447   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26448     {
26449       /* This is for the cases where LR is not being used to contain the return
26450          address.  It may therefore contain information that we might not want
26451          to leak, hence it must be cleared.  The value in R0 will never be a
26452          secret at this point, so it is safe to use it, see the clearing code
26453          in cmse_nonsecure_entry_clear_before_return ().  */
26454       if (reg_containing_return_addr != LR_REGNUM)
26455         asm_fprintf (f, "\tmov\tlr, r0\n");
26456
26457       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26458          by cmse_nonsecure_entry_clear_before_return ().  */
26459       if (!TARGET_HAVE_FPCXT_CMSE)
26460         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26461       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26462     }
26463   else
26464     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26465 }
26466 \f
26467 /* Scan INSN just before assembler is output for it.
26468    For Thumb-1, we track the status of the condition codes; this
26469    information is used in the cbranchsi4_insn pattern.  */
26470 void
26471 thumb1_final_prescan_insn (rtx_insn *insn)
26472 {
26473   if (flag_print_asm_name)
26474     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26475                  INSN_ADDRESSES (INSN_UID (insn)));
26476   /* Don't overwrite the previous setter when we get to a cbranch.  */
26477   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26478     {
26479       enum attr_conds conds;
26480
26481       if (cfun->machine->thumb1_cc_insn)
26482         {
26483           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26484               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26485             CC_STATUS_INIT;
26486         }
26487       conds = get_attr_conds (insn);
26488       if (conds == CONDS_SET)
26489         {
26490           rtx set = single_set (insn);
26491           cfun->machine->thumb1_cc_insn = insn;
26492           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26493           cfun->machine->thumb1_cc_op1 = const0_rtx;
26494           cfun->machine->thumb1_cc_mode = CC_NZmode;
26495           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26496             {
26497               rtx src1 = XEXP (SET_SRC (set), 1);
26498               if (src1 == const0_rtx)
26499                 cfun->machine->thumb1_cc_mode = CCmode;
26500             }
26501           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26502             {
26503               /* Record the src register operand instead of dest because
26504                  cprop_hardreg pass propagates src.  */
26505               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26506             }
26507         }
26508       else if (conds != CONDS_NOCOND)
26509         cfun->machine->thumb1_cc_insn = NULL_RTX;
26510     }
26511
26512     /* Check if unexpected far jump is used.  */
26513     if (cfun->machine->lr_save_eliminated
26514         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26515       internal_error("Unexpected thumb1 far jump");
26516 }
26517
26518 int
26519 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26520 {
26521   unsigned HOST_WIDE_INT mask = 0xff;
26522   int i;
26523
26524   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26525   if (val == 0) /* XXX */
26526     return 0;
26527
26528   for (i = 0; i < 25; i++)
26529     if ((val & (mask << i)) == val)
26530       return 1;
26531
26532   return 0;
26533 }
26534
26535 /* Returns nonzero if the current function contains,
26536    or might contain a far jump.  */
26537 static int
26538 thumb_far_jump_used_p (void)
26539 {
26540   rtx_insn *insn;
26541   bool far_jump = false;
26542   unsigned int func_size = 0;
26543
26544   /* If we have already decided that far jumps may be used,
26545      do not bother checking again, and always return true even if
26546      it turns out that they are not being used.  Once we have made
26547      the decision that far jumps are present (and that hence the link
26548      register will be pushed onto the stack) we cannot go back on it.  */
26549   if (cfun->machine->far_jump_used)
26550     return 1;
26551
26552   /* If this function is not being called from the prologue/epilogue
26553      generation code then it must be being called from the
26554      INITIAL_ELIMINATION_OFFSET macro.  */
26555   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26556     {
26557       /* In this case we know that we are being asked about the elimination
26558          of the arg pointer register.  If that register is not being used,
26559          then there are no arguments on the stack, and we do not have to
26560          worry that a far jump might force the prologue to push the link
26561          register, changing the stack offsets.  In this case we can just
26562          return false, since the presence of far jumps in the function will
26563          not affect stack offsets.
26564
26565          If the arg pointer is live (or if it was live, but has now been
26566          eliminated and so set to dead) then we do have to test to see if
26567          the function might contain a far jump.  This test can lead to some
26568          false negatives, since before reload is completed, then length of
26569          branch instructions is not known, so gcc defaults to returning their
26570          longest length, which in turn sets the far jump attribute to true.
26571
26572          A false negative will not result in bad code being generated, but it
26573          will result in a needless push and pop of the link register.  We
26574          hope that this does not occur too often.
26575
26576          If we need doubleword stack alignment this could affect the other
26577          elimination offsets so we can't risk getting it wrong.  */
26578       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26579         cfun->machine->arg_pointer_live = 1;
26580       else if (!cfun->machine->arg_pointer_live)
26581         return 0;
26582     }
26583
26584   /* We should not change far_jump_used during or after reload, as there is
26585      no chance to change stack frame layout.  */
26586   if (reload_in_progress || reload_completed)
26587     return 0;
26588
26589   /* Check to see if the function contains a branch
26590      insn with the far jump attribute set.  */
26591   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26592     {
26593       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26594         {
26595           far_jump = true;
26596         }
26597       func_size += get_attr_length (insn);
26598     }
26599
26600   /* Attribute far_jump will always be true for thumb1 before
26601      shorten_branch pass.  So checking far_jump attribute before
26602      shorten_branch isn't much useful.
26603
26604      Following heuristic tries to estimate more accurately if a far jump
26605      may finally be used.  The heuristic is very conservative as there is
26606      no chance to roll-back the decision of not to use far jump.
26607
26608      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26609      2-byte insn is associated with a 4 byte constant pool.  Using
26610      function size 2048/3 as the threshold is conservative enough.  */
26611   if (far_jump)
26612     {
26613       if ((func_size * 3) >= 2048)
26614         {
26615           /* Record the fact that we have decided that
26616              the function does use far jumps.  */
26617           cfun->machine->far_jump_used = 1;
26618           return 1;
26619         }
26620     }
26621
26622   return 0;
26623 }
26624
26625 /* Return nonzero if FUNC must be entered in ARM mode.  */
26626 static bool
26627 is_called_in_ARM_mode (tree func)
26628 {
26629   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26630
26631   /* Ignore the problem about functions whose address is taken.  */
26632   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26633     return true;
26634
26635 #ifdef ARM_PE
26636   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26637 #else
26638   return false;
26639 #endif
26640 }
26641
26642 /* Given the stack offsets and register mask in OFFSETS, decide how
26643    many additional registers to push instead of subtracting a constant
26644    from SP.  For epilogues the principle is the same except we use pop.
26645    FOR_PROLOGUE indicates which we're generating.  */
26646 static int
26647 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26648 {
26649   HOST_WIDE_INT amount;
26650   unsigned long live_regs_mask = offsets->saved_regs_mask;
26651   /* Extract a mask of the ones we can give to the Thumb's push/pop
26652      instruction.  */
26653   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26654   /* Then count how many other high registers will need to be pushed.  */
26655   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26656   int n_free, reg_base, size;
26657
26658   if (!for_prologue && frame_pointer_needed)
26659     amount = offsets->locals_base - offsets->saved_regs;
26660   else
26661     amount = offsets->outgoing_args - offsets->saved_regs;
26662
26663   /* If the stack frame size is 512 exactly, we can save one load
26664      instruction, which should make this a win even when optimizing
26665      for speed.  */
26666   if (!optimize_size && amount != 512)
26667     return 0;
26668
26669   /* Can't do this if there are high registers to push.  */
26670   if (high_regs_pushed != 0)
26671     return 0;
26672
26673   /* Shouldn't do it in the prologue if no registers would normally
26674      be pushed at all.  In the epilogue, also allow it if we'll have
26675      a pop insn for the PC.  */
26676   if  (l_mask == 0
26677        && (for_prologue
26678            || TARGET_BACKTRACE
26679            || (live_regs_mask & 1 << LR_REGNUM) == 0
26680            || TARGET_INTERWORK
26681            || crtl->args.pretend_args_size != 0))
26682     return 0;
26683
26684   /* Don't do this if thumb_expand_prologue wants to emit instructions
26685      between the push and the stack frame allocation.  */
26686   if (for_prologue
26687       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26688           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26689     return 0;
26690
26691   reg_base = 0;
26692   n_free = 0;
26693   if (!for_prologue)
26694     {
26695       size = arm_size_return_regs ();
26696       reg_base = ARM_NUM_INTS (size);
26697       live_regs_mask >>= reg_base;
26698     }
26699
26700   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26701          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26702     {
26703       live_regs_mask >>= 1;
26704       n_free++;
26705     }
26706
26707   if (n_free == 0)
26708     return 0;
26709   gcc_assert (amount / 4 * 4 == amount);
26710
26711   if (amount >= 512 && (amount - n_free * 4) < 512)
26712     return (amount - 508) / 4;
26713   if (amount <= n_free * 4)
26714     return amount / 4;
26715   return 0;
26716 }
26717
26718 /* The bits which aren't usefully expanded as rtl.  */
26719 const char *
26720 thumb1_unexpanded_epilogue (void)
26721 {
26722   arm_stack_offsets *offsets;
26723   int regno;
26724   unsigned long live_regs_mask = 0;
26725   int high_regs_pushed = 0;
26726   int extra_pop;
26727   int had_to_push_lr;
26728   int size;
26729
26730   if (cfun->machine->return_used_this_function != 0)
26731     return "";
26732
26733   if (IS_NAKED (arm_current_func_type ()))
26734     return "";
26735
26736   offsets = arm_get_frame_offsets ();
26737   live_regs_mask = offsets->saved_regs_mask;
26738   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26739
26740   /* If we can deduce the registers used from the function's return value.
26741      This is more reliable that examining df_regs_ever_live_p () because that
26742      will be set if the register is ever used in the function, not just if
26743      the register is used to hold a return value.  */
26744   size = arm_size_return_regs ();
26745
26746   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26747   if (extra_pop > 0)
26748     {
26749       unsigned long extra_mask = (1 << extra_pop) - 1;
26750       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26751     }
26752
26753   /* The prolog may have pushed some high registers to use as
26754      work registers.  e.g. the testsuite file:
26755      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26756      compiles to produce:
26757         push    {r4, r5, r6, r7, lr}
26758         mov     r7, r9
26759         mov     r6, r8
26760         push    {r6, r7}
26761      as part of the prolog.  We have to undo that pushing here.  */
26762
26763   if (high_regs_pushed)
26764     {
26765       unsigned long mask = live_regs_mask & 0xff;
26766       int next_hi_reg;
26767
26768       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26769
26770       if (mask == 0)
26771         /* Oh dear!  We have no low registers into which we can pop
26772            high registers!  */
26773         internal_error
26774           ("no low registers available for popping high registers");
26775
26776       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26777         if (live_regs_mask & (1 << next_hi_reg))
26778           break;
26779
26780       while (high_regs_pushed)
26781         {
26782           /* Find lo register(s) into which the high register(s) can
26783              be popped.  */
26784           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26785             {
26786               if (mask & (1 << regno))
26787                 high_regs_pushed--;
26788               if (high_regs_pushed == 0)
26789                 break;
26790             }
26791
26792           if (high_regs_pushed == 0 && regno >= 0)
26793             mask &= ~((1 << regno) - 1);
26794
26795           /* Pop the values into the low register(s).  */
26796           thumb_pop (asm_out_file, mask);
26797
26798           /* Move the value(s) into the high registers.  */
26799           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26800             {
26801               if (mask & (1 << regno))
26802                 {
26803                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26804                                regno);
26805
26806                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26807                        next_hi_reg--)
26808                     if (live_regs_mask & (1 << next_hi_reg))
26809                       break;
26810                 }
26811             }
26812         }
26813       live_regs_mask &= ~0x0f00;
26814     }
26815
26816   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26817   live_regs_mask &= 0xff;
26818
26819   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26820     {
26821       /* Pop the return address into the PC.  */
26822       if (had_to_push_lr)
26823         live_regs_mask |= 1 << PC_REGNUM;
26824
26825       /* Either no argument registers were pushed or a backtrace
26826          structure was created which includes an adjusted stack
26827          pointer, so just pop everything.  */
26828       if (live_regs_mask)
26829         thumb_pop (asm_out_file, live_regs_mask);
26830
26831       /* We have either just popped the return address into the
26832          PC or it is was kept in LR for the entire function.
26833          Note that thumb_pop has already called thumb_exit if the
26834          PC was in the list.  */
26835       if (!had_to_push_lr)
26836         thumb_exit (asm_out_file, LR_REGNUM);
26837     }
26838   else
26839     {
26840       /* Pop everything but the return address.  */
26841       if (live_regs_mask)
26842         thumb_pop (asm_out_file, live_regs_mask);
26843
26844       if (had_to_push_lr)
26845         {
26846           if (size > 12)
26847             {
26848               /* We have no free low regs, so save one.  */
26849               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26850                            LAST_ARG_REGNUM);
26851             }
26852
26853           /* Get the return address into a temporary register.  */
26854           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26855
26856           if (size > 12)
26857             {
26858               /* Move the return address to lr.  */
26859               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26860                            LAST_ARG_REGNUM);
26861               /* Restore the low register.  */
26862               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26863                            IP_REGNUM);
26864               regno = LR_REGNUM;
26865             }
26866           else
26867             regno = LAST_ARG_REGNUM;
26868         }
26869       else
26870         regno = LR_REGNUM;
26871
26872       /* Remove the argument registers that were pushed onto the stack.  */
26873       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26874                    SP_REGNUM, SP_REGNUM,
26875                    crtl->args.pretend_args_size);
26876
26877       thumb_exit (asm_out_file, regno);
26878     }
26879
26880   return "";
26881 }
26882
26883 /* Functions to save and restore machine-specific function data.  */
26884 static struct machine_function *
26885 arm_init_machine_status (void)
26886 {
26887   struct machine_function *machine;
26888   machine = ggc_cleared_alloc<machine_function> ();
26889
26890 #if ARM_FT_UNKNOWN != 0
26891   machine->func_type = ARM_FT_UNKNOWN;
26892 #endif
26893   machine->static_chain_stack_bytes = -1;
26894   return machine;
26895 }
26896
26897 /* Return an RTX indicating where the return address to the
26898    calling function can be found.  */
26899 rtx
26900 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26901 {
26902   if (count != 0)
26903     return NULL_RTX;
26904
26905   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26906 }
26907
26908 /* Do anything needed before RTL is emitted for each function.  */
26909 void
26910 arm_init_expanders (void)
26911 {
26912   /* Arrange to initialize and mark the machine per-function status.  */
26913   init_machine_status = arm_init_machine_status;
26914
26915   /* This is to stop the combine pass optimizing away the alignment
26916      adjustment of va_arg.  */
26917   /* ??? It is claimed that this should not be necessary.  */
26918   if (cfun)
26919     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26920 }
26921
26922 /* Check that FUNC is called with a different mode.  */
26923
26924 bool
26925 arm_change_mode_p (tree func)
26926 {
26927   if (TREE_CODE (func) != FUNCTION_DECL)
26928     return false;
26929
26930   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26931
26932   if (!callee_tree)
26933     callee_tree = target_option_default_node;
26934
26935   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26936   int flags = callee_opts->x_target_flags;
26937
26938   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26939 }
26940
26941 /* Like arm_compute_initial_elimination offset.  Simpler because there
26942    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
26943    to point at the base of the local variables after static stack
26944    space for a function has been allocated.  */
26945
26946 HOST_WIDE_INT
26947 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26948 {
26949   arm_stack_offsets *offsets;
26950
26951   offsets = arm_get_frame_offsets ();
26952
26953   switch (from)
26954     {
26955     case ARG_POINTER_REGNUM:
26956       switch (to)
26957         {
26958         case STACK_POINTER_REGNUM:
26959           return offsets->outgoing_args - offsets->saved_args;
26960
26961         case FRAME_POINTER_REGNUM:
26962           return offsets->soft_frame - offsets->saved_args;
26963
26964         case ARM_HARD_FRAME_POINTER_REGNUM:
26965           return offsets->saved_regs - offsets->saved_args;
26966
26967         case THUMB_HARD_FRAME_POINTER_REGNUM:
26968           return offsets->locals_base - offsets->saved_args;
26969
26970         default:
26971           gcc_unreachable ();
26972         }
26973       break;
26974
26975     case FRAME_POINTER_REGNUM:
26976       switch (to)
26977         {
26978         case STACK_POINTER_REGNUM:
26979           return offsets->outgoing_args - offsets->soft_frame;
26980
26981         case ARM_HARD_FRAME_POINTER_REGNUM:
26982           return offsets->saved_regs - offsets->soft_frame;
26983
26984         case THUMB_HARD_FRAME_POINTER_REGNUM:
26985           return offsets->locals_base - offsets->soft_frame;
26986
26987         default:
26988           gcc_unreachable ();
26989         }
26990       break;
26991
26992     default:
26993       gcc_unreachable ();
26994     }
26995 }
26996
26997 /* Generate the function's prologue.  */
26998
26999 void
27000 thumb1_expand_prologue (void)
27001 {
27002   rtx_insn *insn;
27003
27004   HOST_WIDE_INT amount;
27005   HOST_WIDE_INT size;
27006   arm_stack_offsets *offsets;
27007   unsigned long func_type;
27008   int regno;
27009   unsigned long live_regs_mask;
27010   unsigned long l_mask;
27011   unsigned high_regs_pushed = 0;
27012   bool lr_needs_saving;
27013
27014   func_type = arm_current_func_type ();
27015
27016   /* Naked functions don't have prologues.  */
27017   if (IS_NAKED (func_type))
27018     {
27019       if (flag_stack_usage_info)
27020         current_function_static_stack_size = 0;
27021       return;
27022     }
27023
27024   if (IS_INTERRUPT (func_type))
27025     {
27026       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27027       return;
27028     }
27029
27030   if (is_called_in_ARM_mode (current_function_decl))
27031     emit_insn (gen_prologue_thumb1_interwork ());
27032
27033   offsets = arm_get_frame_offsets ();
27034   live_regs_mask = offsets->saved_regs_mask;
27035   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27036
27037   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27038   l_mask = live_regs_mask & 0x40ff;
27039   /* Then count how many other high registers will need to be pushed.  */
27040   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27041
27042   if (crtl->args.pretend_args_size)
27043     {
27044       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27045
27046       if (cfun->machine->uses_anonymous_args)
27047         {
27048           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27049           unsigned long mask;
27050
27051           mask = 1ul << (LAST_ARG_REGNUM + 1);
27052           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27053
27054           insn = thumb1_emit_multi_reg_push (mask, 0);
27055         }
27056       else
27057         {
27058           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27059                                         stack_pointer_rtx, x));
27060         }
27061       RTX_FRAME_RELATED_P (insn) = 1;
27062     }
27063
27064   if (TARGET_BACKTRACE)
27065     {
27066       HOST_WIDE_INT offset = 0;
27067       unsigned work_register;
27068       rtx work_reg, x, arm_hfp_rtx;
27069
27070       /* We have been asked to create a stack backtrace structure.
27071          The code looks like this:
27072
27073          0   .align 2
27074          0   func:
27075          0     sub   SP, #16         Reserve space for 4 registers.
27076          2     push  {R7}            Push low registers.
27077          4     add   R7, SP, #20     Get the stack pointer before the push.
27078          6     str   R7, [SP, #8]    Store the stack pointer
27079                                         (before reserving the space).
27080          8     mov   R7, PC          Get hold of the start of this code + 12.
27081         10     str   R7, [SP, #16]   Store it.
27082         12     mov   R7, FP          Get hold of the current frame pointer.
27083         14     str   R7, [SP, #4]    Store it.
27084         16     mov   R7, LR          Get hold of the current return address.
27085         18     str   R7, [SP, #12]   Store it.
27086         20     add   R7, SP, #16     Point at the start of the
27087                                         backtrace structure.
27088         22     mov   FP, R7          Put this value into the frame pointer.  */
27089
27090       work_register = thumb_find_work_register (live_regs_mask);
27091       work_reg = gen_rtx_REG (SImode, work_register);
27092       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27093
27094       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27095                                     stack_pointer_rtx, GEN_INT (-16)));
27096       RTX_FRAME_RELATED_P (insn) = 1;
27097
27098       if (l_mask)
27099         {
27100           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27101           RTX_FRAME_RELATED_P (insn) = 1;
27102           lr_needs_saving = false;
27103
27104           offset = bit_count (l_mask) * UNITS_PER_WORD;
27105         }
27106
27107       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27108       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27109
27110       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27111       x = gen_frame_mem (SImode, x);
27112       emit_move_insn (x, work_reg);
27113
27114       /* Make sure that the instruction fetching the PC is in the right place
27115          to calculate "start of backtrace creation code + 12".  */
27116       /* ??? The stores using the common WORK_REG ought to be enough to
27117          prevent the scheduler from doing anything weird.  Failing that
27118          we could always move all of the following into an UNSPEC_VOLATILE.  */
27119       if (l_mask)
27120         {
27121           x = gen_rtx_REG (SImode, PC_REGNUM);
27122           emit_move_insn (work_reg, x);
27123
27124           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27125           x = gen_frame_mem (SImode, x);
27126           emit_move_insn (x, work_reg);
27127
27128           emit_move_insn (work_reg, arm_hfp_rtx);
27129
27130           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27131           x = gen_frame_mem (SImode, x);
27132           emit_move_insn (x, work_reg);
27133         }
27134       else
27135         {
27136           emit_move_insn (work_reg, arm_hfp_rtx);
27137
27138           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27139           x = gen_frame_mem (SImode, x);
27140           emit_move_insn (x, work_reg);
27141
27142           x = gen_rtx_REG (SImode, PC_REGNUM);
27143           emit_move_insn (work_reg, x);
27144
27145           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27146           x = gen_frame_mem (SImode, x);
27147           emit_move_insn (x, work_reg);
27148         }
27149
27150       x = gen_rtx_REG (SImode, LR_REGNUM);
27151       emit_move_insn (work_reg, x);
27152
27153       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27154       x = gen_frame_mem (SImode, x);
27155       emit_move_insn (x, work_reg);
27156
27157       x = GEN_INT (offset + 12);
27158       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27159
27160       emit_move_insn (arm_hfp_rtx, work_reg);
27161     }
27162   /* Optimization:  If we are not pushing any low registers but we are going
27163      to push some high registers then delay our first push.  This will just
27164      be a push of LR and we can combine it with the push of the first high
27165      register.  */
27166   else if ((l_mask & 0xff) != 0
27167            || (high_regs_pushed == 0 && lr_needs_saving))
27168     {
27169       unsigned long mask = l_mask;
27170       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27171       insn = thumb1_emit_multi_reg_push (mask, mask);
27172       RTX_FRAME_RELATED_P (insn) = 1;
27173       lr_needs_saving = false;
27174     }
27175
27176   if (high_regs_pushed)
27177     {
27178       unsigned pushable_regs;
27179       unsigned next_hi_reg;
27180       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27181                                                  : crtl->args.info.nregs;
27182       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27183
27184       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27185         if (live_regs_mask & (1 << next_hi_reg))
27186           break;
27187
27188       /* Here we need to mask out registers used for passing arguments
27189          even if they can be pushed.  This is to avoid using them to
27190          stash the high registers.  Such kind of stash may clobber the
27191          use of arguments.  */
27192       pushable_regs = l_mask & (~arg_regs_mask);
27193       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27194
27195       /* Normally, LR can be used as a scratch register once it has been
27196          saved; but if the function examines its own return address then
27197          the value is still live and we need to avoid using it.  */
27198       bool return_addr_live
27199         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27200                            LR_REGNUM);
27201
27202       if (lr_needs_saving || return_addr_live)
27203         pushable_regs &= ~(1 << LR_REGNUM);
27204
27205       if (pushable_regs == 0)
27206         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27207
27208       while (high_regs_pushed > 0)
27209         {
27210           unsigned long real_regs_mask = 0;
27211           unsigned long push_mask = 0;
27212
27213           for (regno = LR_REGNUM; regno >= 0; regno --)
27214             {
27215               if (pushable_regs & (1 << regno))
27216                 {
27217                   emit_move_insn (gen_rtx_REG (SImode, regno),
27218                                   gen_rtx_REG (SImode, next_hi_reg));
27219
27220                   high_regs_pushed --;
27221                   real_regs_mask |= (1 << next_hi_reg);
27222                   push_mask |= (1 << regno);
27223
27224                   if (high_regs_pushed)
27225                     {
27226                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27227                            next_hi_reg --)
27228                         if (live_regs_mask & (1 << next_hi_reg))
27229                           break;
27230                     }
27231                   else
27232                     break;
27233                 }
27234             }
27235
27236           /* If we had to find a work register and we have not yet
27237              saved the LR then add it to the list of regs to push.  */
27238           if (lr_needs_saving)
27239             {
27240               push_mask |= 1 << LR_REGNUM;
27241               real_regs_mask |= 1 << LR_REGNUM;
27242               lr_needs_saving = false;
27243               /* If the return address is not live at this point, we
27244                  can add LR to the list of registers that we can use
27245                  for pushes.  */
27246               if (!return_addr_live)
27247                 pushable_regs |= 1 << LR_REGNUM;
27248             }
27249
27250           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27251           RTX_FRAME_RELATED_P (insn) = 1;
27252         }
27253     }
27254
27255   /* Load the pic register before setting the frame pointer,
27256      so we can use r7 as a temporary work register.  */
27257   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27258     arm_load_pic_register (live_regs_mask, NULL_RTX);
27259
27260   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27261     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27262                     stack_pointer_rtx);
27263
27264   size = offsets->outgoing_args - offsets->saved_args;
27265   if (flag_stack_usage_info)
27266     current_function_static_stack_size = size;
27267
27268   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
27269   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27270        || flag_stack_clash_protection)
27271       && size)
27272     sorry ("%<-fstack-check=specific%> for Thumb-1");
27273
27274   amount = offsets->outgoing_args - offsets->saved_regs;
27275   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27276   if (amount)
27277     {
27278       if (amount < 512)
27279         {
27280           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27281                                         GEN_INT (- amount)));
27282           RTX_FRAME_RELATED_P (insn) = 1;
27283         }
27284       else
27285         {
27286           rtx reg, dwarf;
27287
27288           /* The stack decrement is too big for an immediate value in a single
27289              insn.  In theory we could issue multiple subtracts, but after
27290              three of them it becomes more space efficient to place the full
27291              value in the constant pool and load into a register.  (Also the
27292              ARM debugger really likes to see only one stack decrement per
27293              function).  So instead we look for a scratch register into which
27294              we can load the decrement, and then we subtract this from the
27295              stack pointer.  Unfortunately on the thumb the only available
27296              scratch registers are the argument registers, and we cannot use
27297              these as they may hold arguments to the function.  Instead we
27298              attempt to locate a call preserved register which is used by this
27299              function.  If we can find one, then we know that it will have
27300              been pushed at the start of the prologue and so we can corrupt
27301              it now.  */
27302           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27303             if (live_regs_mask & (1 << regno))
27304               break;
27305
27306           gcc_assert(regno <= LAST_LO_REGNUM);
27307
27308           reg = gen_rtx_REG (SImode, regno);
27309
27310           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27311
27312           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27313                                         stack_pointer_rtx, reg));
27314
27315           dwarf = gen_rtx_SET (stack_pointer_rtx,
27316                                plus_constant (Pmode, stack_pointer_rtx,
27317                                               -amount));
27318           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27319           RTX_FRAME_RELATED_P (insn) = 1;
27320         }
27321     }
27322
27323   if (frame_pointer_needed)
27324     thumb_set_frame_pointer (offsets);
27325
27326   /* If we are profiling, make sure no instructions are scheduled before
27327      the call to mcount.  Similarly if the user has requested no
27328      scheduling in the prolog.  Similarly if we want non-call exceptions
27329      using the EABI unwinder, to prevent faulting instructions from being
27330      swapped with a stack adjustment.  */
27331   if (crtl->profile || !TARGET_SCHED_PROLOG
27332       || (arm_except_unwind_info (&global_options) == UI_TARGET
27333           && cfun->can_throw_non_call_exceptions))
27334     emit_insn (gen_blockage ());
27335
27336   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27337   if (live_regs_mask & 0xff)
27338     cfun->machine->lr_save_eliminated = 0;
27339 }
27340
27341 /* Clear caller saved registers not used to pass return values and leaked
27342    condition flags before exiting a cmse_nonsecure_entry function.  */
27343
27344 void
27345 cmse_nonsecure_entry_clear_before_return (void)
27346 {
27347   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27348   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27349   uint32_t padding_bits_to_clear = 0;
27350   auto_sbitmap to_clear_bitmap (maxregno + 1);
27351   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27352   tree result_type;
27353
27354   bitmap_clear (to_clear_bitmap);
27355   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27356   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27357
27358   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27359      registers.  */
27360   if (clear_vfpregs)
27361     {
27362       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27363
27364       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27365
27366       if (!TARGET_HAVE_FPCXT_CMSE)
27367         {
27368           /* Make sure we don't clear the two scratch registers used to clear
27369              the relevant FPSCR bits in output_return_instruction.  */
27370           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27371           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27372           emit_use (gen_rtx_REG (SImode, 4));
27373           bitmap_clear_bit (to_clear_bitmap, 4);
27374         }
27375     }
27376
27377   /* If the user has defined registers to be caller saved, these are no longer
27378      restored by the function before returning and must thus be cleared for
27379      security purposes.  */
27380   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27381     {
27382       /* We do not touch registers that can be used to pass arguments as per
27383          the AAPCS, since these should never be made callee-saved by user
27384          options.  */
27385       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27386         continue;
27387       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27388         continue;
27389       if (!callee_saved_reg_p (regno)
27390           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27391               || TARGET_HARD_FLOAT))
27392         bitmap_set_bit (to_clear_bitmap, regno);
27393     }
27394
27395   /* Make sure we do not clear the registers used to return the result in.  */
27396   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27397   if (!VOID_TYPE_P (result_type))
27398     {
27399       uint64_t to_clear_return_mask;
27400       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27401
27402       /* No need to check that we return in registers, because we don't
27403          support returning on stack yet.  */
27404       gcc_assert (REG_P (result_rtl));
27405       to_clear_return_mask
27406         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27407                                      &padding_bits_to_clear);
27408       if (to_clear_return_mask)
27409         {
27410           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27411           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27412             {
27413               if (to_clear_return_mask & (1ULL << regno))
27414                 bitmap_clear_bit (to_clear_bitmap, regno);
27415             }
27416         }
27417     }
27418
27419   if (padding_bits_to_clear != 0)
27420     {
27421       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27422       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27423
27424       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27425          returning a composite type, which only uses r0.  Let's make sure that
27426          r1-r3 is cleared too.  */
27427       bitmap_clear (to_clear_arg_regs_bitmap);
27428       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27429       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27430     }
27431
27432   /* Clear full registers that leak before returning.  */
27433   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27434   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27435   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27436                         clearing_reg);
27437 }
27438
27439 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27440    POP instruction can be generated.  LR should be replaced by PC.  All
27441    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27442    all we really need to check here is if single register is to be
27443    returned, or multiple register return.  */
27444 void
27445 thumb2_expand_return (bool simple_return)
27446 {
27447   int i, num_regs;
27448   unsigned long saved_regs_mask;
27449   arm_stack_offsets *offsets;
27450
27451   offsets = arm_get_frame_offsets ();
27452   saved_regs_mask = offsets->saved_regs_mask;
27453
27454   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27455     if (saved_regs_mask & (1 << i))
27456       num_regs++;
27457
27458   if (!simple_return && saved_regs_mask)
27459     {
27460       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27461          functions or adapt code to handle according to ACLE.  This path should
27462          not be reachable for cmse_nonsecure_entry functions though we prefer
27463          to assert it for now to ensure that future code changes do not silently
27464          change this behavior.  */
27465       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27466       if (arm_current_function_pac_enabled_p ())
27467         {
27468           gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27469           arm_emit_multi_reg_pop (saved_regs_mask);
27470           emit_insn (gen_aut_nop ());
27471           emit_jump_insn (simple_return_rtx);
27472         }
27473       else if (num_regs == 1)
27474         {
27475           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27476           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27477           rtx addr = gen_rtx_MEM (SImode,
27478                                   gen_rtx_POST_INC (SImode,
27479                                                     stack_pointer_rtx));
27480           set_mem_alias_set (addr, get_frame_alias_set ());
27481           XVECEXP (par, 0, 0) = ret_rtx;
27482           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27483           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27484           emit_jump_insn (par);
27485         }
27486       else
27487         {
27488           saved_regs_mask &= ~ (1 << LR_REGNUM);
27489           saved_regs_mask |=   (1 << PC_REGNUM);
27490           arm_emit_multi_reg_pop (saved_regs_mask);
27491         }
27492     }
27493   else
27494     {
27495       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27496         cmse_nonsecure_entry_clear_before_return ();
27497       emit_jump_insn (simple_return_rtx);
27498     }
27499 }
27500
27501 void
27502 thumb1_expand_epilogue (void)
27503 {
27504   HOST_WIDE_INT amount;
27505   arm_stack_offsets *offsets;
27506   int regno;
27507
27508   /* Naked functions don't have prologues.  */
27509   if (IS_NAKED (arm_current_func_type ()))
27510     return;
27511
27512   offsets = arm_get_frame_offsets ();
27513   amount = offsets->outgoing_args - offsets->saved_regs;
27514
27515   if (frame_pointer_needed)
27516     {
27517       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27518       amount = offsets->locals_base - offsets->saved_regs;
27519     }
27520   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27521
27522   gcc_assert (amount >= 0);
27523   if (amount)
27524     {
27525       emit_insn (gen_blockage ());
27526
27527       if (amount < 512)
27528         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27529                                GEN_INT (amount)));
27530       else
27531         {
27532           /* r3 is always free in the epilogue.  */
27533           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27534
27535           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27536           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27537         }
27538     }
27539
27540   /* Emit a USE (stack_pointer_rtx), so that
27541      the stack adjustment will not be deleted.  */
27542   emit_insn (gen_force_register_use (stack_pointer_rtx));
27543
27544   if (crtl->profile || !TARGET_SCHED_PROLOG)
27545     emit_insn (gen_blockage ());
27546
27547   /* Emit a clobber for each insn that will be restored in the epilogue,
27548      so that flow2 will get register lifetimes correct.  */
27549   for (regno = 0; regno < 13; regno++)
27550     if (reg_needs_saving_p (regno))
27551       emit_clobber (gen_rtx_REG (SImode, regno));
27552
27553   if (! df_regs_ever_live_p (LR_REGNUM))
27554     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27555
27556   /* Clear all caller-saved regs that are not used to return.  */
27557   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27558     cmse_nonsecure_entry_clear_before_return ();
27559 }
27560
27561 /* Epilogue code for APCS frame.  */
27562 static void
27563 arm_expand_epilogue_apcs_frame (bool really_return)
27564 {
27565   unsigned long func_type;
27566   unsigned long saved_regs_mask;
27567   int num_regs = 0;
27568   int i;
27569   int floats_from_frame = 0;
27570   arm_stack_offsets *offsets;
27571
27572   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27573   func_type = arm_current_func_type ();
27574
27575   /* Get frame offsets for ARM.  */
27576   offsets = arm_get_frame_offsets ();
27577   saved_regs_mask = offsets->saved_regs_mask;
27578
27579   /* Find the offset of the floating-point save area in the frame.  */
27580   floats_from_frame
27581     = (offsets->saved_args
27582        + arm_compute_static_chain_stack_bytes ()
27583        - offsets->frame);
27584
27585   /* Compute how many core registers saved and how far away the floats are.  */
27586   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27587     if (saved_regs_mask & (1 << i))
27588       {
27589         num_regs++;
27590         floats_from_frame += 4;
27591       }
27592
27593   if (TARGET_VFP_BASE)
27594     {
27595       int start_reg;
27596       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27597
27598       /* The offset is from IP_REGNUM.  */
27599       int saved_size = arm_get_vfp_saved_size ();
27600       if (saved_size > 0)
27601         {
27602           rtx_insn *insn;
27603           floats_from_frame += saved_size;
27604           insn = emit_insn (gen_addsi3 (ip_rtx,
27605                                         hard_frame_pointer_rtx,
27606                                         GEN_INT (-floats_from_frame)));
27607           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27608                                        ip_rtx, hard_frame_pointer_rtx);
27609         }
27610
27611       /* Generate VFP register multi-pop.  */
27612       start_reg = FIRST_VFP_REGNUM;
27613
27614       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27615         /* Look for a case where a reg does not need restoring.  */
27616         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27617           {
27618             if (start_reg != i)
27619               arm_emit_vfp_multi_reg_pop (start_reg,
27620                                           (i - start_reg) / 2,
27621                                           gen_rtx_REG (SImode,
27622                                                        IP_REGNUM));
27623             start_reg = i + 2;
27624           }
27625
27626       /* Restore the remaining regs that we have discovered (or possibly
27627          even all of them, if the conditional in the for loop never
27628          fired).  */
27629       if (start_reg != i)
27630         arm_emit_vfp_multi_reg_pop (start_reg,
27631                                     (i - start_reg) / 2,
27632                                     gen_rtx_REG (SImode, IP_REGNUM));
27633     }
27634
27635   if (TARGET_IWMMXT)
27636     {
27637       /* The frame pointer is guaranteed to be non-double-word aligned, as
27638          it is set to double-word-aligned old_stack_pointer - 4.  */
27639       rtx_insn *insn;
27640       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27641
27642       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27643         if (reg_needs_saving_p (i))
27644           {
27645             rtx addr = gen_frame_mem (V2SImode,
27646                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27647                                                 - lrm_count * 4));
27648             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27649             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27650                                                gen_rtx_REG (V2SImode, i),
27651                                                NULL_RTX);
27652             lrm_count += 2;
27653           }
27654     }
27655
27656   /* saved_regs_mask should contain IP which contains old stack pointer
27657      at the time of activation creation.  Since SP and IP are adjacent registers,
27658      we can restore the value directly into SP.  */
27659   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27660   saved_regs_mask &= ~(1 << IP_REGNUM);
27661   saved_regs_mask |= (1 << SP_REGNUM);
27662
27663   /* There are two registers left in saved_regs_mask - LR and PC.  We
27664      only need to restore LR (the return address), but to
27665      save time we can load it directly into PC, unless we need a
27666      special function exit sequence, or we are not really returning.  */
27667   if (really_return
27668       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27669       && !crtl->calls_eh_return)
27670     /* Delete LR from the register mask, so that LR on
27671        the stack is loaded into the PC in the register mask.  */
27672     saved_regs_mask &= ~(1 << LR_REGNUM);
27673   else
27674     saved_regs_mask &= ~(1 << PC_REGNUM);
27675
27676   num_regs = bit_count (saved_regs_mask);
27677   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27678     {
27679       rtx_insn *insn;
27680       emit_insn (gen_blockage ());
27681       /* Unwind the stack to just below the saved registers.  */
27682       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27683                                     hard_frame_pointer_rtx,
27684                                     GEN_INT (- 4 * num_regs)));
27685
27686       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27687                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27688     }
27689
27690   arm_emit_multi_reg_pop (saved_regs_mask);
27691
27692   if (IS_INTERRUPT (func_type))
27693     {
27694       /* Interrupt handlers will have pushed the
27695          IP onto the stack, so restore it now.  */
27696       rtx_insn *insn;
27697       rtx addr = gen_rtx_MEM (SImode,
27698                               gen_rtx_POST_INC (SImode,
27699                               stack_pointer_rtx));
27700       set_mem_alias_set (addr, get_frame_alias_set ());
27701       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27702       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27703                                          gen_rtx_REG (SImode, IP_REGNUM),
27704                                          NULL_RTX);
27705     }
27706
27707   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27708     return;
27709
27710   if (crtl->calls_eh_return)
27711     emit_insn (gen_addsi3 (stack_pointer_rtx,
27712                            stack_pointer_rtx,
27713                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27714
27715   if (IS_STACKALIGN (func_type))
27716     /* Restore the original stack pointer.  Before prologue, the stack was
27717        realigned and the original stack pointer saved in r0.  For details,
27718        see comment in arm_expand_prologue.  */
27719     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27720
27721   emit_jump_insn (simple_return_rtx);
27722 }
27723
27724 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27725    function is not a sibcall.  */
27726 void
27727 arm_expand_epilogue (bool really_return)
27728 {
27729   unsigned long func_type;
27730   unsigned long saved_regs_mask;
27731   int num_regs = 0;
27732   int i;
27733   int amount;
27734   arm_stack_offsets *offsets;
27735
27736   func_type = arm_current_func_type ();
27737
27738   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27739      let output_return_instruction take care of instruction emission if any.  */
27740   if (IS_NAKED (func_type)
27741       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27742     {
27743       if (really_return)
27744         emit_jump_insn (simple_return_rtx);
27745       return;
27746     }
27747
27748   /* If we are throwing an exception, then we really must be doing a
27749      return, so we can't tail-call.  */
27750   gcc_assert (!crtl->calls_eh_return || really_return);
27751
27752   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27753     {
27754       arm_expand_epilogue_apcs_frame (really_return);
27755       return;
27756     }
27757
27758   /* Get frame offsets for ARM.  */
27759   offsets = arm_get_frame_offsets ();
27760   saved_regs_mask = offsets->saved_regs_mask;
27761   num_regs = bit_count (saved_regs_mask);
27762
27763   if (frame_pointer_needed)
27764     {
27765       rtx_insn *insn;
27766       /* Restore stack pointer if necessary.  */
27767       if (TARGET_ARM)
27768         {
27769           /* In ARM mode, frame pointer points to first saved register.
27770              Restore stack pointer to last saved register.  */
27771           amount = offsets->frame - offsets->saved_regs;
27772
27773           /* Force out any pending memory operations that reference stacked data
27774              before stack de-allocation occurs.  */
27775           emit_insn (gen_blockage ());
27776           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27777                             hard_frame_pointer_rtx,
27778                             GEN_INT (amount)));
27779           arm_add_cfa_adjust_cfa_note (insn, amount,
27780                                        stack_pointer_rtx,
27781                                        hard_frame_pointer_rtx);
27782
27783           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27784              deleted.  */
27785           emit_insn (gen_force_register_use (stack_pointer_rtx));
27786         }
27787       else
27788         {
27789           /* In Thumb-2 mode, the frame pointer points to the last saved
27790              register.  */
27791           amount = offsets->locals_base - offsets->saved_regs;
27792           if (amount)
27793             {
27794               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27795                                 hard_frame_pointer_rtx,
27796                                 GEN_INT (amount)));
27797               arm_add_cfa_adjust_cfa_note (insn, amount,
27798                                            hard_frame_pointer_rtx,
27799                                            hard_frame_pointer_rtx);
27800             }
27801
27802           /* Force out any pending memory operations that reference stacked data
27803              before stack de-allocation occurs.  */
27804           emit_insn (gen_blockage ());
27805           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27806                                        hard_frame_pointer_rtx));
27807           arm_add_cfa_adjust_cfa_note (insn, 0,
27808                                        stack_pointer_rtx,
27809                                        hard_frame_pointer_rtx);
27810           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27811              deleted.  */
27812           emit_insn (gen_force_register_use (stack_pointer_rtx));
27813         }
27814     }
27815   else
27816     {
27817       /* Pop off outgoing args and local frame to adjust stack pointer to
27818          last saved register.  */
27819       amount = offsets->outgoing_args - offsets->saved_regs;
27820       if (amount)
27821         {
27822           rtx_insn *tmp;
27823           /* Force out any pending memory operations that reference stacked data
27824              before stack de-allocation occurs.  */
27825           emit_insn (gen_blockage ());
27826           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27827                                        stack_pointer_rtx,
27828                                        GEN_INT (amount)));
27829           arm_add_cfa_adjust_cfa_note (tmp, amount,
27830                                        stack_pointer_rtx, stack_pointer_rtx);
27831           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27832              not deleted.  */
27833           emit_insn (gen_force_register_use (stack_pointer_rtx));
27834         }
27835     }
27836
27837   if (TARGET_VFP_BASE)
27838     {
27839       /* Generate VFP register multi-pop.  */
27840       int end_reg = LAST_VFP_REGNUM + 1;
27841
27842       /* Scan the registers in reverse order.  We need to match
27843          any groupings made in the prologue and generate matching
27844          vldm operations.  The need to match groups is because,
27845          unlike pop, vldm can only do consecutive regs.  */
27846       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27847         /* Look for a case where a reg does not need restoring.  */
27848         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27849           {
27850             /* Restore the regs discovered so far (from reg+2 to
27851                end_reg).  */
27852             if (end_reg > i + 2)
27853               arm_emit_vfp_multi_reg_pop (i + 2,
27854                                           (end_reg - (i + 2)) / 2,
27855                                           stack_pointer_rtx);
27856             end_reg = i;
27857           }
27858
27859       /* Restore the remaining regs that we have discovered (or possibly
27860          even all of them, if the conditional in the for loop never
27861          fired).  */
27862       if (end_reg > i + 2)
27863         arm_emit_vfp_multi_reg_pop (i + 2,
27864                                     (end_reg - (i + 2)) / 2,
27865                                     stack_pointer_rtx);
27866     }
27867
27868   if (TARGET_IWMMXT)
27869     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27870       if (reg_needs_saving_p (i))
27871         {
27872           rtx_insn *insn;
27873           rtx addr = gen_rtx_MEM (V2SImode,
27874                                   gen_rtx_POST_INC (SImode,
27875                                                     stack_pointer_rtx));
27876           set_mem_alias_set (addr, get_frame_alias_set ());
27877           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27878           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27879                                              gen_rtx_REG (V2SImode, i),
27880                                              NULL_RTX);
27881           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27882                                        stack_pointer_rtx, stack_pointer_rtx);
27883         }
27884
27885   if (saved_regs_mask)
27886     {
27887       rtx insn;
27888       bool return_in_pc = false;
27889
27890       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27891           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27892           && !IS_CMSE_ENTRY (func_type)
27893           && !IS_STACKALIGN (func_type)
27894           && really_return
27895           && crtl->args.pretend_args_size == 0
27896           && saved_regs_mask & (1 << LR_REGNUM)
27897           && !crtl->calls_eh_return
27898           && !arm_current_function_pac_enabled_p ())
27899         {
27900           saved_regs_mask &= ~(1 << LR_REGNUM);
27901           saved_regs_mask |= (1 << PC_REGNUM);
27902           return_in_pc = true;
27903         }
27904
27905       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27906         {
27907           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27908             if (saved_regs_mask & (1 << i))
27909               {
27910                 rtx addr = gen_rtx_MEM (SImode,
27911                                         gen_rtx_POST_INC (SImode,
27912                                                           stack_pointer_rtx));
27913                 set_mem_alias_set (addr, get_frame_alias_set ());
27914
27915                 if (i == PC_REGNUM)
27916                   {
27917                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27918                     XVECEXP (insn, 0, 0) = ret_rtx;
27919                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27920                                                         addr);
27921                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27922                     insn = emit_jump_insn (insn);
27923                   }
27924                 else
27925                   {
27926                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27927                                                  addr));
27928                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27929                                                        gen_rtx_REG (SImode, i),
27930                                                        NULL_RTX);
27931                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27932                                                  stack_pointer_rtx,
27933                                                  stack_pointer_rtx);
27934                   }
27935               }
27936         }
27937       else
27938         {
27939           if (TARGET_LDRD
27940               && current_tune->prefer_ldrd_strd
27941               && !optimize_function_for_size_p (cfun))
27942             {
27943               if (TARGET_THUMB2)
27944                 thumb2_emit_ldrd_pop (saved_regs_mask);
27945               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27946                 arm_emit_ldrd_pop (saved_regs_mask);
27947               else
27948                 arm_emit_multi_reg_pop (saved_regs_mask);
27949             }
27950           else
27951             arm_emit_multi_reg_pop (saved_regs_mask);
27952         }
27953
27954       if (return_in_pc)
27955         return;
27956     }
27957
27958   amount
27959     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27960   if (amount)
27961     {
27962       int i, j;
27963       rtx dwarf = NULL_RTX;
27964       rtx_insn *tmp =
27965         emit_insn (gen_addsi3 (stack_pointer_rtx,
27966                                stack_pointer_rtx,
27967                                GEN_INT (amount)));
27968
27969       RTX_FRAME_RELATED_P (tmp) = 1;
27970
27971       if (cfun->machine->uses_anonymous_args)
27972         {
27973           /* Restore pretend args.  Refer arm_expand_prologue on how to save
27974              pretend_args in stack.  */
27975           int num_regs = crtl->args.pretend_args_size / 4;
27976           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27977           for (j = 0, i = 0; j < num_regs; i++)
27978             if (saved_regs_mask & (1 << i))
27979               {
27980                 rtx reg = gen_rtx_REG (SImode, i);
27981                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27982                 j++;
27983               }
27984           REG_NOTES (tmp) = dwarf;
27985         }
27986       arm_add_cfa_adjust_cfa_note (tmp, amount,
27987                                    stack_pointer_rtx, stack_pointer_rtx);
27988     }
27989
27990   if (IS_CMSE_ENTRY (func_type))
27991     {
27992       /* CMSE_ENTRY always returns.  */
27993       gcc_assert (really_return);
27994       /* Clear all caller-saved regs that are not used to return.  */
27995       cmse_nonsecure_entry_clear_before_return ();
27996
27997       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27998          VLDR.  */
27999       if (TARGET_HAVE_FPCXT_CMSE)
28000         {
28001           rtx_insn *insn;
28002
28003           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28004                                                    GEN_INT (FPCXTNS_ENUM)));
28005           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28006                                   plus_constant (Pmode, stack_pointer_rtx, 4));
28007           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28008           RTX_FRAME_RELATED_P (insn) = 1;
28009         }
28010     }
28011
28012   if (arm_current_function_pac_enabled_p ())
28013     emit_insn (gen_aut_nop ());
28014
28015   if (!really_return)
28016     return;
28017
28018   if (crtl->calls_eh_return)
28019     emit_insn (gen_addsi3 (stack_pointer_rtx,
28020                            stack_pointer_rtx,
28021                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28022
28023   if (IS_STACKALIGN (func_type))
28024     /* Restore the original stack pointer.  Before prologue, the stack was
28025        realigned and the original stack pointer saved in r0.  For details,
28026        see comment in arm_expand_prologue.  */
28027     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28028
28029   emit_jump_insn (simple_return_rtx);
28030 }
28031
28032 /* Implementation of insn prologue_thumb1_interwork.  This is the first
28033    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
28034
28035 const char *
28036 thumb1_output_interwork (void)
28037 {
28038   const char * name;
28039   FILE *f = asm_out_file;
28040
28041   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28042   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28043               == SYMBOL_REF);
28044   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28045
28046   /* Generate code sequence to switch us into Thumb mode.  */
28047   /* The .code 32 directive has already been emitted by
28048      ASM_DECLARE_FUNCTION_NAME.  */
28049   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28050   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28051
28052   /* Generate a label, so that the debugger will notice the
28053      change in instruction sets.  This label is also used by
28054      the assembler to bypass the ARM code when this function
28055      is called from a Thumb encoded function elsewhere in the
28056      same file.  Hence the definition of STUB_NAME here must
28057      agree with the definition in gas/config/tc-arm.c.  */
28058
28059 #define STUB_NAME ".real_start_of"
28060
28061   fprintf (f, "\t.code\t16\n");
28062 #ifdef ARM_PE
28063   if (arm_dllexport_name_p (name))
28064     name = arm_strip_name_encoding (name);
28065 #endif
28066   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28067   fprintf (f, "\t.thumb_func\n");
28068   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28069
28070   return "";
28071 }
28072
28073 /* Handle the case of a double word load into a low register from
28074    a computed memory address.  The computed address may involve a
28075    register which is overwritten by the load.  */
28076 const char *
28077 thumb_load_double_from_address (rtx *operands)
28078 {
28079   rtx addr;
28080   rtx base;
28081   rtx offset;
28082   rtx arg1;
28083   rtx arg2;
28084
28085   gcc_assert (REG_P (operands[0]));
28086   gcc_assert (MEM_P (operands[1]));
28087
28088   /* Get the memory address.  */
28089   addr = XEXP (operands[1], 0);
28090
28091   /* Work out how the memory address is computed.  */
28092   switch (GET_CODE (addr))
28093     {
28094     case REG:
28095       operands[2] = adjust_address (operands[1], SImode, 4);
28096
28097       if (REGNO (operands[0]) == REGNO (addr))
28098         {
28099           output_asm_insn ("ldr\t%H0, %2", operands);
28100           output_asm_insn ("ldr\t%0, %1", operands);
28101         }
28102       else
28103         {
28104           output_asm_insn ("ldr\t%0, %1", operands);
28105           output_asm_insn ("ldr\t%H0, %2", operands);
28106         }
28107       break;
28108
28109     case CONST:
28110       /* Compute <address> + 4 for the high order load.  */
28111       operands[2] = adjust_address (operands[1], SImode, 4);
28112
28113       output_asm_insn ("ldr\t%0, %1", operands);
28114       output_asm_insn ("ldr\t%H0, %2", operands);
28115       break;
28116
28117     case PLUS:
28118       arg1   = XEXP (addr, 0);
28119       arg2   = XEXP (addr, 1);
28120
28121       if (CONSTANT_P (arg1))
28122         base = arg2, offset = arg1;
28123       else
28124         base = arg1, offset = arg2;
28125
28126       gcc_assert (REG_P (base));
28127
28128       /* Catch the case of <address> = <reg> + <reg> */
28129       if (REG_P (offset))
28130         {
28131           int reg_offset = REGNO (offset);
28132           int reg_base   = REGNO (base);
28133           int reg_dest   = REGNO (operands[0]);
28134
28135           /* Add the base and offset registers together into the
28136              higher destination register.  */
28137           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28138                        reg_dest + 1, reg_base, reg_offset);
28139
28140           /* Load the lower destination register from the address in
28141              the higher destination register.  */
28142           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28143                        reg_dest, reg_dest + 1);
28144
28145           /* Load the higher destination register from its own address
28146              plus 4.  */
28147           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28148                        reg_dest + 1, reg_dest + 1);
28149         }
28150       else
28151         {
28152           /* Compute <address> + 4 for the high order load.  */
28153           operands[2] = adjust_address (operands[1], SImode, 4);
28154
28155           /* If the computed address is held in the low order register
28156              then load the high order register first, otherwise always
28157              load the low order register first.  */
28158           if (REGNO (operands[0]) == REGNO (base))
28159             {
28160               output_asm_insn ("ldr\t%H0, %2", operands);
28161               output_asm_insn ("ldr\t%0, %1", operands);
28162             }
28163           else
28164             {
28165               output_asm_insn ("ldr\t%0, %1", operands);
28166               output_asm_insn ("ldr\t%H0, %2", operands);
28167             }
28168         }
28169       break;
28170
28171     case LABEL_REF:
28172       /* With no registers to worry about we can just load the value
28173          directly.  */
28174       operands[2] = adjust_address (operands[1], SImode, 4);
28175
28176       output_asm_insn ("ldr\t%H0, %2", operands);
28177       output_asm_insn ("ldr\t%0, %1", operands);
28178       break;
28179
28180     default:
28181       gcc_unreachable ();
28182     }
28183
28184   return "";
28185 }
28186
28187 const char *
28188 thumb_output_move_mem_multiple (int n, rtx *operands)
28189 {
28190   switch (n)
28191     {
28192     case 2:
28193       if (REGNO (operands[4]) > REGNO (operands[5]))
28194         std::swap (operands[4], operands[5]);
28195
28196       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28197       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28198       break;
28199
28200     case 3:
28201       if (REGNO (operands[4]) > REGNO (operands[5]))
28202         std::swap (operands[4], operands[5]);
28203       if (REGNO (operands[5]) > REGNO (operands[6]))
28204         std::swap (operands[5], operands[6]);
28205       if (REGNO (operands[4]) > REGNO (operands[5]))
28206         std::swap (operands[4], operands[5]);
28207
28208       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28209       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28210       break;
28211
28212     default:
28213       gcc_unreachable ();
28214     }
28215
28216   return "";
28217 }
28218
28219 /* Output a call-via instruction for thumb state.  */
28220 const char *
28221 thumb_call_via_reg (rtx reg)
28222 {
28223   int regno = REGNO (reg);
28224   rtx *labelp;
28225
28226   gcc_assert (regno < LR_REGNUM);
28227
28228   /* If we are in the normal text section we can use a single instance
28229      per compilation unit.  If we are doing function sections, then we need
28230      an entry per section, since we can't rely on reachability.  */
28231   if (in_section == text_section)
28232     {
28233       thumb_call_reg_needed = 1;
28234
28235       if (thumb_call_via_label[regno] == NULL)
28236         thumb_call_via_label[regno] = gen_label_rtx ();
28237       labelp = thumb_call_via_label + regno;
28238     }
28239   else
28240     {
28241       if (cfun->machine->call_via[regno] == NULL)
28242         cfun->machine->call_via[regno] = gen_label_rtx ();
28243       labelp = cfun->machine->call_via + regno;
28244     }
28245
28246   output_asm_insn ("bl\t%a0", labelp);
28247   return "";
28248 }
28249
28250 /* Routines for generating rtl.  */
28251 void
28252 thumb_expand_cpymemqi (rtx *operands)
28253 {
28254   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28255   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28256   HOST_WIDE_INT len = INTVAL (operands[2]);
28257   HOST_WIDE_INT offset = 0;
28258
28259   while (len >= 12)
28260     {
28261       emit_insn (gen_cpymem12b (out, in, out, in));
28262       len -= 12;
28263     }
28264
28265   if (len >= 8)
28266     {
28267       emit_insn (gen_cpymem8b (out, in, out, in));
28268       len -= 8;
28269     }
28270
28271   if (len >= 4)
28272     {
28273       rtx reg = gen_reg_rtx (SImode);
28274       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28275       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28276       len -= 4;
28277       offset += 4;
28278     }
28279
28280   if (len >= 2)
28281     {
28282       rtx reg = gen_reg_rtx (HImode);
28283       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28284                                               plus_constant (Pmode, in,
28285                                                              offset))));
28286       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28287                                                                 offset)),
28288                             reg));
28289       len -= 2;
28290       offset += 2;
28291     }
28292
28293   if (len)
28294     {
28295       rtx reg = gen_reg_rtx (QImode);
28296       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28297                                               plus_constant (Pmode, in,
28298                                                              offset))));
28299       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28300                                                                 offset)),
28301                             reg));
28302     }
28303 }
28304
28305 void
28306 thumb_reload_out_hi (rtx *operands)
28307 {
28308   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28309 }
28310
28311 /* Return the length of a function name prefix
28312     that starts with the character 'c'.  */
28313 static int
28314 arm_get_strip_length (int c)
28315 {
28316   switch (c)
28317     {
28318     ARM_NAME_ENCODING_LENGTHS
28319       default: return 0;
28320     }
28321 }
28322
28323 /* Return a pointer to a function's name with any
28324    and all prefix encodings stripped from it.  */
28325 const char *
28326 arm_strip_name_encoding (const char *name)
28327 {
28328   int skip;
28329
28330   while ((skip = arm_get_strip_length (* name)))
28331     name += skip;
28332
28333   return name;
28334 }
28335
28336 /* If there is a '*' anywhere in the name's prefix, then
28337    emit the stripped name verbatim, otherwise prepend an
28338    underscore if leading underscores are being used.  */
28339 void
28340 arm_asm_output_labelref (FILE *stream, const char *name)
28341 {
28342   int skip;
28343   int verbatim = 0;
28344
28345   while ((skip = arm_get_strip_length (* name)))
28346     {
28347       verbatim |= (*name == '*');
28348       name += skip;
28349     }
28350
28351   if (verbatim)
28352     fputs (name, stream);
28353   else
28354     asm_fprintf (stream, "%U%s", name);
28355 }
28356
28357 /* This function is used to emit an EABI tag and its associated value.
28358    We emit the numerical value of the tag in case the assembler does not
28359    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28360    the tag name in a comment so that anyone reading the assembler output
28361    will know which tag is being set.
28362
28363    This function is not static because arm-c.cc needs it too.  */
28364
28365 void
28366 arm_emit_eabi_attribute (const char *name, int num, int val)
28367 {
28368   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28369   if (flag_verbose_asm || flag_debug_asm)
28370     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28371   asm_fprintf (asm_out_file, "\n");
28372 }
28373
28374 /* This function is used to print CPU tuning information as comment
28375    in assembler file.  Pointers are not printed for now.  */
28376
28377 void
28378 arm_print_tune_info (void)
28379 {
28380   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28381   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28382                current_tune->constant_limit);
28383   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28384                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28385   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28386                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28387   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28388                "prefetch.l1_cache_size:\t%d\n",
28389                current_tune->prefetch.l1_cache_size);
28390   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28391                "prefetch.l1_cache_line_size:\t%d\n",
28392                current_tune->prefetch.l1_cache_line_size);
28393   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28394                "prefer_constant_pool:\t%d\n",
28395                (int) current_tune->prefer_constant_pool);
28396   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28397                "branch_cost:\t(s:speed, p:predictable)\n");
28398   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28399   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28400                current_tune->branch_cost (false, false));
28401   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28402                current_tune->branch_cost (false, true));
28403   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28404                current_tune->branch_cost (true, false));
28405   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28406                current_tune->branch_cost (true, true));
28407   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28408                "prefer_ldrd_strd:\t%d\n",
28409                (int) current_tune->prefer_ldrd_strd);
28410   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28411                "logical_op_non_short_circuit:\t[%d,%d]\n",
28412                (int) current_tune->logical_op_non_short_circuit_thumb,
28413                (int) current_tune->logical_op_non_short_circuit_arm);
28414   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28415                "disparage_flag_setting_t16_encodings:\t%d\n",
28416                (int) current_tune->disparage_flag_setting_t16_encodings);
28417   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28418                "string_ops_prefer_neon:\t%d\n",
28419                (int) current_tune->string_ops_prefer_neon);
28420   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28421                "max_insns_inline_memset:\t%d\n",
28422                current_tune->max_insns_inline_memset);
28423   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28424                current_tune->fusible_ops);
28425   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28426                (int) current_tune->sched_autopref);
28427 }
28428
28429 /* The last set of target options used to emit .arch directives, etc.  This
28430    could be a function-local static if it were not required to expose it as a
28431    root to the garbage collector.  */
28432 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28433
28434 /* Print .arch and .arch_extension directives corresponding to the
28435    current architecture configuration.  */
28436 static void
28437 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28438 {
28439   arm_build_target build_target;
28440   /* If the target options haven't changed since the last time we were called
28441      there is nothing to do.  This should be sufficient to suppress the
28442      majority of redundant work.  */
28443   if (last_asm_targ_options == targ_options)
28444     return;
28445
28446   last_asm_targ_options = targ_options;
28447
28448   build_target.isa = sbitmap_alloc (isa_num_bits);
28449   arm_configure_build_target (&build_target, targ_options, false);
28450
28451   if (build_target.core_name
28452       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28453     {
28454       const char* truncated_name
28455         = arm_rewrite_selected_cpu (build_target.core_name);
28456       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28457     }
28458
28459   const arch_option *arch
28460     = arm_parse_arch_option_name (all_architectures, "-march",
28461                                   build_target.arch_name);
28462   auto_sbitmap opt_bits (isa_num_bits);
28463
28464   gcc_assert (arch);
28465
28466   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28467     {
28468       /* Keep backward compatability for assemblers which don't support
28469          armv7ve.  Fortunately, none of the following extensions are reset
28470          by a .fpu directive.  */
28471       asm_fprintf (stream, "\t.arch armv7-a\n");
28472       asm_fprintf (stream, "\t.arch_extension virt\n");
28473       asm_fprintf (stream, "\t.arch_extension idiv\n");
28474       asm_fprintf (stream, "\t.arch_extension sec\n");
28475       asm_fprintf (stream, "\t.arch_extension mp\n");
28476     }
28477   else
28478     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28479
28480   /* The .fpu directive will reset any architecture extensions from the
28481      assembler that relate to the fp/vector extensions.  So put this out before
28482      any .arch_extension directives.  */
28483   const char *fpu_name = (TARGET_SOFT_FLOAT
28484                           ? "softvfp"
28485                           : arm_identify_fpu_from_isa (build_target.isa));
28486   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28487
28488   if (!arch->common.extensions)
28489     return;
28490
28491   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28492        opt->name != NULL;
28493        opt++)
28494     {
28495       if (!opt->remove)
28496         {
28497           arm_initialize_isa (opt_bits, opt->isa_bits);
28498
28499           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28500              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28501              floating point instructions is disabled.  So the following check
28502              restricts the printing of ".arch_extension mve" and
28503              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28504              this special behaviour because the feature bit "mve" and
28505              "mve_float" are not part of "fpu bits", so they are not cleared
28506              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28507              TARGET_HAVE_MVE_FLOAT are disabled.  */
28508           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28509               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28510                   && !TARGET_HAVE_MVE_FLOAT))
28511             continue;
28512
28513           /* If every feature bit of this option is set in the target ISA
28514              specification, print out the option name.  However, don't print
28515              anything if all the bits are part of the FPU specification.  */
28516           if (bitmap_subset_p (opt_bits, build_target.isa)
28517               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28518             asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28519         }
28520     }
28521 }
28522
28523 static void
28524 arm_file_start (void)
28525 {
28526   int val;
28527   bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28528   bool bti = (aarch_enable_bti == 1);
28529
28530   arm_print_asm_arch_directives
28531     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28532
28533   if (TARGET_BPABI)
28534     {
28535       /* If we have a named cpu, but we the assembler does not support that
28536          name via .cpu, put out a cpu name attribute; but don't do this if the
28537          name starts with the fictitious prefix, 'generic'.  */
28538       if (arm_active_target.core_name
28539           && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28540           && !startswith (arm_active_target.core_name, "generic"))
28541         {
28542           const char* truncated_name
28543             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28544           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28545             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28546                          truncated_name);
28547         }
28548
28549       if (print_tune_info)
28550         arm_print_tune_info ();
28551
28552       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28553         arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28554
28555       if (TARGET_HARD_FLOAT_ABI)
28556         arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28557
28558       /* Some of these attributes only apply when the corresponding features
28559          are used.  However we don't have any easy way of figuring this out.
28560          Conservatively record the setting that would have been used.  */
28561
28562       if (flag_rounding_math)
28563         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28564
28565       if (!flag_unsafe_math_optimizations)
28566         {
28567           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28568           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28569         }
28570       if (flag_signaling_nans)
28571         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28572
28573       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28574                            flag_finite_math_only ? 1 : 3);
28575
28576       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28577       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28578       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28579                                flag_short_enums ? 1 : 2);
28580
28581       /* Tag_ABI_optimization_goals.  */
28582       if (optimize_size)
28583         val = 4;
28584       else if (optimize >= 2)
28585         val = 2;
28586       else if (optimize)
28587         val = 1;
28588       else
28589         val = 6;
28590       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28591
28592       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28593                                unaligned_access);
28594
28595       if (arm_fp16_format)
28596         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28597                              (int) arm_fp16_format);
28598
28599       if (TARGET_HAVE_PACBTI)
28600         {
28601           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28602           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28603         }
28604       else if (pac || bti)
28605         {
28606           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28607           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28608         }
28609
28610       if (bti)
28611         arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28612       if (pac)
28613         arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28614
28615       if (arm_lang_output_object_attributes_hook)
28616         arm_lang_output_object_attributes_hook();
28617     }
28618
28619   default_file_start ();
28620 }
28621
28622 static void
28623 arm_file_end (void)
28624 {
28625   int regno;
28626
28627   /* Just in case the last function output in the assembler had non-default
28628      architecture directives, we force the assembler state back to the default
28629      set, so that any 'calculated' build attributes are based on the default
28630      options rather than the special options for that function.  */
28631   arm_print_asm_arch_directives
28632     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28633
28634   if (NEED_INDICATE_EXEC_STACK)
28635     /* Add .note.GNU-stack.  */
28636     file_end_indicate_exec_stack ();
28637
28638   if (! thumb_call_reg_needed)
28639     return;
28640
28641   switch_to_section (text_section);
28642   asm_fprintf (asm_out_file, "\t.code 16\n");
28643   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28644
28645   for (regno = 0; regno < LR_REGNUM; regno++)
28646     {
28647       rtx label = thumb_call_via_label[regno];
28648
28649       if (label != 0)
28650         {
28651           targetm.asm_out.internal_label (asm_out_file, "L",
28652                                           CODE_LABEL_NUMBER (label));
28653           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28654         }
28655     }
28656 }
28657
28658 #ifndef ARM_PE
28659 /* Symbols in the text segment can be accessed without indirecting via the
28660    constant pool; it may take an extra binary operation, but this is still
28661    faster than indirecting via memory.  Don't do this when not optimizing,
28662    since we won't be calculating al of the offsets necessary to do this
28663    simplification.  */
28664
28665 static void
28666 arm_encode_section_info (tree decl, rtx rtl, int first)
28667 {
28668   if (optimize > 0 && TREE_CONSTANT (decl))
28669     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28670
28671   default_encode_section_info (decl, rtl, first);
28672 }
28673 #endif /* !ARM_PE */
28674
28675 static void
28676 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28677 {
28678   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28679       && !strcmp (prefix, "L"))
28680     {
28681       arm_ccfsm_state = 0;
28682       arm_target_insn = NULL;
28683     }
28684   default_internal_label (stream, prefix, labelno);
28685 }
28686
28687 /* Define classes to generate code as RTL or output asm to a file.
28688    Using templates then allows to use the same code to output code
28689    sequences in the two formats.  */
28690 class thumb1_const_rtl
28691 {
28692  public:
28693   thumb1_const_rtl (rtx dst) : dst (dst) {}
28694
28695   void mov (HOST_WIDE_INT val)
28696   {
28697     emit_set_insn (dst, GEN_INT (val));
28698   }
28699
28700   void add (HOST_WIDE_INT val)
28701   {
28702     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28703   }
28704
28705   void ashift (HOST_WIDE_INT shift)
28706   {
28707     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28708   }
28709
28710   void neg ()
28711   {
28712     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28713   }
28714
28715  private:
28716   rtx dst;
28717 };
28718
28719 class thumb1_const_print
28720 {
28721  public:
28722   thumb1_const_print (FILE *f, int regno)
28723   {
28724     t_file = f;
28725     dst_regname = reg_names[regno];
28726   }
28727
28728   void mov (HOST_WIDE_INT val)
28729   {
28730     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28731                  dst_regname, val);
28732   }
28733
28734   void add (HOST_WIDE_INT val)
28735   {
28736     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28737                  dst_regname, val);
28738   }
28739
28740   void ashift (HOST_WIDE_INT shift)
28741   {
28742     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28743                  dst_regname, shift);
28744   }
28745
28746   void neg ()
28747   {
28748     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28749   }
28750
28751  private:
28752   FILE *t_file;
28753   const char *dst_regname;
28754 };
28755
28756 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28757    Avoid generating useless code when one of the bytes is zero.  */
28758 template <class T>
28759 void
28760 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28761 {
28762   bool mov_done_p = false;
28763   unsigned HOST_WIDE_INT val = op1;
28764   int shift = 0;
28765   int i;
28766
28767   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28768
28769   if (val <= 255)
28770     {
28771       dst.mov (val);
28772       return;
28773     }
28774
28775   /* For negative numbers with the first nine bits set, build the
28776      opposite of OP1, then negate it, it's generally shorter and not
28777      longer.  */
28778   if ((val & 0xFF800000) == 0xFF800000)
28779     {
28780       thumb1_gen_const_int_1 (dst, -op1);
28781       dst.neg ();
28782       return;
28783     }
28784
28785   /* In the general case, we need 7 instructions to build
28786      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28787      do better if VAL is small enough, or
28788      right-shiftable by a suitable amount.  If the
28789      right-shift enables to encode at least one less byte,
28790      it's worth it: we save a adds and a lsls at the
28791      expense of a final lsls.  */
28792   int final_shift = number_of_first_bit_set (val);
28793
28794   int leading_zeroes = clz_hwi (val);
28795   int number_of_bytes_needed
28796     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28797        / BITS_PER_UNIT) + 1;
28798   int number_of_bytes_needed2
28799     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28800        / BITS_PER_UNIT) + 1;
28801
28802   if (number_of_bytes_needed2 < number_of_bytes_needed)
28803     val >>= final_shift;
28804   else
28805     final_shift = 0;
28806
28807   /* If we are in a very small range, we can use either a single movs
28808      or movs+adds.  */
28809   if (val <= 510)
28810     {
28811       if (val > 255)
28812         {
28813           unsigned HOST_WIDE_INT high = val - 255;
28814
28815           dst.mov (high);
28816           dst.add (255);
28817         }
28818       else
28819         dst.mov (val);
28820
28821       if (final_shift > 0)
28822         dst.ashift (final_shift);
28823     }
28824   else
28825     {
28826       /* General case, emit upper 3 bytes as needed.  */
28827       for (i = 0; i < 3; i++)
28828         {
28829           unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28830
28831           if (byte)
28832             {
28833               /* We are about to emit new bits, stop accumulating a
28834                  shift amount, and left-shift only if we have already
28835                  emitted some upper bits.  */
28836               if (mov_done_p)
28837                 {
28838                   dst.ashift (shift);
28839                   dst.add (byte);
28840                 }
28841               else
28842                 dst.mov (byte);
28843
28844               /* Stop accumulating shift amount since we've just
28845                  emitted some bits.  */
28846               shift = 0;
28847
28848               mov_done_p = true;
28849             }
28850
28851           if (mov_done_p)
28852             shift += 8;
28853         }
28854
28855       /* Emit lower byte.  */
28856       if (!mov_done_p)
28857         dst.mov (val & 0xff);
28858       else
28859         {
28860           dst.ashift (shift);
28861           if (val & 0xff)
28862             dst.add (val & 0xff);
28863         }
28864
28865       if (final_shift > 0)
28866         dst.ashift (final_shift);
28867     }
28868 }
28869
28870 /* Proxies for thumb1.md, since the thumb1_const_print and
28871    thumb1_const_rtl classes are not exported.  */
28872 void
28873 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28874 {
28875   thumb1_const_rtl t (dst);
28876   thumb1_gen_const_int_1 (t, op1);
28877 }
28878
28879 void
28880 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28881 {
28882   thumb1_const_print t (asm_out_file, REGNO (dst));
28883   thumb1_gen_const_int_1 (t, op1);
28884 }
28885
28886 /* Output code to add DELTA to the first argument, and then jump
28887    to FUNCTION.  Used for C++ multiple inheritance.  */
28888
28889 static void
28890 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28891                      HOST_WIDE_INT, tree function)
28892 {
28893   static int thunk_label = 0;
28894   char label[256];
28895   char labelpc[256];
28896   int mi_delta = delta;
28897   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28898   int shift = 0;
28899   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28900                     ? 1 : 0);
28901   if (mi_delta < 0)
28902     mi_delta = - mi_delta;
28903
28904   final_start_function (emit_barrier (), file, 1);
28905
28906   if (TARGET_THUMB1)
28907     {
28908       int labelno = thunk_label++;
28909       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28910       /* Thunks are entered in arm mode when available.  */
28911       if (TARGET_THUMB1_ONLY)
28912         {
28913           /* push r3 so we can use it as a temporary.  */
28914           /* TODO: Omit this save if r3 is not used.  */
28915           fputs ("\tpush {r3}\n", file);
28916
28917           /* With -mpure-code, we cannot load the address from the
28918              constant pool: we build it explicitly.  */
28919           if (target_pure_code)
28920             {
28921               fputs ("\tmovs\tr3, #:upper8_15:#", file);
28922               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28923               fputc ('\n', file);
28924               fputs ("\tlsls r3, #8\n", file);
28925               fputs ("\tadds\tr3, #:upper0_7:#", file);
28926               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28927               fputc ('\n', file);
28928               fputs ("\tlsls r3, #8\n", file);
28929               fputs ("\tadds\tr3, #:lower8_15:#", file);
28930               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28931               fputc ('\n', file);
28932               fputs ("\tlsls r3, #8\n", file);
28933               fputs ("\tadds\tr3, #:lower0_7:#", file);
28934               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28935               fputc ('\n', file);
28936             }
28937           else
28938             fputs ("\tldr\tr3, ", file);
28939         }
28940       else
28941         {
28942           fputs ("\tldr\tr12, ", file);
28943         }
28944
28945       if (!target_pure_code)
28946         {
28947           assemble_name (file, label);
28948           fputc ('\n', file);
28949         }
28950
28951       if (flag_pic)
28952         {
28953           /* If we are generating PIC, the ldr instruction below loads
28954              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28955              the address of the add + 8, so we have:
28956
28957              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28958                  = target + 1.
28959
28960              Note that we have "+ 1" because some versions of GNU ld
28961              don't set the low bit of the result for R_ARM_REL32
28962              relocations against thumb function symbols.
28963              On ARMv6M this is +4, not +8.  */
28964           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28965           assemble_name (file, labelpc);
28966           fputs (":\n", file);
28967           if (TARGET_THUMB1_ONLY)
28968             {
28969               /* This is 2 insns after the start of the thunk, so we know it
28970                  is 4-byte aligned.  */
28971               fputs ("\tadd\tr3, pc, r3\n", file);
28972               fputs ("\tmov r12, r3\n", file);
28973             }
28974           else
28975             fputs ("\tadd\tr12, pc, r12\n", file);
28976         }
28977       else if (TARGET_THUMB1_ONLY)
28978         fputs ("\tmov r12, r3\n", file);
28979     }
28980   if (TARGET_THUMB1_ONLY)
28981     {
28982       if (mi_delta > 255)
28983         {
28984           /* With -mpure-code, we cannot load MI_DELTA from the
28985              constant pool: we build it explicitly.  */
28986           if (target_pure_code)
28987             {
28988               thumb1_const_print r3 (file, 3);
28989               thumb1_gen_const_int_1 (r3, mi_delta);
28990             }
28991           else
28992             {
28993               fputs ("\tldr\tr3, ", file);
28994               assemble_name (file, label);
28995               fputs ("+4\n", file);
28996             }
28997           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28998                        mi_op, this_regno, this_regno);
28999         }
29000       else if (mi_delta != 0)
29001         {
29002           /* Thumb1 unified syntax requires s suffix in instruction name when
29003              one of the operands is immediate.  */
29004           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29005                        mi_op, this_regno, this_regno,
29006                        mi_delta);
29007         }
29008     }
29009   else
29010     {
29011       /* TODO: Use movw/movt for large constants when available.  */
29012       while (mi_delta != 0)
29013         {
29014           if ((mi_delta & (3 << shift)) == 0)
29015             shift += 2;
29016           else
29017             {
29018               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29019                            mi_op, this_regno, this_regno,
29020                            mi_delta & (0xff << shift));
29021               mi_delta &= ~(0xff << shift);
29022               shift += 8;
29023             }
29024         }
29025     }
29026   if (TARGET_THUMB1)
29027     {
29028       if (TARGET_THUMB1_ONLY)
29029         fputs ("\tpop\t{r3}\n", file);
29030
29031       fprintf (file, "\tbx\tr12\n");
29032
29033       /* With -mpure-code, we don't need to emit literals for the
29034          function address and delta since we emitted code to build
29035          them.  */
29036       if (!target_pure_code)
29037         {
29038           ASM_OUTPUT_ALIGN (file, 2);
29039           assemble_name (file, label);
29040           fputs (":\n", file);
29041           if (flag_pic)
29042             {
29043               /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
29044               rtx tem = XEXP (DECL_RTL (function), 0);
29045               /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29046                  pipeline offset is four rather than eight.  Adjust the offset
29047                  accordingly.  */
29048               tem = plus_constant (GET_MODE (tem), tem,
29049                                    TARGET_THUMB1_ONLY ? -3 : -7);
29050               tem = gen_rtx_MINUS (GET_MODE (tem),
29051                                    tem,
29052                                    gen_rtx_SYMBOL_REF (Pmode,
29053                                                        ggc_strdup (labelpc)));
29054               assemble_integer (tem, 4, BITS_PER_WORD, 1);
29055             }
29056           else
29057             /* Output ".word .LTHUNKn".  */
29058             assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29059
29060           if (TARGET_THUMB1_ONLY && mi_delta > 255)
29061             assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29062         }
29063     }
29064   else
29065     {
29066       fputs ("\tb\t", file);
29067       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29068       if (NEED_PLT_RELOC)
29069         fputs ("(PLT)", file);
29070       fputc ('\n', file);
29071     }
29072
29073   final_end_function ();
29074 }
29075
29076 /* MI thunk handling for TARGET_32BIT.  */
29077
29078 static void
29079 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29080                        HOST_WIDE_INT vcall_offset, tree function)
29081 {
29082   const bool long_call_p = arm_is_long_call_p (function);
29083
29084   /* On ARM, this_regno is R0 or R1 depending on
29085      whether the function returns an aggregate or not.
29086   */
29087   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29088                                        function)
29089                     ? R1_REGNUM : R0_REGNUM);
29090
29091   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29092   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29093   reload_completed = 1;
29094   emit_note (NOTE_INSN_PROLOGUE_END);
29095
29096   /* Add DELTA to THIS_RTX.  */
29097   if (delta != 0)
29098     arm_split_constant (PLUS, Pmode, NULL_RTX,
29099                         delta, this_rtx, this_rtx, false);
29100
29101   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
29102   if (vcall_offset != 0)
29103     {
29104       /* Load *THIS_RTX.  */
29105       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29106       /* Compute *THIS_RTX + VCALL_OFFSET.  */
29107       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29108                           false);
29109       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
29110       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29111       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29112     }
29113
29114   /* Generate a tail call to the target function.  */
29115   if (!TREE_USED (function))
29116     {
29117       assemble_external (function);
29118       TREE_USED (function) = 1;
29119     }
29120   rtx funexp = XEXP (DECL_RTL (function), 0);
29121   if (long_call_p)
29122     {
29123       emit_move_insn (temp, funexp);
29124       funexp = temp;
29125     }
29126   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29127   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29128   SIBLING_CALL_P (insn) = 1;
29129   emit_barrier ();
29130
29131   /* Indirect calls require a bit of fixup in PIC mode.  */
29132   if (long_call_p)
29133     {
29134       split_all_insns_noflow ();
29135       arm_reorg ();
29136     }
29137
29138   insn = get_insns ();
29139   shorten_branches (insn);
29140   final_start_function (insn, file, 1);
29141   final (insn, file, 1);
29142   final_end_function ();
29143
29144   /* Stop pretending this is a post-reload pass.  */
29145   reload_completed = 0;
29146 }
29147
29148 /* Output code to add DELTA to the first argument, and then jump
29149    to FUNCTION.  Used for C++ multiple inheritance.  */
29150
29151 static void
29152 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29153                      HOST_WIDE_INT vcall_offset, tree function)
29154 {
29155   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29156
29157   assemble_start_function (thunk, fnname);
29158   if (TARGET_32BIT)
29159     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29160   else
29161     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29162   assemble_end_function (thunk, fnname);
29163 }
29164
29165 int
29166 arm_emit_vector_const (FILE *file, rtx x)
29167 {
29168   int i;
29169   const char * pattern;
29170
29171   gcc_assert (GET_CODE (x) == CONST_VECTOR);
29172
29173   switch (GET_MODE (x))
29174     {
29175     case E_V2SImode: pattern = "%08x"; break;
29176     case E_V4HImode: pattern = "%04x"; break;
29177     case E_V8QImode: pattern = "%02x"; break;
29178     default:       gcc_unreachable ();
29179     }
29180
29181   fprintf (file, "0x");
29182   for (i = CONST_VECTOR_NUNITS (x); i--;)
29183     {
29184       rtx element;
29185
29186       element = CONST_VECTOR_ELT (x, i);
29187       fprintf (file, pattern, INTVAL (element));
29188     }
29189
29190   return 1;
29191 }
29192
29193 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29194    HFmode constant pool entries are actually loaded with ldr.  */
29195 void
29196 arm_emit_fp16_const (rtx c)
29197 {
29198   long bits;
29199
29200   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29201   if (WORDS_BIG_ENDIAN)
29202     assemble_zeros (2);
29203   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29204   if (!WORDS_BIG_ENDIAN)
29205     assemble_zeros (2);
29206 }
29207
29208 const char *
29209 arm_output_load_gr (rtx *operands)
29210 {
29211   rtx reg;
29212   rtx offset;
29213   rtx wcgr;
29214   rtx sum;
29215
29216   if (!MEM_P (operands [1])
29217       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29218       || !REG_P (reg = XEXP (sum, 0))
29219       || !CONST_INT_P (offset = XEXP (sum, 1))
29220       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29221     return "wldrw%?\t%0, %1";
29222
29223   /* Fix up an out-of-range load of a GR register.  */
29224   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29225   wcgr = operands[0];
29226   operands[0] = reg;
29227   output_asm_insn ("ldr%?\t%0, %1", operands);
29228
29229   operands[0] = wcgr;
29230   operands[1] = reg;
29231   output_asm_insn ("tmcr%?\t%0, %1", operands);
29232   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29233
29234   return "";
29235 }
29236
29237 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29238
29239    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29240    named arg and all anonymous args onto the stack.
29241    XXX I know the prologue shouldn't be pushing registers, but it is faster
29242    that way.  */
29243
29244 static void
29245 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29246                             const function_arg_info &arg,
29247                             int *pretend_size,
29248                             int second_time ATTRIBUTE_UNUSED)
29249 {
29250   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29251   int nregs;
29252
29253   cfun->machine->uses_anonymous_args = 1;
29254   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29255     {
29256       nregs = pcum->aapcs_ncrn;
29257       if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29258           && (nregs & 1))
29259         {
29260           int res = arm_needs_doubleword_align (arg.mode, arg.type);
29261           if (res < 0 && warn_psabi)
29262             inform (input_location, "parameter passing for argument of "
29263                     "type %qT changed in GCC 7.1", arg.type);
29264           else if (res > 0)
29265             {
29266               nregs++;
29267               if (res > 1 && warn_psabi)
29268                 inform (input_location,
29269                         "parameter passing for argument of type "
29270                         "%qT changed in GCC 9.1", arg.type);
29271             }
29272         }
29273     }
29274   else
29275     nregs = pcum->nregs;
29276
29277   if (nregs < NUM_ARG_REGS)
29278     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29279 }
29280
29281 /* We can't rely on the caller doing the proper promotion when
29282    using APCS or ATPCS.  */
29283
29284 static bool
29285 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29286 {
29287     return !TARGET_AAPCS_BASED;
29288 }
29289
29290 static machine_mode
29291 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29292                            machine_mode mode,
29293                            int *punsignedp ATTRIBUTE_UNUSED,
29294                            const_tree fntype ATTRIBUTE_UNUSED,
29295                            int for_return ATTRIBUTE_UNUSED)
29296 {
29297   if (GET_MODE_CLASS (mode) == MODE_INT
29298       && GET_MODE_SIZE (mode) < 4)
29299     return SImode;
29300
29301   return mode;
29302 }
29303
29304
29305 static bool
29306 arm_default_short_enums (void)
29307 {
29308   return ARM_DEFAULT_SHORT_ENUMS;
29309 }
29310
29311
29312 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
29313
29314 static bool
29315 arm_align_anon_bitfield (void)
29316 {
29317   return TARGET_AAPCS_BASED;
29318 }
29319
29320
29321 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
29322
29323 static tree
29324 arm_cxx_guard_type (void)
29325 {
29326   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29327 }
29328
29329
29330 /* The EABI says test the least significant bit of a guard variable.  */
29331
29332 static bool
29333 arm_cxx_guard_mask_bit (void)
29334 {
29335   return TARGET_AAPCS_BASED;
29336 }
29337
29338
29339 /* The EABI specifies that all array cookies are 8 bytes long.  */
29340
29341 static tree
29342 arm_get_cookie_size (tree type)
29343 {
29344   tree size;
29345
29346   if (!TARGET_AAPCS_BASED)
29347     return default_cxx_get_cookie_size (type);
29348
29349   size = build_int_cst (sizetype, 8);
29350   return size;
29351 }
29352
29353
29354 /* The EABI says that array cookies should also contain the element size.  */
29355
29356 static bool
29357 arm_cookie_has_size (void)
29358 {
29359   return TARGET_AAPCS_BASED;
29360 }
29361
29362
29363 /* The EABI says constructors and destructors should return a pointer to
29364    the object constructed/destroyed.  */
29365
29366 static bool
29367 arm_cxx_cdtor_returns_this (void)
29368 {
29369   return TARGET_AAPCS_BASED;
29370 }
29371
29372 /* The EABI says that an inline function may never be the key
29373    method.  */
29374
29375 static bool
29376 arm_cxx_key_method_may_be_inline (void)
29377 {
29378   return !TARGET_AAPCS_BASED;
29379 }
29380
29381 static void
29382 arm_cxx_determine_class_data_visibility (tree decl)
29383 {
29384   if (!TARGET_AAPCS_BASED
29385       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29386     return;
29387
29388   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29389      is exported.  However, on systems without dynamic vague linkage,
29390      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29391   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29392     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29393   else
29394     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29395   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29396 }
29397
29398 static bool
29399 arm_cxx_class_data_always_comdat (void)
29400 {
29401   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29402      vague linkage if the class has no key function.  */
29403   return !TARGET_AAPCS_BASED;
29404 }
29405
29406
29407 /* The EABI says __aeabi_atexit should be used to register static
29408    destructors.  */
29409
29410 static bool
29411 arm_cxx_use_aeabi_atexit (void)
29412 {
29413   return TARGET_AAPCS_BASED;
29414 }
29415
29416
29417 void
29418 arm_set_return_address (rtx source, rtx scratch)
29419 {
29420   arm_stack_offsets *offsets;
29421   HOST_WIDE_INT delta;
29422   rtx addr, mem;
29423   unsigned long saved_regs;
29424
29425   offsets = arm_get_frame_offsets ();
29426   saved_regs = offsets->saved_regs_mask;
29427
29428   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29429     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29430   else
29431     {
29432       if (frame_pointer_needed)
29433         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29434       else
29435         {
29436           /* LR will be the first saved register.  */
29437           delta = offsets->outgoing_args - (offsets->frame + 4);
29438
29439
29440           if (delta >= 4096)
29441             {
29442               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29443                                      GEN_INT (delta & ~4095)));
29444               addr = scratch;
29445               delta &= 4095;
29446             }
29447           else
29448             addr = stack_pointer_rtx;
29449
29450           addr = plus_constant (Pmode, addr, delta);
29451         }
29452
29453       /* The store needs to be marked to prevent DSE from deleting
29454          it as dead if it is based on fp.  */
29455       mem = gen_frame_mem (Pmode, addr);
29456       MEM_VOLATILE_P (mem) = true;
29457       emit_move_insn (mem, source);
29458     }
29459 }
29460
29461
29462 void
29463 thumb_set_return_address (rtx source, rtx scratch)
29464 {
29465   arm_stack_offsets *offsets;
29466   HOST_WIDE_INT delta;
29467   HOST_WIDE_INT limit;
29468   int reg;
29469   rtx addr, mem;
29470   unsigned long mask;
29471
29472   emit_use (source);
29473
29474   offsets = arm_get_frame_offsets ();
29475   mask = offsets->saved_regs_mask;
29476   if (mask & (1 << LR_REGNUM))
29477     {
29478       limit = 1024;
29479       /* Find the saved regs.  */
29480       if (frame_pointer_needed)
29481         {
29482           delta = offsets->soft_frame - offsets->saved_args;
29483           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29484           if (TARGET_THUMB1)
29485             limit = 128;
29486         }
29487       else
29488         {
29489           delta = offsets->outgoing_args - offsets->saved_args;
29490           reg = SP_REGNUM;
29491         }
29492       /* Allow for the stack frame.  */
29493       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29494         delta -= 16;
29495       /* The link register is always the first saved register.  */
29496       delta -= 4;
29497
29498       /* Construct the address.  */
29499       addr = gen_rtx_REG (SImode, reg);
29500       if (delta > limit)
29501         {
29502           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29503           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29504           addr = scratch;
29505         }
29506       else
29507         addr = plus_constant (Pmode, addr, delta);
29508
29509       /* The store needs to be marked to prevent DSE from deleting
29510          it as dead if it is based on fp.  */
29511       mem = gen_frame_mem (Pmode, addr);
29512       MEM_VOLATILE_P (mem) = true;
29513       emit_move_insn (mem, source);
29514     }
29515   else
29516     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29517 }
29518
29519 /* Implements target hook vector_mode_supported_p.  */
29520 bool
29521 arm_vector_mode_supported_p (machine_mode mode)
29522 {
29523   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29524   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29525       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29526       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29527       || mode == V8BFmode))
29528     return true;
29529
29530   if ((TARGET_NEON || TARGET_IWMMXT)
29531       && ((mode == V2SImode)
29532           || (mode == V4HImode)
29533           || (mode == V8QImode)))
29534     return true;
29535
29536   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29537       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29538       || mode == V2HAmode))
29539     return true;
29540
29541   if (TARGET_HAVE_MVE
29542       && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29543           || mode == V16QImode
29544           || mode == V16BImode || mode == V8BImode || mode == V4BImode))
29545       return true;
29546
29547   if (TARGET_HAVE_MVE_FLOAT
29548       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29549       return true;
29550
29551   return false;
29552 }
29553
29554 /* Implements target hook array_mode_supported_p.  */
29555
29556 static bool
29557 arm_array_mode_supported_p (machine_mode mode,
29558                             unsigned HOST_WIDE_INT nelems)
29559 {
29560   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29561      for now, as the lane-swapping logic needs to be extended in the expanders.
29562      See PR target/82518.  */
29563   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29564       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29565       && (nelems >= 2 && nelems <= 4))
29566     return true;
29567
29568   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29569       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29570     return true;
29571
29572   return false;
29573 }
29574
29575 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29576    registers when autovectorizing for Neon, at least until multiple vector
29577    widths are supported properly by the middle-end.  */
29578
29579 static machine_mode
29580 arm_preferred_simd_mode (scalar_mode mode)
29581 {
29582   if (TARGET_NEON)
29583     switch (mode)
29584       {
29585       case E_HFmode:
29586         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29587       case E_SFmode:
29588         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29589       case E_SImode:
29590         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29591       case E_HImode:
29592         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29593       case E_QImode:
29594         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29595       case E_DImode:
29596         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29597           return V2DImode;
29598         break;
29599
29600       default:;
29601       }
29602
29603   if (TARGET_REALLY_IWMMXT)
29604     switch (mode)
29605       {
29606       case E_SImode:
29607         return V2SImode;
29608       case E_HImode:
29609         return V4HImode;
29610       case E_QImode:
29611         return V8QImode;
29612
29613       default:;
29614       }
29615
29616   if (TARGET_HAVE_MVE)
29617     switch (mode)
29618       {
29619       case E_QImode:
29620         return V16QImode;
29621       case E_HImode:
29622         return V8HImode;
29623       case E_SImode:
29624         return V4SImode;
29625
29626       default:;
29627       }
29628
29629   if (TARGET_HAVE_MVE_FLOAT)
29630     switch (mode)
29631       {
29632       case E_HFmode:
29633         return V8HFmode;
29634       case E_SFmode:
29635         return V4SFmode;
29636
29637       default:;
29638       }
29639
29640   return word_mode;
29641 }
29642
29643 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29644
29645    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29646    using r0-r4 for function arguments, r7 for the stack frame and don't have
29647    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29648    potentially problematic instructions accept high registers so this is not
29649    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29650    that require many low registers.  */
29651 static bool
29652 arm_class_likely_spilled_p (reg_class_t rclass)
29653 {
29654   if ((TARGET_THUMB1 && rclass == LO_REGS)
29655       || rclass  == CC_REG)
29656     return true;
29657
29658   return default_class_likely_spilled_p (rclass);
29659 }
29660
29661 /* Implements target hook small_register_classes_for_mode_p.  */
29662 bool
29663 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29664 {
29665   return TARGET_THUMB1;
29666 }
29667
29668 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29669    ARM insns and therefore guarantee that the shift count is modulo 256.
29670    DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29671    guarantee no particular behavior for out-of-range counts.  */
29672
29673 static unsigned HOST_WIDE_INT
29674 arm_shift_truncation_mask (machine_mode mode)
29675 {
29676   return mode == SImode ? 255 : 0;
29677 }
29678
29679
29680 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29681
29682 unsigned int
29683 arm_debugger_regno (unsigned int regno)
29684 {
29685   if (regno < 16)
29686     return regno;
29687
29688   if (IS_VFP_REGNUM (regno))
29689     {
29690       /* See comment in arm_dwarf_register_span.  */
29691       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29692         return 64 + regno - FIRST_VFP_REGNUM;
29693       else
29694         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29695     }
29696
29697   if (IS_IWMMXT_GR_REGNUM (regno))
29698     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29699
29700   if (IS_IWMMXT_REGNUM (regno))
29701     return 112 + regno - FIRST_IWMMXT_REGNUM;
29702
29703   return DWARF_FRAME_REGISTERS;
29704 }
29705
29706 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29707    GCC models tham as 64 32-bit registers, so we need to describe this to
29708    the DWARF generation code.  Other registers can use the default.  */
29709 static rtx
29710 arm_dwarf_register_span (rtx rtl)
29711 {
29712   machine_mode mode;
29713   unsigned regno;
29714   rtx parts[16];
29715   int nregs;
29716   int i;
29717
29718   regno = REGNO (rtl);
29719   if (!IS_VFP_REGNUM (regno))
29720     return NULL_RTX;
29721
29722   /* XXX FIXME: The EABI defines two VFP register ranges:
29723         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29724         256-287: D0-D31
29725      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29726      corresponding D register.  Until GDB supports this, we shall use the
29727      legacy encodings.  We also use these encodings for D0-D15 for
29728      compatibility with older debuggers.  */
29729   mode = GET_MODE (rtl);
29730   if (GET_MODE_SIZE (mode) < 8)
29731     return NULL_RTX;
29732
29733   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29734     {
29735       nregs = GET_MODE_SIZE (mode) / 4;
29736       for (i = 0; i < nregs; i += 2)
29737         if (TARGET_BIG_END)
29738           {
29739             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29740             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29741           }
29742         else
29743           {
29744             parts[i] = gen_rtx_REG (SImode, regno + i);
29745             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29746           }
29747     }
29748   else
29749     {
29750       nregs = GET_MODE_SIZE (mode) / 8;
29751       for (i = 0; i < nregs; i++)
29752         parts[i] = gen_rtx_REG (DImode, regno + i);
29753     }
29754
29755   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29756 }
29757
29758 #if ARM_UNWIND_INFO
29759 /* Emit unwind directives for a store-multiple instruction or stack pointer
29760    push during alignment.
29761    These should only ever be generated by the function prologue code, so
29762    expect them to have a particular form.
29763    The store-multiple instruction sometimes pushes pc as the last register,
29764    although it should not be tracked into unwind information, or for -Os
29765    sometimes pushes some dummy registers before first register that needs
29766    to be tracked in unwind information; such dummy registers are there just
29767    to avoid separate stack adjustment, and will not be restored in the
29768    epilogue.  */
29769
29770 static void
29771 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29772 {
29773   int i;
29774   HOST_WIDE_INT offset;
29775   HOST_WIDE_INT nregs;
29776   int reg_size;
29777   unsigned reg;
29778   unsigned lastreg;
29779   unsigned padfirst = 0, padlast = 0;
29780   rtx e;
29781
29782   e = XVECEXP (p, 0, 0);
29783   gcc_assert (GET_CODE (e) == SET);
29784
29785   /* First insn will adjust the stack pointer.  */
29786   gcc_assert (GET_CODE (e) == SET
29787               && REG_P (SET_DEST (e))
29788               && REGNO (SET_DEST (e)) == SP_REGNUM
29789               && GET_CODE (SET_SRC (e)) == PLUS);
29790
29791   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29792   nregs = XVECLEN (p, 0) - 1;
29793   gcc_assert (nregs);
29794
29795   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29796   if (reg < 16)
29797     {
29798       /* For -Os dummy registers can be pushed at the beginning to
29799          avoid separate stack pointer adjustment.  */
29800       e = XVECEXP (p, 0, 1);
29801       e = XEXP (SET_DEST (e), 0);
29802       if (GET_CODE (e) == PLUS)
29803         padfirst = INTVAL (XEXP (e, 1));
29804       gcc_assert (padfirst == 0 || optimize_size);
29805       /* The function prologue may also push pc, but not annotate it as it is
29806          never restored.  We turn this into a stack pointer adjustment.  */
29807       e = XVECEXP (p, 0, nregs);
29808       e = XEXP (SET_DEST (e), 0);
29809       if (GET_CODE (e) == PLUS)
29810         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29811       else
29812         padlast = offset - 4;
29813       gcc_assert (padlast == 0 || padlast == 4);
29814       if (padlast == 4)
29815         fprintf (out_file, "\t.pad #4\n");
29816       reg_size = 4;
29817       fprintf (out_file, "\t.save {");
29818     }
29819   else if (IS_VFP_REGNUM (reg))
29820     {
29821       reg_size = 8;
29822       fprintf (out_file, "\t.vsave {");
29823     }
29824   else
29825     /* Unknown register type.  */
29826     gcc_unreachable ();
29827
29828   /* If the stack increment doesn't match the size of the saved registers,
29829      something has gone horribly wrong.  */
29830   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29831
29832   offset = padfirst;
29833   lastreg = 0;
29834   /* The remaining insns will describe the stores.  */
29835   for (i = 1; i <= nregs; i++)
29836     {
29837       /* Expect (set (mem <addr>) (reg)).
29838          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29839       e = XVECEXP (p, 0, i);
29840       gcc_assert (GET_CODE (e) == SET
29841                   && MEM_P (SET_DEST (e))
29842                   && REG_P (SET_SRC (e)));
29843
29844       reg = REGNO (SET_SRC (e));
29845       gcc_assert (reg >= lastreg);
29846
29847       if (i != 1)
29848         fprintf (out_file, ", ");
29849       /* We can't use %r for vfp because we need to use the
29850          double precision register names.  */
29851       if (IS_VFP_REGNUM (reg))
29852         asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29853       else
29854         asm_fprintf (out_file, "%r", reg);
29855
29856       if (flag_checking)
29857         {
29858           /* Check that the addresses are consecutive.  */
29859           e = XEXP (SET_DEST (e), 0);
29860           if (GET_CODE (e) == PLUS)
29861             gcc_assert (REG_P (XEXP (e, 0))
29862                         && REGNO (XEXP (e, 0)) == SP_REGNUM
29863                         && CONST_INT_P (XEXP (e, 1))
29864                         && offset == INTVAL (XEXP (e, 1)));
29865           else
29866             gcc_assert (i == 1
29867                         && REG_P (e)
29868                         && REGNO (e) == SP_REGNUM);
29869           offset += reg_size;
29870         }
29871     }
29872   fprintf (out_file, "}\n");
29873   if (padfirst)
29874     fprintf (out_file, "\t.pad #%d\n", padfirst);
29875 }
29876
29877 /*  Emit unwind directives for a SET.  */
29878
29879 static void
29880 arm_unwind_emit_set (FILE * out_file, rtx p)
29881 {
29882   rtx e0;
29883   rtx e1;
29884   unsigned reg;
29885
29886   e0 = XEXP (p, 0);
29887   e1 = XEXP (p, 1);
29888   switch (GET_CODE (e0))
29889     {
29890     case MEM:
29891       /* Pushing a single register.  */
29892       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29893           || !REG_P (XEXP (XEXP (e0, 0), 0))
29894           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29895         abort ();
29896
29897       asm_fprintf (out_file, "\t.save ");
29898       if (IS_VFP_REGNUM (REGNO (e1)))
29899         asm_fprintf(out_file, "{d%d}\n",
29900                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29901       else
29902         asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29903       break;
29904
29905     case REG:
29906       if (REGNO (e0) == SP_REGNUM)
29907         {
29908           /* A stack increment.  */
29909           if (GET_CODE (e1) != PLUS
29910               || !REG_P (XEXP (e1, 0))
29911               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29912               || !CONST_INT_P (XEXP (e1, 1)))
29913             abort ();
29914
29915           asm_fprintf (out_file, "\t.pad #%wd\n",
29916                        -INTVAL (XEXP (e1, 1)));
29917         }
29918       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29919         {
29920           HOST_WIDE_INT offset;
29921
29922           if (GET_CODE (e1) == PLUS)
29923             {
29924               if (!REG_P (XEXP (e1, 0))
29925                   || !CONST_INT_P (XEXP (e1, 1)))
29926                 abort ();
29927               reg = REGNO (XEXP (e1, 0));
29928               offset = INTVAL (XEXP (e1, 1));
29929               asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29930                            HARD_FRAME_POINTER_REGNUM, reg,
29931                            offset);
29932             }
29933           else if (REG_P (e1))
29934             {
29935               reg = REGNO (e1);
29936               asm_fprintf (out_file, "\t.setfp %r, %r\n",
29937                            HARD_FRAME_POINTER_REGNUM, reg);
29938             }
29939           else
29940             abort ();
29941         }
29942       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29943         {
29944           /* Move from sp to reg.  */
29945           asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
29946         }
29947      else if (GET_CODE (e1) == PLUS
29948               && REG_P (XEXP (e1, 0))
29949               && REGNO (XEXP (e1, 0)) == SP_REGNUM
29950               && CONST_INT_P (XEXP (e1, 1)))
29951         {
29952           /* Set reg to offset from sp.  */
29953           asm_fprintf (out_file, "\t.movsp %r, #%d\n",
29954                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29955         }
29956       else
29957         abort ();
29958       break;
29959
29960     default:
29961       abort ();
29962     }
29963 }
29964
29965
29966 /* Emit unwind directives for the given insn.  */
29967
29968 static void
29969 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
29970 {
29971   rtx note, pat;
29972   bool handled_one = false;
29973
29974   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29975     return;
29976
29977   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29978       && (TREE_NOTHROW (current_function_decl)
29979           || crtl->all_throwers_are_sibcalls))
29980     return;
29981
29982   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29983     return;
29984
29985   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29986     {
29987       switch (REG_NOTE_KIND (note))
29988         {
29989         case REG_FRAME_RELATED_EXPR:
29990           pat = XEXP (note, 0);
29991           goto found;
29992
29993         case REG_CFA_REGISTER:
29994           pat = XEXP (note, 0);
29995           if (pat == NULL)
29996             {
29997               pat = PATTERN (insn);
29998               if (GET_CODE (pat) == PARALLEL)
29999                 pat = XVECEXP (pat, 0, 0);
30000             }
30001
30002           /* Only emitted for IS_STACKALIGN re-alignment.  */
30003           {
30004             rtx dest, src;
30005             unsigned reg;
30006
30007             src = SET_SRC (pat);
30008             dest = SET_DEST (pat);
30009
30010             gcc_assert (src == stack_pointer_rtx);
30011             reg = REGNO (dest);
30012             asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30013                          reg + 0x90, reg);
30014           }
30015           handled_one = true;
30016           break;
30017
30018         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
30019            to get correct dwarf information for shrink-wrap.  We should not
30020            emit unwind information for it because these are used either for
30021            pretend arguments or notes to adjust sp and restore registers from
30022            stack.  */
30023         case REG_CFA_DEF_CFA:
30024         case REG_CFA_ADJUST_CFA:
30025         case REG_CFA_RESTORE:
30026           return;
30027
30028         case REG_CFA_EXPRESSION:
30029         case REG_CFA_OFFSET:
30030           /* ??? Only handling here what we actually emit.  */
30031           gcc_unreachable ();
30032
30033         default:
30034           break;
30035         }
30036     }
30037   if (handled_one)
30038     return;
30039   pat = PATTERN (insn);
30040  found:
30041
30042   switch (GET_CODE (pat))
30043     {
30044     case SET:
30045       arm_unwind_emit_set (out_file, pat);
30046       break;
30047
30048     case SEQUENCE:
30049       /* Store multiple.  */
30050       arm_unwind_emit_sequence (out_file, pat);
30051       break;
30052
30053     default:
30054       abort();
30055     }
30056 }
30057
30058
30059 /* Output a reference from a function exception table to the type_info
30060    object X.  The EABI specifies that the symbol should be relocated by
30061    an R_ARM_TARGET2 relocation.  */
30062
30063 static bool
30064 arm_output_ttype (rtx x)
30065 {
30066   fputs ("\t.word\t", asm_out_file);
30067   output_addr_const (asm_out_file, x);
30068   /* Use special relocations for symbol references.  */
30069   if (!CONST_INT_P (x))
30070     fputs ("(TARGET2)", asm_out_file);
30071   fputc ('\n', asm_out_file);
30072
30073   return TRUE;
30074 }
30075
30076 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
30077
30078 static void
30079 arm_asm_emit_except_personality (rtx personality)
30080 {
30081   fputs ("\t.personality\t", asm_out_file);
30082   output_addr_const (asm_out_file, personality);
30083   fputc ('\n', asm_out_file);
30084 }
30085 #endif /* ARM_UNWIND_INFO */
30086
30087 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
30088
30089 static void
30090 arm_asm_init_sections (void)
30091 {
30092 #if ARM_UNWIND_INFO
30093   exception_section = get_unnamed_section (0, output_section_asm_op,
30094                                            "\t.handlerdata");
30095 #endif /* ARM_UNWIND_INFO */
30096
30097 #ifdef OBJECT_FORMAT_ELF
30098   if (target_pure_code)
30099     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30100 #endif
30101 }
30102
30103 /* Output unwind directives for the start/end of a function.  */
30104
30105 void
30106 arm_output_fn_unwind (FILE * f, bool prologue)
30107 {
30108   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30109     return;
30110
30111   if (prologue)
30112     fputs ("\t.fnstart\n", f);
30113   else
30114     {
30115       /* If this function will never be unwound, then mark it as such.
30116          The came condition is used in arm_unwind_emit to suppress
30117          the frame annotations.  */
30118       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30119           && (TREE_NOTHROW (current_function_decl)
30120               || crtl->all_throwers_are_sibcalls))
30121         fputs("\t.cantunwind\n", f);
30122
30123       fputs ("\t.fnend\n", f);
30124     }
30125 }
30126
30127 static bool
30128 arm_emit_tls_decoration (FILE *fp, rtx x)
30129 {
30130   enum tls_reloc reloc;
30131   rtx val;
30132
30133   val = XVECEXP (x, 0, 0);
30134   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30135
30136   output_addr_const (fp, val);
30137
30138   switch (reloc)
30139     {
30140     case TLS_GD32:
30141       fputs ("(tlsgd)", fp);
30142       break;
30143     case TLS_GD32_FDPIC:
30144       fputs ("(tlsgd_fdpic)", fp);
30145       break;
30146     case TLS_LDM32:
30147       fputs ("(tlsldm)", fp);
30148       break;
30149     case TLS_LDM32_FDPIC:
30150       fputs ("(tlsldm_fdpic)", fp);
30151       break;
30152     case TLS_LDO32:
30153       fputs ("(tlsldo)", fp);
30154       break;
30155     case TLS_IE32:
30156       fputs ("(gottpoff)", fp);
30157       break;
30158     case TLS_IE32_FDPIC:
30159       fputs ("(gottpoff_fdpic)", fp);
30160       break;
30161     case TLS_LE32:
30162       fputs ("(tpoff)", fp);
30163       break;
30164     case TLS_DESCSEQ:
30165       fputs ("(tlsdesc)", fp);
30166       break;
30167     default:
30168       gcc_unreachable ();
30169     }
30170
30171   switch (reloc)
30172     {
30173     case TLS_GD32:
30174     case TLS_LDM32:
30175     case TLS_IE32:
30176     case TLS_DESCSEQ:
30177       fputs (" + (. - ", fp);
30178       output_addr_const (fp, XVECEXP (x, 0, 2));
30179       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30180       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30181       output_addr_const (fp, XVECEXP (x, 0, 3));
30182       fputc (')', fp);
30183       break;
30184     default:
30185       break;
30186     }
30187
30188   return TRUE;
30189 }
30190
30191 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
30192
30193 static void
30194 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30195 {
30196   gcc_assert (size == 4);
30197   fputs ("\t.word\t", file);
30198   output_addr_const (file, x);
30199   fputs ("(tlsldo)", file);
30200 }
30201
30202 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
30203
30204 static bool
30205 arm_output_addr_const_extra (FILE *fp, rtx x)
30206 {
30207   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30208     return arm_emit_tls_decoration (fp, x);
30209   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30210     {
30211       char label[256];
30212       int labelno = INTVAL (XVECEXP (x, 0, 0));
30213
30214       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30215       assemble_name_raw (fp, label);
30216
30217       return TRUE;
30218     }
30219   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30220     {
30221       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30222       if (GOT_PCREL)
30223         fputs ("+.", fp);
30224       fputs ("-(", fp);
30225       output_addr_const (fp, XVECEXP (x, 0, 0));
30226       fputc (')', fp);
30227       return TRUE;
30228     }
30229   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30230     {
30231       output_addr_const (fp, XVECEXP (x, 0, 0));
30232       if (GOT_PCREL)
30233         fputs ("+.", fp);
30234       fputs ("-(", fp);
30235       output_addr_const (fp, XVECEXP (x, 0, 1));
30236       fputc (')', fp);
30237       return TRUE;
30238     }
30239   else if (GET_CODE (x) == CONST_VECTOR)
30240     return arm_emit_vector_const (fp, x);
30241
30242   return FALSE;
30243 }
30244
30245 /* Output assembly for a shift instruction.
30246    SET_FLAGS determines how the instruction modifies the condition codes.
30247    0 - Do not set condition codes.
30248    1 - Set condition codes.
30249    2 - Use smallest instruction.  */
30250 const char *
30251 arm_output_shift(rtx * operands, int set_flags)
30252 {
30253   char pattern[100];
30254   static const char flag_chars[3] = {'?', '.', '!'};
30255   const char *shift;
30256   HOST_WIDE_INT val;
30257   char c;
30258
30259   c = flag_chars[set_flags];
30260   shift = shift_op(operands[3], &val);
30261   if (shift)
30262     {
30263       if (val != -1)
30264         operands[2] = GEN_INT(val);
30265       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30266     }
30267   else
30268     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30269
30270   output_asm_insn (pattern, operands);
30271   return "";
30272 }
30273
30274 /* Output assembly for a WMMX immediate shift instruction.  */
30275 const char *
30276 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30277 {
30278   int shift = INTVAL (operands[2]);
30279   char templ[50];
30280   machine_mode opmode = GET_MODE (operands[0]);
30281
30282   gcc_assert (shift >= 0);
30283
30284   /* If the shift value in the register versions is > 63 (for D qualifier),
30285      31 (for W qualifier) or 15 (for H qualifier).  */
30286   if (((opmode == V4HImode) && (shift > 15))
30287         || ((opmode == V2SImode) && (shift > 31))
30288         || ((opmode == DImode) && (shift > 63)))
30289   {
30290     if (wror_or_wsra)
30291       {
30292         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30293         output_asm_insn (templ, operands);
30294         if (opmode == DImode)
30295           {
30296             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30297             output_asm_insn (templ, operands);
30298           }
30299       }
30300     else
30301       {
30302         /* The destination register will contain all zeros.  */
30303         sprintf (templ, "wzero\t%%0");
30304         output_asm_insn (templ, operands);
30305       }
30306     return "";
30307   }
30308
30309   if ((opmode == DImode) && (shift > 32))
30310     {
30311       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30312       output_asm_insn (templ, operands);
30313       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30314       output_asm_insn (templ, operands);
30315     }
30316   else
30317     {
30318       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30319       output_asm_insn (templ, operands);
30320     }
30321   return "";
30322 }
30323
30324 /* Output assembly for a WMMX tinsr instruction.  */
30325 const char *
30326 arm_output_iwmmxt_tinsr (rtx *operands)
30327 {
30328   int mask = INTVAL (operands[3]);
30329   int i;
30330   char templ[50];
30331   int units = mode_nunits[GET_MODE (operands[0])];
30332   gcc_assert ((mask & (mask - 1)) == 0);
30333   for (i = 0; i < units; ++i)
30334     {
30335       if ((mask & 0x01) == 1)
30336         {
30337           break;
30338         }
30339       mask >>= 1;
30340     }
30341   gcc_assert (i < units);
30342   {
30343     switch (GET_MODE (operands[0]))
30344       {
30345       case E_V8QImode:
30346         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30347         break;
30348       case E_V4HImode:
30349         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30350         break;
30351       case E_V2SImode:
30352         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30353         break;
30354       default:
30355         gcc_unreachable ();
30356         break;
30357       }
30358     output_asm_insn (templ, operands);
30359   }
30360   return "";
30361 }
30362
30363 /* Output a Thumb-1 casesi dispatch sequence.  */
30364 const char *
30365 thumb1_output_casesi (rtx *operands)
30366 {
30367   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30368
30369   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30370
30371   switch (GET_MODE(diff_vec))
30372     {
30373     case E_QImode:
30374       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30375               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30376     case E_HImode:
30377       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30378               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30379     case E_SImode:
30380       return "bl\t%___gnu_thumb1_case_si";
30381     default:
30382       gcc_unreachable ();
30383     }
30384 }
30385
30386 /* Output a Thumb-2 casesi instruction.  */
30387 const char *
30388 thumb2_output_casesi (rtx *operands)
30389 {
30390   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30391
30392   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30393
30394   output_asm_insn ("cmp\t%0, %1", operands);
30395   output_asm_insn ("bhi\t%l3", operands);
30396   switch (GET_MODE(diff_vec))
30397     {
30398     case E_QImode:
30399       return "tbb\t[%|pc, %0]";
30400     case E_HImode:
30401       return "tbh\t[%|pc, %0, lsl #1]";
30402     case E_SImode:
30403       if (flag_pic)
30404         {
30405           output_asm_insn ("adr\t%4, %l2", operands);
30406           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30407           output_asm_insn ("add\t%4, %4, %5", operands);
30408           return "bx\t%4";
30409         }
30410       else
30411         {
30412           output_asm_insn ("adr\t%4, %l2", operands);
30413           return "ldr\t%|pc, [%4, %0, lsl #2]";
30414         }
30415     default:
30416       gcc_unreachable ();
30417     }
30418 }
30419
30420 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30421    per-core tuning structs.  */
30422 static int
30423 arm_issue_rate (void)
30424 {
30425   return current_tune->issue_rate;
30426 }
30427
30428 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30429 static int
30430 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30431 {
30432   if (DEBUG_INSN_P (insn))
30433     return more;
30434
30435   rtx_code code = GET_CODE (PATTERN (insn));
30436   if (code == USE || code == CLOBBER)
30437     return more;
30438
30439   if (get_attr_type (insn) == TYPE_NO_INSN)
30440     return more;
30441
30442   return more - 1;
30443 }
30444
30445 /* Return how many instructions should scheduler lookahead to choose the
30446    best one.  */
30447 static int
30448 arm_first_cycle_multipass_dfa_lookahead (void)
30449 {
30450   int issue_rate = arm_issue_rate ();
30451
30452   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30453 }
30454
30455 /* Enable modeling of L2 auto-prefetcher.  */
30456 static int
30457 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30458 {
30459   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30460 }
30461
30462 const char *
30463 arm_mangle_type (const_tree type)
30464 {
30465   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30466      has to be managled as if it is in the "std" namespace.  */
30467   if (TARGET_AAPCS_BASED
30468       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30469     return "St9__va_list";
30470
30471   /* Half-precision floating point types.  */
30472   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30473     {
30474       if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30475         return NULL;
30476       if (TYPE_MODE (type) == BFmode)
30477         return "u6__bf16";
30478       else
30479         return "Dh";
30480     }
30481
30482   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30483      builtin type.  */
30484   if (TYPE_NAME (type) != NULL)
30485     return arm_mangle_builtin_type (type);
30486
30487   /* Use the default mangling.  */
30488   return NULL;
30489 }
30490
30491 /* Order of allocation of core registers for Thumb: this allocation is
30492    written over the corresponding initial entries of the array
30493    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30494    first.  Saving and restoring a low register is usually cheaper than
30495    using a call-clobbered high register.  */
30496
30497 static const int thumb_core_reg_alloc_order[] =
30498 {
30499    3,  2,  1,  0,  4,  5,  6,  7,
30500   12, 14,  8,  9, 10, 11
30501 };
30502
30503 /* Adjust register allocation order when compiling for Thumb.  */
30504
30505 void
30506 arm_order_regs_for_local_alloc (void)
30507 {
30508   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30509   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30510   if (TARGET_THUMB)
30511     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30512             sizeof (thumb_core_reg_alloc_order));
30513 }
30514
30515 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30516
30517 bool
30518 arm_frame_pointer_required (void)
30519 {
30520   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30521     return true;
30522
30523   /* If the function receives nonlocal gotos, it needs to save the frame
30524      pointer in the nonlocal_goto_save_area object.  */
30525   if (cfun->has_nonlocal_label)
30526     return true;
30527
30528   /* The frame pointer is required for non-leaf APCS frames.  */
30529   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30530     return true;
30531
30532   /* If we are probing the stack in the prologue, we will have a faulting
30533      instruction prior to the stack adjustment and this requires a frame
30534      pointer if we want to catch the exception using the EABI unwinder.  */
30535   if (!IS_INTERRUPT (arm_current_func_type ())
30536       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30537           || flag_stack_clash_protection)
30538       && arm_except_unwind_info (&global_options) == UI_TARGET
30539       && cfun->can_throw_non_call_exceptions)
30540     {
30541       HOST_WIDE_INT size = get_frame_size ();
30542
30543       /* That's irrelevant if there is no stack adjustment.  */
30544       if (size <= 0)
30545         return false;
30546
30547       /* That's relevant only if there is a stack probe.  */
30548       if (crtl->is_leaf && !cfun->calls_alloca)
30549         {
30550           /* We don't have the final size of the frame so adjust.  */
30551           size += 32 * UNITS_PER_WORD;
30552           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30553             return true;
30554         }
30555       else
30556         return true;
30557     }
30558
30559   return false;
30560 }
30561
30562 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30563    All modes except THUMB1 have conditional execution.
30564    If we have conditional arithmetic, return false before reload to
30565    enable some ifcvt transformations. */
30566 static bool
30567 arm_have_conditional_execution (void)
30568 {
30569   bool has_cond_exec, enable_ifcvt_trans;
30570
30571   /* Only THUMB1 cannot support conditional execution. */
30572   has_cond_exec = !TARGET_THUMB1;
30573
30574   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30575      before reload. */
30576   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30577
30578   return has_cond_exec && !enable_ifcvt_trans;
30579 }
30580
30581 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30582 static HOST_WIDE_INT
30583 arm_vector_alignment (const_tree type)
30584 {
30585   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30586
30587   if (TARGET_AAPCS_BASED)
30588     align = MIN (align, 64);
30589
30590   return align;
30591 }
30592
30593 static unsigned int
30594 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30595 {
30596   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30597     {
30598       modes->safe_push (V16QImode);
30599       modes->safe_push (V8QImode);
30600     }
30601   return 0;
30602 }
30603
30604 static bool
30605 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30606 {
30607   /* Vectors which aren't in packed structures will not be less aligned than
30608      the natural alignment of their element type, so this is safe.  */
30609   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30610     return !is_packed;
30611
30612   return default_builtin_vector_alignment_reachable (type, is_packed);
30613 }
30614
30615 static bool
30616 arm_builtin_support_vector_misalignment (machine_mode mode,
30617                                          const_tree type, int misalignment,
30618                                          bool is_packed)
30619 {
30620   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30621     {
30622       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30623
30624       if (is_packed)
30625         return align == 1;
30626
30627       /* If the misalignment is unknown, we should be able to handle the access
30628          so long as it is not to a member of a packed data structure.  */
30629       if (misalignment == -1)
30630         return true;
30631
30632       /* Return true if the misalignment is a multiple of the natural alignment
30633          of the vector's element type.  This is probably always going to be
30634          true in practice, since we've already established that this isn't a
30635          packed access.  */
30636       return ((misalignment % align) == 0);
30637     }
30638
30639   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30640                                                       is_packed);
30641 }
30642
30643 static void
30644 arm_conditional_register_usage (void)
30645 {
30646   int regno;
30647
30648   if (TARGET_THUMB1 && optimize_size)
30649     {
30650       /* When optimizing for size on Thumb-1, it's better not
30651         to use the HI regs, because of the overhead of
30652         stacking them.  */
30653       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30654         fixed_regs[regno] = call_used_regs[regno] = 1;
30655     }
30656
30657   /* The link register can be clobbered by any branch insn,
30658      but we have no way to track that at present, so mark
30659      it as unavailable.  */
30660   if (TARGET_THUMB1)
30661     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30662
30663   if (TARGET_32BIT && TARGET_VFP_BASE)
30664     {
30665       /* VFPv3 registers are disabled when earlier VFP
30666          versions are selected due to the definition of
30667          LAST_VFP_REGNUM.  */
30668       for (regno = FIRST_VFP_REGNUM;
30669            regno <= LAST_VFP_REGNUM; ++ regno)
30670         {
30671           fixed_regs[regno] = 0;
30672           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30673             || regno >= FIRST_VFP_REGNUM + 32;
30674         }
30675       if (TARGET_HAVE_MVE)
30676         fixed_regs[VPR_REGNUM] = 0;
30677     }
30678
30679   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30680     {
30681       regno = FIRST_IWMMXT_GR_REGNUM;
30682       /* The 2002/10/09 revision of the XScale ABI has wCG0
30683          and wCG1 as call-preserved registers.  The 2002/11/21
30684          revision changed this so that all wCG registers are
30685          scratch registers.  */
30686       for (regno = FIRST_IWMMXT_GR_REGNUM;
30687            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30688         fixed_regs[regno] = 0;
30689       /* The XScale ABI has wR0 - wR9 as scratch registers,
30690          the rest as call-preserved registers.  */
30691       for (regno = FIRST_IWMMXT_REGNUM;
30692            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30693         {
30694           fixed_regs[regno] = 0;
30695           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30696         }
30697     }
30698
30699   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30700     {
30701       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30702       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30703     }
30704   else if (TARGET_APCS_STACK)
30705     {
30706       fixed_regs[10]     = 1;
30707       call_used_regs[10] = 1;
30708     }
30709   /* -mcaller-super-interworking reserves r11 for calls to
30710      _interwork_r11_call_via_rN().  Making the register global
30711      is an easy way of ensuring that it remains valid for all
30712      calls.  */
30713   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30714       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30715     {
30716       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30717       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30718       if (TARGET_CALLER_INTERWORKING)
30719         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30720     }
30721
30722   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30723   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30724   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30725
30726   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30727 }
30728
30729 static reg_class_t
30730 arm_preferred_rename_class (reg_class_t rclass)
30731 {
30732   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30733      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30734      and code size can be reduced.  */
30735   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30736     return LO_REGS;
30737   else
30738     return NO_REGS;
30739 }
30740
30741 /* Compute the attribute "length" of insn "*push_multi".
30742    So this function MUST be kept in sync with that insn pattern.  */
30743 int
30744 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30745 {
30746   int i, regno, hi_reg;
30747   int num_saves = XVECLEN (parallel_op, 0);
30748
30749   /* ARM mode.  */
30750   if (TARGET_ARM)
30751     return 4;
30752   /* Thumb1 mode.  */
30753   if (TARGET_THUMB1)
30754     return 2;
30755
30756   /* Thumb2 mode.  */
30757   regno = REGNO (first_op);
30758   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30759      list is 8-bit.  Normally this means all registers in the list must be
30760      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30761      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30762      with 16-bit encoding.  */
30763   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30764   for (i = 1; i < num_saves && !hi_reg; i++)
30765     {
30766       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30767       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30768     }
30769
30770   if (!hi_reg)
30771     return 2;
30772   return 4;
30773 }
30774
30775 /* Compute the attribute "length" of insn.  Currently, this function is used
30776    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30777    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30778    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30779    true if OPERANDS contains insn which explicit updates base register.  */
30780
30781 int
30782 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30783 {
30784   /* ARM mode.  */
30785   if (TARGET_ARM)
30786     return 4;
30787   /* Thumb1 mode.  */
30788   if (TARGET_THUMB1)
30789     return 2;
30790
30791   rtx parallel_op = operands[0];
30792   /* Initialize to elements number of PARALLEL.  */
30793   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30794   /* Initialize the value to base register.  */
30795   unsigned regno = REGNO (operands[1]);
30796   /* Skip return and write back pattern.
30797      We only need register pop pattern for later analysis.  */
30798   unsigned first_indx = 0;
30799   first_indx += return_pc ? 1 : 0;
30800   first_indx += write_back_p ? 1 : 0;
30801
30802   /* A pop operation can be done through LDM or POP.  If the base register is SP
30803      and if it's with write back, then a LDM will be alias of POP.  */
30804   bool pop_p = (regno == SP_REGNUM && write_back_p);
30805   bool ldm_p = !pop_p;
30806
30807   /* Check base register for LDM.  */
30808   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30809     return 4;
30810
30811   /* Check each register in the list.  */
30812   for (; indx >= first_indx; indx--)
30813     {
30814       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30815       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30816          comment in arm_attr_length_push_multi.  */
30817       if (REGNO_REG_CLASS (regno) == HI_REGS
30818           && (regno != PC_REGNUM || ldm_p))
30819         return 4;
30820     }
30821
30822   return 2;
30823 }
30824
30825 /* Compute the number of instructions emitted by output_move_double.  */
30826 int
30827 arm_count_output_move_double_insns (rtx *operands)
30828 {
30829   int count;
30830   rtx ops[2];
30831   /* output_move_double may modify the operands array, so call it
30832      here on a copy of the array.  */
30833   ops[0] = operands[0];
30834   ops[1] = operands[1];
30835   output_move_double (ops, false, &count);
30836   return count;
30837 }
30838
30839 /* Same as above, but operands are a register/memory pair in SImode.
30840    Assumes operands has the base register in position 0 and memory in position
30841    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
30842 int
30843 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30844 {
30845   int count;
30846   rtx ops[2];
30847   int regnum, memnum;
30848   if (load)
30849     regnum = 0, memnum = 1;
30850   else
30851     regnum = 1, memnum = 0;
30852   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30853   ops[memnum] = adjust_address (operands[2], DImode, 0);
30854   output_move_double (ops, false, &count);
30855   return count;
30856 }
30857
30858
30859 int
30860 vfp3_const_double_for_fract_bits (rtx operand)
30861 {
30862   REAL_VALUE_TYPE r0;
30863
30864   if (!CONST_DOUBLE_P (operand))
30865     return 0;
30866
30867   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30868   if (exact_real_inverse (DFmode, &r0)
30869       && !REAL_VALUE_NEGATIVE (r0))
30870     {
30871       if (exact_real_truncate (DFmode, &r0))
30872         {
30873           HOST_WIDE_INT value = real_to_integer (&r0);
30874           value = value & 0xffffffff;
30875           if ((value != 0) && ( (value & (value - 1)) == 0))
30876             {
30877               int ret = exact_log2 (value);
30878               gcc_assert (IN_RANGE (ret, 0, 31));
30879               return ret;
30880             }
30881         }
30882     }
30883   return 0;
30884 }
30885
30886 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30887    log2 is in [1, 32], return that log2.  Otherwise return -1.
30888    This is used in the patterns for vcvt.s32.f32 floating-point to
30889    fixed-point conversions.  */
30890
30891 int
30892 vfp3_const_double_for_bits (rtx x)
30893 {
30894   const REAL_VALUE_TYPE *r;
30895
30896   if (!CONST_DOUBLE_P (x))
30897     return -1;
30898
30899   r = CONST_DOUBLE_REAL_VALUE (x);
30900
30901   if (REAL_VALUE_NEGATIVE (*r)
30902       || REAL_VALUE_ISNAN (*r)
30903       || REAL_VALUE_ISINF (*r)
30904       || !real_isinteger (r, SFmode))
30905     return -1;
30906
30907   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30908
30909 /* The exact_log2 above will have returned -1 if this is
30910    not an exact log2.  */
30911   if (!IN_RANGE (hwint, 1, 32))
30912     return -1;
30913
30914   return hwint;
30915 }
30916
30917 \f
30918 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30919
30920 static void
30921 arm_pre_atomic_barrier (enum memmodel model)
30922 {
30923   if (need_atomic_barrier_p (model, true))
30924     emit_insn (gen_memory_barrier ());
30925 }
30926
30927 static void
30928 arm_post_atomic_barrier (enum memmodel model)
30929 {
30930   if (need_atomic_barrier_p (model, false))
30931     emit_insn (gen_memory_barrier ());
30932 }
30933
30934 /* Emit the load-exclusive and store-exclusive instructions.
30935    Use acquire and release versions if necessary.  */
30936
30937 static void
30938 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30939 {
30940   rtx (*gen) (rtx, rtx);
30941
30942   if (acq)
30943     {
30944       switch (mode)
30945         {
30946         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30947         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30948         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30949         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30950         default:
30951           gcc_unreachable ();
30952         }
30953     }
30954   else
30955     {
30956       switch (mode)
30957         {
30958         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30959         case E_HImode: gen = gen_arm_load_exclusivehi; break;
30960         case E_SImode: gen = gen_arm_load_exclusivesi; break;
30961         case E_DImode: gen = gen_arm_load_exclusivedi; break;
30962         default:
30963           gcc_unreachable ();
30964         }
30965     }
30966
30967   emit_insn (gen (rval, mem));
30968 }
30969
30970 static void
30971 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30972                           rtx mem, bool rel)
30973 {
30974   rtx (*gen) (rtx, rtx, rtx);
30975
30976   if (rel)
30977     {
30978       switch (mode)
30979         {
30980         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30981         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30982         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30983         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30984         default:
30985           gcc_unreachable ();
30986         }
30987     }
30988   else
30989     {
30990       switch (mode)
30991         {
30992         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30993         case E_HImode: gen = gen_arm_store_exclusivehi; break;
30994         case E_SImode: gen = gen_arm_store_exclusivesi; break;
30995         case E_DImode: gen = gen_arm_store_exclusivedi; break;
30996         default:
30997           gcc_unreachable ();
30998         }
30999     }
31000
31001   emit_insn (gen (bval, rval, mem));
31002 }
31003
31004 /* Mark the previous jump instruction as unlikely.  */
31005
31006 static void
31007 emit_unlikely_jump (rtx insn)
31008 {
31009   rtx_insn *jump = emit_jump_insn (insn);
31010   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31011 }
31012
31013 /* Expand a compare and swap pattern.  */
31014
31015 void
31016 arm_expand_compare_and_swap (rtx operands[])
31017 {
31018   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31019   machine_mode mode, cmp_mode;
31020
31021   bval = operands[0];
31022   rval = operands[1];
31023   mem = operands[2];
31024   oldval = operands[3];
31025   newval = operands[4];
31026   is_weak = operands[5];
31027   mod_s = operands[6];
31028   mod_f = operands[7];
31029   mode = GET_MODE (mem);
31030
31031   /* Normally the succ memory model must be stronger than fail, but in the
31032      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31033      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
31034
31035   if (TARGET_HAVE_LDACQ
31036       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31037       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31038     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31039
31040   switch (mode)
31041     {
31042     case E_QImode:
31043     case E_HImode:
31044       /* For narrow modes, we're going to perform the comparison in SImode,
31045          so do the zero-extension now.  */
31046       rval = gen_reg_rtx (SImode);
31047       oldval = convert_modes (SImode, mode, oldval, true);
31048       /* FALLTHRU */
31049
31050     case E_SImode:
31051       /* Force the value into a register if needed.  We waited until after
31052          the zero-extension above to do this properly.  */
31053       if (!arm_add_operand (oldval, SImode))
31054         oldval = force_reg (SImode, oldval);
31055       break;
31056
31057     case E_DImode:
31058       if (!cmpdi_operand (oldval, mode))
31059         oldval = force_reg (mode, oldval);
31060       break;
31061
31062     default:
31063       gcc_unreachable ();
31064     }
31065
31066   if (TARGET_THUMB1)
31067     cmp_mode = E_SImode;
31068   else
31069     cmp_mode = CC_Zmode;
31070
31071   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31072   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31073                                         oldval, newval, is_weak, mod_s, mod_f));
31074
31075   if (mode == QImode || mode == HImode)
31076     emit_move_insn (operands[1], gen_lowpart (mode, rval));
31077
31078   /* In all cases, we arrange for success to be signaled by Z set.
31079      This arrangement allows for the boolean result to be used directly
31080      in a subsequent branch, post optimization.  For Thumb-1 targets, the
31081      boolean negation of the result is also stored in bval because Thumb-1
31082      backend lacks dependency tracking for CC flag due to flag-setting not
31083      being represented at RTL level.  */
31084   if (TARGET_THUMB1)
31085       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31086   else
31087     {
31088       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31089       emit_insn (gen_rtx_SET (bval, x));
31090     }
31091 }
31092
31093 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
31094    another memory store between the load-exclusive and store-exclusive can
31095    reset the monitor from Exclusive to Open state.  This means we must wait
31096    until after reload to split the pattern, lest we get a register spill in
31097    the middle of the atomic sequence.  Success of the compare and swap is
31098    indicated by the Z flag set for 32bit targets and by neg_bval being zero
31099    for Thumb-1 targets (ie. negation of the boolean value returned by
31100    atomic_compare_and_swapmode standard pattern in operand 0).  */
31101
31102 void
31103 arm_split_compare_and_swap (rtx operands[])
31104 {
31105   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31106   machine_mode mode;
31107   enum memmodel mod_s, mod_f;
31108   bool is_weak;
31109   rtx_code_label *label1, *label2;
31110   rtx x, cond;
31111
31112   rval = operands[1];
31113   mem = operands[2];
31114   oldval = operands[3];
31115   newval = operands[4];
31116   is_weak = (operands[5] != const0_rtx);
31117   mod_s_rtx = operands[6];
31118   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31119   mod_f = memmodel_from_int (INTVAL (operands[7]));
31120   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31121   mode = GET_MODE (mem);
31122
31123   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31124
31125   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31126   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31127
31128   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
31129      a full barrier is emitted after the store-release.  */
31130   if (is_armv8_sync)
31131     use_acquire = false;
31132
31133   /* Checks whether a barrier is needed and emits one accordingly.  */
31134   if (!(use_acquire || use_release))
31135     arm_pre_atomic_barrier (mod_s);
31136
31137   label1 = NULL;
31138   if (!is_weak)
31139     {
31140       label1 = gen_label_rtx ();
31141       emit_label (label1);
31142     }
31143   label2 = gen_label_rtx ();
31144
31145   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31146
31147   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31148      as required to communicate with arm_expand_compare_and_swap.  */
31149   if (TARGET_32BIT)
31150     {
31151       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31152       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31153       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31154                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31155       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31156     }
31157   else
31158     {
31159       cond = gen_rtx_NE (VOIDmode, rval, oldval);
31160       if (thumb1_cmpneg_operand (oldval, SImode))
31161         {
31162           rtx src = rval;
31163           if (!satisfies_constraint_L (oldval))
31164             {
31165               gcc_assert (satisfies_constraint_J (oldval));
31166
31167               /* For such immediates, ADDS needs the source and destination regs
31168                  to be the same.
31169
31170                  Normally this would be handled by RA, but this is all happening
31171                  after RA.  */
31172               emit_move_insn (neg_bval, rval);
31173               src = neg_bval;
31174             }
31175
31176           emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31177                                                        label2, cond));
31178         }
31179       else
31180         {
31181           emit_move_insn (neg_bval, const1_rtx);
31182           emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31183         }
31184     }
31185
31186   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31187
31188   /* Weak or strong, we want EQ to be true for success, so that we
31189      match the flags that we got from the compare above.  */
31190   if (TARGET_32BIT)
31191     {
31192       cond = gen_rtx_REG (CCmode, CC_REGNUM);
31193       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31194       emit_insn (gen_rtx_SET (cond, x));
31195     }
31196
31197   if (!is_weak)
31198     {
31199       /* Z is set to boolean value of !neg_bval, as required to communicate
31200          with arm_expand_compare_and_swap.  */
31201       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31202       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31203     }
31204
31205   if (!is_mm_relaxed (mod_f))
31206     emit_label (label2);
31207
31208   /* Checks whether a barrier is needed and emits one accordingly.  */
31209   if (is_armv8_sync
31210       || !(use_acquire || use_release))
31211     arm_post_atomic_barrier (mod_s);
31212
31213   if (is_mm_relaxed (mod_f))
31214     emit_label (label2);
31215 }
31216
31217 /* Split an atomic operation pattern.  Operation is given by CODE and is one
31218    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31219    operation).  Operation is performed on the content at MEM and on VALUE
31220    following the memory model MODEL_RTX.  The content at MEM before and after
31221    the operation is returned in OLD_OUT and NEW_OUT respectively while the
31222    success of the operation is returned in COND.  Using a scratch register or
31223    an operand register for these determines what result is returned for that
31224    pattern.  */
31225
31226 void
31227 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31228                      rtx value, rtx model_rtx, rtx cond)
31229 {
31230   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31231   machine_mode mode = GET_MODE (mem);
31232   machine_mode wmode = (mode == DImode ? DImode : SImode);
31233   rtx_code_label *label;
31234   bool all_low_regs, bind_old_new;
31235   rtx x;
31236
31237   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31238
31239   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31240   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31241
31242   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
31243      a full barrier is emitted after the store-release.  */
31244   if (is_armv8_sync)
31245     use_acquire = false;
31246
31247   /* Checks whether a barrier is needed and emits one accordingly.  */
31248   if (!(use_acquire || use_release))
31249     arm_pre_atomic_barrier (model);
31250
31251   label = gen_label_rtx ();
31252   emit_label (label);
31253
31254   if (new_out)
31255     new_out = gen_lowpart (wmode, new_out);
31256   if (old_out)
31257     old_out = gen_lowpart (wmode, old_out);
31258   else
31259     old_out = new_out;
31260   value = simplify_gen_subreg (wmode, value, mode, 0);
31261
31262   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31263
31264   /* Does the operation require destination and first operand to use the same
31265      register?  This is decided by register constraints of relevant insn
31266      patterns in thumb1.md.  */
31267   gcc_assert (!new_out || REG_P (new_out));
31268   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31269                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31270                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31271   bind_old_new =
31272     (TARGET_THUMB1
31273      && code != SET
31274      && code != MINUS
31275      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31276
31277   /* We want to return the old value while putting the result of the operation
31278      in the same register as the old value so copy the old value over to the
31279      destination register and use that register for the operation.  */
31280   if (old_out && bind_old_new)
31281     {
31282       emit_move_insn (new_out, old_out);
31283       old_out = new_out;
31284     }
31285
31286   switch (code)
31287     {
31288     case SET:
31289       new_out = value;
31290       break;
31291
31292     case NOT:
31293       x = gen_rtx_AND (wmode, old_out, value);
31294       emit_insn (gen_rtx_SET (new_out, x));
31295       x = gen_rtx_NOT (wmode, new_out);
31296       emit_insn (gen_rtx_SET (new_out, x));
31297       break;
31298
31299     case MINUS:
31300       if (CONST_INT_P (value))
31301         {
31302           value = gen_int_mode (-INTVAL (value), wmode);
31303           code = PLUS;
31304         }
31305       /* FALLTHRU */
31306
31307     case PLUS:
31308       if (mode == DImode)
31309         {
31310           /* DImode plus/minus need to clobber flags.  */
31311           /* The adddi3 and subdi3 patterns are incorrectly written so that
31312              they require matching operands, even when we could easily support
31313              three operands.  Thankfully, this can be fixed up post-splitting,
31314              as the individual add+adc patterns do accept three operands and
31315              post-reload cprop can make these moves go away.  */
31316           emit_move_insn (new_out, old_out);
31317           if (code == PLUS)
31318             x = gen_adddi3 (new_out, new_out, value);
31319           else
31320             x = gen_subdi3 (new_out, new_out, value);
31321           emit_insn (x);
31322           break;
31323         }
31324       /* FALLTHRU */
31325
31326     default:
31327       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31328       emit_insn (gen_rtx_SET (new_out, x));
31329       break;
31330     }
31331
31332   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31333                             use_release);
31334
31335   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31336   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31337
31338   /* Checks whether a barrier is needed and emits one accordingly.  */
31339   if (is_armv8_sync
31340       || !(use_acquire || use_release))
31341     arm_post_atomic_barrier (model);
31342 }
31343 \f
31344 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
31345 opt_machine_mode
31346 arm_mode_to_pred_mode (machine_mode mode)
31347 {
31348   switch (GET_MODE_NUNITS (mode))
31349     {
31350     case 16: return V16BImode;
31351     case 8: return V8BImode;
31352     case 4: return V4BImode;
31353     }
31354   return opt_machine_mode ();
31355 }
31356
31357 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31358    If CAN_INVERT, store either the result or its inverse in TARGET
31359    and return true if TARGET contains the inverse.  If !CAN_INVERT,
31360    always store the result in TARGET, never its inverse.
31361
31362    Note that the handling of floating-point comparisons is not
31363    IEEE compliant.  */
31364
31365 bool
31366 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31367                            bool can_invert)
31368 {
31369   machine_mode cmp_result_mode = GET_MODE (target);
31370   machine_mode cmp_mode = GET_MODE (op0);
31371
31372   bool inverted;
31373
31374   /* MVE supports more comparisons than Neon.  */
31375   if (TARGET_HAVE_MVE)
31376       inverted = false;
31377   else
31378     switch (code)
31379       {
31380         /* For these we need to compute the inverse of the requested
31381            comparison.  */
31382       case UNORDERED:
31383       case UNLT:
31384       case UNLE:
31385       case UNGT:
31386       case UNGE:
31387       case UNEQ:
31388       case NE:
31389         code = reverse_condition_maybe_unordered (code);
31390         if (!can_invert)
31391           {
31392             /* Recursively emit the inverted comparison into a temporary
31393                and then store its inverse in TARGET.  This avoids reusing
31394                TARGET (which for integer NE could be one of the inputs).  */
31395             rtx tmp = gen_reg_rtx (cmp_result_mode);
31396             if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31397               gcc_unreachable ();
31398             emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31399             return false;
31400           }
31401         inverted = true;
31402         break;
31403
31404       default:
31405         inverted = false;
31406         break;
31407       }
31408
31409   switch (code)
31410     {
31411     /* These are natively supported by Neon for zero comparisons, but otherwise
31412        require the operands to be swapped. For MVE, we can only compare
31413        registers.  */
31414     case LE:
31415     case LT:
31416       if (!TARGET_HAVE_MVE)
31417         if (op1 != CONST0_RTX (cmp_mode))
31418           {
31419             code = swap_condition (code);
31420             std::swap (op0, op1);
31421           }
31422       /* Fall through.  */
31423
31424     /* These are natively supported by Neon for both register and zero
31425        operands. MVE supports registers only.  */
31426     case EQ:
31427     case GE:
31428     case GT:
31429     case NE:
31430       if (TARGET_HAVE_MVE)
31431         {
31432           switch (GET_MODE_CLASS (cmp_mode))
31433             {
31434             case MODE_VECTOR_INT:
31435               emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31436                                         op0, force_reg (cmp_mode, op1)));
31437               break;
31438             case MODE_VECTOR_FLOAT:
31439               if (TARGET_HAVE_MVE_FLOAT)
31440                 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31441                                             op0, force_reg (cmp_mode, op1)));
31442               else
31443                 gcc_unreachable ();
31444               break;
31445             default:
31446               gcc_unreachable ();
31447             }
31448         }
31449       else
31450         emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31451       return inverted;
31452
31453     /* These are natively supported for register operands only.
31454        Comparisons with zero aren't useful and should be folded
31455        or canonicalized by target-independent code.  */
31456     case GEU:
31457     case GTU:
31458       if (TARGET_HAVE_MVE)
31459         emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31460                                   op0, force_reg (cmp_mode, op1)));
31461       else
31462         emit_insn (gen_neon_vc (code, cmp_mode, target,
31463                                 op0, force_reg (cmp_mode, op1)));
31464       return inverted;
31465
31466     /* These require the operands to be swapped and likewise do not
31467        support comparisons with zero.  */
31468     case LEU:
31469     case LTU:
31470       if (TARGET_HAVE_MVE)
31471         emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31472                                   force_reg (cmp_mode, op1), op0));
31473       else
31474         emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31475                                 target, force_reg (cmp_mode, op1), op0));
31476       return inverted;
31477
31478     /* These need a combination of two comparisons.  */
31479     case LTGT:
31480     case ORDERED:
31481       {
31482         /* Operands are LTGT iff (a > b || a > b).
31483            Operands are ORDERED iff (a > b || a <= b).  */
31484         rtx gt_res = gen_reg_rtx (cmp_result_mode);
31485         rtx alt_res = gen_reg_rtx (cmp_result_mode);
31486         rtx_code alt_code = (code == LTGT ? LT : LE);
31487         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31488             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31489           gcc_unreachable ();
31490         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31491                                                      gt_res, alt_res)));
31492         return inverted;
31493       }
31494
31495     default:
31496       gcc_unreachable ();
31497     }
31498 }
31499
31500 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31501    CMP_RESULT_MODE is the mode of the comparison result.  */
31502
31503 void
31504 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31505 {
31506   /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31507      arm_expand_vector_compare, and another one here.  */
31508   rtx mask;
31509
31510   if (TARGET_HAVE_MVE)
31511     mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31512   else
31513     mask = gen_reg_rtx (cmp_result_mode);
31514
31515   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31516                                              operands[4], operands[5], true);
31517   if (inverted)
31518     std::swap (operands[1], operands[2]);
31519   if (TARGET_NEON)
31520   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31521                             mask, operands[1], operands[2]));
31522   else
31523     {
31524       machine_mode cmp_mode = GET_MODE (operands[0]);
31525
31526       switch (GET_MODE_CLASS (cmp_mode))
31527         {
31528         case MODE_VECTOR_INT:
31529           emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31530                                      operands[1], operands[2], mask));
31531           break;
31532         case MODE_VECTOR_FLOAT:
31533           if (TARGET_HAVE_MVE_FLOAT)
31534             emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31535                                          operands[1], operands[2], mask));
31536           else
31537             gcc_unreachable ();
31538           break;
31539         default:
31540           gcc_unreachable ();
31541         }
31542     }
31543 }
31544 \f
31545 #define MAX_VECT_LEN 16
31546
31547 struct expand_vec_perm_d
31548 {
31549   rtx target, op0, op1;
31550   vec_perm_indices perm;
31551   machine_mode vmode;
31552   bool one_vector_p;
31553   bool testing_p;
31554 };
31555
31556 /* Generate a variable permutation.  */
31557
31558 static void
31559 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31560 {
31561   machine_mode vmode = GET_MODE (target);
31562   bool one_vector_p = rtx_equal_p (op0, op1);
31563
31564   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31565   gcc_checking_assert (GET_MODE (op0) == vmode);
31566   gcc_checking_assert (GET_MODE (op1) == vmode);
31567   gcc_checking_assert (GET_MODE (sel) == vmode);
31568   gcc_checking_assert (TARGET_NEON);
31569
31570   if (one_vector_p)
31571     {
31572       if (vmode == V8QImode)
31573         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31574       else
31575         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31576     }
31577   else
31578     {
31579       rtx pair;
31580
31581       if (vmode == V8QImode)
31582         {
31583           pair = gen_reg_rtx (V16QImode);
31584           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31585           pair = gen_lowpart (TImode, pair);
31586           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31587         }
31588       else
31589         {
31590           pair = gen_reg_rtx (OImode);
31591           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31592           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31593         }
31594     }
31595 }
31596
31597 void
31598 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31599 {
31600   machine_mode vmode = GET_MODE (target);
31601   unsigned int nelt = GET_MODE_NUNITS (vmode);
31602   bool one_vector_p = rtx_equal_p (op0, op1);
31603   rtx mask;
31604
31605   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31606      numbering of elements for big-endian, we must reverse the order.  */
31607   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31608
31609   /* The VTBL instruction does not use a modulo index, so we must take care
31610      of that ourselves.  */
31611   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31612   mask = gen_const_vec_duplicate (vmode, mask);
31613   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31614
31615   arm_expand_vec_perm_1 (target, op0, op1, sel);
31616 }
31617
31618 /* Map lane ordering between architectural lane order, and GCC lane order,
31619    taking into account ABI.  See comment above output_move_neon for details.  */
31620
31621 static int
31622 neon_endian_lane_map (machine_mode mode, int lane)
31623 {
31624   if (BYTES_BIG_ENDIAN)
31625   {
31626     int nelems = GET_MODE_NUNITS (mode);
31627     /* Reverse lane order.  */
31628     lane = (nelems - 1 - lane);
31629     /* Reverse D register order, to match ABI.  */
31630     if (GET_MODE_SIZE (mode) == 16)
31631       lane = lane ^ (nelems / 2);
31632   }
31633   return lane;
31634 }
31635
31636 /* Some permutations index into pairs of vectors, this is a helper function
31637    to map indexes into those pairs of vectors.  */
31638
31639 static int
31640 neon_pair_endian_lane_map (machine_mode mode, int lane)
31641 {
31642   int nelem = GET_MODE_NUNITS (mode);
31643   if (BYTES_BIG_ENDIAN)
31644     lane =
31645       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31646   return lane;
31647 }
31648
31649 /* Generate or test for an insn that supports a constant permutation.  */
31650
31651 /* Recognize patterns for the VUZP insns.  */
31652
31653 static bool
31654 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31655 {
31656   unsigned int i, odd, mask, nelt = d->perm.length ();
31657   rtx out0, out1, in0, in1;
31658   int first_elem;
31659   int swap_nelt;
31660
31661   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31662     return false;
31663
31664   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31665      big endian pattern on 64 bit vectors, so we correct for that.  */
31666   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31667     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31668
31669   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31670
31671   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31672     odd = 0;
31673   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31674     odd = 1;
31675   else
31676     return false;
31677   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31678
31679   for (i = 0; i < nelt; i++)
31680     {
31681       unsigned elt =
31682         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31683       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31684         return false;
31685     }
31686
31687   /* Success!  */
31688   if (d->testing_p)
31689     return true;
31690
31691   in0 = d->op0;
31692   in1 = d->op1;
31693   if (swap_nelt != 0)
31694     std::swap (in0, in1);
31695
31696   out0 = d->target;
31697   out1 = gen_reg_rtx (d->vmode);
31698   if (odd)
31699     std::swap (out0, out1);
31700
31701   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31702   return true;
31703 }
31704
31705 /* Recognize patterns for the VZIP insns.  */
31706
31707 static bool
31708 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31709 {
31710   unsigned int i, high, mask, nelt = d->perm.length ();
31711   rtx out0, out1, in0, in1;
31712   int first_elem;
31713   bool is_swapped;
31714
31715   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31716     return false;
31717
31718   is_swapped = BYTES_BIG_ENDIAN;
31719
31720   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31721
31722   high = nelt / 2;
31723   if (first_elem == neon_endian_lane_map (d->vmode, high))
31724     ;
31725   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31726     high = 0;
31727   else
31728     return false;
31729   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31730
31731   for (i = 0; i < nelt / 2; i++)
31732     {
31733       unsigned elt =
31734         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31735       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31736           != elt)
31737         return false;
31738       elt =
31739         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31740       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31741           != elt)
31742         return false;
31743     }
31744
31745   /* Success!  */
31746   if (d->testing_p)
31747     return true;
31748
31749   in0 = d->op0;
31750   in1 = d->op1;
31751   if (is_swapped)
31752     std::swap (in0, in1);
31753
31754   out0 = d->target;
31755   out1 = gen_reg_rtx (d->vmode);
31756   if (high)
31757     std::swap (out0, out1);
31758
31759   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31760   return true;
31761 }
31762
31763 /* Recognize patterns for the VREV insns.  */
31764 static bool
31765 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31766 {
31767   unsigned int i, j, diff, nelt = d->perm.length ();
31768   rtx (*gen) (machine_mode, rtx, rtx);
31769
31770   if (!d->one_vector_p)
31771     return false;
31772
31773   diff = d->perm[0];
31774   switch (diff)
31775     {
31776     case 7:
31777        switch (d->vmode)
31778         {
31779          case E_V16QImode:
31780          case E_V8QImode:
31781           gen = gen_neon_vrev64;
31782           break;
31783          default:
31784           return false;
31785         }
31786        break;
31787     case 3:
31788        switch (d->vmode)
31789         {
31790         case E_V16QImode:
31791         case E_V8QImode:
31792           gen = gen_neon_vrev32;
31793           break;
31794         case E_V8HImode:
31795         case E_V4HImode:
31796         case E_V8HFmode:
31797         case E_V4HFmode:
31798           gen = gen_neon_vrev64;
31799           break;
31800         default:
31801           return false;
31802         }
31803       break;
31804     case 1:
31805       switch (d->vmode)
31806         {
31807         case E_V16QImode:
31808         case E_V8QImode:
31809           gen = gen_neon_vrev16;
31810           break;
31811         case E_V8HImode:
31812         case E_V4HImode:
31813           gen = gen_neon_vrev32;
31814           break;
31815         case E_V4SImode:
31816         case E_V2SImode:
31817         case E_V4SFmode:
31818         case E_V2SFmode:
31819           gen = gen_neon_vrev64;
31820           break;
31821         default:
31822           return false;
31823         }
31824       break;
31825     default:
31826       return false;
31827     }
31828
31829   for (i = 0; i < nelt ; i += diff + 1)
31830     for (j = 0; j <= diff; j += 1)
31831       {
31832         /* This is guaranteed to be true as the value of diff
31833            is 7, 3, 1 and we should have enough elements in the
31834            queue to generate this. Getting a vector mask with a
31835            value of diff other than these values implies that
31836            something is wrong by the time we get here.  */
31837         gcc_assert (i + j < nelt);
31838         if (d->perm[i + j] != i + diff - j)
31839           return false;
31840       }
31841
31842   /* Success! */
31843   if (d->testing_p)
31844     return true;
31845
31846   emit_insn (gen (d->vmode, d->target, d->op0));
31847   return true;
31848 }
31849
31850 /* Recognize patterns for the VTRN insns.  */
31851
31852 static bool
31853 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31854 {
31855   unsigned int i, odd, mask, nelt = d->perm.length ();
31856   rtx out0, out1, in0, in1;
31857
31858   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31859     return false;
31860
31861   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31862   if (d->perm[0] == 0)
31863     odd = 0;
31864   else if (d->perm[0] == 1)
31865     odd = 1;
31866   else
31867     return false;
31868   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31869
31870   for (i = 0; i < nelt; i += 2)
31871     {
31872       if (d->perm[i] != i + odd)
31873         return false;
31874       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31875         return false;
31876     }
31877
31878   /* Success!  */
31879   if (d->testing_p)
31880     return true;
31881
31882   in0 = d->op0;
31883   in1 = d->op1;
31884   if (BYTES_BIG_ENDIAN)
31885     {
31886       std::swap (in0, in1);
31887       odd = !odd;
31888     }
31889
31890   out0 = d->target;
31891   out1 = gen_reg_rtx (d->vmode);
31892   if (odd)
31893     std::swap (out0, out1);
31894
31895   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31896   return true;
31897 }
31898
31899 /* Recognize patterns for the VEXT insns.  */
31900
31901 static bool
31902 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31903 {
31904   unsigned int i, nelt = d->perm.length ();
31905   rtx offset;
31906
31907   unsigned int location;
31908
31909   unsigned int next  = d->perm[0] + 1;
31910
31911   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31912   if (BYTES_BIG_ENDIAN)
31913     return false;
31914
31915   /* Check if the extracted indexes are increasing by one.  */
31916   for (i = 1; i < nelt; next++, i++)
31917     {
31918       /* If we hit the most significant element of the 2nd vector in
31919          the previous iteration, no need to test further.  */
31920       if (next == 2 * nelt)
31921         return false;
31922
31923       /* If we are operating on only one vector: it could be a
31924          rotation.  If there are only two elements of size < 64, let
31925          arm_evpc_neon_vrev catch it.  */
31926       if (d->one_vector_p && (next == nelt))
31927         {
31928           if ((nelt == 2) && (d->vmode != V2DImode))
31929             return false;
31930           else
31931             next = 0;
31932         }
31933
31934       if (d->perm[i] != next)
31935         return false;
31936     }
31937
31938   location = d->perm[0];
31939
31940   /* Success! */
31941   if (d->testing_p)
31942     return true;
31943
31944   offset = GEN_INT (location);
31945
31946   if(d->vmode == E_DImode)
31947     return false;
31948
31949   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31950   return true;
31951 }
31952
31953 /* The NEON VTBL instruction is a fully variable permuation that's even
31954    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31955    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31956    can do slightly better by expanding this as a constant where we don't
31957    have to apply a mask.  */
31958
31959 static bool
31960 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31961 {
31962   rtx rperm[MAX_VECT_LEN], sel;
31963   machine_mode vmode = d->vmode;
31964   unsigned int i, nelt = d->perm.length ();
31965
31966   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31967      numbering of elements for big-endian, we must reverse the order.  */
31968   if (BYTES_BIG_ENDIAN)
31969     return false;
31970
31971   if (d->testing_p)
31972     return true;
31973
31974   /* Generic code will try constant permutation twice.  Once with the
31975      original mode and again with the elements lowered to QImode.
31976      So wait and don't do the selector expansion ourselves.  */
31977   if (vmode != V8QImode && vmode != V16QImode)
31978     return false;
31979
31980   for (i = 0; i < nelt; ++i)
31981     rperm[i] = GEN_INT (d->perm[i]);
31982   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31983   sel = force_reg (vmode, sel);
31984
31985   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31986   return true;
31987 }
31988
31989 static bool
31990 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31991 {
31992   /* Check if the input mask matches vext before reordering the
31993      operands.  */
31994   if (TARGET_NEON)
31995     if (arm_evpc_neon_vext (d))
31996       return true;
31997
31998   /* The pattern matching functions above are written to look for a small
31999      number to begin the sequence (0, 1, N/2).  If we begin with an index
32000      from the second operand, we can swap the operands.  */
32001   unsigned int nelt = d->perm.length ();
32002   if (d->perm[0] >= nelt)
32003     {
32004       d->perm.rotate_inputs (1);
32005       std::swap (d->op0, d->op1);
32006     }
32007
32008   if (TARGET_NEON)
32009     {
32010       if (arm_evpc_neon_vuzp (d))
32011         return true;
32012       if (arm_evpc_neon_vzip (d))
32013         return true;
32014       if (arm_evpc_neon_vrev (d))
32015         return true;
32016       if (arm_evpc_neon_vtrn (d))
32017         return true;
32018       return arm_evpc_neon_vtbl (d);
32019     }
32020   return false;
32021 }
32022
32023 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
32024
32025 static bool
32026 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32027                               rtx target, rtx op0, rtx op1,
32028                               const vec_perm_indices &sel)
32029 {
32030   if (vmode != op_mode)
32031     return false;
32032
32033   struct expand_vec_perm_d d;
32034   int i, nelt, which;
32035
32036   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32037     return false;
32038
32039   d.target = target;
32040   if (op0)
32041     {
32042       rtx nop0 = force_reg (vmode, op0);
32043       if (op0 == op1)
32044         op1 = nop0;
32045       op0 = nop0;
32046     }
32047   if (op1)
32048     op1 = force_reg (vmode, op1);
32049   d.op0 = op0;
32050   d.op1 = op1;
32051
32052   d.vmode = vmode;
32053   gcc_assert (VECTOR_MODE_P (d.vmode));
32054   d.testing_p = !target;
32055
32056   nelt = GET_MODE_NUNITS (d.vmode);
32057   for (i = which = 0; i < nelt; ++i)
32058     {
32059       int ei = sel[i] & (2 * nelt - 1);
32060       which |= (ei < nelt ? 1 : 2);
32061     }
32062
32063   switch (which)
32064     {
32065     default:
32066       gcc_unreachable();
32067
32068     case 3:
32069       d.one_vector_p = false;
32070       if (d.testing_p || !rtx_equal_p (op0, op1))
32071         break;
32072
32073       /* The elements of PERM do not suggest that only the first operand
32074          is used, but both operands are identical.  Allow easier matching
32075          of the permutation by folding the permutation into the single
32076          input vector.  */
32077       /* FALLTHRU */
32078     case 2:
32079       d.op0 = op1;
32080       d.one_vector_p = true;
32081       break;
32082
32083     case 1:
32084       d.op1 = op0;
32085       d.one_vector_p = true;
32086       break;
32087     }
32088
32089   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32090
32091   if (!d.testing_p)
32092     return arm_expand_vec_perm_const_1 (&d);
32093
32094   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32095   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32096   if (!d.one_vector_p)
32097     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32098
32099   start_sequence ();
32100   bool ret = arm_expand_vec_perm_const_1 (&d);
32101   end_sequence ();
32102
32103   return ret;
32104 }
32105
32106 bool
32107 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32108 {
32109   /* If we are soft float and we do not have ldrd
32110      then all auto increment forms are ok.  */
32111   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32112     return true;
32113
32114   switch (code)
32115     {
32116       /* Post increment and Pre Decrement are supported for all
32117          instruction forms except for vector forms.  */
32118     case ARM_POST_INC:
32119     case ARM_PRE_DEC:
32120       if (VECTOR_MODE_P (mode))
32121         {
32122           if (code != ARM_PRE_DEC)
32123             return true;
32124           else
32125             return false;
32126         }
32127
32128       return true;
32129
32130     case ARM_POST_DEC:
32131     case ARM_PRE_INC:
32132       /* Without LDRD and mode size greater than
32133          word size, there is no point in auto-incrementing
32134          because ldm and stm will not have these forms.  */
32135       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32136         return false;
32137
32138       /* Vector and floating point modes do not support
32139          these auto increment forms.  */
32140       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32141         return false;
32142
32143       return true;
32144
32145     default:
32146       return false;
32147
32148     }
32149
32150   return false;
32151 }
32152
32153 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32154    on ARM, since we know that shifts by negative amounts are no-ops.
32155    Additionally, the default expansion code is not available or suitable
32156    for post-reload insn splits (this can occur when the register allocator
32157    chooses not to do a shift in NEON).
32158
32159    This function is used in both initial expand and post-reload splits, and
32160    handles all kinds of 64-bit shifts.
32161
32162    Input requirements:
32163     - It is safe for the input and output to be the same register, but
32164       early-clobber rules apply for the shift amount and scratch registers.
32165     - Shift by register requires both scratch registers.  In all other cases
32166       the scratch registers may be NULL.
32167     - Ashiftrt by a register also clobbers the CC register.  */
32168 void
32169 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32170                                rtx amount, rtx scratch1, rtx scratch2)
32171 {
32172   rtx out_high = gen_highpart (SImode, out);
32173   rtx out_low = gen_lowpart (SImode, out);
32174   rtx in_high = gen_highpart (SImode, in);
32175   rtx in_low = gen_lowpart (SImode, in);
32176
32177   /* Terminology:
32178         in = the register pair containing the input value.
32179         out = the destination register pair.
32180         up = the high- or low-part of each pair.
32181         down = the opposite part to "up".
32182      In a shift, we can consider bits to shift from "up"-stream to
32183      "down"-stream, so in a left-shift "up" is the low-part and "down"
32184      is the high-part of each register pair.  */
32185
32186   rtx out_up   = code == ASHIFT ? out_low : out_high;
32187   rtx out_down = code == ASHIFT ? out_high : out_low;
32188   rtx in_up   = code == ASHIFT ? in_low : in_high;
32189   rtx in_down = code == ASHIFT ? in_high : in_low;
32190
32191   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32192   gcc_assert (out
32193               && (REG_P (out) || SUBREG_P (out))
32194               && GET_MODE (out) == DImode);
32195   gcc_assert (in
32196               && (REG_P (in) || SUBREG_P (in))
32197               && GET_MODE (in) == DImode);
32198   gcc_assert (amount
32199               && (((REG_P (amount) || SUBREG_P (amount))
32200                    && GET_MODE (amount) == SImode)
32201                   || CONST_INT_P (amount)));
32202   gcc_assert (scratch1 == NULL
32203               || (GET_CODE (scratch1) == SCRATCH)
32204               || (GET_MODE (scratch1) == SImode
32205                   && REG_P (scratch1)));
32206   gcc_assert (scratch2 == NULL
32207               || (GET_CODE (scratch2) == SCRATCH)
32208               || (GET_MODE (scratch2) == SImode
32209                   && REG_P (scratch2)));
32210   gcc_assert (!REG_P (out) || !REG_P (amount)
32211               || !HARD_REGISTER_P (out)
32212               || (REGNO (out) != REGNO (amount)
32213                   && REGNO (out) + 1 != REGNO (amount)));
32214
32215   /* Macros to make following code more readable.  */
32216   #define SUB_32(DEST,SRC) \
32217             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32218   #define RSB_32(DEST,SRC) \
32219             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32220   #define SUB_S_32(DEST,SRC) \
32221             gen_addsi3_compare0 ((DEST), (SRC), \
32222                                  GEN_INT (-32))
32223   #define SET(DEST,SRC) \
32224             gen_rtx_SET ((DEST), (SRC))
32225   #define SHIFT(CODE,SRC,AMOUNT) \
32226             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32227   #define LSHIFT(CODE,SRC,AMOUNT) \
32228             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32229                             SImode, (SRC), (AMOUNT))
32230   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32231             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32232                             SImode, (SRC), (AMOUNT))
32233   #define ORR(A,B) \
32234             gen_rtx_IOR (SImode, (A), (B))
32235   #define BRANCH(COND,LABEL) \
32236             gen_arm_cond_branch ((LABEL), \
32237                                  gen_rtx_ ## COND (CCmode, cc_reg, \
32238                                                    const0_rtx), \
32239                                  cc_reg)
32240
32241   /* Shifts by register and shifts by constant are handled separately.  */
32242   if (CONST_INT_P (amount))
32243     {
32244       /* We have a shift-by-constant.  */
32245
32246       /* First, handle out-of-range shift amounts.
32247          In both cases we try to match the result an ARM instruction in a
32248          shift-by-register would give.  This helps reduce execution
32249          differences between optimization levels, but it won't stop other
32250          parts of the compiler doing different things.  This is "undefined
32251          behavior, in any case.  */
32252       if (INTVAL (amount) <= 0)
32253         emit_insn (gen_movdi (out, in));
32254       else if (INTVAL (amount) >= 64)
32255         {
32256           if (code == ASHIFTRT)
32257             {
32258               rtx const31_rtx = GEN_INT (31);
32259               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32260               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32261             }
32262           else
32263             emit_insn (gen_movdi (out, const0_rtx));
32264         }
32265
32266       /* Now handle valid shifts. */
32267       else if (INTVAL (amount) < 32)
32268         {
32269           /* Shifts by a constant less than 32.  */
32270           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32271
32272           /* Clearing the out register in DImode first avoids lots
32273              of spilling and results in less stack usage.
32274              Later this redundant insn is completely removed.
32275              Do that only if "in" and "out" are different registers.  */
32276           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32277             emit_insn (SET (out, const0_rtx));
32278           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32279           emit_insn (SET (out_down,
32280                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
32281                                out_down)));
32282           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32283         }
32284       else
32285         {
32286           /* Shifts by a constant greater than 31.  */
32287           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32288
32289           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32290             emit_insn (SET (out, const0_rtx));
32291           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32292           if (code == ASHIFTRT)
32293             emit_insn (gen_ashrsi3 (out_up, in_up,
32294                                     GEN_INT (31)));
32295           else
32296             emit_insn (SET (out_up, const0_rtx));
32297         }
32298     }
32299   else
32300     {
32301       /* We have a shift-by-register.  */
32302       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32303
32304       /* This alternative requires the scratch registers.  */
32305       gcc_assert (scratch1 && REG_P (scratch1));
32306       gcc_assert (scratch2 && REG_P (scratch2));
32307
32308       /* We will need the values "amount-32" and "32-amount" later.
32309          Swapping them around now allows the later code to be more general. */
32310       switch (code)
32311         {
32312         case ASHIFT:
32313           emit_insn (SUB_32 (scratch1, amount));
32314           emit_insn (RSB_32 (scratch2, amount));
32315           break;
32316         case ASHIFTRT:
32317           emit_insn (RSB_32 (scratch1, amount));
32318           /* Also set CC = amount > 32.  */
32319           emit_insn (SUB_S_32 (scratch2, amount));
32320           break;
32321         case LSHIFTRT:
32322           emit_insn (RSB_32 (scratch1, amount));
32323           emit_insn (SUB_32 (scratch2, amount));
32324           break;
32325         default:
32326           gcc_unreachable ();
32327         }
32328
32329       /* Emit code like this:
32330
32331          arithmetic-left:
32332             out_down = in_down << amount;
32333             out_down = (in_up << (amount - 32)) | out_down;
32334             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32335             out_up = in_up << amount;
32336
32337          arithmetic-right:
32338             out_down = in_down >> amount;
32339             out_down = (in_up << (32 - amount)) | out_down;
32340             if (amount < 32)
32341               out_down = ((signed)in_up >> (amount - 32)) | out_down;
32342             out_up = in_up << amount;
32343
32344          logical-right:
32345             out_down = in_down >> amount;
32346             out_down = (in_up << (32 - amount)) | out_down;
32347             if (amount < 32)
32348               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32349             out_up = in_up << amount;
32350
32351           The ARM and Thumb2 variants are the same but implemented slightly
32352           differently.  If this were only called during expand we could just
32353           use the Thumb2 case and let combine do the right thing, but this
32354           can also be called from post-reload splitters.  */
32355
32356       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32357
32358       if (!TARGET_THUMB2)
32359         {
32360           /* Emit code for ARM mode.  */
32361           emit_insn (SET (out_down,
32362                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32363           if (code == ASHIFTRT)
32364             {
32365               rtx_code_label *done_label = gen_label_rtx ();
32366               emit_jump_insn (BRANCH (LT, done_label));
32367               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32368                                              out_down)));
32369               emit_label (done_label);
32370             }
32371           else
32372             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32373                                            out_down)));
32374         }
32375       else
32376         {
32377           /* Emit code for Thumb2 mode.
32378              Thumb2 can't do shift and or in one insn.  */
32379           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32380           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32381
32382           if (code == ASHIFTRT)
32383             {
32384               rtx_code_label *done_label = gen_label_rtx ();
32385               emit_jump_insn (BRANCH (LT, done_label));
32386               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32387               emit_insn (SET (out_down, ORR (out_down, scratch2)));
32388               emit_label (done_label);
32389             }
32390           else
32391             {
32392               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32393               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32394             }
32395         }
32396
32397       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32398     }
32399
32400   #undef SUB_32
32401   #undef RSB_32
32402   #undef SUB_S_32
32403   #undef SET
32404   #undef SHIFT
32405   #undef LSHIFT
32406   #undef REV_LSHIFT
32407   #undef ORR
32408   #undef BRANCH
32409 }
32410
32411 /* Returns true if the pattern is a valid symbolic address, which is either a
32412    symbol_ref or (symbol_ref + addend).
32413
32414    According to the ARM ELF ABI, the initial addend of REL-type relocations
32415    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32416    literal field of the instruction as a 16-bit signed value in the range
32417    -32768 <= A < 32768.
32418
32419    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32420    unsigned range of 0 <= A < 256 as described in the AAELF32
32421    relocation handling documentation: REL-type relocations are encoded
32422    as unsigned in this case.  */
32423
32424 bool
32425 arm_valid_symbolic_address_p (rtx addr)
32426 {
32427   rtx xop0, xop1 = NULL_RTX;
32428   rtx tmp = addr;
32429
32430   if (target_word_relocations)
32431     return false;
32432
32433   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32434     return true;
32435
32436   /* (const (plus: symbol_ref const_int))  */
32437   if (GET_CODE (addr) == CONST)
32438     tmp = XEXP (addr, 0);
32439
32440   if (GET_CODE (tmp) == PLUS)
32441     {
32442       xop0 = XEXP (tmp, 0);
32443       xop1 = XEXP (tmp, 1);
32444
32445       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32446         {
32447           if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32448             return IN_RANGE (INTVAL (xop1), 0, 0xff);
32449           else
32450             return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32451         }
32452     }
32453
32454   return false;
32455 }
32456
32457 /* Returns true if a valid comparison operation and makes
32458    the operands in a form that is valid.  */
32459 bool
32460 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32461 {
32462   enum rtx_code code = GET_CODE (*comparison);
32463   int code_int;
32464   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32465     ? GET_MODE (*op2) : GET_MODE (*op1);
32466
32467   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32468
32469   if (code == UNEQ || code == LTGT)
32470     return false;
32471
32472   code_int = (int)code;
32473   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32474   PUT_CODE (*comparison, (enum rtx_code)code_int);
32475
32476   switch (mode)
32477     {
32478     case E_SImode:
32479       if (!arm_add_operand (*op1, mode))
32480         *op1 = force_reg (mode, *op1);
32481       if (!arm_add_operand (*op2, mode))
32482         *op2 = force_reg (mode, *op2);
32483       return true;
32484
32485     case E_DImode:
32486       /* gen_compare_reg() will sort out any invalid operands.  */
32487       return true;
32488
32489     case E_HFmode:
32490       if (!TARGET_VFP_FP16INST)
32491         break;
32492       /* FP16 comparisons are done in SF mode.  */
32493       mode = SFmode;
32494       *op1 = convert_to_mode (mode, *op1, 1);
32495       *op2 = convert_to_mode (mode, *op2, 1);
32496       /* Fall through.  */
32497     case E_SFmode:
32498     case E_DFmode:
32499       if (!vfp_compare_operand (*op1, mode))
32500         *op1 = force_reg (mode, *op1);
32501       if (!vfp_compare_operand (*op2, mode))
32502         *op2 = force_reg (mode, *op2);
32503       return true;
32504     default:
32505       break;
32506     }
32507
32508   return false;
32509
32510 }
32511
32512 /* Maximum number of instructions to set block of memory.  */
32513 static int
32514 arm_block_set_max_insns (void)
32515 {
32516   if (optimize_function_for_size_p (cfun))
32517     return 4;
32518   else
32519     return current_tune->max_insns_inline_memset;
32520 }
32521
32522 /* Return TRUE if it's profitable to set block of memory for
32523    non-vectorized case.  VAL is the value to set the memory
32524    with.  LENGTH is the number of bytes to set.  ALIGN is the
32525    alignment of the destination memory in bytes.  UNALIGNED_P
32526    is TRUE if we can only set the memory with instructions
32527    meeting alignment requirements.  USE_STRD_P is TRUE if we
32528    can use strd to set the memory.  */
32529 static bool
32530 arm_block_set_non_vect_profit_p (rtx val,
32531                                  unsigned HOST_WIDE_INT length,
32532                                  unsigned HOST_WIDE_INT align,
32533                                  bool unaligned_p, bool use_strd_p)
32534 {
32535   int num = 0;
32536   /* For leftovers in bytes of 0-7, we can set the memory block using
32537      strb/strh/str with minimum instruction number.  */
32538   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32539
32540   if (unaligned_p)
32541     {
32542       num = arm_const_inline_cost (SET, val);
32543       num += length / align + length % align;
32544     }
32545   else if (use_strd_p)
32546     {
32547       num = arm_const_double_inline_cost (val);
32548       num += (length >> 3) + leftover[length & 7];
32549     }
32550   else
32551     {
32552       num = arm_const_inline_cost (SET, val);
32553       num += (length >> 2) + leftover[length & 3];
32554     }
32555
32556   /* We may be able to combine last pair STRH/STRB into a single STR
32557      by shifting one byte back.  */
32558   if (unaligned_access && length > 3 && (length & 3) == 3)
32559     num--;
32560
32561   return (num <= arm_block_set_max_insns ());
32562 }
32563
32564 /* Return TRUE if it's profitable to set block of memory for
32565    vectorized case.  LENGTH is the number of bytes to set.
32566    ALIGN is the alignment of destination memory in bytes.
32567    MODE is the vector mode used to set the memory.  */
32568 static bool
32569 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32570                              unsigned HOST_WIDE_INT align,
32571                              machine_mode mode)
32572 {
32573   int num;
32574   bool unaligned_p = ((align & 3) != 0);
32575   unsigned int nelt = GET_MODE_NUNITS (mode);
32576
32577   /* Instruction loading constant value.  */
32578   num = 1;
32579   /* Instructions storing the memory.  */
32580   num += (length + nelt - 1) / nelt;
32581   /* Instructions adjusting the address expression.  Only need to
32582      adjust address expression if it's 4 bytes aligned and bytes
32583      leftover can only be stored by mis-aligned store instruction.  */
32584   if (!unaligned_p && (length & 3) != 0)
32585     num++;
32586
32587   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32588   if (!unaligned_p && mode == V16QImode)
32589     num--;
32590
32591   return (num <= arm_block_set_max_insns ());
32592 }
32593
32594 /* Set a block of memory using vectorization instructions for the
32595    unaligned case.  We fill the first LENGTH bytes of the memory
32596    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32597    the alignment requirement of memory.  Return TRUE if succeeded.  */
32598 static bool
32599 arm_block_set_unaligned_vect (rtx dstbase,
32600                               unsigned HOST_WIDE_INT length,
32601                               unsigned HOST_WIDE_INT value,
32602                               unsigned HOST_WIDE_INT align)
32603 {
32604   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32605   rtx dst, mem;
32606   rtx val_vec, reg;
32607   rtx (*gen_func) (rtx, rtx);
32608   machine_mode mode;
32609   unsigned HOST_WIDE_INT v = value;
32610   unsigned int offset = 0;
32611   gcc_assert ((align & 0x3) != 0);
32612   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32613   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32614   if (length >= nelt_v16)
32615     {
32616       mode = V16QImode;
32617       gen_func = gen_movmisalignv16qi;
32618     }
32619   else
32620     {
32621       mode = V8QImode;
32622       gen_func = gen_movmisalignv8qi;
32623     }
32624   nelt_mode = GET_MODE_NUNITS (mode);
32625   gcc_assert (length >= nelt_mode);
32626   /* Skip if it isn't profitable.  */
32627   if (!arm_block_set_vect_profit_p (length, align, mode))
32628     return false;
32629
32630   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32631   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32632
32633   v = sext_hwi (v, BITS_PER_WORD);
32634
32635   reg = gen_reg_rtx (mode);
32636   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32637   /* Emit instruction loading the constant value.  */
32638   emit_move_insn (reg, val_vec);
32639
32640   /* Handle nelt_mode bytes in a vector.  */
32641   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32642     {
32643       emit_insn ((*gen_func) (mem, reg));
32644       if (i + 2 * nelt_mode <= length)
32645         {
32646           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32647           offset += nelt_mode;
32648           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32649         }
32650     }
32651
32652   /* If there are not less than nelt_v8 bytes leftover, we must be in
32653      V16QI mode.  */
32654   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32655
32656   /* Handle (8, 16) bytes leftover.  */
32657   if (i + nelt_v8 < length)
32658     {
32659       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32660       offset += length - i;
32661       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32662
32663       /* We are shifting bytes back, set the alignment accordingly.  */
32664       if ((length & 1) != 0 && align >= 2)
32665         set_mem_align (mem, BITS_PER_UNIT);
32666
32667       emit_insn (gen_movmisalignv16qi (mem, reg));
32668     }
32669   /* Handle (0, 8] bytes leftover.  */
32670   else if (i < length && i + nelt_v8 >= length)
32671     {
32672       if (mode == V16QImode)
32673         reg = gen_lowpart (V8QImode, reg);
32674
32675       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32676                                               + (nelt_mode - nelt_v8))));
32677       offset += (length - i) + (nelt_mode - nelt_v8);
32678       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32679
32680       /* We are shifting bytes back, set the alignment accordingly.  */
32681       if ((length & 1) != 0 && align >= 2)
32682         set_mem_align (mem, BITS_PER_UNIT);
32683
32684       emit_insn (gen_movmisalignv8qi (mem, reg));
32685     }
32686
32687   return true;
32688 }
32689
32690 /* Set a block of memory using vectorization instructions for the
32691    aligned case.  We fill the first LENGTH bytes of the memory area
32692    starting from DSTBASE with byte constant VALUE.  ALIGN is the
32693    alignment requirement of memory.  Return TRUE if succeeded.  */
32694 static bool
32695 arm_block_set_aligned_vect (rtx dstbase,
32696                             unsigned HOST_WIDE_INT length,
32697                             unsigned HOST_WIDE_INT value,
32698                             unsigned HOST_WIDE_INT align)
32699 {
32700   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32701   rtx dst, addr, mem;
32702   rtx val_vec, reg;
32703   machine_mode mode;
32704   unsigned int offset = 0;
32705
32706   gcc_assert ((align & 0x3) == 0);
32707   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32708   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32709   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32710     mode = V16QImode;
32711   else
32712     mode = V8QImode;
32713
32714   nelt_mode = GET_MODE_NUNITS (mode);
32715   gcc_assert (length >= nelt_mode);
32716   /* Skip if it isn't profitable.  */
32717   if (!arm_block_set_vect_profit_p (length, align, mode))
32718     return false;
32719
32720   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32721
32722   reg = gen_reg_rtx (mode);
32723   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32724   /* Emit instruction loading the constant value.  */
32725   emit_move_insn (reg, val_vec);
32726
32727   i = 0;
32728   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
32729   if (mode == V16QImode)
32730     {
32731       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32732       emit_insn (gen_movmisalignv16qi (mem, reg));
32733       i += nelt_mode;
32734       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
32735       if (i + nelt_v8 < length && i + nelt_v16 > length)
32736         {
32737           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32738           offset += length - nelt_mode;
32739           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32740           /* We are shifting bytes back, set the alignment accordingly.  */
32741           if ((length & 0x3) == 0)
32742             set_mem_align (mem, BITS_PER_UNIT * 4);
32743           else if ((length & 0x1) == 0)
32744             set_mem_align (mem, BITS_PER_UNIT * 2);
32745           else
32746             set_mem_align (mem, BITS_PER_UNIT);
32747
32748           emit_insn (gen_movmisalignv16qi (mem, reg));
32749           return true;
32750         }
32751       /* Fall through for bytes leftover.  */
32752       mode = V8QImode;
32753       nelt_mode = GET_MODE_NUNITS (mode);
32754       reg = gen_lowpart (V8QImode, reg);
32755     }
32756
32757   /* Handle 8 bytes in a vector.  */
32758   for (; (i + nelt_mode <= length); i += nelt_mode)
32759     {
32760       addr = plus_constant (Pmode, dst, i);
32761       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32762       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32763         emit_move_insn (mem, reg);
32764       else
32765         emit_insn (gen_unaligned_storev8qi (mem, reg));
32766     }
32767
32768   /* Handle single word leftover by shifting 4 bytes back.  We can
32769      use aligned access for this case.  */
32770   if (i + UNITS_PER_WORD == length)
32771     {
32772       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32773       offset += i - UNITS_PER_WORD;
32774       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32775       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32776       if (align > UNITS_PER_WORD)
32777         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32778
32779       emit_insn (gen_unaligned_storev8qi (mem, reg));
32780     }
32781   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32782      We have to use unaligned access for this case.  */
32783   else if (i < length)
32784     {
32785       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32786       offset += length - nelt_mode;
32787       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32788       /* We are shifting bytes back, set the alignment accordingly.  */
32789       if ((length & 1) == 0)
32790         set_mem_align (mem, BITS_PER_UNIT * 2);
32791       else
32792         set_mem_align (mem, BITS_PER_UNIT);
32793
32794       emit_insn (gen_movmisalignv8qi (mem, reg));
32795     }
32796
32797   return true;
32798 }
32799
32800 /* Set a block of memory using plain strh/strb instructions, only
32801    using instructions allowed by ALIGN on processor.  We fill the
32802    first LENGTH bytes of the memory area starting from DSTBASE
32803    with byte constant VALUE.  ALIGN is the alignment requirement
32804    of memory.  */
32805 static bool
32806 arm_block_set_unaligned_non_vect (rtx dstbase,
32807                                   unsigned HOST_WIDE_INT length,
32808                                   unsigned HOST_WIDE_INT value,
32809                                   unsigned HOST_WIDE_INT align)
32810 {
32811   unsigned int i;
32812   rtx dst, addr, mem;
32813   rtx val_exp, val_reg, reg;
32814   machine_mode mode;
32815   HOST_WIDE_INT v = value;
32816
32817   gcc_assert (align == 1 || align == 2);
32818
32819   if (align == 2)
32820     v |= (value << BITS_PER_UNIT);
32821
32822   v = sext_hwi (v, BITS_PER_WORD);
32823   val_exp = GEN_INT (v);
32824   /* Skip if it isn't profitable.  */
32825   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32826                                         align, true, false))
32827     return false;
32828
32829   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32830   mode = (align == 2 ? HImode : QImode);
32831   val_reg = force_reg (SImode, val_exp);
32832   reg = gen_lowpart (mode, val_reg);
32833
32834   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32835     {
32836       addr = plus_constant (Pmode, dst, i);
32837       mem = adjust_automodify_address (dstbase, mode, addr, i);
32838       emit_move_insn (mem, reg);
32839     }
32840
32841   /* Handle single byte leftover.  */
32842   if (i + 1 == length)
32843     {
32844       reg = gen_lowpart (QImode, val_reg);
32845       addr = plus_constant (Pmode, dst, i);
32846       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32847       emit_move_insn (mem, reg);
32848       i++;
32849     }
32850
32851   gcc_assert (i == length);
32852   return true;
32853 }
32854
32855 /* Set a block of memory using plain strd/str/strh/strb instructions,
32856    to permit unaligned copies on processors which support unaligned
32857    semantics for those instructions.  We fill the first LENGTH bytes
32858    of the memory area starting from DSTBASE with byte constant VALUE.
32859    ALIGN is the alignment requirement of memory.  */
32860 static bool
32861 arm_block_set_aligned_non_vect (rtx dstbase,
32862                                 unsigned HOST_WIDE_INT length,
32863                                 unsigned HOST_WIDE_INT value,
32864                                 unsigned HOST_WIDE_INT align)
32865 {
32866   unsigned int i;
32867   rtx dst, addr, mem;
32868   rtx val_exp, val_reg, reg;
32869   unsigned HOST_WIDE_INT v;
32870   bool use_strd_p;
32871
32872   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32873                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32874
32875   v = (value | (value << 8) | (value << 16) | (value << 24));
32876   if (length < UNITS_PER_WORD)
32877     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32878
32879   if (use_strd_p)
32880     v |= (v << BITS_PER_WORD);
32881   else
32882     v = sext_hwi (v, BITS_PER_WORD);
32883
32884   val_exp = GEN_INT (v);
32885   /* Skip if it isn't profitable.  */
32886   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32887                                         align, false, use_strd_p))
32888     {
32889       if (!use_strd_p)
32890         return false;
32891
32892       /* Try without strd.  */
32893       v = (v >> BITS_PER_WORD);
32894       v = sext_hwi (v, BITS_PER_WORD);
32895       val_exp = GEN_INT (v);
32896       use_strd_p = false;
32897       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32898                                             align, false, use_strd_p))
32899         return false;
32900     }
32901
32902   i = 0;
32903   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32904   /* Handle double words using strd if possible.  */
32905   if (use_strd_p)
32906     {
32907       val_reg = force_reg (DImode, val_exp);
32908       reg = val_reg;
32909       for (; (i + 8 <= length); i += 8)
32910         {
32911           addr = plus_constant (Pmode, dst, i);
32912           mem = adjust_automodify_address (dstbase, DImode, addr, i);
32913           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32914             emit_move_insn (mem, reg);
32915           else
32916             emit_insn (gen_unaligned_storedi (mem, reg));
32917         }
32918     }
32919   else
32920     val_reg = force_reg (SImode, val_exp);
32921
32922   /* Handle words.  */
32923   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32924   for (; (i + 4 <= length); i += 4)
32925     {
32926       addr = plus_constant (Pmode, dst, i);
32927       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32928       if ((align & 3) == 0)
32929         emit_move_insn (mem, reg);
32930       else
32931         emit_insn (gen_unaligned_storesi (mem, reg));
32932     }
32933
32934   /* Merge last pair of STRH and STRB into a STR if possible.  */
32935   if (unaligned_access && i > 0 && (i + 3) == length)
32936     {
32937       addr = plus_constant (Pmode, dst, i - 1);
32938       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32939       /* We are shifting one byte back, set the alignment accordingly.  */
32940       if ((align & 1) == 0)
32941         set_mem_align (mem, BITS_PER_UNIT);
32942
32943       /* Most likely this is an unaligned access, and we can't tell at
32944          compilation time.  */
32945       emit_insn (gen_unaligned_storesi (mem, reg));
32946       return true;
32947     }
32948
32949   /* Handle half word leftover.  */
32950   if (i + 2 <= length)
32951     {
32952       reg = gen_lowpart (HImode, val_reg);
32953       addr = plus_constant (Pmode, dst, i);
32954       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32955       if ((align & 1) == 0)
32956         emit_move_insn (mem, reg);
32957       else
32958         emit_insn (gen_unaligned_storehi (mem, reg));
32959
32960       i += 2;
32961     }
32962
32963   /* Handle single byte leftover.  */
32964   if (i + 1 == length)
32965     {
32966       reg = gen_lowpart (QImode, val_reg);
32967       addr = plus_constant (Pmode, dst, i);
32968       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32969       emit_move_insn (mem, reg);
32970     }
32971
32972   return true;
32973 }
32974
32975 /* Set a block of memory using vectorization instructions for both
32976    aligned and unaligned cases.  We fill the first LENGTH bytes of
32977    the memory area starting from DSTBASE with byte constant VALUE.
32978    ALIGN is the alignment requirement of memory.  */
32979 static bool
32980 arm_block_set_vect (rtx dstbase,
32981                     unsigned HOST_WIDE_INT length,
32982                     unsigned HOST_WIDE_INT value,
32983                     unsigned HOST_WIDE_INT align)
32984 {
32985   /* Check whether we need to use unaligned store instruction.  */
32986   if (((align & 3) != 0 || (length & 3) != 0)
32987       /* Check whether unaligned store instruction is available.  */
32988       && (!unaligned_access || BYTES_BIG_ENDIAN))
32989     return false;
32990
32991   if ((align & 3) == 0)
32992     return arm_block_set_aligned_vect (dstbase, length, value, align);
32993   else
32994     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32995 }
32996
32997 /* Expand string store operation.  Firstly we try to do that by using
32998    vectorization instructions, then try with ARM unaligned access and
32999    double-word store if profitable.  OPERANDS[0] is the destination,
33000    OPERANDS[1] is the number of bytes, operands[2] is the value to
33001    initialize the memory, OPERANDS[3] is the known alignment of the
33002    destination.  */
33003 bool
33004 arm_gen_setmem (rtx *operands)
33005 {
33006   rtx dstbase = operands[0];
33007   unsigned HOST_WIDE_INT length;
33008   unsigned HOST_WIDE_INT value;
33009   unsigned HOST_WIDE_INT align;
33010
33011   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33012     return false;
33013
33014   length = UINTVAL (operands[1]);
33015   if (length > 64)
33016     return false;
33017
33018   value = (UINTVAL (operands[2]) & 0xFF);
33019   align = UINTVAL (operands[3]);
33020   if (TARGET_NEON && length >= 8
33021       && current_tune->string_ops_prefer_neon
33022       && arm_block_set_vect (dstbase, length, value, align))
33023     return true;
33024
33025   if (!unaligned_access && (align & 3) != 0)
33026     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33027
33028   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33029 }
33030
33031
33032 static bool
33033 arm_macro_fusion_p (void)
33034 {
33035   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33036 }
33037
33038 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33039    for MOVW / MOVT macro fusion.  */
33040
33041 static bool
33042 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33043 {
33044   /* We are trying to fuse
33045      movw imm / movt imm
33046     instructions as a group that gets scheduled together.  */
33047
33048   rtx set_dest = SET_DEST (curr_set);
33049
33050   if (GET_MODE (set_dest) != SImode)
33051     return false;
33052
33053   /* We are trying to match:
33054      prev (movw)  == (set (reg r0) (const_int imm16))
33055      curr (movt) == (set (zero_extract (reg r0)
33056                                         (const_int 16)
33057                                         (const_int 16))
33058                           (const_int imm16_1))
33059      or
33060      prev (movw) == (set (reg r1)
33061                           (high (symbol_ref ("SYM"))))
33062     curr (movt) == (set (reg r0)
33063                         (lo_sum (reg r1)
33064                                 (symbol_ref ("SYM"))))  */
33065
33066     if (GET_CODE (set_dest) == ZERO_EXTRACT)
33067       {
33068         if (CONST_INT_P (SET_SRC (curr_set))
33069             && CONST_INT_P (SET_SRC (prev_set))
33070             && REG_P (XEXP (set_dest, 0))
33071             && REG_P (SET_DEST (prev_set))
33072             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33073           return true;
33074
33075       }
33076     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33077              && REG_P (SET_DEST (curr_set))
33078              && REG_P (SET_DEST (prev_set))
33079              && GET_CODE (SET_SRC (prev_set)) == HIGH
33080              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33081       return true;
33082
33083   return false;
33084 }
33085
33086 static bool
33087 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33088 {
33089   rtx prev_set = single_set (prev);
33090   rtx curr_set = single_set (curr);
33091
33092   if (!prev_set
33093       || !curr_set)
33094     return false;
33095
33096   if (any_condjump_p (curr))
33097     return false;
33098
33099   if (!arm_macro_fusion_p ())
33100     return false;
33101
33102   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33103       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33104     return true;
33105
33106   return false;
33107 }
33108
33109 /* Return true iff the instruction fusion described by OP is enabled.  */
33110 bool
33111 arm_fusion_enabled_p (tune_params::fuse_ops op)
33112 {
33113   return current_tune->fusible_ops & op;
33114 }
33115
33116 /* Return TRUE if return address signing mechanism is enabled.  */
33117 bool
33118 arm_current_function_pac_enabled_p (void)
33119 {
33120   return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33121           || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33122               && !crtl->is_leaf));
33123 }
33124
33125 /* Return TRUE if Branch Target Identification Mechanism is enabled.  */
33126 static bool
33127 aarch_bti_enabled ()
33128 {
33129   return false;
33130 }
33131
33132 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
33133    scheduled for speculative execution.  Reject the long-running division
33134    and square-root instructions.  */
33135
33136 static bool
33137 arm_sched_can_speculate_insn (rtx_insn *insn)
33138 {
33139   switch (get_attr_type (insn))
33140     {
33141       case TYPE_SDIV:
33142       case TYPE_UDIV:
33143       case TYPE_FDIVS:
33144       case TYPE_FDIVD:
33145       case TYPE_FSQRTS:
33146       case TYPE_FSQRTD:
33147       case TYPE_NEON_FP_SQRT_S:
33148       case TYPE_NEON_FP_SQRT_D:
33149       case TYPE_NEON_FP_SQRT_S_Q:
33150       case TYPE_NEON_FP_SQRT_D_Q:
33151       case TYPE_NEON_FP_DIV_S:
33152       case TYPE_NEON_FP_DIV_D:
33153       case TYPE_NEON_FP_DIV_S_Q:
33154       case TYPE_NEON_FP_DIV_D_Q:
33155         return false;
33156       default:
33157         return true;
33158     }
33159 }
33160
33161 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
33162
33163 static unsigned HOST_WIDE_INT
33164 arm_asan_shadow_offset (void)
33165 {
33166   return HOST_WIDE_INT_1U << 29;
33167 }
33168
33169
33170 /* This is a temporary fix for PR60655.  Ideally we need
33171    to handle most of these cases in the generic part but
33172    currently we reject minus (..) (sym_ref).  We try to
33173    ameliorate the case with minus (sym_ref1) (sym_ref2)
33174    where they are in the same section.  */
33175
33176 static bool
33177 arm_const_not_ok_for_debug_p (rtx p)
33178 {
33179   tree decl_op0 = NULL;
33180   tree decl_op1 = NULL;
33181
33182   if (GET_CODE (p) == UNSPEC)
33183     return true;
33184   if (GET_CODE (p) == MINUS)
33185     {
33186       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33187         {
33188           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33189           if (decl_op1
33190               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33191               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33192             {
33193               if ((VAR_P (decl_op1)
33194                    || TREE_CODE (decl_op1) == CONST_DECL)
33195                   && (VAR_P (decl_op0)
33196                       || TREE_CODE (decl_op0) == CONST_DECL))
33197                 return (get_variable_section (decl_op1, false)
33198                         != get_variable_section (decl_op0, false));
33199
33200               if (TREE_CODE (decl_op1) == LABEL_DECL
33201                   && TREE_CODE (decl_op0) == LABEL_DECL)
33202                 return (DECL_CONTEXT (decl_op1)
33203                         != DECL_CONTEXT (decl_op0));
33204             }
33205
33206           return true;
33207         }
33208     }
33209
33210   return false;
33211 }
33212
33213 /* return TRUE if x is a reference to a value in a constant pool */
33214 extern bool
33215 arm_is_constant_pool_ref (rtx x)
33216 {
33217   return (MEM_P (x)
33218           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33219           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33220 }
33221
33222 /* Remember the last target of arm_set_current_function.  */
33223 static GTY(()) tree arm_previous_fndecl;
33224
33225 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
33226
33227 void
33228 save_restore_target_globals (tree new_tree)
33229 {
33230   /* If we have a previous state, use it.  */
33231   if (TREE_TARGET_GLOBALS (new_tree))
33232     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33233   else if (new_tree == target_option_default_node)
33234     restore_target_globals (&default_target_globals);
33235   else
33236     {
33237       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
33238       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33239     }
33240
33241   arm_option_params_internal ();
33242 }
33243
33244 /* Invalidate arm_previous_fndecl.  */
33245
33246 void
33247 arm_reset_previous_fndecl (void)
33248 {
33249   arm_previous_fndecl = NULL_TREE;
33250 }
33251
33252 /* Establish appropriate back-end context for processing the function
33253    FNDECL.  The argument might be NULL to indicate processing at top
33254    level, outside of any function scope.  */
33255
33256 static void
33257 arm_set_current_function (tree fndecl)
33258 {
33259   if (!fndecl || fndecl == arm_previous_fndecl)
33260     return;
33261
33262   tree old_tree = (arm_previous_fndecl
33263                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33264                    : NULL_TREE);
33265
33266   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33267
33268   /* If current function has no attributes but previous one did,
33269      use the default node.  */
33270   if (! new_tree && old_tree)
33271     new_tree = target_option_default_node;
33272
33273   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
33274      the default have been handled by save_restore_target_globals from
33275      arm_pragma_target_parse.  */
33276   if (old_tree == new_tree)
33277     return;
33278
33279   arm_previous_fndecl = fndecl;
33280
33281   /* First set the target options.  */
33282   cl_target_option_restore (&global_options, &global_options_set,
33283                             TREE_TARGET_OPTION (new_tree));
33284
33285   save_restore_target_globals (new_tree);
33286
33287   arm_override_options_after_change_1 (&global_options, &global_options_set);
33288 }
33289
33290 /* Implement TARGET_OPTION_PRINT.  */
33291
33292 static void
33293 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33294 {
33295   int flags = ptr->x_target_flags;
33296   const char *fpu_name;
33297
33298   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33299               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33300
33301   fprintf (file, "%*sselected isa %s\n", indent, "",
33302            TARGET_THUMB2_P (flags) ? "thumb2" :
33303            TARGET_THUMB_P (flags) ? "thumb1" :
33304            "arm");
33305
33306   if (ptr->x_arm_arch_string)
33307     fprintf (file, "%*sselected architecture %s\n", indent, "",
33308              ptr->x_arm_arch_string);
33309
33310   if (ptr->x_arm_cpu_string)
33311     fprintf (file, "%*sselected CPU %s\n", indent, "",
33312              ptr->x_arm_cpu_string);
33313
33314   if (ptr->x_arm_tune_string)
33315     fprintf (file, "%*sselected tune %s\n", indent, "",
33316              ptr->x_arm_tune_string);
33317
33318   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33319 }
33320
33321 /* Hook to determine if one function can safely inline another.  */
33322
33323 static bool
33324 arm_can_inline_p (tree caller, tree callee)
33325 {
33326   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33327   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33328   bool can_inline = true;
33329
33330   struct cl_target_option *caller_opts
33331         = TREE_TARGET_OPTION (caller_tree ? caller_tree
33332                                            : target_option_default_node);
33333
33334   struct cl_target_option *callee_opts
33335         = TREE_TARGET_OPTION (callee_tree ? callee_tree
33336                                            : target_option_default_node);
33337
33338   if (callee_opts == caller_opts)
33339     return true;
33340
33341   /* Callee's ISA features should be a subset of the caller's.  */
33342   struct arm_build_target caller_target;
33343   struct arm_build_target callee_target;
33344   caller_target.isa = sbitmap_alloc (isa_num_bits);
33345   callee_target.isa = sbitmap_alloc (isa_num_bits);
33346
33347   arm_configure_build_target (&caller_target, caller_opts, false);
33348   arm_configure_build_target (&callee_target, callee_opts, false);
33349   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33350     can_inline = false;
33351
33352   sbitmap_free (caller_target.isa);
33353   sbitmap_free (callee_target.isa);
33354
33355   /* OK to inline between different modes.
33356      Function with mode specific instructions, e.g using asm,
33357      must be explicitly protected with noinline.  */
33358   return can_inline;
33359 }
33360
33361 /* Hook to fix function's alignment affected by target attribute.  */
33362
33363 static void
33364 arm_relayout_function (tree fndecl)
33365 {
33366   if (DECL_USER_ALIGN (fndecl))
33367     return;
33368
33369   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33370
33371   if (!callee_tree)
33372     callee_tree = target_option_default_node;
33373
33374   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33375   SET_DECL_ALIGN
33376     (fndecl,
33377      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33378 }
33379
33380 /* Inner function to process the attribute((target(...))), take an argument and
33381    set the current options from the argument.  If we have a list, recursively
33382    go over the list.  */
33383
33384 static bool
33385 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33386 {
33387   if (TREE_CODE (args) == TREE_LIST)
33388     {
33389       bool ret = true;
33390
33391       for (; args; args = TREE_CHAIN (args))
33392         if (TREE_VALUE (args)
33393             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33394           ret = false;
33395       return ret;
33396     }
33397
33398   else if (TREE_CODE (args) != STRING_CST)
33399     {
33400       error ("attribute %<target%> argument not a string");
33401       return false;
33402     }
33403
33404   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33405   char *q;
33406
33407   while ((q = strtok (argstr, ",")) != NULL)
33408     {
33409       argstr = NULL;
33410       if (!strcmp (q, "thumb"))
33411         {
33412           opts->x_target_flags |= MASK_THUMB;
33413           if (TARGET_FDPIC && !arm_arch_thumb2)
33414             sorry ("FDPIC mode is not supported in Thumb-1 mode");
33415         }
33416
33417       else if (!strcmp (q, "arm"))
33418         opts->x_target_flags &= ~MASK_THUMB;
33419
33420       else if (!strcmp (q, "general-regs-only"))
33421         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33422
33423       else if (startswith (q, "fpu="))
33424         {
33425           int fpu_index;
33426           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33427                                        &fpu_index, CL_TARGET))
33428             {
33429               error ("invalid fpu for target attribute or pragma %qs", q);
33430               return false;
33431             }
33432           if (fpu_index == TARGET_FPU_auto)
33433             {
33434               /* This doesn't really make sense until we support
33435                  general dynamic selection of the architecture and all
33436                  sub-features.  */
33437               sorry ("auto fpu selection not currently permitted here");
33438               return false;
33439             }
33440           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33441         }
33442       else if (startswith (q, "arch="))
33443         {
33444           char *arch = q + 5;
33445           const arch_option *arm_selected_arch
33446              = arm_parse_arch_option_name (all_architectures, "arch", arch);
33447
33448           if (!arm_selected_arch)
33449             {
33450               error ("invalid architecture for target attribute or pragma %qs",
33451                      q);
33452               return false;
33453             }
33454
33455           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33456         }
33457       else if (q[0] == '+')
33458         {
33459           opts->x_arm_arch_string
33460             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33461         }
33462       else
33463         {
33464           error ("unknown target attribute or pragma %qs", q);
33465           return false;
33466         }
33467     }
33468
33469   return true;
33470 }
33471
33472 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
33473
33474 tree
33475 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33476                                  struct gcc_options *opts_set)
33477 {
33478   struct cl_target_option cl_opts;
33479
33480   if (!arm_valid_target_attribute_rec (args, opts))
33481     return NULL_TREE;
33482
33483   cl_target_option_save (&cl_opts, opts, opts_set);
33484   arm_configure_build_target (&arm_active_target, &cl_opts, false);
33485   arm_option_check_internal (opts);
33486   /* Do any overrides, such as global options arch=xxx.
33487      We do this since arm_active_target was overridden.  */
33488   arm_option_reconfigure_globals ();
33489   arm_options_perform_arch_sanity_checks ();
33490   arm_option_override_internal (opts, opts_set);
33491
33492   return build_target_option_node (opts, opts_set);
33493 }
33494
33495 static void
33496 add_attribute (const char * mode, tree *attributes)
33497 {
33498   size_t len = strlen (mode);
33499   tree value = build_string (len, mode);
33500
33501   TREE_TYPE (value) = build_array_type (char_type_node,
33502                                         build_index_type (size_int (len)));
33503
33504   *attributes = tree_cons (get_identifier ("target"),
33505                            build_tree_list (NULL_TREE, value),
33506                            *attributes);
33507 }
33508
33509 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33510
33511 static void
33512 arm_insert_attributes (tree fndecl, tree * attributes)
33513 {
33514   const char *mode;
33515
33516   if (! TARGET_FLIP_THUMB)
33517     return;
33518
33519   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33520       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33521    return;
33522
33523   /* Nested definitions must inherit mode.  */
33524   if (current_function_decl)
33525    {
33526      mode = TARGET_THUMB ? "thumb" : "arm";
33527      add_attribute (mode, attributes);
33528      return;
33529    }
33530
33531   /* If there is already a setting don't change it.  */
33532   if (lookup_attribute ("target", *attributes) != NULL)
33533     return;
33534
33535   mode = thumb_flipper ? "thumb" : "arm";
33536   add_attribute (mode, attributes);
33537
33538   thumb_flipper = !thumb_flipper;
33539 }
33540
33541 /* Hook to validate attribute((target("string"))).  */
33542
33543 static bool
33544 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33545                               tree args, int ARG_UNUSED (flags))
33546 {
33547   bool ret = true;
33548   struct gcc_options func_options, func_options_set;
33549   tree cur_tree, new_optimize;
33550   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33551
33552   /* Get the optimization options of the current function.  */
33553   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33554
33555   /* If the function changed the optimization levels as well as setting target
33556      options, start with the optimizations specified.  */
33557   if (!func_optimize)
33558     func_optimize = optimization_default_node;
33559
33560   /* Init func_options.  */
33561   memset (&func_options, 0, sizeof (func_options));
33562   init_options_struct (&func_options, NULL);
33563   lang_hooks.init_options_struct (&func_options);
33564   memset (&func_options_set, 0, sizeof (func_options_set));
33565
33566   /* Initialize func_options to the defaults.  */
33567   cl_optimization_restore (&func_options, &func_options_set,
33568                            TREE_OPTIMIZATION (func_optimize));
33569
33570   cl_target_option_restore (&func_options, &func_options_set,
33571                             TREE_TARGET_OPTION (target_option_default_node));
33572
33573   /* Set func_options flags with new target mode.  */
33574   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33575                                               &func_options_set);
33576
33577   if (cur_tree == NULL_TREE)
33578     ret = false;
33579
33580   new_optimize = build_optimization_node (&func_options, &func_options_set);
33581
33582   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33583
33584   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33585
33586   return ret;
33587 }
33588
33589 /* Match an ISA feature bitmap to a named FPU.  We always use the
33590    first entry that exactly matches the feature set, so that we
33591    effectively canonicalize the FPU name for the assembler.  */
33592 static const char*
33593 arm_identify_fpu_from_isa (sbitmap isa)
33594 {
33595   auto_sbitmap fpubits (isa_num_bits);
33596   auto_sbitmap cand_fpubits (isa_num_bits);
33597
33598   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33599
33600   /* If there are no ISA feature bits relating to the FPU, we must be
33601      doing soft-float.  */
33602   if (bitmap_empty_p (fpubits))
33603     return "softvfp";
33604
33605   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33606     {
33607       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33608       if (bitmap_equal_p (fpubits, cand_fpubits))
33609         return all_fpus[i].name;
33610     }
33611   /* We must find an entry, or things have gone wrong.  */
33612   gcc_unreachable ();
33613 }
33614
33615 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33616    by the function fndecl.  */
33617 void
33618 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33619 {
33620   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33621
33622   struct cl_target_option *targ_options;
33623   if (target_parts)
33624     targ_options = TREE_TARGET_OPTION (target_parts);
33625   else
33626     targ_options = TREE_TARGET_OPTION (target_option_current_node);
33627   gcc_assert (targ_options);
33628
33629   arm_print_asm_arch_directives (stream, targ_options);
33630
33631   fprintf (stream, "\t.syntax unified\n");
33632
33633   if (TARGET_THUMB)
33634     {
33635       if (is_called_in_ARM_mode (decl)
33636           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33637               && cfun->is_thunk))
33638         fprintf (stream, "\t.code 32\n");
33639       else if (TARGET_THUMB1)
33640         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33641       else
33642         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33643     }
33644   else
33645     fprintf (stream, "\t.arm\n");
33646
33647   if (TARGET_POKE_FUNCTION_NAME)
33648     arm_poke_function_name (stream, (const char *) name);
33649 }
33650
33651 /* If MEM is in the form of [base+offset], extract the two parts
33652    of address and set to BASE and OFFSET, otherwise return false
33653    after clearing BASE and OFFSET.  */
33654
33655 static bool
33656 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33657 {
33658   rtx addr;
33659
33660   gcc_assert (MEM_P (mem));
33661
33662   addr = XEXP (mem, 0);
33663
33664   /* Strip off const from addresses like (const (addr)).  */
33665   if (GET_CODE (addr) == CONST)
33666     addr = XEXP (addr, 0);
33667
33668   if (REG_P (addr))
33669     {
33670       *base = addr;
33671       *offset = const0_rtx;
33672       return true;
33673     }
33674
33675   if (GET_CODE (addr) == PLUS
33676       && GET_CODE (XEXP (addr, 0)) == REG
33677       && CONST_INT_P (XEXP (addr, 1)))
33678     {
33679       *base = XEXP (addr, 0);
33680       *offset = XEXP (addr, 1);
33681       return true;
33682     }
33683
33684   *base = NULL_RTX;
33685   *offset = NULL_RTX;
33686
33687   return false;
33688 }
33689
33690 /* If INSN is a load or store of address in the form of [base+offset],
33691    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
33692    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
33693    otherwise return FALSE.  */
33694
33695 static bool
33696 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33697 {
33698   rtx x, dest, src;
33699
33700   gcc_assert (INSN_P (insn));
33701   x = PATTERN (insn);
33702   if (GET_CODE (x) != SET)
33703     return false;
33704
33705   src = SET_SRC (x);
33706   dest = SET_DEST (x);
33707   if (REG_P (src) && MEM_P (dest))
33708     {
33709       *is_load = false;
33710       extract_base_offset_in_addr (dest, base, offset);
33711     }
33712   else if (MEM_P (src) && REG_P (dest))
33713     {
33714       *is_load = true;
33715       extract_base_offset_in_addr (src, base, offset);
33716     }
33717   else
33718     return false;
33719
33720   return (*base != NULL_RTX && *offset != NULL_RTX);
33721 }
33722
33723 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33724
33725    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33726    and PRI are only calculated for these instructions.  For other instruction,
33727    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33728    instruction fusion can be supported by returning different priorities.
33729
33730    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33731
33732 static void
33733 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33734                            int *fusion_pri, int *pri)
33735 {
33736   int tmp, off_val;
33737   bool is_load;
33738   rtx base, offset;
33739
33740   gcc_assert (INSN_P (insn));
33741
33742   tmp = max_pri - 1;
33743   if (!fusion_load_store (insn, &base, &offset, &is_load))
33744     {
33745       *pri = tmp;
33746       *fusion_pri = tmp;
33747       return;
33748     }
33749
33750   /* Load goes first.  */
33751   if (is_load)
33752     *fusion_pri = tmp - 1;
33753   else
33754     *fusion_pri = tmp - 2;
33755
33756   tmp /= 2;
33757
33758   /* INSN with smaller base register goes first.  */
33759   tmp -= ((REGNO (base) & 0xff) << 20);
33760
33761   /* INSN with smaller offset goes first.  */
33762   off_val = (int)(INTVAL (offset));
33763   if (off_val >= 0)
33764     tmp -= (off_val & 0xfffff);
33765   else
33766     tmp += ((- off_val) & 0xfffff);
33767
33768   *pri = tmp;
33769   return;
33770 }
33771
33772
33773 /* Construct and return a PARALLEL RTX vector with elements numbering the
33774    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33775    the vector - from the perspective of the architecture.  This does not
33776    line up with GCC's perspective on lane numbers, so we end up with
33777    different masks depending on our target endian-ness.  The diagram
33778    below may help.  We must draw the distinction when building masks
33779    which select one half of the vector.  An instruction selecting
33780    architectural low-lanes for a big-endian target, must be described using
33781    a mask selecting GCC high-lanes.
33782
33783                  Big-Endian             Little-Endian
33784
33785 GCC             0   1   2   3           3   2   1   0
33786               | x | x | x | x |       | x | x | x | x |
33787 Architecture    3   2   1   0           3   2   1   0
33788
33789 Low Mask:         { 2, 3 }                { 0, 1 }
33790 High Mask:        { 0, 1 }                { 2, 3 }
33791 */
33792
33793 rtx
33794 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33795 {
33796   int nunits = GET_MODE_NUNITS (mode);
33797   rtvec v = rtvec_alloc (nunits / 2);
33798   int high_base = nunits / 2;
33799   int low_base = 0;
33800   int base;
33801   rtx t1;
33802   int i;
33803
33804   if (BYTES_BIG_ENDIAN)
33805     base = high ? low_base : high_base;
33806   else
33807     base = high ? high_base : low_base;
33808
33809   for (i = 0; i < nunits / 2; i++)
33810     RTVEC_ELT (v, i) = GEN_INT (base + i);
33811
33812   t1 = gen_rtx_PARALLEL (mode, v);
33813   return t1;
33814 }
33815
33816 /* Check OP for validity as a PARALLEL RTX vector with elements
33817    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33818    from the perspective of the architecture.  See the diagram above
33819    arm_simd_vect_par_cnst_half_p for more details.  */
33820
33821 bool
33822 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33823                                        bool high)
33824 {
33825   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33826   HOST_WIDE_INT count_op = XVECLEN (op, 0);
33827   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33828   int i = 0;
33829
33830   if (!VECTOR_MODE_P (mode))
33831     return false;
33832
33833   if (count_op != count_ideal)
33834     return false;
33835
33836   for (i = 0; i < count_ideal; i++)
33837     {
33838       rtx elt_op = XVECEXP (op, 0, i);
33839       rtx elt_ideal = XVECEXP (ideal, 0, i);
33840
33841       if (!CONST_INT_P (elt_op)
33842           || INTVAL (elt_ideal) != INTVAL (elt_op))
33843         return false;
33844     }
33845   return true;
33846 }
33847
33848 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33849    in Thumb1.  */
33850 static bool
33851 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33852                          const_tree)
33853 {
33854   /* For now, we punt and not handle this for TARGET_THUMB1.  */
33855   if (vcall_offset && TARGET_THUMB1)
33856     return false;
33857
33858   /* Otherwise ok.  */
33859   return true;
33860 }
33861
33862 /* Generate RTL for a conditional branch with rtx comparison CODE in
33863    mode CC_MODE. The destination of the unlikely conditional branch
33864    is LABEL_REF.  */
33865
33866 void
33867 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33868                           rtx label_ref)
33869 {
33870   rtx x;
33871   x = gen_rtx_fmt_ee (code, VOIDmode,
33872                       gen_rtx_REG (cc_mode, CC_REGNUM),
33873                       const0_rtx);
33874
33875   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33876                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
33877                             pc_rtx);
33878   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33879 }
33880
33881 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33882
33883    For pure-code sections there is no letter code for this attribute, so
33884    output all the section flags numerically when this is needed.  */
33885
33886 static bool
33887 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33888 {
33889
33890   if (flags & SECTION_ARM_PURECODE)
33891     {
33892       *num = 0x20000000;
33893
33894       if (!(flags & SECTION_DEBUG))
33895         *num |= 0x2;
33896       if (flags & SECTION_EXCLUDE)
33897         *num |= 0x80000000;
33898       if (flags & SECTION_WRITE)
33899         *num |= 0x1;
33900       if (flags & SECTION_CODE)
33901         *num |= 0x4;
33902       if (flags & SECTION_MERGE)
33903         *num |= 0x10;
33904       if (flags & SECTION_STRINGS)
33905         *num |= 0x20;
33906       if (flags & SECTION_TLS)
33907         *num |= 0x400;
33908       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33909         *num |= 0x200;
33910
33911         return true;
33912     }
33913
33914   return false;
33915 }
33916
33917 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33918
33919    If pure-code is passed as an option, make sure all functions are in
33920    sections that have the SHF_ARM_PURECODE attribute.  */
33921
33922 static section *
33923 arm_function_section (tree decl, enum node_frequency freq,
33924                       bool startup, bool exit)
33925 {
33926   const char * section_name;
33927   section * sec;
33928
33929   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33930     return default_function_section (decl, freq, startup, exit);
33931
33932   if (!target_pure_code)
33933     return default_function_section (decl, freq, startup, exit);
33934
33935
33936   section_name = DECL_SECTION_NAME (decl);
33937
33938   /* If a function is not in a named section then it falls under the 'default'
33939      text section, also known as '.text'.  We can preserve previous behavior as
33940      the default text section already has the SHF_ARM_PURECODE section
33941      attribute.  */
33942   if (!section_name)
33943     {
33944       section *default_sec = default_function_section (decl, freq, startup,
33945                                                        exit);
33946
33947       /* If default_sec is not null, then it must be a special section like for
33948          example .text.startup.  We set the pure-code attribute and return the
33949          same section to preserve existing behavior.  */
33950       if (default_sec)
33951           default_sec->common.flags |= SECTION_ARM_PURECODE;
33952       return default_sec;
33953     }
33954
33955   /* Otherwise look whether a section has already been created with
33956      'section_name'.  */
33957   sec = get_named_section (decl, section_name, 0);
33958   if (!sec)
33959     /* If that is not the case passing NULL as the section's name to
33960        'get_named_section' will create a section with the declaration's
33961        section name.  */
33962     sec = get_named_section (decl, NULL, 0);
33963
33964   /* Set the SHF_ARM_PURECODE attribute.  */
33965   sec->common.flags |= SECTION_ARM_PURECODE;
33966
33967   return sec;
33968 }
33969
33970 /* Implements the TARGET_SECTION_FLAGS hook.
33971
33972    If DECL is a function declaration and pure-code is passed as an option
33973    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
33974    section's name and RELOC indicates whether the declarations initializer may
33975    contain runtime relocations.  */
33976
33977 static unsigned int
33978 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33979 {
33980   unsigned int flags = default_section_type_flags (decl, name, reloc);
33981
33982   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33983     flags |= SECTION_ARM_PURECODE;
33984
33985   return flags;
33986 }
33987
33988 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
33989
33990 static void
33991 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33992                            rtx op0, rtx op1,
33993                            rtx *quot_p, rtx *rem_p)
33994 {
33995   if (mode == SImode)
33996     gcc_assert (!TARGET_IDIV);
33997
33998   scalar_int_mode libval_mode
33999     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34000
34001   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34002                                         libval_mode, op0, mode, op1, mode);
34003
34004   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34005   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34006                                        GET_MODE_SIZE (mode));
34007
34008   gcc_assert (quotient);
34009   gcc_assert (remainder);
34010
34011   *quot_p = quotient;
34012   *rem_p = remainder;
34013 }
34014
34015 /*  This function checks for the availability of the coprocessor builtin passed
34016     in BUILTIN for the current target.  Returns true if it is available and
34017     false otherwise.  If a BUILTIN is passed for which this function has not
34018     been implemented it will cause an exception.  */
34019
34020 bool
34021 arm_coproc_builtin_available (enum unspecv builtin)
34022 {
34023   /* None of these builtins are available in Thumb mode if the target only
34024      supports Thumb-1.  */
34025   if (TARGET_THUMB1)
34026     return false;
34027
34028   switch (builtin)
34029     {
34030       case VUNSPEC_CDP:
34031       case VUNSPEC_LDC:
34032       case VUNSPEC_LDCL:
34033       case VUNSPEC_STC:
34034       case VUNSPEC_STCL:
34035       case VUNSPEC_MCR:
34036       case VUNSPEC_MRC:
34037         if (arm_arch4)
34038           return true;
34039         break;
34040       case VUNSPEC_CDP2:
34041       case VUNSPEC_LDC2:
34042       case VUNSPEC_LDC2L:
34043       case VUNSPEC_STC2:
34044       case VUNSPEC_STC2L:
34045       case VUNSPEC_MCR2:
34046       case VUNSPEC_MRC2:
34047         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34048            ARMv8-{A,M}.  */
34049         if (arm_arch5t)
34050           return true;
34051         break;
34052       case VUNSPEC_MCRR:
34053       case VUNSPEC_MRRC:
34054         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34055            ARMv8-{A,M}.  */
34056         if (arm_arch6 || arm_arch5te)
34057           return true;
34058         break;
34059       case VUNSPEC_MCRR2:
34060       case VUNSPEC_MRRC2:
34061         if (arm_arch6)
34062           return true;
34063         break;
34064       default:
34065         gcc_unreachable ();
34066     }
34067   return false;
34068 }
34069
34070 /* This function returns true if OP is a valid memory operand for the ldc and
34071    stc coprocessor instructions and false otherwise.  */
34072
34073 bool
34074 arm_coproc_ldc_stc_legitimate_address (rtx op)
34075 {
34076   HOST_WIDE_INT range;
34077   /* Has to be a memory operand.  */
34078   if (!MEM_P (op))
34079     return false;
34080
34081   op = XEXP (op, 0);
34082
34083   /* We accept registers.  */
34084   if (REG_P (op))
34085     return true;
34086
34087   switch GET_CODE (op)
34088     {
34089       case PLUS:
34090         {
34091           /* Or registers with an offset.  */
34092           if (!REG_P (XEXP (op, 0)))
34093             return false;
34094
34095           op = XEXP (op, 1);
34096
34097           /* The offset must be an immediate though.  */
34098           if (!CONST_INT_P (op))
34099             return false;
34100
34101           range = INTVAL (op);
34102
34103           /* Within the range of [-1020,1020].  */
34104           if (!IN_RANGE (range, -1020, 1020))
34105             return false;
34106
34107           /* And a multiple of 4.  */
34108           return (range % 4) == 0;
34109         }
34110       case PRE_INC:
34111       case POST_INC:
34112       case PRE_DEC:
34113       case POST_DEC:
34114         return REG_P (XEXP (op, 0));
34115       default:
34116         gcc_unreachable ();
34117     }
34118   return false;
34119 }
34120
34121 /* Return the diagnostic message string if conversion from FROMTYPE to
34122    TOTYPE is not allowed, NULL otherwise.  */
34123
34124 static const char *
34125 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34126 {
34127   if (element_mode (fromtype) != element_mode (totype))
34128     {
34129       /* Do no allow conversions to/from BFmode scalar types.  */
34130       if (TYPE_MODE (fromtype) == BFmode)
34131         return N_("invalid conversion from type %<bfloat16_t%>");
34132       if (TYPE_MODE (totype) == BFmode)
34133         return N_("invalid conversion to type %<bfloat16_t%>");
34134     }
34135
34136   /* Conversion allowed.  */
34137   return NULL;
34138 }
34139
34140 /* Return the diagnostic message string if the unary operation OP is
34141    not permitted on TYPE, NULL otherwise.  */
34142
34143 static const char *
34144 arm_invalid_unary_op (int op, const_tree type)
34145 {
34146   /* Reject all single-operand operations on BFmode except for &.  */
34147   if (element_mode (type) == BFmode && op != ADDR_EXPR)
34148     return N_("operation not permitted on type %<bfloat16_t%>");
34149
34150   /* Operation allowed.  */
34151   return NULL;
34152 }
34153
34154 /* Return the diagnostic message string if the binary operation OP is
34155    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
34156
34157 static const char *
34158 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34159                            const_tree type2)
34160 {
34161   /* Reject all 2-operand operations on BFmode.  */
34162   if (element_mode (type1) == BFmode
34163       || element_mode (type2) == BFmode)
34164     return N_("operation not permitted on type %<bfloat16_t%>");
34165
34166   /* Operation allowed.  */
34167   return NULL;
34168 }
34169
34170 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34171
34172    In VFPv1, VFP registers could only be accessed in the mode they were
34173    set, so subregs would be invalid there.  However, we don't support
34174    VFPv1 at the moment, and the restriction was lifted in VFPv2.
34175
34176    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34177    VFP registers in little-endian order.  We can't describe that accurately to
34178    GCC, so avoid taking subregs of such values.
34179
34180    The only exception is going from a 128-bit to a 64-bit type.  In that
34181    case the data layout happens to be consistent for big-endian, so we
34182    explicitly allow that case.  */
34183
34184 static bool
34185 arm_can_change_mode_class (machine_mode from, machine_mode to,
34186                            reg_class_t rclass)
34187 {
34188   if (TARGET_BIG_END
34189       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34190       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34191           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34192       && reg_classes_intersect_p (VFP_REGS, rclass))
34193     return false;
34194   return true;
34195 }
34196
34197 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
34198    strcpy from constants will be faster.  */
34199
34200 static HOST_WIDE_INT
34201 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34202 {
34203   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34204   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34205     return MAX (align, BITS_PER_WORD * factor);
34206   return align;
34207 }
34208
34209 /* Emit a speculation barrier on target architectures that do not have
34210    DSB/ISB directly.  Such systems probably don't need a barrier
34211    themselves, but if the code is ever run on a later architecture, it
34212    might become a problem.  */
34213 void
34214 arm_emit_speculation_barrier_function ()
34215 {
34216   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34217 }
34218
34219 /* Have we recorded an explicit access to the Q bit of APSR?.  */
34220 bool
34221 arm_q_bit_access (void)
34222 {
34223   if (cfun && cfun->decl)
34224     return lookup_attribute ("acle qbit",
34225                              DECL_ATTRIBUTES (cfun->decl));
34226   return true;
34227 }
34228
34229 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
34230 bool
34231 arm_ge_bits_access (void)
34232 {
34233   if (cfun && cfun->decl)
34234     return lookup_attribute ("acle gebits",
34235                              DECL_ATTRIBUTES (cfun->decl));
34236   return true;
34237 }
34238
34239 /* NULL if insn INSN is valid within a low-overhead loop.
34240    Otherwise return why doloop cannot be applied.  */
34241
34242 static const char *
34243 arm_invalid_within_doloop (const rtx_insn *insn)
34244 {
34245   if (!TARGET_HAVE_LOB)
34246     return default_invalid_within_doloop (insn);
34247
34248   if (CALL_P (insn))
34249     return "Function call in the loop.";
34250
34251   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34252     return "LR is used inside loop.";
34253
34254   return NULL;
34255 }
34256
34257 bool
34258 arm_target_insn_ok_for_lob (rtx insn)
34259 {
34260   basic_block bb = BLOCK_FOR_INSN (insn);
34261   /* Make sure the basic block of the target insn is a simple latch
34262      having as single predecessor and successor the body of the loop
34263      itself.  Only simple loops with a single basic block as body are
34264      supported for 'low over head loop' making sure that LE target is
34265      above LE itself in the generated code.  */
34266
34267   return single_succ_p (bb)
34268     && single_pred_p (bb)
34269     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34270     && contains_no_active_insn_p (bb);
34271 }
34272
34273 #if CHECKING_P
34274 namespace selftest {
34275
34276 /* Scan the static data tables generated by parsecpu.awk looking for
34277    potential issues with the data.  We primarily check for
34278    inconsistencies in the option extensions at present (extensions
34279    that duplicate others but aren't marked as aliases).  Furthermore,
34280    for correct canonicalization later options must never be a subset
34281    of an earlier option.  Any extension should also only specify other
34282    feature bits and never an architecture bit.  The architecture is inferred
34283    from the declaration of the extension.  */
34284 static void
34285 arm_test_cpu_arch_data (void)
34286 {
34287   const arch_option *arch;
34288   const cpu_option *cpu;
34289   auto_sbitmap target_isa (isa_num_bits);
34290   auto_sbitmap isa1 (isa_num_bits);
34291   auto_sbitmap isa2 (isa_num_bits);
34292
34293   for (arch = all_architectures; arch->common.name != NULL; ++arch)
34294     {
34295       const cpu_arch_extension *ext1, *ext2;
34296
34297       if (arch->common.extensions == NULL)
34298         continue;
34299
34300       arm_initialize_isa (target_isa, arch->common.isa_bits);
34301
34302       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34303         {
34304           if (ext1->alias)
34305             continue;
34306
34307           arm_initialize_isa (isa1, ext1->isa_bits);
34308           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34309             {
34310               if (ext2->alias || ext1->remove != ext2->remove)
34311                 continue;
34312
34313               arm_initialize_isa (isa2, ext2->isa_bits);
34314               /* If the option is a subset of the parent option, it doesn't
34315                  add anything and so isn't useful.  */
34316               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34317
34318               /* If the extension specifies any architectural bits then
34319                  disallow it.  Extensions should only specify feature bits.  */
34320               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34321             }
34322         }
34323     }
34324
34325   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34326     {
34327       const cpu_arch_extension *ext1, *ext2;
34328
34329       if (cpu->common.extensions == NULL)
34330         continue;
34331
34332       arm_initialize_isa (target_isa, arch->common.isa_bits);
34333
34334       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34335         {
34336           if (ext1->alias)
34337             continue;
34338
34339           arm_initialize_isa (isa1, ext1->isa_bits);
34340           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34341             {
34342               if (ext2->alias || ext1->remove != ext2->remove)
34343                 continue;
34344
34345               arm_initialize_isa (isa2, ext2->isa_bits);
34346               /* If the option is a subset of the parent option, it doesn't
34347                  add anything and so isn't useful.  */
34348               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34349
34350               /* If the extension specifies any architectural bits then
34351                  disallow it.  Extensions should only specify feature bits.  */
34352               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34353             }
34354         }
34355     }
34356 }
34357
34358 /* Scan the static data tables generated by parsecpu.awk looking for
34359    potential issues with the data.  Here we check for consistency between the
34360    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34361    a feature bit that is not defined by any FPU flag.  */
34362 static void
34363 arm_test_fpu_data (void)
34364 {
34365   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34366   auto_sbitmap fpubits (isa_num_bits);
34367   auto_sbitmap tmpset (isa_num_bits);
34368
34369   static const enum isa_feature fpu_bitlist_internal[]
34370     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34371   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34372
34373   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34374   {
34375     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34376     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34377     bitmap_clear (isa_all_fpubits_internal);
34378     bitmap_copy (isa_all_fpubits_internal, tmpset);
34379   }
34380
34381   if (!bitmap_empty_p (isa_all_fpubits_internal))
34382     {
34383         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34384                          " group that are not defined by any FPU.\n"
34385                          "       Check your arm-cpus.in.\n");
34386         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34387     }
34388 }
34389
34390 static void
34391 arm_run_selftests (void)
34392 {
34393   arm_test_cpu_arch_data ();
34394   arm_test_fpu_data ();
34395 }
34396 } /* Namespace selftest.  */
34397
34398 #undef TARGET_RUN_TARGET_SELFTESTS
34399 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34400 #endif /* CHECKING_P */
34401
34402 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34403    global variable based guard use the default else
34404    return a null tree.  */
34405 static tree
34406 arm_stack_protect_guard (void)
34407 {
34408   if (arm_stack_protector_guard == SSP_GLOBAL)
34409     return default_stack_protect_guard ();
34410
34411   return NULL_TREE;
34412 }
34413
34414 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34415    Unlike the arm version, we do NOT implement asm flag outputs.  */
34416
34417 rtx_insn *
34418 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34419                       vec<machine_mode> & /*input_modes*/,
34420                       vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34421                       HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34422 {
34423   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34424     if (startswith (constraints[i], "=@cc"))
34425       {
34426         sorry ("%<asm%> flags not supported in thumb1 mode");
34427         break;
34428       }
34429   return NULL;
34430 }
34431
34432 /* Generate code to enable conditional branches in functions over 1 MiB.
34433    Parameters are:
34434      operands: is the operands list of the asm insn (see arm_cond_branch or
34435        arm_cond_branch_reversed).
34436      pos_label: is an index into the operands array where operands[pos_label] is
34437        the asm label of the final jump destination.
34438      dest: is a string which is used to generate the asm label of the intermediate
34439        destination
34440    branch_format: is a string denoting the intermediate branch format, e.g.
34441      "beq", "bne", etc.  */
34442
34443 const char *
34444 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34445                     const char * branch_format)
34446 {
34447   rtx_code_label * tmp_label = gen_label_rtx ();
34448   char label_buf[256];
34449   char buffer[128];
34450   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34451                         CODE_LABEL_NUMBER (tmp_label));
34452   const char *label_ptr = arm_strip_name_encoding (label_buf);
34453   rtx dest_label = operands[pos_label];
34454   operands[pos_label] = tmp_label;
34455
34456   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34457   output_asm_insn (buffer, operands);
34458
34459   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34460   operands[pos_label] = dest_label;
34461   output_asm_insn (buffer, operands);
34462   return "";
34463 }
34464
34465 /* If given mode matches, load from memory to LO_REGS.
34466    (i.e [Rn], Rn <= LO_REGS).  */
34467 enum reg_class
34468 arm_mode_base_reg_class (machine_mode mode)
34469 {
34470   if (TARGET_HAVE_MVE
34471       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34472     return LO_REGS;
34473
34474   return MODE_BASE_REG_REG_CLASS (mode);
34475 }
34476
34477 struct gcc_target targetm = TARGET_INITIALIZER;
34478
34479 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
34480
34481 opt_machine_mode
34482 arm_get_mask_mode (machine_mode mode)
34483 {
34484   if (TARGET_HAVE_MVE)
34485     return arm_mode_to_pred_mode (mode);
34486
34487   return default_get_mask_mode (mode);
34488 }
34489
34490 #include "gt-arm.h"