gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2015 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "hash-set.h"
  30 #include "machmode.h"
  31 #include "vec.h"
  32 #include "double-int.h"
  33 #include "input.h"
  34 #include "alias.h"
  35 #include "symtab.h"
  36 #include "wide-int.h"
  37 #include "inchash.h"
  38 #include "tree.h"
  39 #include "fold-const.h"
  40 #include "stringpool.h"
  41 #include "stor-layout.h"
  42 #include "calls.h"
  43 #include "varasm.h"
  44 #include "obstack.h"
  45 #include "regs.h"
  46 #include "hard-reg-set.h"
  47 #include "insn-config.h"
  48 #include "conditions.h"
  49 #include "output.h"
  50 #include "insn-attr.h"
  51 #include "flags.h"
  52 #include "reload.h"
  53 #include "input.h"
  54 #include "function.h"
  55 #include "expr.h"
  56 #include "insn-codes.h"
  57 #include "optabs.h"
  58 #include "diagnostic-core.h"
  59 #include "recog.h"
  60 #include "predict.h"
  61 #include "dominance.h"
  62 #include "cfg.h"
  63 #include "cfgrtl.h"
  64 #include "cfganal.h"
  65 #include "lcm.h"
  66 #include "cfgbuild.h"
  67 #include "cfgcleanup.h"
  68 #include "basic-block.h"
  69 #include "hash-map.h"
  70 #include "is-a.h"
  71 #include "plugin-api.h"
  72 #include "ipa-ref.h"
  73 #include "cgraph.h"
  74 #include "ggc.h"
  75 #include "except.h"
  76 #include "tm_p.h"
  77 #include "target.h"
  78 #include "sched-int.h"
  79 #include "target-def.h"
  80 #include "debug.h"
  81 #include "langhooks.h"
  82 #include "df.h"
  83 #include "intl.h"
  84 #include "libfuncs.h"
  85 #include "params.h"
  86 #include "opts.h"
  87 #include "dumpfile.h"
  88 #include "gimple-expr.h"
  89 #include "builtins.h"
  90 #include "tm-constrs.h"
  91 #include "rtl-iter.h"
  92
  93 /* Forward definitions of types.  */
  94 typedef struct minipool_node    Mnode;
  95 typedef struct minipool_fixup   Mfix;
  96
  97 void (*arm_lang_output_object_attributes_hook)(void);
  98
  99 struct four_ints
 100 {
 101   int i[4];
 102 };
 103
 104 /* Forward function declarations.  */
 105 static bool arm_const_not_ok_for_debug_p (rtx);
 106 static bool arm_lra_p (void);
 107 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 108 static int arm_compute_static_chain_stack_bytes (void);
 109 static arm_stack_offsets *arm_get_frame_offsets (void);
 110 static void arm_add_gc_roots (void);
 111 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 112                              HOST_WIDE_INT, rtx, rtx, int, int);
 113 static unsigned bit_count (unsigned long);
 114 static int arm_address_register_rtx_p (rtx, int);
 115 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 116 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 117 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 118 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 119 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 120 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 121 inline static int thumb1_index_register_rtx_p (rtx, int);
 122 static int thumb_far_jump_used_p (void);
 123 static bool thumb_force_lr_save (void);
 124 static unsigned arm_size_return_regs (void);
 125 static bool arm_assemble_integer (rtx, unsigned int, int);
 126 static void arm_print_operand (FILE *, rtx, int);
 127 static void arm_print_operand_address (FILE *, rtx);
 128 static bool arm_print_operand_punct_valid_p (unsigned char code);
 129 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 130 static arm_cc get_arm_condition_code (rtx);
 131 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 132 static const char *output_multi_immediate (rtx *, const char *, const char *,
 133                                            int, HOST_WIDE_INT);
 134 static const char *shift_op (rtx, HOST_WIDE_INT *);
 135 static struct machine_function *arm_init_machine_status (void);
 136 static void thumb_exit (FILE *, int);
 137 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 138 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 139 static Mnode *add_minipool_forward_ref (Mfix *);
 140 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 141 static Mnode *add_minipool_backward_ref (Mfix *);
 142 static void assign_minipool_offsets (Mfix *);
 143 static void arm_print_value (FILE *, rtx);
 144 static void dump_minipool (rtx_insn *);
 145 static int arm_barrier_cost (rtx);
 146 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 147 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 148 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 149                                machine_mode, rtx);
 150 static void arm_reorg (void);
 151 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 152 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 153 static unsigned long arm_compute_save_reg_mask (void);
 154 static unsigned long arm_isr_value (tree);
 155 static unsigned long arm_compute_func_type (void);
 156 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 157 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 158 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 159 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 160 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 161 #endif
 162 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 163 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 164 static int arm_comp_type_attributes (const_tree, const_tree);
 165 static void arm_set_default_type_attributes (tree);
 166 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
 167 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 168 static int optimal_immediate_sequence (enum rtx_code code,
 169                                        unsigned HOST_WIDE_INT val,
 170                                        struct four_ints *return_sequence);
 171 static int optimal_immediate_sequence_1 (enum rtx_code code,
 172                                          unsigned HOST_WIDE_INT val,
 173                                          struct four_ints *return_sequence,
 174                                          int i);
 175 static int arm_get_strip_length (int);
 176 static bool arm_function_ok_for_sibcall (tree, tree);
 177 static machine_mode arm_promote_function_mode (const_tree,
 178                                                     machine_mode, int *,
 179                                                     const_tree, int);
 180 static bool arm_return_in_memory (const_tree, const_tree);
 181 static rtx arm_function_value (const_tree, const_tree, bool);
 182 static rtx arm_libcall_value_1 (machine_mode);
 183 static rtx arm_libcall_value (machine_mode, const_rtx);
 184 static bool arm_function_value_regno_p (const unsigned int);
 185 static void arm_internal_label (FILE *, const char *, unsigned long);
 186 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 187                                  tree);
 188 static bool arm_have_conditional_execution (void);
 189 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 190 static bool arm_legitimate_constant_p (machine_mode, rtx);
 191 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 192 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 193 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 194 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 195 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 196 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 197 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 198 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 199 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 200 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 201 static void emit_constant_insn (rtx cond, rtx pattern);
 202 static rtx_insn *emit_set_insn (rtx, rtx);
 203 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 204 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 205                                   tree, bool);
 206 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 207                              const_tree, bool);
 208 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 209                                       const_tree, bool);
 210 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 211 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 212                                       const_tree);
 213 static rtx aapcs_libcall_value (machine_mode);
 214 static int aapcs_select_return_coproc (const_tree, const_tree);
 215
 216 #ifdef OBJECT_FORMAT_ELF
 217 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 218 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 219 #endif
 220 #ifndef ARM_PE
 221 static void arm_encode_section_info (tree, rtx, int);
 222 #endif
 223
 224 static void arm_file_end (void);
 225 static void arm_file_start (void);
 226
 227 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 228                                         tree, int *, int);
 229 static bool arm_pass_by_reference (cumulative_args_t,
 230                                    machine_mode, const_tree, bool);
 231 static bool arm_promote_prototypes (const_tree);
 232 static bool arm_default_short_enums (void);
 233 static bool arm_align_anon_bitfield (void);
 234 static bool arm_return_in_msb (const_tree);
 235 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 236 static bool arm_return_in_memory (const_tree, const_tree);
 237 #if ARM_UNWIND_INFO
 238 static void arm_unwind_emit (FILE *, rtx_insn *);
 239 static bool arm_output_ttype (rtx);
 240 static void arm_asm_emit_except_personality (rtx);
 241 static void arm_asm_init_sections (void);
 242 #endif
 243 static rtx arm_dwarf_register_span (rtx);
 244
 245 static tree arm_cxx_guard_type (void);
 246 static bool arm_cxx_guard_mask_bit (void);
 247 static tree arm_get_cookie_size (tree);
 248 static bool arm_cookie_has_size (void);
 249 static bool arm_cxx_cdtor_returns_this (void);
 250 static bool arm_cxx_key_method_may_be_inline (void);
 251 static void arm_cxx_determine_class_data_visibility (tree);
 252 static bool arm_cxx_class_data_always_comdat (void);
 253 static bool arm_cxx_use_aeabi_atexit (void);
 254 static void arm_init_libfuncs (void);
 255 static tree arm_build_builtin_va_list (void);
 256 static void arm_expand_builtin_va_start (tree, rtx);
 257 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 258 static void arm_option_override (void);
 259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 260 static bool arm_cannot_copy_insn_p (rtx_insn *);
 261 static int arm_issue_rate (void);
 262 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 263 static bool arm_output_addr_const_extra (FILE *, rtx);
 264 static bool arm_allocate_stack_slots_for_args (void);
 265 static bool arm_warn_func_return (tree);
 266 static const char *arm_invalid_parameter_type (const_tree t);
 267 static const char *arm_invalid_return_type (const_tree t);
 268 static tree arm_promoted_type (const_tree t);
 269 static tree arm_convert_to_type (tree type, tree expr);
 270 static bool arm_scalar_mode_supported_p (machine_mode);
 271 static bool arm_frame_pointer_required (void);
 272 static bool arm_can_eliminate (const int, const int);
 273 static void arm_asm_trampoline_template (FILE *);
 274 static void arm_trampoline_init (rtx, tree, rtx);
 275 static rtx arm_trampoline_adjust_address (rtx);
 276 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 278 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 279 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 280 static bool arm_array_mode_supported_p (machine_mode,
 281                                         unsigned HOST_WIDE_INT);
 282 static machine_mode arm_preferred_simd_mode (machine_mode);
 283 static bool arm_class_likely_spilled_p (reg_class_t);
 284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 287                                                      const_tree type,
 288                                                      int misalignment,
 289                                                      bool is_packed);
 290 static void arm_conditional_register_usage (void);
 291 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 292 static unsigned int arm_autovectorize_vector_sizes (void);
 293 static int arm_default_branch_cost (bool, bool);
 294 static int arm_cortex_a5_branch_cost (bool, bool);
 295 static int arm_cortex_m_branch_cost (bool, bool);
 296
 297 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 298                                              const unsigned char *sel);
 299
 300 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 301                                            tree vectype,
 302                                            int misalign ATTRIBUTE_UNUSED);
 303 static unsigned arm_add_stmt_cost (void *data, int count,
 304                                    enum vect_cost_for_stmt kind,
 305                                    struct _stmt_vec_info *stmt_info,
 306                                    int misalign,
 307                                    enum vect_cost_model_location where);
 308
 309 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 310                                          bool op0_preserve_value);
 311 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 312
 313 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 314 \f
 315 /* Table of machine attributes.  */
 316 static const struct attribute_spec arm_attribute_table[] =
 317 {
 318   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 319        affects_type_identity } */
 320   /* Function calls made to this symbol must be done indirectly, because
 321      it may lie outside of the 26 bit addressing range of a normal function
 322      call.  */
 323   { "long_call",    0, 0, false, true,  true,  NULL, false },
 324   /* Whereas these functions are always known to reside within the 26 bit
 325      addressing range.  */
 326   { "short_call",   0, 0, false, true,  true,  NULL, false },
 327   /* Specify the procedure call conventions for a function.  */
 328   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 329     false },
 330   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 331   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 332     false },
 333   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 334     false },
 335   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 336     false },
 337 #ifdef ARM_PE
 338   /* ARM/PE has three new attributes:
 339      interfacearm - ?
 340      dllexport - for exporting a function/variable that will live in a dll
 341      dllimport - for importing a function/variable from a dll
 342
 343      Microsoft allows multiple declspecs in one __declspec, separating
 344      them with spaces.  We do NOT support this.  Instead, use __declspec
 345      multiple times.
 346   */
 347   { "dllimport",    0, 0, true,  false, false, NULL, false },
 348   { "dllexport",    0, 0, true,  false, false, NULL, false },
 349   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 350     false },
 351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 352   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 353   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 354   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 355     false },
 356 #endif
 357   { NULL,           0, 0, false, false, false, NULL, false }
 358 };
 359 \f
 360 /* Initialize the GCC target structure.  */
 361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 362 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 364 #endif
 365
 366 #undef TARGET_LEGITIMIZE_ADDRESS
 367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 368
 369 #undef TARGET_LRA_P
 370 #define TARGET_LRA_P arm_lra_p
 371
 372 #undef  TARGET_ATTRIBUTE_TABLE
 373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 374
 375 #undef TARGET_ASM_FILE_START
 376 #define TARGET_ASM_FILE_START arm_file_start
 377 #undef TARGET_ASM_FILE_END
 378 #define TARGET_ASM_FILE_END arm_file_end
 379
 380 #undef  TARGET_ASM_ALIGNED_SI_OP
 381 #define TARGET_ASM_ALIGNED_SI_OP NULL
 382 #undef  TARGET_ASM_INTEGER
 383 #define TARGET_ASM_INTEGER arm_assemble_integer
 384
 385 #undef TARGET_PRINT_OPERAND
 386 #define TARGET_PRINT_OPERAND arm_print_operand
 387 #undef TARGET_PRINT_OPERAND_ADDRESS
 388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 391
 392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 394
 395 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 397
 398 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 400
 401 #undef  TARGET_OPTION_OVERRIDE
 402 #define TARGET_OPTION_OVERRIDE arm_option_override
 403
 404 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 405 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 406
 407 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 408 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 409
 410 #undef  TARGET_SCHED_ADJUST_COST
 411 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 412
 413 #undef TARGET_SCHED_REORDER
 414 #define TARGET_SCHED_REORDER arm_sched_reorder
 415
 416 #undef TARGET_REGISTER_MOVE_COST
 417 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 418
 419 #undef TARGET_MEMORY_MOVE_COST
 420 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 421
 422 #undef TARGET_ENCODE_SECTION_INFO
 423 #ifdef ARM_PE
 424 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 425 #else
 426 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 427 #endif
 428
 429 #undef  TARGET_STRIP_NAME_ENCODING
 430 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 431
 432 #undef  TARGET_ASM_INTERNAL_LABEL
 433 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 434
 435 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 436 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 437
 438 #undef  TARGET_FUNCTION_VALUE
 439 #define TARGET_FUNCTION_VALUE arm_function_value
 440
 441 #undef  TARGET_LIBCALL_VALUE
 442 #define TARGET_LIBCALL_VALUE arm_libcall_value
 443
 444 #undef TARGET_FUNCTION_VALUE_REGNO_P
 445 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 446
 447 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 448 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 449 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 451
 452 #undef  TARGET_RTX_COSTS
 453 #define TARGET_RTX_COSTS arm_rtx_costs
 454 #undef  TARGET_ADDRESS_COST
 455 #define TARGET_ADDRESS_COST arm_address_cost
 456
 457 #undef TARGET_SHIFT_TRUNCATION_MASK
 458 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 459 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 460 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 461 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 462 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 463 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 464 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 465 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 466 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 467   arm_autovectorize_vector_sizes
 468
 469 #undef  TARGET_MACHINE_DEPENDENT_REORG
 470 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 471
 472 #undef  TARGET_INIT_BUILTINS
 473 #define TARGET_INIT_BUILTINS  arm_init_builtins
 474 #undef  TARGET_EXPAND_BUILTIN
 475 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 476 #undef  TARGET_BUILTIN_DECL
 477 #define TARGET_BUILTIN_DECL arm_builtin_decl
 478
 479 #undef TARGET_INIT_LIBFUNCS
 480 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 481
 482 #undef TARGET_PROMOTE_FUNCTION_MODE
 483 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 484 #undef TARGET_PROMOTE_PROTOTYPES
 485 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 486 #undef TARGET_PASS_BY_REFERENCE
 487 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 488 #undef TARGET_ARG_PARTIAL_BYTES
 489 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 490 #undef TARGET_FUNCTION_ARG
 491 #define TARGET_FUNCTION_ARG arm_function_arg
 492 #undef TARGET_FUNCTION_ARG_ADVANCE
 493 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 494 #undef TARGET_FUNCTION_ARG_BOUNDARY
 495 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 496
 497 #undef  TARGET_SETUP_INCOMING_VARARGS
 498 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 499
 500 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 501 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 502
 503 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 504 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 505 #undef TARGET_TRAMPOLINE_INIT
 506 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 507 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 508 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 509
 510 #undef TARGET_WARN_FUNC_RETURN
 511 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 512
 513 #undef TARGET_DEFAULT_SHORT_ENUMS
 514 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 515
 516 #undef TARGET_ALIGN_ANON_BITFIELD
 517 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 518
 519 #undef TARGET_NARROW_VOLATILE_BITFIELD
 520 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 521
 522 #undef TARGET_CXX_GUARD_TYPE
 523 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 524
 525 #undef TARGET_CXX_GUARD_MASK_BIT
 526 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 527
 528 #undef TARGET_CXX_GET_COOKIE_SIZE
 529 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 530
 531 #undef TARGET_CXX_COOKIE_HAS_SIZE
 532 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 533
 534 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 535 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 536
 537 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 538 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 539
 540 #undef TARGET_CXX_USE_AEABI_ATEXIT
 541 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 542
 543 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 544 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 545   arm_cxx_determine_class_data_visibility
 546
 547 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 548 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 549
 550 #undef TARGET_RETURN_IN_MSB
 551 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 552
 553 #undef TARGET_RETURN_IN_MEMORY
 554 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 555
 556 #undef TARGET_MUST_PASS_IN_STACK
 557 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 558
 559 #if ARM_UNWIND_INFO
 560 #undef TARGET_ASM_UNWIND_EMIT
 561 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 562
 563 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 564 #undef TARGET_ASM_TTYPE
 565 #define TARGET_ASM_TTYPE arm_output_ttype
 566
 567 #undef TARGET_ARM_EABI_UNWINDER
 568 #define TARGET_ARM_EABI_UNWINDER true
 569
 570 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 571 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 572
 573 #undef TARGET_ASM_INIT_SECTIONS
 574 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 575 #endif /* ARM_UNWIND_INFO */
 576
 577 #undef TARGET_DWARF_REGISTER_SPAN
 578 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 579
 580 #undef  TARGET_CANNOT_COPY_INSN_P
 581 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 582
 583 #ifdef HAVE_AS_TLS
 584 #undef TARGET_HAVE_TLS
 585 #define TARGET_HAVE_TLS true
 586 #endif
 587
 588 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 589 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 590
 591 #undef TARGET_LEGITIMATE_CONSTANT_P
 592 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 593
 594 #undef TARGET_CANNOT_FORCE_CONST_MEM
 595 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 596
 597 #undef TARGET_MAX_ANCHOR_OFFSET
 598 #define TARGET_MAX_ANCHOR_OFFSET 4095
 599
 600 /* The minimum is set such that the total size of the block
 601    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 602    divisible by eight, ensuring natural spacing of anchors.  */
 603 #undef TARGET_MIN_ANCHOR_OFFSET
 604 #define TARGET_MIN_ANCHOR_OFFSET -4088
 605
 606 #undef TARGET_SCHED_ISSUE_RATE
 607 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 608
 609 #undef TARGET_MANGLE_TYPE
 610 #define TARGET_MANGLE_TYPE arm_mangle_type
 611
 612 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 613 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 614
 615 #undef TARGET_BUILD_BUILTIN_VA_LIST
 616 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 617 #undef TARGET_EXPAND_BUILTIN_VA_START
 618 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 619 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 620 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 621
 622 #ifdef HAVE_AS_TLS
 623 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 624 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 625 #endif
 626
 627 #undef TARGET_LEGITIMATE_ADDRESS_P
 628 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 629
 630 #undef TARGET_PREFERRED_RELOAD_CLASS
 631 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 632
 633 #undef TARGET_INVALID_PARAMETER_TYPE
 634 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 635
 636 #undef TARGET_INVALID_RETURN_TYPE
 637 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 638
 639 #undef TARGET_PROMOTED_TYPE
 640 #define TARGET_PROMOTED_TYPE arm_promoted_type
 641
 642 #undef TARGET_CONVERT_TO_TYPE
 643 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 644
 645 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 646 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 647
 648 #undef TARGET_FRAME_POINTER_REQUIRED
 649 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 650
 651 #undef TARGET_CAN_ELIMINATE
 652 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 653
 654 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 655 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 656
 657 #undef TARGET_CLASS_LIKELY_SPILLED_P
 658 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 659
 660 #undef TARGET_VECTORIZE_BUILTINS
 661 #define TARGET_VECTORIZE_BUILTINS
 662
 663 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 664 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 665   arm_builtin_vectorized_function
 666
 667 #undef TARGET_VECTOR_ALIGNMENT
 668 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 669
 670 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 671 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 672   arm_vector_alignment_reachable
 673
 674 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 675 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 676   arm_builtin_support_vector_misalignment
 677
 678 #undef TARGET_PREFERRED_RENAME_CLASS
 679 #define TARGET_PREFERRED_RENAME_CLASS \
 680   arm_preferred_rename_class
 681
 682 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 683 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 684   arm_vectorize_vec_perm_const_ok
 685
 686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 688   arm_builtin_vectorization_cost
 689 #undef TARGET_VECTORIZE_ADD_STMT_COST
 690 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 691
 692 #undef TARGET_CANONICALIZE_COMPARISON
 693 #define TARGET_CANONICALIZE_COMPARISON \
 694   arm_canonicalize_comparison
 695
 696 #undef TARGET_ASAN_SHADOW_OFFSET
 697 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 698
 699 #undef MAX_INSN_PER_IT_BLOCK
 700 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 701
 702 #undef TARGET_CAN_USE_DOLOOP_P
 703 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 704
 705 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 706 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 707
 708 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 709 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 710
 711 #undef TARGET_SCHED_FUSION_PRIORITY
 712 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 713
 714 struct gcc_target targetm = TARGET_INITIALIZER;
 715 \f
 716 /* Obstack for minipool constant handling.  */
 717 static struct obstack minipool_obstack;
 718 static char *         minipool_startobj;
 719
 720 /* The maximum number of insns skipped which
 721    will be conditionalised if possible.  */
 722 static int max_insns_skipped = 5;
 723
 724 extern FILE * asm_out_file;
 725
 726 /* True if we are currently building a constant table.  */
 727 int making_const_table;
 728
 729 /* The processor for which instructions should be scheduled.  */
 730 enum processor_type arm_tune = arm_none;
 731
 732 /* The current tuning set.  */
 733 const struct tune_params *current_tune;
 734
 735 /* Which floating point hardware to schedule for.  */
 736 int arm_fpu_attr;
 737
 738 /* Which floating popint hardware to use.  */
 739 const struct arm_fpu_desc *arm_fpu_desc;
 740
 741 /* Used for Thumb call_via trampolines.  */
 742 rtx thumb_call_via_label[14];
 743 static int thumb_call_reg_needed;
 744
 745 /* The bits in this mask specify which
 746    instructions we are allowed to generate.  */
 747 unsigned long insn_flags = 0;
 748
 749 /* The bits in this mask specify which instruction scheduling options should
 750    be used.  */
 751 unsigned long tune_flags = 0;
 752
 753 /* The highest ARM architecture version supported by the
 754    target.  */
 755 enum base_architecture arm_base_arch = BASE_ARCH_0;
 756
 757 /* The following are used in the arm.md file as equivalents to bits
 758    in the above two flag variables.  */
 759
 760 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 761 int arm_arch3m = 0;
 762
 763 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 764 int arm_arch4 = 0;
 765
 766 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 767 int arm_arch4t = 0;
 768
 769 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 770 int arm_arch5 = 0;
 771
 772 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 773 int arm_arch5e = 0;
 774
 775 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 776 int arm_arch6 = 0;
 777
 778 /* Nonzero if this chip supports the ARM 6K extensions.  */
 779 int arm_arch6k = 0;
 780
 781 /* Nonzero if instructions present in ARMv6-M can be used.  */
 782 int arm_arch6m = 0;
 783
 784 /* Nonzero if this chip supports the ARM 7 extensions.  */
 785 int arm_arch7 = 0;
 786
 787 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 788 int arm_arch_notm = 0;
 789
 790 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 791 int arm_arch7em = 0;
 792
 793 /* Nonzero if instructions present in ARMv8 can be used.  */
 794 int arm_arch8 = 0;
 795
 796 /* Nonzero if this chip can benefit from load scheduling.  */
 797 int arm_ld_sched = 0;
 798
 799 /* Nonzero if this chip is a StrongARM.  */
 800 int arm_tune_strongarm = 0;
 801
 802 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 803 int arm_arch_iwmmxt = 0;
 804
 805 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 806 int arm_arch_iwmmxt2 = 0;
 807
 808 /* Nonzero if this chip is an XScale.  */
 809 int arm_arch_xscale = 0;
 810
 811 /* Nonzero if tuning for XScale  */
 812 int arm_tune_xscale = 0;
 813
 814 /* Nonzero if we want to tune for stores that access the write-buffer.
 815    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 816 int arm_tune_wbuf = 0;
 817
 818 /* Nonzero if tuning for Cortex-A9.  */
 819 int arm_tune_cortex_a9 = 0;
 820
 821 /* Nonzero if generating Thumb instructions.  */
 822 int thumb_code = 0;
 823
 824 /* Nonzero if generating Thumb-1 instructions.  */
 825 int thumb1_code = 0;
 826
 827 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 828    preprocessor.
 829    XXX This is a bit of a hack, it's intended to help work around
 830    problems in GLD which doesn't understand that armv5t code is
 831    interworking clean.  */
 832 int arm_cpp_interwork = 0;
 833
 834 /* Nonzero if chip supports Thumb 2.  */
 835 int arm_arch_thumb2;
 836
 837 /* Nonzero if chip supports integer division instruction.  */
 838 int arm_arch_arm_hwdiv;
 839 int arm_arch_thumb_hwdiv;
 840
 841 /* Nonzero if we should use Neon to handle 64-bits operations rather
 842    than core registers.  */
 843 int prefer_neon_for_64bits = 0;
 844
 845 /* Nonzero if we shouldn't use literal pools.  */
 846 bool arm_disable_literal_pool = false;
 847
 848 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 849    we must report the mode of the memory reference from
 850    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 851 machine_mode output_memory_reference_mode;
 852
 853 /* The register number to be used for the PIC offset register.  */
 854 unsigned arm_pic_register = INVALID_REGNUM;
 855
 856 enum arm_pcs arm_pcs_default;
 857
 858 /* For an explanation of these variables, see final_prescan_insn below.  */
 859 int arm_ccfsm_state;
 860 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 861 enum arm_cond_code arm_current_cc;
 862
 863 rtx arm_target_insn;
 864 int arm_target_label;
 865 /* The number of conditionally executed insns, including the current insn.  */
 866 int arm_condexec_count = 0;
 867 /* A bitmask specifying the patterns for the IT block.
 868    Zero means do not output an IT block before this insn. */
 869 int arm_condexec_mask = 0;
 870 /* The number of bits used in arm_condexec_mask.  */
 871 int arm_condexec_masklen = 0;
 872
 873 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 874 int arm_arch_crc = 0;
 875
 876 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 877 int arm_m_profile_small_mul = 0;
 878
 879 /* The condition codes of the ARM, and the inverse function.  */
 880 static const char * const arm_condition_codes[] =
 881 {
 882   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 883   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 884 };
 885
 886 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 887 int arm_regs_in_sequence[] =
 888 {
 889   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 890 };
 891
 892 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 893 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 894
 895 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 896                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 897                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 898 \f
 899 /* Initialization code.  */
 900
 901 struct processors
 902 {
 903   const char *const name;
 904   enum processor_type core;
 905   const char *arch;
 906   enum base_architecture base_arch;
 907   const unsigned long flags;
 908   const struct tune_params *const tune;
 909 };
 910
 911
 912 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 913 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 914   prefetch_slots, \
 915   l1_size, \
 916   l1_line_size
 917
 918 /* arm generic vectorizer costs.  */
 919 static const
 920 struct cpu_vec_costs arm_default_vec_cost = {
 921   1,                                    /* scalar_stmt_cost.  */
 922   1,                                    /* scalar load_cost.  */
 923   1,                                    /* scalar_store_cost.  */
 924   1,                                    /* vec_stmt_cost.  */
 925   1,                                    /* vec_to_scalar_cost.  */
 926   1,                                    /* scalar_to_vec_cost.  */
 927   1,                                    /* vec_align_load_cost.  */
 928   1,                                    /* vec_unalign_load_cost.  */
 929   1,                                    /* vec_unalign_store_cost.  */
 930   1,                                    /* vec_store_cost.  */
 931   3,                                    /* cond_taken_branch_cost.  */
 932   1,                                    /* cond_not_taken_branch_cost.  */
 933 };
 934
 935 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 936 #include "aarch-cost-tables.h"
 937
 938
 939
 940 const struct cpu_cost_table cortexa9_extra_costs =
 941 {
 942   /* ALU */
 943   {
 944     0,                  /* arith.  */
 945     0,                  /* logical.  */
 946     0,                  /* shift.  */
 947     COSTS_N_INSNS (1),  /* shift_reg.  */
 948     COSTS_N_INSNS (1),  /* arith_shift.  */
 949     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
 950     0,                  /* log_shift.  */
 951     COSTS_N_INSNS (1),  /* log_shift_reg.  */
 952     COSTS_N_INSNS (1),  /* extend.  */
 953     COSTS_N_INSNS (2),  /* extend_arith.  */
 954     COSTS_N_INSNS (1),  /* bfi.  */
 955     COSTS_N_INSNS (1),  /* bfx.  */
 956     0,                  /* clz.  */
 957     0,                  /* rev.  */
 958     0,                  /* non_exec.  */
 959     true                /* non_exec_costs_exec.  */
 960   },
 961   {
 962     /* MULT SImode */
 963     {
 964       COSTS_N_INSNS (3),        /* simple.  */
 965       COSTS_N_INSNS (3),        /* flag_setting.  */
 966       COSTS_N_INSNS (2),        /* extend.  */
 967       COSTS_N_INSNS (3),        /* add.  */
 968       COSTS_N_INSNS (2),        /* extend_add.  */
 969       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
 970     },
 971     /* MULT DImode */
 972     {
 973       0,                        /* simple (N/A).  */
 974       0,                        /* flag_setting (N/A).  */
 975       COSTS_N_INSNS (4),        /* extend.  */
 976       0,                        /* add (N/A).  */
 977       COSTS_N_INSNS (4),        /* extend_add.  */
 978       0                         /* idiv (N/A).  */
 979     }
 980   },
 981   /* LD/ST */
 982   {
 983     COSTS_N_INSNS (2),  /* load.  */
 984     COSTS_N_INSNS (2),  /* load_sign_extend.  */
 985     COSTS_N_INSNS (2),  /* ldrd.  */
 986     COSTS_N_INSNS (2),  /* ldm_1st.  */
 987     1,                  /* ldm_regs_per_insn_1st.  */
 988     2,                  /* ldm_regs_per_insn_subsequent.  */
 989     COSTS_N_INSNS (5),  /* loadf.  */
 990     COSTS_N_INSNS (5),  /* loadd.  */
 991     COSTS_N_INSNS (1),  /* load_unaligned.  */
 992     COSTS_N_INSNS (2),  /* store.  */
 993     COSTS_N_INSNS (2),  /* strd.  */
 994     COSTS_N_INSNS (2),  /* stm_1st.  */
 995     1,                  /* stm_regs_per_insn_1st.  */
 996     2,                  /* stm_regs_per_insn_subsequent.  */
 997     COSTS_N_INSNS (1),  /* storef.  */
 998     COSTS_N_INSNS (1),  /* stored.  */
 999     COSTS_N_INSNS (1)   /* store_unaligned.  */
1000   },
1001   {
1002     /* FP SFmode */
1003     {
1004       COSTS_N_INSNS (14),       /* div.  */
1005       COSTS_N_INSNS (4),        /* mult.  */
1006       COSTS_N_INSNS (7),        /* mult_addsub. */
1007       COSTS_N_INSNS (30),       /* fma.  */
1008       COSTS_N_INSNS (3),        /* addsub.  */
1009       COSTS_N_INSNS (1),        /* fpconst.  */
1010       COSTS_N_INSNS (1),        /* neg.  */
1011       COSTS_N_INSNS (3),        /* compare.  */
1012       COSTS_N_INSNS (3),        /* widen.  */
1013       COSTS_N_INSNS (3),        /* narrow.  */
1014       COSTS_N_INSNS (3),        /* toint.  */
1015       COSTS_N_INSNS (3),        /* fromint.  */
1016       COSTS_N_INSNS (3)         /* roundint.  */
1017     },
1018     /* FP DFmode */
1019     {
1020       COSTS_N_INSNS (24),       /* div.  */
1021       COSTS_N_INSNS (5),        /* mult.  */
1022       COSTS_N_INSNS (8),        /* mult_addsub.  */
1023       COSTS_N_INSNS (30),       /* fma.  */
1024       COSTS_N_INSNS (3),        /* addsub.  */
1025       COSTS_N_INSNS (1),        /* fpconst.  */
1026       COSTS_N_INSNS (1),        /* neg.  */
1027       COSTS_N_INSNS (3),        /* compare.  */
1028       COSTS_N_INSNS (3),        /* widen.  */
1029       COSTS_N_INSNS (3),        /* narrow.  */
1030       COSTS_N_INSNS (3),        /* toint.  */
1031       COSTS_N_INSNS (3),        /* fromint.  */
1032       COSTS_N_INSNS (3)         /* roundint.  */
1033     }
1034   },
1035   /* Vector */
1036   {
1037     COSTS_N_INSNS (1)   /* alu.  */
1038   }
1039 };
1040
1041 const struct cpu_cost_table cortexa8_extra_costs =
1042 {
1043   /* ALU */
1044   {
1045     0,                  /* arith.  */
1046     0,                  /* logical.  */
1047     COSTS_N_INSNS (1),  /* shift.  */
1048     0,                  /* shift_reg.  */
1049     COSTS_N_INSNS (1),  /* arith_shift.  */
1050     0,                  /* arith_shift_reg.  */
1051     COSTS_N_INSNS (1),  /* log_shift.  */
1052     0,                  /* log_shift_reg.  */
1053     0,                  /* extend.  */
1054     0,                  /* extend_arith.  */
1055     0,                  /* bfi.  */
1056     0,                  /* bfx.  */
1057     0,                  /* clz.  */
1058     0,                  /* rev.  */
1059     0,                  /* non_exec.  */
1060     true                /* non_exec_costs_exec.  */
1061   },
1062   {
1063     /* MULT SImode */
1064     {
1065       COSTS_N_INSNS (1),        /* simple.  */
1066       COSTS_N_INSNS (1),        /* flag_setting.  */
1067       COSTS_N_INSNS (1),        /* extend.  */
1068       COSTS_N_INSNS (1),        /* add.  */
1069       COSTS_N_INSNS (1),        /* extend_add.  */
1070       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1071     },
1072     /* MULT DImode */
1073     {
1074       0,                        /* simple (N/A).  */
1075       0,                        /* flag_setting (N/A).  */
1076       COSTS_N_INSNS (2),        /* extend.  */
1077       0,                        /* add (N/A).  */
1078       COSTS_N_INSNS (2),        /* extend_add.  */
1079       0                         /* idiv (N/A).  */
1080     }
1081   },
1082   /* LD/ST */
1083   {
1084     COSTS_N_INSNS (1),  /* load.  */
1085     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1086     COSTS_N_INSNS (1),  /* ldrd.  */
1087     COSTS_N_INSNS (1),  /* ldm_1st.  */
1088     1,                  /* ldm_regs_per_insn_1st.  */
1089     2,                  /* ldm_regs_per_insn_subsequent.  */
1090     COSTS_N_INSNS (1),  /* loadf.  */
1091     COSTS_N_INSNS (1),  /* loadd.  */
1092     COSTS_N_INSNS (1),  /* load_unaligned.  */
1093     COSTS_N_INSNS (1),  /* store.  */
1094     COSTS_N_INSNS (1),  /* strd.  */
1095     COSTS_N_INSNS (1),  /* stm_1st.  */
1096     1,                  /* stm_regs_per_insn_1st.  */
1097     2,                  /* stm_regs_per_insn_subsequent.  */
1098     COSTS_N_INSNS (1),  /* storef.  */
1099     COSTS_N_INSNS (1),  /* stored.  */
1100     COSTS_N_INSNS (1)   /* store_unaligned.  */
1101   },
1102   {
1103     /* FP SFmode */
1104     {
1105       COSTS_N_INSNS (36),       /* div.  */
1106       COSTS_N_INSNS (11),       /* mult.  */
1107       COSTS_N_INSNS (20),       /* mult_addsub. */
1108       COSTS_N_INSNS (30),       /* fma.  */
1109       COSTS_N_INSNS (9),        /* addsub.  */
1110       COSTS_N_INSNS (3),        /* fpconst.  */
1111       COSTS_N_INSNS (3),        /* neg.  */
1112       COSTS_N_INSNS (6),        /* compare.  */
1113       COSTS_N_INSNS (4),        /* widen.  */
1114       COSTS_N_INSNS (4),        /* narrow.  */
1115       COSTS_N_INSNS (8),        /* toint.  */
1116       COSTS_N_INSNS (8),        /* fromint.  */
1117       COSTS_N_INSNS (8)         /* roundint.  */
1118     },
1119     /* FP DFmode */
1120     {
1121       COSTS_N_INSNS (64),       /* div.  */
1122       COSTS_N_INSNS (16),       /* mult.  */
1123       COSTS_N_INSNS (25),       /* mult_addsub.  */
1124       COSTS_N_INSNS (30),       /* fma.  */
1125       COSTS_N_INSNS (9),        /* addsub.  */
1126       COSTS_N_INSNS (3),        /* fpconst.  */
1127       COSTS_N_INSNS (3),        /* neg.  */
1128       COSTS_N_INSNS (6),        /* compare.  */
1129       COSTS_N_INSNS (6),        /* widen.  */
1130       COSTS_N_INSNS (6),        /* narrow.  */
1131       COSTS_N_INSNS (8),        /* toint.  */
1132       COSTS_N_INSNS (8),        /* fromint.  */
1133       COSTS_N_INSNS (8)         /* roundint.  */
1134     }
1135   },
1136   /* Vector */
1137   {
1138     COSTS_N_INSNS (1)   /* alu.  */
1139   }
1140 };
1141
1142 const struct cpu_cost_table cortexa5_extra_costs =
1143 {
1144   /* ALU */
1145   {
1146     0,                  /* arith.  */
1147     0,                  /* logical.  */
1148     COSTS_N_INSNS (1),  /* shift.  */
1149     COSTS_N_INSNS (1),  /* shift_reg.  */
1150     COSTS_N_INSNS (1),  /* arith_shift.  */
1151     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1152     COSTS_N_INSNS (1),  /* log_shift.  */
1153     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1154     COSTS_N_INSNS (1),  /* extend.  */
1155     COSTS_N_INSNS (1),  /* extend_arith.  */
1156     COSTS_N_INSNS (1),  /* bfi.  */
1157     COSTS_N_INSNS (1),  /* bfx.  */
1158     COSTS_N_INSNS (1),  /* clz.  */
1159     COSTS_N_INSNS (1),  /* rev.  */
1160     0,                  /* non_exec.  */
1161     true                /* non_exec_costs_exec.  */
1162   },
1163
1164   {
1165     /* MULT SImode */
1166     {
1167       0,                        /* simple.  */
1168       COSTS_N_INSNS (1),        /* flag_setting.  */
1169       COSTS_N_INSNS (1),        /* extend.  */
1170       COSTS_N_INSNS (1),        /* add.  */
1171       COSTS_N_INSNS (1),        /* extend_add.  */
1172       COSTS_N_INSNS (7)         /* idiv.  */
1173     },
1174     /* MULT DImode */
1175     {
1176       0,                        /* simple (N/A).  */
1177       0,                        /* flag_setting (N/A).  */
1178       COSTS_N_INSNS (1),        /* extend.  */
1179       0,                        /* add.  */
1180       COSTS_N_INSNS (2),        /* extend_add.  */
1181       0                         /* idiv (N/A).  */
1182     }
1183   },
1184   /* LD/ST */
1185   {
1186     COSTS_N_INSNS (1),  /* load.  */
1187     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1188     COSTS_N_INSNS (6),  /* ldrd.  */
1189     COSTS_N_INSNS (1),  /* ldm_1st.  */
1190     1,                  /* ldm_regs_per_insn_1st.  */
1191     2,                  /* ldm_regs_per_insn_subsequent.  */
1192     COSTS_N_INSNS (2),  /* loadf.  */
1193     COSTS_N_INSNS (4),  /* loadd.  */
1194     COSTS_N_INSNS (1),  /* load_unaligned.  */
1195     COSTS_N_INSNS (1),  /* store.  */
1196     COSTS_N_INSNS (3),  /* strd.  */
1197     COSTS_N_INSNS (1),  /* stm_1st.  */
1198     1,                  /* stm_regs_per_insn_1st.  */
1199     2,                  /* stm_regs_per_insn_subsequent.  */
1200     COSTS_N_INSNS (2),  /* storef.  */
1201     COSTS_N_INSNS (2),  /* stored.  */
1202     COSTS_N_INSNS (1)   /* store_unaligned.  */
1203   },
1204   {
1205     /* FP SFmode */
1206     {
1207       COSTS_N_INSNS (15),       /* div.  */
1208       COSTS_N_INSNS (3),        /* mult.  */
1209       COSTS_N_INSNS (7),        /* mult_addsub. */
1210       COSTS_N_INSNS (7),        /* fma.  */
1211       COSTS_N_INSNS (3),        /* addsub.  */
1212       COSTS_N_INSNS (3),        /* fpconst.  */
1213       COSTS_N_INSNS (3),        /* neg.  */
1214       COSTS_N_INSNS (3),        /* compare.  */
1215       COSTS_N_INSNS (3),        /* widen.  */
1216       COSTS_N_INSNS (3),        /* narrow.  */
1217       COSTS_N_INSNS (3),        /* toint.  */
1218       COSTS_N_INSNS (3),        /* fromint.  */
1219       COSTS_N_INSNS (3)         /* roundint.  */
1220     },
1221     /* FP DFmode */
1222     {
1223       COSTS_N_INSNS (30),       /* div.  */
1224       COSTS_N_INSNS (6),        /* mult.  */
1225       COSTS_N_INSNS (10),       /* mult_addsub.  */
1226       COSTS_N_INSNS (7),        /* fma.  */
1227       COSTS_N_INSNS (3),        /* addsub.  */
1228       COSTS_N_INSNS (3),        /* fpconst.  */
1229       COSTS_N_INSNS (3),        /* neg.  */
1230       COSTS_N_INSNS (3),        /* compare.  */
1231       COSTS_N_INSNS (3),        /* widen.  */
1232       COSTS_N_INSNS (3),        /* narrow.  */
1233       COSTS_N_INSNS (3),        /* toint.  */
1234       COSTS_N_INSNS (3),        /* fromint.  */
1235       COSTS_N_INSNS (3)         /* roundint.  */
1236     }
1237   },
1238   /* Vector */
1239   {
1240     COSTS_N_INSNS (1)   /* alu.  */
1241   }
1242 };
1243
1244
1245 const struct cpu_cost_table cortexa7_extra_costs =
1246 {
1247   /* ALU */
1248   {
1249     0,                  /* arith.  */
1250     0,                  /* logical.  */
1251     COSTS_N_INSNS (1),  /* shift.  */
1252     COSTS_N_INSNS (1),  /* shift_reg.  */
1253     COSTS_N_INSNS (1),  /* arith_shift.  */
1254     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1255     COSTS_N_INSNS (1),  /* log_shift.  */
1256     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1257     COSTS_N_INSNS (1),  /* extend.  */
1258     COSTS_N_INSNS (1),  /* extend_arith.  */
1259     COSTS_N_INSNS (1),  /* bfi.  */
1260     COSTS_N_INSNS (1),  /* bfx.  */
1261     COSTS_N_INSNS (1),  /* clz.  */
1262     COSTS_N_INSNS (1),  /* rev.  */
1263     0,                  /* non_exec.  */
1264     true                /* non_exec_costs_exec.  */
1265   },
1266
1267   {
1268     /* MULT SImode */
1269     {
1270       0,                        /* simple.  */
1271       COSTS_N_INSNS (1),        /* flag_setting.  */
1272       COSTS_N_INSNS (1),        /* extend.  */
1273       COSTS_N_INSNS (1),        /* add.  */
1274       COSTS_N_INSNS (1),        /* extend_add.  */
1275       COSTS_N_INSNS (7)         /* idiv.  */
1276     },
1277     /* MULT DImode */
1278     {
1279       0,                        /* simple (N/A).  */
1280       0,                        /* flag_setting (N/A).  */
1281       COSTS_N_INSNS (1),        /* extend.  */
1282       0,                        /* add.  */
1283       COSTS_N_INSNS (2),        /* extend_add.  */
1284       0                         /* idiv (N/A).  */
1285     }
1286   },
1287   /* LD/ST */
1288   {
1289     COSTS_N_INSNS (1),  /* load.  */
1290     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1291     COSTS_N_INSNS (3),  /* ldrd.  */
1292     COSTS_N_INSNS (1),  /* ldm_1st.  */
1293     1,                  /* ldm_regs_per_insn_1st.  */
1294     2,                  /* ldm_regs_per_insn_subsequent.  */
1295     COSTS_N_INSNS (2),  /* loadf.  */
1296     COSTS_N_INSNS (2),  /* loadd.  */
1297     COSTS_N_INSNS (1),  /* load_unaligned.  */
1298     COSTS_N_INSNS (1),  /* store.  */
1299     COSTS_N_INSNS (3),  /* strd.  */
1300     COSTS_N_INSNS (1),  /* stm_1st.  */
1301     1,                  /* stm_regs_per_insn_1st.  */
1302     2,                  /* stm_regs_per_insn_subsequent.  */
1303     COSTS_N_INSNS (2),  /* storef.  */
1304     COSTS_N_INSNS (2),  /* stored.  */
1305     COSTS_N_INSNS (1)   /* store_unaligned.  */
1306   },
1307   {
1308     /* FP SFmode */
1309     {
1310       COSTS_N_INSNS (15),       /* div.  */
1311       COSTS_N_INSNS (3),        /* mult.  */
1312       COSTS_N_INSNS (7),        /* mult_addsub. */
1313       COSTS_N_INSNS (7),        /* fma.  */
1314       COSTS_N_INSNS (3),        /* addsub.  */
1315       COSTS_N_INSNS (3),        /* fpconst.  */
1316       COSTS_N_INSNS (3),        /* neg.  */
1317       COSTS_N_INSNS (3),        /* compare.  */
1318       COSTS_N_INSNS (3),        /* widen.  */
1319       COSTS_N_INSNS (3),        /* narrow.  */
1320       COSTS_N_INSNS (3),        /* toint.  */
1321       COSTS_N_INSNS (3),        /* fromint.  */
1322       COSTS_N_INSNS (3)         /* roundint.  */
1323     },
1324     /* FP DFmode */
1325     {
1326       COSTS_N_INSNS (30),       /* div.  */
1327       COSTS_N_INSNS (6),        /* mult.  */
1328       COSTS_N_INSNS (10),       /* mult_addsub.  */
1329       COSTS_N_INSNS (7),        /* fma.  */
1330       COSTS_N_INSNS (3),        /* addsub.  */
1331       COSTS_N_INSNS (3),        /* fpconst.  */
1332       COSTS_N_INSNS (3),        /* neg.  */
1333       COSTS_N_INSNS (3),        /* compare.  */
1334       COSTS_N_INSNS (3),        /* widen.  */
1335       COSTS_N_INSNS (3),        /* narrow.  */
1336       COSTS_N_INSNS (3),        /* toint.  */
1337       COSTS_N_INSNS (3),        /* fromint.  */
1338       COSTS_N_INSNS (3)         /* roundint.  */
1339     }
1340   },
1341   /* Vector */
1342   {
1343     COSTS_N_INSNS (1)   /* alu.  */
1344   }
1345 };
1346
1347 const struct cpu_cost_table cortexa12_extra_costs =
1348 {
1349   /* ALU */
1350   {
1351     0,                  /* arith.  */
1352     0,                  /* logical.  */
1353     0,                  /* shift.  */
1354     COSTS_N_INSNS (1),  /* shift_reg.  */
1355     COSTS_N_INSNS (1),  /* arith_shift.  */
1356     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1357     COSTS_N_INSNS (1),  /* log_shift.  */
1358     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1359     0,                  /* extend.  */
1360     COSTS_N_INSNS (1),  /* extend_arith.  */
1361     0,                  /* bfi.  */
1362     COSTS_N_INSNS (1),  /* bfx.  */
1363     COSTS_N_INSNS (1),  /* clz.  */
1364     COSTS_N_INSNS (1),  /* rev.  */
1365     0,                  /* non_exec.  */
1366     true                /* non_exec_costs_exec.  */
1367   },
1368   /* MULT SImode */
1369   {
1370     {
1371       COSTS_N_INSNS (2),        /* simple.  */
1372       COSTS_N_INSNS (3),        /* flag_setting.  */
1373       COSTS_N_INSNS (2),        /* extend.  */
1374       COSTS_N_INSNS (3),        /* add.  */
1375       COSTS_N_INSNS (2),        /* extend_add.  */
1376       COSTS_N_INSNS (18)        /* idiv.  */
1377     },
1378     /* MULT DImode */
1379     {
1380       0,                        /* simple (N/A).  */
1381       0,                        /* flag_setting (N/A).  */
1382       COSTS_N_INSNS (3),        /* extend.  */
1383       0,                        /* add (N/A).  */
1384       COSTS_N_INSNS (3),        /* extend_add.  */
1385       0                         /* idiv (N/A).  */
1386     }
1387   },
1388   /* LD/ST */
1389   {
1390     COSTS_N_INSNS (3),  /* load.  */
1391     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1392     COSTS_N_INSNS (3),  /* ldrd.  */
1393     COSTS_N_INSNS (3),  /* ldm_1st.  */
1394     1,                  /* ldm_regs_per_insn_1st.  */
1395     2,                  /* ldm_regs_per_insn_subsequent.  */
1396     COSTS_N_INSNS (3),  /* loadf.  */
1397     COSTS_N_INSNS (3),  /* loadd.  */
1398     0,                  /* load_unaligned.  */
1399     0,                  /* store.  */
1400     0,                  /* strd.  */
1401     0,                  /* stm_1st.  */
1402     1,                  /* stm_regs_per_insn_1st.  */
1403     2,                  /* stm_regs_per_insn_subsequent.  */
1404     COSTS_N_INSNS (2),  /* storef.  */
1405     COSTS_N_INSNS (2),  /* stored.  */
1406     0                   /* store_unaligned.  */
1407   },
1408   {
1409     /* FP SFmode */
1410     {
1411       COSTS_N_INSNS (17),       /* div.  */
1412       COSTS_N_INSNS (4),        /* mult.  */
1413       COSTS_N_INSNS (8),        /* mult_addsub. */
1414       COSTS_N_INSNS (8),        /* fma.  */
1415       COSTS_N_INSNS (4),        /* addsub.  */
1416       COSTS_N_INSNS (2),        /* fpconst. */
1417       COSTS_N_INSNS (2),        /* neg.  */
1418       COSTS_N_INSNS (2),        /* compare.  */
1419       COSTS_N_INSNS (4),        /* widen.  */
1420       COSTS_N_INSNS (4),        /* narrow.  */
1421       COSTS_N_INSNS (4),        /* toint.  */
1422       COSTS_N_INSNS (4),        /* fromint.  */
1423       COSTS_N_INSNS (4)         /* roundint.  */
1424     },
1425     /* FP DFmode */
1426     {
1427       COSTS_N_INSNS (31),       /* div.  */
1428       COSTS_N_INSNS (4),        /* mult.  */
1429       COSTS_N_INSNS (8),        /* mult_addsub.  */
1430       COSTS_N_INSNS (8),        /* fma.  */
1431       COSTS_N_INSNS (4),        /* addsub.  */
1432       COSTS_N_INSNS (2),        /* fpconst.  */
1433       COSTS_N_INSNS (2),        /* neg.  */
1434       COSTS_N_INSNS (2),        /* compare.  */
1435       COSTS_N_INSNS (4),        /* widen.  */
1436       COSTS_N_INSNS (4),        /* narrow.  */
1437       COSTS_N_INSNS (4),        /* toint.  */
1438       COSTS_N_INSNS (4),        /* fromint.  */
1439       COSTS_N_INSNS (4)         /* roundint.  */
1440     }
1441   },
1442   /* Vector */
1443   {
1444     COSTS_N_INSNS (1)   /* alu.  */
1445   }
1446 };
1447
1448 const struct cpu_cost_table cortexa15_extra_costs =
1449 {
1450   /* ALU */
1451   {
1452     0,                  /* arith.  */
1453     0,                  /* logical.  */
1454     0,                  /* shift.  */
1455     0,                  /* shift_reg.  */
1456     COSTS_N_INSNS (1),  /* arith_shift.  */
1457     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1458     COSTS_N_INSNS (1),  /* log_shift.  */
1459     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1460     0,                  /* extend.  */
1461     COSTS_N_INSNS (1),  /* extend_arith.  */
1462     COSTS_N_INSNS (1),  /* bfi.  */
1463     0,                  /* bfx.  */
1464     0,                  /* clz.  */
1465     0,                  /* rev.  */
1466     0,                  /* non_exec.  */
1467     true                /* non_exec_costs_exec.  */
1468   },
1469   /* MULT SImode */
1470   {
1471     {
1472       COSTS_N_INSNS (2),        /* simple.  */
1473       COSTS_N_INSNS (3),        /* flag_setting.  */
1474       COSTS_N_INSNS (2),        /* extend.  */
1475       COSTS_N_INSNS (2),        /* add.  */
1476       COSTS_N_INSNS (2),        /* extend_add.  */
1477       COSTS_N_INSNS (18)        /* idiv.  */
1478     },
1479     /* MULT DImode */
1480     {
1481       0,                        /* simple (N/A).  */
1482       0,                        /* flag_setting (N/A).  */
1483       COSTS_N_INSNS (3),        /* extend.  */
1484       0,                        /* add (N/A).  */
1485       COSTS_N_INSNS (3),        /* extend_add.  */
1486       0                         /* idiv (N/A).  */
1487     }
1488   },
1489   /* LD/ST */
1490   {
1491     COSTS_N_INSNS (3),  /* load.  */
1492     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1493     COSTS_N_INSNS (3),  /* ldrd.  */
1494     COSTS_N_INSNS (4),  /* ldm_1st.  */
1495     1,                  /* ldm_regs_per_insn_1st.  */
1496     2,                  /* ldm_regs_per_insn_subsequent.  */
1497     COSTS_N_INSNS (4),  /* loadf.  */
1498     COSTS_N_INSNS (4),  /* loadd.  */
1499     0,                  /* load_unaligned.  */
1500     0,                  /* store.  */
1501     0,                  /* strd.  */
1502     COSTS_N_INSNS (1),  /* stm_1st.  */
1503     1,                  /* stm_regs_per_insn_1st.  */
1504     2,                  /* stm_regs_per_insn_subsequent.  */
1505     0,                  /* storef.  */
1506     0,                  /* stored.  */
1507     0                   /* store_unaligned.  */
1508   },
1509   {
1510     /* FP SFmode */
1511     {
1512       COSTS_N_INSNS (17),       /* div.  */
1513       COSTS_N_INSNS (4),        /* mult.  */
1514       COSTS_N_INSNS (8),        /* mult_addsub. */
1515       COSTS_N_INSNS (8),        /* fma.  */
1516       COSTS_N_INSNS (4),        /* addsub.  */
1517       COSTS_N_INSNS (2),        /* fpconst. */
1518       COSTS_N_INSNS (2),        /* neg.  */
1519       COSTS_N_INSNS (5),        /* compare.  */
1520       COSTS_N_INSNS (4),        /* widen.  */
1521       COSTS_N_INSNS (4),        /* narrow.  */
1522       COSTS_N_INSNS (4),        /* toint.  */
1523       COSTS_N_INSNS (4),        /* fromint.  */
1524       COSTS_N_INSNS (4)         /* roundint.  */
1525     },
1526     /* FP DFmode */
1527     {
1528       COSTS_N_INSNS (31),       /* div.  */
1529       COSTS_N_INSNS (4),        /* mult.  */
1530       COSTS_N_INSNS (8),        /* mult_addsub.  */
1531       COSTS_N_INSNS (8),        /* fma.  */
1532       COSTS_N_INSNS (4),        /* addsub.  */
1533       COSTS_N_INSNS (2),        /* fpconst.  */
1534       COSTS_N_INSNS (2),        /* neg.  */
1535       COSTS_N_INSNS (2),        /* compare.  */
1536       COSTS_N_INSNS (4),        /* widen.  */
1537       COSTS_N_INSNS (4),        /* narrow.  */
1538       COSTS_N_INSNS (4),        /* toint.  */
1539       COSTS_N_INSNS (4),        /* fromint.  */
1540       COSTS_N_INSNS (4)         /* roundint.  */
1541     }
1542   },
1543   /* Vector */
1544   {
1545     COSTS_N_INSNS (1)   /* alu.  */
1546   }
1547 };
1548
1549 const struct cpu_cost_table v7m_extra_costs =
1550 {
1551   /* ALU */
1552   {
1553     0,                  /* arith.  */
1554     0,                  /* logical.  */
1555     0,                  /* shift.  */
1556     0,                  /* shift_reg.  */
1557     0,                  /* arith_shift.  */
1558     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1559     0,                  /* log_shift.  */
1560     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1561     0,                  /* extend.  */
1562     COSTS_N_INSNS (1),  /* extend_arith.  */
1563     0,                  /* bfi.  */
1564     0,                  /* bfx.  */
1565     0,                  /* clz.  */
1566     0,                  /* rev.  */
1567     COSTS_N_INSNS (1),  /* non_exec.  */
1568     false               /* non_exec_costs_exec.  */
1569   },
1570   {
1571     /* MULT SImode */
1572     {
1573       COSTS_N_INSNS (1),        /* simple.  */
1574       COSTS_N_INSNS (1),        /* flag_setting.  */
1575       COSTS_N_INSNS (2),        /* extend.  */
1576       COSTS_N_INSNS (1),        /* add.  */
1577       COSTS_N_INSNS (3),        /* extend_add.  */
1578       COSTS_N_INSNS (8)         /* idiv.  */
1579     },
1580     /* MULT DImode */
1581     {
1582       0,                        /* simple (N/A).  */
1583       0,                        /* flag_setting (N/A).  */
1584       COSTS_N_INSNS (2),        /* extend.  */
1585       0,                        /* add (N/A).  */
1586       COSTS_N_INSNS (3),        /* extend_add.  */
1587       0                         /* idiv (N/A).  */
1588     }
1589   },
1590   /* LD/ST */
1591   {
1592     COSTS_N_INSNS (2),  /* load.  */
1593     0,                  /* load_sign_extend.  */
1594     COSTS_N_INSNS (3),  /* ldrd.  */
1595     COSTS_N_INSNS (2),  /* ldm_1st.  */
1596     1,                  /* ldm_regs_per_insn_1st.  */
1597     1,                  /* ldm_regs_per_insn_subsequent.  */
1598     COSTS_N_INSNS (2),  /* loadf.  */
1599     COSTS_N_INSNS (3),  /* loadd.  */
1600     COSTS_N_INSNS (1),  /* load_unaligned.  */
1601     COSTS_N_INSNS (2),  /* store.  */
1602     COSTS_N_INSNS (3),  /* strd.  */
1603     COSTS_N_INSNS (2),  /* stm_1st.  */
1604     1,                  /* stm_regs_per_insn_1st.  */
1605     1,                  /* stm_regs_per_insn_subsequent.  */
1606     COSTS_N_INSNS (2),  /* storef.  */
1607     COSTS_N_INSNS (3),  /* stored.  */
1608     COSTS_N_INSNS (1)  /* store_unaligned.  */
1609   },
1610   {
1611     /* FP SFmode */
1612     {
1613       COSTS_N_INSNS (7),        /* div.  */
1614       COSTS_N_INSNS (2),        /* mult.  */
1615       COSTS_N_INSNS (5),        /* mult_addsub.  */
1616       COSTS_N_INSNS (3),        /* fma.  */
1617       COSTS_N_INSNS (1),        /* addsub.  */
1618       0,                        /* fpconst.  */
1619       0,                        /* neg.  */
1620       0,                        /* compare.  */
1621       0,                        /* widen.  */
1622       0,                        /* narrow.  */
1623       0,                        /* toint.  */
1624       0,                        /* fromint.  */
1625       0                         /* roundint.  */
1626     },
1627     /* FP DFmode */
1628     {
1629       COSTS_N_INSNS (15),       /* div.  */
1630       COSTS_N_INSNS (5),        /* mult.  */
1631       COSTS_N_INSNS (7),        /* mult_addsub.  */
1632       COSTS_N_INSNS (7),        /* fma.  */
1633       COSTS_N_INSNS (3),        /* addsub.  */
1634       0,                        /* fpconst.  */
1635       0,                        /* neg.  */
1636       0,                        /* compare.  */
1637       0,                        /* widen.  */
1638       0,                        /* narrow.  */
1639       0,                        /* toint.  */
1640       0,                        /* fromint.  */
1641       0                         /* roundint.  */
1642     }
1643   },
1644   /* Vector */
1645   {
1646     COSTS_N_INSNS (1)   /* alu.  */
1647   }
1648 };
1649
1650 const struct tune_params arm_slowmul_tune =
1651 {
1652   arm_slowmul_rtx_costs,
1653   NULL,
1654   NULL,                                         /* Sched adj cost.  */
1655   3,                                            /* Constant limit.  */
1656   5,                                            /* Max cond insns.  */
1657   ARM_PREFETCH_NOT_BENEFICIAL,
1658   true,                                         /* Prefer constant pool.  */
1659   arm_default_branch_cost,
1660   false,                                        /* Prefer LDRD/STRD.  */
1661   {true, true},                                 /* Prefer non short circuit.  */
1662   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1663   false,                                        /* Prefer Neon for 64-bits bitops.  */
1664   false, false,                                 /* Prefer 32-bit encodings.  */
1665   false,                                        /* Prefer Neon for stringops.  */
1666   8                                             /* Maximum insns to inline memset.  */
1667 };
1668
1669 const struct tune_params arm_fastmul_tune =
1670 {
1671   arm_fastmul_rtx_costs,
1672   NULL,
1673   NULL,                                         /* Sched adj cost.  */
1674   1,                                            /* Constant limit.  */
1675   5,                                            /* Max cond insns.  */
1676   ARM_PREFETCH_NOT_BENEFICIAL,
1677   true,                                         /* Prefer constant pool.  */
1678   arm_default_branch_cost,
1679   false,                                        /* Prefer LDRD/STRD.  */
1680   {true, true},                                 /* Prefer non short circuit.  */
1681   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1682   false,                                        /* Prefer Neon for 64-bits bitops.  */
1683   false, false,                                 /* Prefer 32-bit encodings.  */
1684   false,                                        /* Prefer Neon for stringops.  */
1685   8                                             /* Maximum insns to inline memset.  */
1686 };
1687
1688 /* StrongARM has early execution of branches, so a sequence that is worth
1689    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1690
1691 const struct tune_params arm_strongarm_tune =
1692 {
1693   arm_fastmul_rtx_costs,
1694   NULL,
1695   NULL,                                         /* Sched adj cost.  */
1696   1,                                            /* Constant limit.  */
1697   3,                                            /* Max cond insns.  */
1698   ARM_PREFETCH_NOT_BENEFICIAL,
1699   true,                                         /* Prefer constant pool.  */
1700   arm_default_branch_cost,
1701   false,                                        /* Prefer LDRD/STRD.  */
1702   {true, true},                                 /* Prefer non short circuit.  */
1703   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1704   false,                                        /* Prefer Neon for 64-bits bitops.  */
1705   false, false,                                 /* Prefer 32-bit encodings.  */
1706   false,                                        /* Prefer Neon for stringops.  */
1707   8                                             /* Maximum insns to inline memset.  */
1708 };
1709
1710 const struct tune_params arm_xscale_tune =
1711 {
1712   arm_xscale_rtx_costs,
1713   NULL,
1714   xscale_sched_adjust_cost,
1715   2,                                            /* Constant limit.  */
1716   3,                                            /* Max cond insns.  */
1717   ARM_PREFETCH_NOT_BENEFICIAL,
1718   true,                                         /* Prefer constant pool.  */
1719   arm_default_branch_cost,
1720   false,                                        /* Prefer LDRD/STRD.  */
1721   {true, true},                                 /* Prefer non short circuit.  */
1722   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1723   false,                                        /* Prefer Neon for 64-bits bitops.  */
1724   false, false,                                 /* Prefer 32-bit encodings.  */
1725   false,                                        /* Prefer Neon for stringops.  */
1726   8                                             /* Maximum insns to inline memset.  */
1727 };
1728
1729 const struct tune_params arm_9e_tune =
1730 {
1731   arm_9e_rtx_costs,
1732   NULL,
1733   NULL,                                         /* Sched adj cost.  */
1734   1,                                            /* Constant limit.  */
1735   5,                                            /* Max cond insns.  */
1736   ARM_PREFETCH_NOT_BENEFICIAL,
1737   true,                                         /* Prefer constant pool.  */
1738   arm_default_branch_cost,
1739   false,                                        /* Prefer LDRD/STRD.  */
1740   {true, true},                                 /* Prefer non short circuit.  */
1741   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1742   false,                                        /* Prefer Neon for 64-bits bitops.  */
1743   false, false,                                 /* Prefer 32-bit encodings.  */
1744   false,                                        /* Prefer Neon for stringops.  */
1745   8                                             /* Maximum insns to inline memset.  */
1746 };
1747
1748 const struct tune_params arm_v6t2_tune =
1749 {
1750   arm_9e_rtx_costs,
1751   NULL,
1752   NULL,                                         /* Sched adj cost.  */
1753   1,                                            /* Constant limit.  */
1754   5,                                            /* Max cond insns.  */
1755   ARM_PREFETCH_NOT_BENEFICIAL,
1756   false,                                        /* Prefer constant pool.  */
1757   arm_default_branch_cost,
1758   false,                                        /* Prefer LDRD/STRD.  */
1759   {true, true},                                 /* Prefer non short circuit.  */
1760   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1761   false,                                        /* Prefer Neon for 64-bits bitops.  */
1762   false, false,                                 /* Prefer 32-bit encodings.  */
1763   false,                                        /* Prefer Neon for stringops.  */
1764   8                                             /* Maximum insns to inline memset.  */
1765 };
1766
1767 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1768 const struct tune_params arm_cortex_tune =
1769 {
1770   arm_9e_rtx_costs,
1771   &generic_extra_costs,
1772   NULL,                                         /* Sched adj cost.  */
1773   1,                                            /* Constant limit.  */
1774   5,                                            /* Max cond insns.  */
1775   ARM_PREFETCH_NOT_BENEFICIAL,
1776   false,                                        /* Prefer constant pool.  */
1777   arm_default_branch_cost,
1778   false,                                        /* Prefer LDRD/STRD.  */
1779   {true, true},                                 /* Prefer non short circuit.  */
1780   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1781   false,                                        /* Prefer Neon for 64-bits bitops.  */
1782   false, false,                                 /* Prefer 32-bit encodings.  */
1783   false,                                        /* Prefer Neon for stringops.  */
1784   8                                             /* Maximum insns to inline memset.  */
1785 };
1786
1787 const struct tune_params arm_cortex_a8_tune =
1788 {
1789   arm_9e_rtx_costs,
1790   &cortexa8_extra_costs,
1791   NULL,                                         /* Sched adj cost.  */
1792   1,                                            /* Constant limit.  */
1793   5,                                            /* Max cond insns.  */
1794   ARM_PREFETCH_NOT_BENEFICIAL,
1795   false,                                        /* Prefer constant pool.  */
1796   arm_default_branch_cost,
1797   false,                                        /* Prefer LDRD/STRD.  */
1798   {true, true},                                 /* Prefer non short circuit.  */
1799   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1800   false,                                        /* Prefer Neon for 64-bits bitops.  */
1801   false, false,                                 /* Prefer 32-bit encodings.  */
1802   true,                                         /* Prefer Neon for stringops.  */
1803   8                                             /* Maximum insns to inline memset.  */
1804 };
1805
1806 const struct tune_params arm_cortex_a7_tune =
1807 {
1808   arm_9e_rtx_costs,
1809   &cortexa7_extra_costs,
1810   NULL,
1811   1,                                            /* Constant limit.  */
1812   5,                                            /* Max cond insns.  */
1813   ARM_PREFETCH_NOT_BENEFICIAL,
1814   false,                                        /* Prefer constant pool.  */
1815   arm_default_branch_cost,
1816   false,                                        /* Prefer LDRD/STRD.  */
1817   {true, true},                                 /* Prefer non short circuit.  */
1818   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1819   false,                                        /* Prefer Neon for 64-bits bitops.  */
1820   false, false,                                 /* Prefer 32-bit encodings.  */
1821   true,                                         /* Prefer Neon for stringops.  */
1822   8                                             /* Maximum insns to inline memset.  */
1823 };
1824
1825 const struct tune_params arm_cortex_a15_tune =
1826 {
1827   arm_9e_rtx_costs,
1828   &cortexa15_extra_costs,
1829   NULL,                                         /* Sched adj cost.  */
1830   1,                                            /* Constant limit.  */
1831   2,                                            /* Max cond insns.  */
1832   ARM_PREFETCH_NOT_BENEFICIAL,
1833   false,                                        /* Prefer constant pool.  */
1834   arm_default_branch_cost,
1835   true,                                         /* Prefer LDRD/STRD.  */
1836   {true, true},                                 /* Prefer non short circuit.  */
1837   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1838   false,                                        /* Prefer Neon for 64-bits bitops.  */
1839   true, true,                                   /* Prefer 32-bit encodings.  */
1840   true,                                         /* Prefer Neon for stringops.  */
1841   8                                             /* Maximum insns to inline memset.  */
1842 };
1843
1844 const struct tune_params arm_cortex_a53_tune =
1845 {
1846   arm_9e_rtx_costs,
1847   &cortexa53_extra_costs,
1848   NULL,                                         /* Scheduler cost adjustment.  */
1849   1,                                            /* Constant limit.  */
1850   5,                                            /* Max cond insns.  */
1851   ARM_PREFETCH_NOT_BENEFICIAL,
1852   false,                                        /* Prefer constant pool.  */
1853   arm_default_branch_cost,
1854   false,                                        /* Prefer LDRD/STRD.  */
1855   {true, true},                                 /* Prefer non short circuit.  */
1856   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1857   false,                                        /* Prefer Neon for 64-bits bitops.  */
1858   false, false,                                 /* Prefer 32-bit encodings.  */
1859   false,                                        /* Prefer Neon for stringops.  */
1860   8                                             /* Maximum insns to inline memset.  */
1861 };
1862
1863 const struct tune_params arm_cortex_a57_tune =
1864 {
1865   arm_9e_rtx_costs,
1866   &cortexa57_extra_costs,
1867   NULL,                                         /* Scheduler cost adjustment.  */
1868   1,                                           /* Constant limit.  */
1869   2,                                           /* Max cond insns.  */
1870   ARM_PREFETCH_NOT_BENEFICIAL,
1871   false,                                       /* Prefer constant pool.  */
1872   arm_default_branch_cost,
1873   true,                                       /* Prefer LDRD/STRD.  */
1874   {true, true},                                /* Prefer non short circuit.  */
1875   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1876   false,                                       /* Prefer Neon for 64-bits bitops.  */
1877   true, true,                                  /* Prefer 32-bit encodings.  */
1878   false,                                        /* Prefer Neon for stringops.  */
1879   8                                             /* Maximum insns to inline memset.  */
1880 };
1881
1882 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1883    less appealing.  Set max_insns_skipped to a low value.  */
1884
1885 const struct tune_params arm_cortex_a5_tune =
1886 {
1887   arm_9e_rtx_costs,
1888   &cortexa5_extra_costs,
1889   NULL,                                         /* Sched adj cost.  */
1890   1,                                            /* Constant limit.  */
1891   1,                                            /* Max cond insns.  */
1892   ARM_PREFETCH_NOT_BENEFICIAL,
1893   false,                                        /* Prefer constant pool.  */
1894   arm_cortex_a5_branch_cost,
1895   false,                                        /* Prefer LDRD/STRD.  */
1896   {false, false},                               /* Prefer non short circuit.  */
1897   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1898   false,                                        /* Prefer Neon for 64-bits bitops.  */
1899   false, false,                                 /* Prefer 32-bit encodings.  */
1900   true,                                         /* Prefer Neon for stringops.  */
1901   8                                             /* Maximum insns to inline memset.  */
1902 };
1903
1904 const struct tune_params arm_cortex_a9_tune =
1905 {
1906   arm_9e_rtx_costs,
1907   &cortexa9_extra_costs,
1908   cortex_a9_sched_adjust_cost,
1909   1,                                            /* Constant limit.  */
1910   5,                                            /* Max cond insns.  */
1911   ARM_PREFETCH_BENEFICIAL(4,32,32),
1912   false,                                        /* Prefer constant pool.  */
1913   arm_default_branch_cost,
1914   false,                                        /* Prefer LDRD/STRD.  */
1915   {true, true},                                 /* Prefer non short circuit.  */
1916   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1917   false,                                        /* Prefer Neon for 64-bits bitops.  */
1918   false, false,                                 /* Prefer 32-bit encodings.  */
1919   false,                                        /* Prefer Neon for stringops.  */
1920   8                                             /* Maximum insns to inline memset.  */
1921 };
1922
1923 const struct tune_params arm_cortex_a12_tune =
1924 {
1925   arm_9e_rtx_costs,
1926   &cortexa12_extra_costs,
1927   NULL,
1928   1,                                            /* Constant limit.  */
1929   5,                                            /* Max cond insns.  */
1930   ARM_PREFETCH_BENEFICIAL(4,32,32),
1931   false,                                        /* Prefer constant pool.  */
1932   arm_default_branch_cost,
1933   true,                                         /* Prefer LDRD/STRD.  */
1934   {true, true},                                 /* Prefer non short circuit.  */
1935   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1936   false,                                        /* Prefer Neon for 64-bits bitops.  */
1937   false, false,                                 /* Prefer 32-bit encodings.  */
1938   true,                                         /* Prefer Neon for stringops.  */
1939   8                                             /* Maximum insns to inline memset.  */
1940 };
1941
1942 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
1943    cycle to execute each.  An LDR from the constant pool also takes two cycles
1944    to execute, but mildly increases pipelining opportunity (consecutive
1945    loads/stores can be pipelined together, saving one cycle), and may also
1946    improve icache utilisation.  Hence we prefer the constant pool for such
1947    processors.  */
1948
1949 const struct tune_params arm_v7m_tune =
1950 {
1951   arm_9e_rtx_costs,
1952   &v7m_extra_costs,
1953   NULL,                                         /* Sched adj cost.  */
1954   1,                                            /* Constant limit.  */
1955   2,                                            /* Max cond insns.  */
1956   ARM_PREFETCH_NOT_BENEFICIAL,
1957   true,                                         /* Prefer constant pool.  */
1958   arm_cortex_m_branch_cost,
1959   false,                                        /* Prefer LDRD/STRD.  */
1960   {false, false},                               /* Prefer non short circuit.  */
1961   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1962   false,                                        /* Prefer Neon for 64-bits bitops.  */
1963   false, false,                                 /* Prefer 32-bit encodings.  */
1964   false,                                        /* Prefer Neon for stringops.  */
1965   8                                             /* Maximum insns to inline memset.  */
1966 };
1967
1968 /* Cortex-M7 tuning.  */
1969
1970 const struct tune_params arm_cortex_m7_tune =
1971 {
1972   arm_9e_rtx_costs,
1973   &v7m_extra_costs,
1974   NULL,                                         /* Sched adj cost.  */
1975   0,                                            /* Constant limit.  */
1976   0,                                            /* Max cond insns.  */
1977   ARM_PREFETCH_NOT_BENEFICIAL,
1978   true,                                         /* Prefer constant pool.  */
1979   arm_cortex_m_branch_cost,
1980   false,                                        /* Prefer LDRD/STRD.  */
1981   {true, true},                                 /* Prefer non short circuit.  */
1982   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1983   false,                                        /* Prefer Neon for 64-bits bitops.  */
1984   false, false,                                 /* Prefer 32-bit encodings.  */
1985   false,                                        /* Prefer Neon for stringops.  */
1986   8                                             /* Maximum insns to inline memset.  */
1987 };
1988
1989 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1990    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
1991 const struct tune_params arm_v6m_tune =
1992 {
1993   arm_9e_rtx_costs,
1994   NULL,
1995   NULL,                                         /* Sched adj cost.  */
1996   1,                                            /* Constant limit.  */
1997   5,                                            /* Max cond insns.  */
1998   ARM_PREFETCH_NOT_BENEFICIAL,
1999   false,                                        /* Prefer constant pool.  */
2000   arm_default_branch_cost,
2001   false,                                        /* Prefer LDRD/STRD.  */
2002   {false, false},                               /* Prefer non short circuit.  */
2003   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2004   false,                                        /* Prefer Neon for 64-bits bitops.  */
2005   false, false,                                 /* Prefer 32-bit encodings.  */
2006   false,                                        /* Prefer Neon for stringops.  */
2007   8                                             /* Maximum insns to inline memset.  */
2008 };
2009
2010 const struct tune_params arm_fa726te_tune =
2011 {
2012   arm_9e_rtx_costs,
2013   NULL,
2014   fa726te_sched_adjust_cost,
2015   1,                                            /* Constant limit.  */
2016   5,                                            /* Max cond insns.  */
2017   ARM_PREFETCH_NOT_BENEFICIAL,
2018   true,                                         /* Prefer constant pool.  */
2019   arm_default_branch_cost,
2020   false,                                        /* Prefer LDRD/STRD.  */
2021   {true, true},                                 /* Prefer non short circuit.  */
2022   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2023   false,                                        /* Prefer Neon for 64-bits bitops.  */
2024   false, false,                                 /* Prefer 32-bit encodings.  */
2025   false,                                        /* Prefer Neon for stringops.  */
2026   8                                             /* Maximum insns to inline memset.  */
2027 };
2028
2029
2030 /* Not all of these give usefully different compilation alternatives,
2031    but there is no simple way of generalizing them.  */
2032 static const struct processors all_cores[] =
2033 {
2034   /* ARM Cores */
2035 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2036   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
2037     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2038 #include "arm-cores.def"
2039 #undef ARM_CORE
2040   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2041 };
2042
2043 static const struct processors all_architectures[] =
2044 {
2045   /* ARM Architectures */
2046   /* We don't specify tuning costs here as it will be figured out
2047      from the core.  */
2048
2049 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2050   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2051 #include "arm-arches.def"
2052 #undef ARM_ARCH
2053   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2054 };
2055
2056
2057 /* These are populated as commandline arguments are processed, or NULL
2058    if not specified.  */
2059 static const struct processors *arm_selected_arch;
2060 static const struct processors *arm_selected_cpu;
2061 static const struct processors *arm_selected_tune;
2062
2063 /* The name of the preprocessor macro to define for this architecture.  */
2064
2065 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2066
2067 /* Available values for -mfpu=.  */
2068
2069 static const struct arm_fpu_desc all_fpus[] =
2070 {
2071 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2072   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2073 #include "arm-fpus.def"
2074 #undef ARM_FPU
2075 };
2076
2077
2078 /* Supported TLS relocations.  */
2079
2080 enum tls_reloc {
2081   TLS_GD32,
2082   TLS_LDM32,
2083   TLS_LDO32,
2084   TLS_IE32,
2085   TLS_LE32,
2086   TLS_DESCSEQ   /* GNU scheme */
2087 };
2088
2089 /* The maximum number of insns to be used when loading a constant.  */
2090 inline static int
2091 arm_constant_limit (bool size_p)
2092 {
2093   return size_p ? 1 : current_tune->constant_limit;
2094 }
2095
2096 /* Emit an insn that's a simple single-set.  Both the operands must be known
2097    to be valid.  */
2098 inline static rtx_insn *
2099 emit_set_insn (rtx x, rtx y)
2100 {
2101   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2102 }
2103
2104 /* Return the number of bits set in VALUE.  */
2105 static unsigned
2106 bit_count (unsigned long value)
2107 {
2108   unsigned long count = 0;
2109
2110   while (value)
2111     {
2112       count++;
2113       value &= value - 1;  /* Clear the least-significant set bit.  */
2114     }
2115
2116   return count;
2117 }
2118
2119 typedef struct
2120 {
2121   machine_mode mode;
2122   const char *name;
2123 } arm_fixed_mode_set;
2124
2125 /* A small helper for setting fixed-point library libfuncs.  */
2126
2127 static void
2128 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2129                              const char *funcname, const char *modename,
2130                              int num_suffix)
2131 {
2132   char buffer[50];
2133
2134   if (num_suffix == 0)
2135     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2136   else
2137     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2138
2139   set_optab_libfunc (optable, mode, buffer);
2140 }
2141
2142 static void
2143 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2144                             machine_mode from, const char *funcname,
2145                             const char *toname, const char *fromname)
2146 {
2147   char buffer[50];
2148   const char *maybe_suffix_2 = "";
2149
2150   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2151   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2152       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2153       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2154     maybe_suffix_2 = "2";
2155
2156   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2157            maybe_suffix_2);
2158
2159   set_conv_libfunc (optable, to, from, buffer);
2160 }
2161
2162 /* Set up library functions unique to ARM.  */
2163
2164 static void
2165 arm_init_libfuncs (void)
2166 {
2167   /* For Linux, we have access to kernel support for atomic operations.  */
2168   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2169     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2170
2171   /* There are no special library functions unless we are using the
2172      ARM BPABI.  */
2173   if (!TARGET_BPABI)
2174     return;
2175
2176   /* The functions below are described in Section 4 of the "Run-Time
2177      ABI for the ARM architecture", Version 1.0.  */
2178
2179   /* Double-precision floating-point arithmetic.  Table 2.  */
2180   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2181   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2182   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2183   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2184   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2185
2186   /* Double-precision comparisons.  Table 3.  */
2187   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2188   set_optab_libfunc (ne_optab, DFmode, NULL);
2189   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2190   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2191   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2192   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2193   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2194
2195   /* Single-precision floating-point arithmetic.  Table 4.  */
2196   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2197   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2198   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2199   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2200   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2201
2202   /* Single-precision comparisons.  Table 5.  */
2203   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2204   set_optab_libfunc (ne_optab, SFmode, NULL);
2205   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2206   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2207   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2208   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2209   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2210
2211   /* Floating-point to integer conversions.  Table 6.  */
2212   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2213   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2214   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2215   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2216   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2217   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2218   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2219   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2220
2221   /* Conversions between floating types.  Table 7.  */
2222   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2223   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2224
2225   /* Integer to floating-point conversions.  Table 8.  */
2226   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2227   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2228   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2229   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2230   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2231   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2232   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2233   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2234
2235   /* Long long.  Table 9.  */
2236   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2237   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2238   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2239   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2240   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2241   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2242   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2243   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2244
2245   /* Integer (32/32->32) division.  \S 4.3.1.  */
2246   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2247   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2248
2249   /* The divmod functions are designed so that they can be used for
2250      plain division, even though they return both the quotient and the
2251      remainder.  The quotient is returned in the usual location (i.e.,
2252      r0 for SImode, {r0, r1} for DImode), just as would be expected
2253      for an ordinary division routine.  Because the AAPCS calling
2254      conventions specify that all of { r0, r1, r2, r3 } are
2255      callee-saved registers, there is no need to tell the compiler
2256      explicitly that those registers are clobbered by these
2257      routines.  */
2258   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2259   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2260
2261   /* For SImode division the ABI provides div-without-mod routines,
2262      which are faster.  */
2263   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2264   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2265
2266   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2267      divmod libcalls instead.  */
2268   set_optab_libfunc (smod_optab, DImode, NULL);
2269   set_optab_libfunc (umod_optab, DImode, NULL);
2270   set_optab_libfunc (smod_optab, SImode, NULL);
2271   set_optab_libfunc (umod_optab, SImode, NULL);
2272
2273   /* Half-precision float operations.  The compiler handles all operations
2274      with NULL libfuncs by converting the SFmode.  */
2275   switch (arm_fp16_format)
2276     {
2277     case ARM_FP16_FORMAT_IEEE:
2278     case ARM_FP16_FORMAT_ALTERNATIVE:
2279
2280       /* Conversions.  */
2281       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2282                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2283                          ? "__gnu_f2h_ieee"
2284                          : "__gnu_f2h_alternative"));
2285       set_conv_libfunc (sext_optab, SFmode, HFmode,
2286                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2287                          ? "__gnu_h2f_ieee"
2288                          : "__gnu_h2f_alternative"));
2289
2290       /* Arithmetic.  */
2291       set_optab_libfunc (add_optab, HFmode, NULL);
2292       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2293       set_optab_libfunc (smul_optab, HFmode, NULL);
2294       set_optab_libfunc (neg_optab, HFmode, NULL);
2295       set_optab_libfunc (sub_optab, HFmode, NULL);
2296
2297       /* Comparisons.  */
2298       set_optab_libfunc (eq_optab, HFmode, NULL);
2299       set_optab_libfunc (ne_optab, HFmode, NULL);
2300       set_optab_libfunc (lt_optab, HFmode, NULL);
2301       set_optab_libfunc (le_optab, HFmode, NULL);
2302       set_optab_libfunc (ge_optab, HFmode, NULL);
2303       set_optab_libfunc (gt_optab, HFmode, NULL);
2304       set_optab_libfunc (unord_optab, HFmode, NULL);
2305       break;
2306
2307     default:
2308       break;
2309     }
2310
2311   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2312   {
2313     const arm_fixed_mode_set fixed_arith_modes[] =
2314       {
2315         { QQmode, "qq" },
2316         { UQQmode, "uqq" },
2317         { HQmode, "hq" },
2318         { UHQmode, "uhq" },
2319         { SQmode, "sq" },
2320         { USQmode, "usq" },
2321         { DQmode, "dq" },
2322         { UDQmode, "udq" },
2323         { TQmode, "tq" },
2324         { UTQmode, "utq" },
2325         { HAmode, "ha" },
2326         { UHAmode, "uha" },
2327         { SAmode, "sa" },
2328         { USAmode, "usa" },
2329         { DAmode, "da" },
2330         { UDAmode, "uda" },
2331         { TAmode, "ta" },
2332         { UTAmode, "uta" }
2333       };
2334     const arm_fixed_mode_set fixed_conv_modes[] =
2335       {
2336         { QQmode, "qq" },
2337         { UQQmode, "uqq" },
2338         { HQmode, "hq" },
2339         { UHQmode, "uhq" },
2340         { SQmode, "sq" },
2341         { USQmode, "usq" },
2342         { DQmode, "dq" },
2343         { UDQmode, "udq" },
2344         { TQmode, "tq" },
2345         { UTQmode, "utq" },
2346         { HAmode, "ha" },
2347         { UHAmode, "uha" },
2348         { SAmode, "sa" },
2349         { USAmode, "usa" },
2350         { DAmode, "da" },
2351         { UDAmode, "uda" },
2352         { TAmode, "ta" },
2353         { UTAmode, "uta" },
2354         { QImode, "qi" },
2355         { HImode, "hi" },
2356         { SImode, "si" },
2357         { DImode, "di" },
2358         { TImode, "ti" },
2359         { SFmode, "sf" },
2360         { DFmode, "df" }
2361       };
2362     unsigned int i, j;
2363
2364     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2365       {
2366         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2367                                      "add", fixed_arith_modes[i].name, 3);
2368         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2369                                      "ssadd", fixed_arith_modes[i].name, 3);
2370         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2371                                      "usadd", fixed_arith_modes[i].name, 3);
2372         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2373                                      "sub", fixed_arith_modes[i].name, 3);
2374         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2375                                      "sssub", fixed_arith_modes[i].name, 3);
2376         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2377                                      "ussub", fixed_arith_modes[i].name, 3);
2378         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2379                                      "mul", fixed_arith_modes[i].name, 3);
2380         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2381                                      "ssmul", fixed_arith_modes[i].name, 3);
2382         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2383                                      "usmul", fixed_arith_modes[i].name, 3);
2384         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2385                                      "div", fixed_arith_modes[i].name, 3);
2386         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2387                                      "udiv", fixed_arith_modes[i].name, 3);
2388         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2389                                      "ssdiv", fixed_arith_modes[i].name, 3);
2390         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2391                                      "usdiv", fixed_arith_modes[i].name, 3);
2392         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2393                                      "neg", fixed_arith_modes[i].name, 2);
2394         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2395                                      "ssneg", fixed_arith_modes[i].name, 2);
2396         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2397                                      "usneg", fixed_arith_modes[i].name, 2);
2398         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2399                                      "ashl", fixed_arith_modes[i].name, 3);
2400         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2401                                      "ashr", fixed_arith_modes[i].name, 3);
2402         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2403                                      "lshr", fixed_arith_modes[i].name, 3);
2404         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2405                                      "ssashl", fixed_arith_modes[i].name, 3);
2406         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2407                                      "usashl", fixed_arith_modes[i].name, 3);
2408         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2409                                      "cmp", fixed_arith_modes[i].name, 2);
2410       }
2411
2412     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2413       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2414         {
2415           if (i == j
2416               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2417                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2418             continue;
2419
2420           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2421                                       fixed_conv_modes[j].mode, "fract",
2422                                       fixed_conv_modes[i].name,
2423                                       fixed_conv_modes[j].name);
2424           arm_set_fixed_conv_libfunc (satfract_optab,
2425                                       fixed_conv_modes[i].mode,
2426                                       fixed_conv_modes[j].mode, "satfract",
2427                                       fixed_conv_modes[i].name,
2428                                       fixed_conv_modes[j].name);
2429           arm_set_fixed_conv_libfunc (fractuns_optab,
2430                                       fixed_conv_modes[i].mode,
2431                                       fixed_conv_modes[j].mode, "fractuns",
2432                                       fixed_conv_modes[i].name,
2433                                       fixed_conv_modes[j].name);
2434           arm_set_fixed_conv_libfunc (satfractuns_optab,
2435                                       fixed_conv_modes[i].mode,
2436                                       fixed_conv_modes[j].mode, "satfractuns",
2437                                       fixed_conv_modes[i].name,
2438                                       fixed_conv_modes[j].name);
2439         }
2440   }
2441
2442   if (TARGET_AAPCS_BASED)
2443     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2444 }
2445
2446 /* On AAPCS systems, this is the "struct __va_list".  */
2447 static GTY(()) tree va_list_type;
2448
2449 /* Return the type to use as __builtin_va_list.  */
2450 static tree
2451 arm_build_builtin_va_list (void)
2452 {
2453   tree va_list_name;
2454   tree ap_field;
2455
2456   if (!TARGET_AAPCS_BASED)
2457     return std_build_builtin_va_list ();
2458
2459   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2460      defined as:
2461
2462        struct __va_list
2463        {
2464          void *__ap;
2465        };
2466
2467      The C Library ABI further reinforces this definition in \S
2468      4.1.
2469
2470      We must follow this definition exactly.  The structure tag
2471      name is visible in C++ mangled names, and thus forms a part
2472      of the ABI.  The field name may be used by people who
2473      #include <stdarg.h>.  */
2474   /* Create the type.  */
2475   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2476   /* Give it the required name.  */
2477   va_list_name = build_decl (BUILTINS_LOCATION,
2478                              TYPE_DECL,
2479                              get_identifier ("__va_list"),
2480                              va_list_type);
2481   DECL_ARTIFICIAL (va_list_name) = 1;
2482   TYPE_NAME (va_list_type) = va_list_name;
2483   TYPE_STUB_DECL (va_list_type) = va_list_name;
2484   /* Create the __ap field.  */
2485   ap_field = build_decl (BUILTINS_LOCATION,
2486                          FIELD_DECL,
2487                          get_identifier ("__ap"),
2488                          ptr_type_node);
2489   DECL_ARTIFICIAL (ap_field) = 1;
2490   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2491   TYPE_FIELDS (va_list_type) = ap_field;
2492   /* Compute its layout.  */
2493   layout_type (va_list_type);
2494
2495   return va_list_type;
2496 }
2497
2498 /* Return an expression of type "void *" pointing to the next
2499    available argument in a variable-argument list.  VALIST is the
2500    user-level va_list object, of type __builtin_va_list.  */
2501 static tree
2502 arm_extract_valist_ptr (tree valist)
2503 {
2504   if (TREE_TYPE (valist) == error_mark_node)
2505     return error_mark_node;
2506
2507   /* On an AAPCS target, the pointer is stored within "struct
2508      va_list".  */
2509   if (TARGET_AAPCS_BASED)
2510     {
2511       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2512       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2513                        valist, ap_field, NULL_TREE);
2514     }
2515
2516   return valist;
2517 }
2518
2519 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2520 static void
2521 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2522 {
2523   valist = arm_extract_valist_ptr (valist);
2524   std_expand_builtin_va_start (valist, nextarg);
2525 }
2526
2527 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2528 static tree
2529 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2530                           gimple_seq *post_p)
2531 {
2532   valist = arm_extract_valist_ptr (valist);
2533   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2534 }
2535
2536 /* Fix up any incompatible options that the user has specified.  */
2537 static void
2538 arm_option_override (void)
2539 {
2540   if (global_options_set.x_arm_arch_option)
2541     arm_selected_arch = &all_architectures[arm_arch_option];
2542
2543   if (global_options_set.x_arm_cpu_option)
2544     {
2545       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2546       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2547     }
2548
2549   if (global_options_set.x_arm_tune_option)
2550     arm_selected_tune = &all_cores[(int) arm_tune_option];
2551
2552 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2553   SUBTARGET_OVERRIDE_OPTIONS;
2554 #endif
2555
2556   if (arm_selected_arch)
2557     {
2558       if (arm_selected_cpu)
2559         {
2560           /* Check for conflict between mcpu and march.  */
2561           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2562             {
2563               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2564                        arm_selected_cpu->name, arm_selected_arch->name);
2565               /* -march wins for code generation.
2566                  -mcpu wins for default tuning.  */
2567               if (!arm_selected_tune)
2568                 arm_selected_tune = arm_selected_cpu;
2569
2570               arm_selected_cpu = arm_selected_arch;
2571             }
2572           else
2573             /* -mcpu wins.  */
2574             arm_selected_arch = NULL;
2575         }
2576       else
2577         /* Pick a CPU based on the architecture.  */
2578         arm_selected_cpu = arm_selected_arch;
2579     }
2580
2581   /* If the user did not specify a processor, choose one for them.  */
2582   if (!arm_selected_cpu)
2583     {
2584       const struct processors * sel;
2585       unsigned int        sought;
2586
2587       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2588       if (!arm_selected_cpu->name)
2589         {
2590 #ifdef SUBTARGET_CPU_DEFAULT
2591           /* Use the subtarget default CPU if none was specified by
2592              configure.  */
2593           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2594 #endif
2595           /* Default to ARM6.  */
2596           if (!arm_selected_cpu->name)
2597             arm_selected_cpu = &all_cores[arm6];
2598         }
2599
2600       sel = arm_selected_cpu;
2601       insn_flags = sel->flags;
2602
2603       /* Now check to see if the user has specified some command line
2604          switch that require certain abilities from the cpu.  */
2605       sought = 0;
2606
2607       if (TARGET_INTERWORK || TARGET_THUMB)
2608         {
2609           sought |= (FL_THUMB | FL_MODE32);
2610
2611           /* There are no ARM processors that support both APCS-26 and
2612              interworking.  Therefore we force FL_MODE26 to be removed
2613              from insn_flags here (if it was set), so that the search
2614              below will always be able to find a compatible processor.  */
2615           insn_flags &= ~FL_MODE26;
2616         }
2617
2618       if (sought != 0 && ((sought & insn_flags) != sought))
2619         {
2620           /* Try to locate a CPU type that supports all of the abilities
2621              of the default CPU, plus the extra abilities requested by
2622              the user.  */
2623           for (sel = all_cores; sel->name != NULL; sel++)
2624             if ((sel->flags & sought) == (sought | insn_flags))
2625               break;
2626
2627           if (sel->name == NULL)
2628             {
2629               unsigned current_bit_count = 0;
2630               const struct processors * best_fit = NULL;
2631
2632               /* Ideally we would like to issue an error message here
2633                  saying that it was not possible to find a CPU compatible
2634                  with the default CPU, but which also supports the command
2635                  line options specified by the programmer, and so they
2636                  ought to use the -mcpu=<name> command line option to
2637                  override the default CPU type.
2638
2639                  If we cannot find a cpu that has both the
2640                  characteristics of the default cpu and the given
2641                  command line options we scan the array again looking
2642                  for a best match.  */
2643               for (sel = all_cores; sel->name != NULL; sel++)
2644                 if ((sel->flags & sought) == sought)
2645                   {
2646                     unsigned count;
2647
2648                     count = bit_count (sel->flags & insn_flags);
2649
2650                     if (count >= current_bit_count)
2651                       {
2652                         best_fit = sel;
2653                         current_bit_count = count;
2654                       }
2655                   }
2656
2657               gcc_assert (best_fit);
2658               sel = best_fit;
2659             }
2660
2661           arm_selected_cpu = sel;
2662         }
2663     }
2664
2665   gcc_assert (arm_selected_cpu);
2666   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2667   if (!arm_selected_tune)
2668     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2669
2670   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2671   insn_flags = arm_selected_cpu->flags;
2672   arm_base_arch = arm_selected_cpu->base_arch;
2673
2674   arm_tune = arm_selected_tune->core;
2675   tune_flags = arm_selected_tune->flags;
2676   current_tune = arm_selected_tune->tune;
2677
2678   /* Make sure that the processor choice does not conflict with any of the
2679      other command line choices.  */
2680   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2681     error ("target CPU does not support ARM mode");
2682
2683   /* BPABI targets use linker tricks to allow interworking on cores
2684      without thumb support.  */
2685   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2686     {
2687       warning (0, "target CPU does not support interworking" );
2688       target_flags &= ~MASK_INTERWORK;
2689     }
2690
2691   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2692     {
2693       warning (0, "target CPU does not support THUMB instructions");
2694       target_flags &= ~MASK_THUMB;
2695     }
2696
2697   if (TARGET_APCS_FRAME && TARGET_THUMB)
2698     {
2699       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2700       target_flags &= ~MASK_APCS_FRAME;
2701     }
2702
2703   /* Callee super interworking implies thumb interworking.  Adding
2704      this to the flags here simplifies the logic elsewhere.  */
2705   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2706     target_flags |= MASK_INTERWORK;
2707
2708   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2709      from here where no function is being compiled currently.  */
2710   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2711     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2712
2713   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2714     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2715
2716   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2717     {
2718       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2719       target_flags |= MASK_APCS_FRAME;
2720     }
2721
2722   if (TARGET_POKE_FUNCTION_NAME)
2723     target_flags |= MASK_APCS_FRAME;
2724
2725   if (TARGET_APCS_REENT && flag_pic)
2726     error ("-fpic and -mapcs-reent are incompatible");
2727
2728   if (TARGET_APCS_REENT)
2729     warning (0, "APCS reentrant code not supported.  Ignored");
2730
2731   /* If this target is normally configured to use APCS frames, warn if they
2732      are turned off and debugging is turned on.  */
2733   if (TARGET_ARM
2734       && write_symbols != NO_DEBUG
2735       && !TARGET_APCS_FRAME
2736       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2737     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2738
2739   if (TARGET_APCS_FLOAT)
2740     warning (0, "passing floating point arguments in fp regs not yet supported");
2741
2742   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2743   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2744   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2745   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2746   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2747   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2748   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2749   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2750   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2751   arm_arch6m = arm_arch6 && !arm_arch_notm;
2752   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2753   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2754   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2755   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2756   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2757
2758   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2759   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2760   thumb_code = TARGET_ARM == 0;
2761   thumb1_code = TARGET_THUMB1 != 0;
2762   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2763   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2764   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2765   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2766   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2767   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2768   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2769   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2770   arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2771   if (arm_restrict_it == 2)
2772     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2773
2774   if (!TARGET_THUMB2)
2775     arm_restrict_it = 0;
2776
2777   /* If we are not using the default (ARM mode) section anchor offset
2778      ranges, then set the correct ranges now.  */
2779   if (TARGET_THUMB1)
2780     {
2781       /* Thumb-1 LDR instructions cannot have negative offsets.
2782          Permissible positive offset ranges are 5-bit (for byte loads),
2783          6-bit (for halfword loads), or 7-bit (for word loads).
2784          Empirical results suggest a 7-bit anchor range gives the best
2785          overall code size.  */
2786       targetm.min_anchor_offset = 0;
2787       targetm.max_anchor_offset = 127;
2788     }
2789   else if (TARGET_THUMB2)
2790     {
2791       /* The minimum is set such that the total size of the block
2792          for a particular anchor is 248 + 1 + 4095 bytes, which is
2793          divisible by eight, ensuring natural spacing of anchors.  */
2794       targetm.min_anchor_offset = -248;
2795       targetm.max_anchor_offset = 4095;
2796     }
2797
2798   /* V5 code we generate is completely interworking capable, so we turn off
2799      TARGET_INTERWORK here to avoid many tests later on.  */
2800
2801   /* XXX However, we must pass the right pre-processor defines to CPP
2802      or GLD can get confused.  This is a hack.  */
2803   if (TARGET_INTERWORK)
2804     arm_cpp_interwork = 1;
2805
2806   if (arm_arch5)
2807     target_flags &= ~MASK_INTERWORK;
2808
2809   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2810     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2811
2812   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2813     error ("iwmmxt abi requires an iwmmxt capable cpu");
2814
2815   if (!global_options_set.x_arm_fpu_index)
2816     {
2817       const char *target_fpu_name;
2818       bool ok;
2819
2820 #ifdef FPUTYPE_DEFAULT
2821       target_fpu_name = FPUTYPE_DEFAULT;
2822 #else
2823       target_fpu_name = "vfp";
2824 #endif
2825
2826       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2827                                   CL_TARGET);
2828       gcc_assert (ok);
2829     }
2830
2831   arm_fpu_desc = &all_fpus[arm_fpu_index];
2832
2833   switch (arm_fpu_desc->model)
2834     {
2835     case ARM_FP_MODEL_VFP:
2836       arm_fpu_attr = FPU_VFP;
2837       break;
2838
2839     default:
2840       gcc_unreachable();
2841     }
2842
2843   if (TARGET_AAPCS_BASED)
2844     {
2845       if (TARGET_CALLER_INTERWORKING)
2846         error ("AAPCS does not support -mcaller-super-interworking");
2847       else
2848         if (TARGET_CALLEE_INTERWORKING)
2849           error ("AAPCS does not support -mcallee-super-interworking");
2850     }
2851
2852   /* iWMMXt and NEON are incompatible.  */
2853   if (TARGET_IWMMXT && TARGET_NEON)
2854     error ("iWMMXt and NEON are incompatible");
2855
2856   /* iWMMXt unsupported under Thumb mode.  */
2857   if (TARGET_THUMB && TARGET_IWMMXT)
2858     error ("iWMMXt unsupported under Thumb mode");
2859
2860   /* __fp16 support currently assumes the core has ldrh.  */
2861   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2862     sorry ("__fp16 and no ldrh");
2863
2864   /* If soft-float is specified then don't use FPU.  */
2865   if (TARGET_SOFT_FLOAT)
2866     arm_fpu_attr = FPU_NONE;
2867
2868   if (TARGET_AAPCS_BASED)
2869     {
2870       if (arm_abi == ARM_ABI_IWMMXT)
2871         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2872       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2873                && TARGET_HARD_FLOAT
2874                && TARGET_VFP)
2875         arm_pcs_default = ARM_PCS_AAPCS_VFP;
2876       else
2877         arm_pcs_default = ARM_PCS_AAPCS;
2878     }
2879   else
2880     {
2881       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2882         sorry ("-mfloat-abi=hard and VFP");
2883
2884       if (arm_abi == ARM_ABI_APCS)
2885         arm_pcs_default = ARM_PCS_APCS;
2886       else
2887         arm_pcs_default = ARM_PCS_ATPCS;
2888     }
2889
2890   /* For arm2/3 there is no need to do any scheduling if we are doing
2891      software floating-point.  */
2892   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2893     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2894
2895   /* Use the cp15 method if it is available.  */
2896   if (target_thread_pointer == TP_AUTO)
2897     {
2898       if (arm_arch6k && !TARGET_THUMB1)
2899         target_thread_pointer = TP_CP15;
2900       else
2901         target_thread_pointer = TP_SOFT;
2902     }
2903
2904   if (TARGET_HARD_TP && TARGET_THUMB1)
2905     error ("can not use -mtp=cp15 with 16-bit Thumb");
2906
2907   /* Override the default structure alignment for AAPCS ABI.  */
2908   if (!global_options_set.x_arm_structure_size_boundary)
2909     {
2910       if (TARGET_AAPCS_BASED)
2911         arm_structure_size_boundary = 8;
2912     }
2913   else
2914     {
2915       if (arm_structure_size_boundary != 8
2916           && arm_structure_size_boundary != 32
2917           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2918         {
2919           if (ARM_DOUBLEWORD_ALIGN)
2920             warning (0,
2921                      "structure size boundary can only be set to 8, 32 or 64");
2922           else
2923             warning (0, "structure size boundary can only be set to 8 or 32");
2924           arm_structure_size_boundary
2925             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2926         }
2927     }
2928
2929   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2930     {
2931       error ("RTP PIC is incompatible with Thumb");
2932       flag_pic = 0;
2933     }
2934
2935   /* If stack checking is disabled, we can use r10 as the PIC register,
2936      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
2937   if (flag_pic && TARGET_SINGLE_PIC_BASE)
2938     {
2939       if (TARGET_VXWORKS_RTP)
2940         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2941       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2942     }
2943
2944   if (flag_pic && TARGET_VXWORKS_RTP)
2945     arm_pic_register = 9;
2946
2947   if (arm_pic_register_string != NULL)
2948     {
2949       int pic_register = decode_reg_name (arm_pic_register_string);
2950
2951       if (!flag_pic)
2952         warning (0, "-mpic-register= is useless without -fpic");
2953
2954       /* Prevent the user from choosing an obviously stupid PIC register.  */
2955       else if (pic_register < 0 || call_used_regs[pic_register]
2956                || pic_register == HARD_FRAME_POINTER_REGNUM
2957                || pic_register == STACK_POINTER_REGNUM
2958                || pic_register >= PC_REGNUM
2959                || (TARGET_VXWORKS_RTP
2960                    && (unsigned int) pic_register != arm_pic_register))
2961         error ("unable to use '%s' for PIC register", arm_pic_register_string);
2962       else
2963         arm_pic_register = pic_register;
2964     }
2965
2966   if (TARGET_VXWORKS_RTP
2967       && !global_options_set.x_arm_pic_data_is_text_relative)
2968     arm_pic_data_is_text_relative = 0;
2969
2970   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
2971   if (fix_cm3_ldrd == 2)
2972     {
2973       if (arm_selected_cpu->core == cortexm3)
2974         fix_cm3_ldrd = 1;
2975       else
2976         fix_cm3_ldrd = 0;
2977     }
2978
2979   /* Enable -munaligned-access by default for
2980      - all ARMv6 architecture-based processors
2981      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2982      - ARMv8 architecture-base processors.
2983
2984      Disable -munaligned-access by default for
2985      - all pre-ARMv6 architecture-based processors
2986      - ARMv6-M architecture-based processors.  */
2987
2988   if (unaligned_access == 2)
2989     {
2990       if (arm_arch6 && (arm_arch_notm || arm_arch7))
2991         unaligned_access = 1;
2992       else
2993         unaligned_access = 0;
2994     }
2995   else if (unaligned_access == 1
2996            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2997     {
2998       warning (0, "target CPU does not support unaligned accesses");
2999       unaligned_access = 0;
3000     }
3001
3002   if (TARGET_THUMB1 && flag_schedule_insns)
3003     {
3004       /* Don't warn since it's on by default in -O2.  */
3005       flag_schedule_insns = 0;
3006     }
3007
3008   if (optimize_size)
3009     {
3010       /* If optimizing for size, bump the number of instructions that we
3011          are prepared to conditionally execute (even on a StrongARM).  */
3012       max_insns_skipped = 6;
3013
3014       /* For THUMB2, we limit the conditional sequence to one IT block.  */
3015       if (TARGET_THUMB2)
3016         max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3017     }
3018   else
3019     max_insns_skipped = current_tune->max_insns_skipped;
3020
3021   /* Hot/Cold partitioning is not currently supported, since we can't
3022      handle literal pool placement in that case.  */
3023   if (flag_reorder_blocks_and_partition)
3024     {
3025       inform (input_location,
3026               "-freorder-blocks-and-partition not supported on this architecture");
3027       flag_reorder_blocks_and_partition = 0;
3028       flag_reorder_blocks = 1;
3029     }
3030
3031   if (flag_pic)
3032     /* Hoisting PIC address calculations more aggressively provides a small,
3033        but measurable, size reduction for PIC code.  Therefore, we decrease
3034        the bar for unrestricted expression hoisting to the cost of PIC address
3035        calculation, which is 2 instructions.  */
3036     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3037                            global_options.x_param_values,
3038                            global_options_set.x_param_values);
3039
3040   /* ARM EABI defaults to strict volatile bitfields.  */
3041   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3042       && abi_version_at_least(2))
3043     flag_strict_volatile_bitfields = 1;
3044
3045   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3046      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3047   if (flag_prefetch_loop_arrays < 0
3048       && HAVE_prefetch
3049       && optimize >= 3
3050       && current_tune->num_prefetch_slots > 0)
3051     flag_prefetch_loop_arrays = 1;
3052
3053   /* Set up parameters to be used in prefetching algorithm.  Do not override the
3054      defaults unless we are tuning for a core we have researched values for.  */
3055   if (current_tune->num_prefetch_slots > 0)
3056     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3057                            current_tune->num_prefetch_slots,
3058                            global_options.x_param_values,
3059                            global_options_set.x_param_values);
3060   if (current_tune->l1_cache_line_size >= 0)
3061     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3062                            current_tune->l1_cache_line_size,
3063                            global_options.x_param_values,
3064                            global_options_set.x_param_values);
3065   if (current_tune->l1_cache_size >= 0)
3066     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3067                            current_tune->l1_cache_size,
3068                            global_options.x_param_values,
3069                            global_options_set.x_param_values);
3070
3071   /* Use Neon to perform 64-bits operations rather than core
3072      registers.  */
3073   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3074   if (use_neon_for_64bits == 1)
3075      prefer_neon_for_64bits = true;
3076
3077   /* Use the alternative scheduling-pressure algorithm by default.  */
3078   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3079                          global_options.x_param_values,
3080                          global_options_set.x_param_values);
3081
3082   /* Disable shrink-wrap when optimizing function for size, since it tends to
3083      generate additional returns.  */
3084   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3085     flag_shrink_wrap = false;
3086   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3087   if (TARGET_APCS_FRAME)
3088     flag_shrink_wrap = false;
3089
3090   /* We only support -mslow-flash-data on armv7-m targets.  */
3091   if (target_slow_flash_data
3092       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3093           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3094     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3095
3096   /* Currently, for slow flash data, we just disable literal pools.  */
3097   if (target_slow_flash_data)
3098     arm_disable_literal_pool = true;
3099
3100   /* Thumb2 inline assembly code should always use unified syntax.
3101      This will apply to ARM and Thumb1 eventually.  */
3102   if (TARGET_THUMB2)
3103     inline_asm_unified = 1;
3104
3105   /* Disable scheduling fusion by default if it's not armv7 processor
3106      or doesn't prefer ldrd/strd.  */
3107   if (flag_schedule_fusion == 2
3108       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3109     flag_schedule_fusion = 0;
3110
3111   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3112      - epilogue_insns - does not accurately model the corresponding insns
3113      emitted in the asm file.  In particular, see the comment in thumb_exit
3114      'Find out how many of the (return) argument registers we can corrupt'.
3115      As a consequence, the epilogue may clobber registers without fipa-ra
3116      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3117      TODO: Accurately model clobbers for epilogue_insns and reenable
3118      fipa-ra.  */
3119   if (TARGET_THUMB1)
3120     flag_ipa_ra = 0;
3121
3122   /* Register global variables with the garbage collector.  */
3123   arm_add_gc_roots ();
3124 }
3125
3126 static void
3127 arm_add_gc_roots (void)
3128 {
3129   gcc_obstack_init(&minipool_obstack);
3130   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3131 }
3132 \f
3133 /* A table of known ARM exception types.
3134    For use with the interrupt function attribute.  */
3135
3136 typedef struct
3137 {
3138   const char *const arg;
3139   const unsigned long return_value;
3140 }
3141 isr_attribute_arg;
3142
3143 static const isr_attribute_arg isr_attribute_args [] =
3144 {
3145   { "IRQ",   ARM_FT_ISR },
3146   { "irq",   ARM_FT_ISR },
3147   { "FIQ",   ARM_FT_FIQ },
3148   { "fiq",   ARM_FT_FIQ },
3149   { "ABORT", ARM_FT_ISR },
3150   { "abort", ARM_FT_ISR },
3151   { "ABORT", ARM_FT_ISR },
3152   { "abort", ARM_FT_ISR },
3153   { "UNDEF", ARM_FT_EXCEPTION },
3154   { "undef", ARM_FT_EXCEPTION },
3155   { "SWI",   ARM_FT_EXCEPTION },
3156   { "swi",   ARM_FT_EXCEPTION },
3157   { NULL,    ARM_FT_NORMAL }
3158 };
3159
3160 /* Returns the (interrupt) function type of the current
3161    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3162
3163 static unsigned long
3164 arm_isr_value (tree argument)
3165 {
3166   const isr_attribute_arg * ptr;
3167   const char *              arg;
3168
3169   if (!arm_arch_notm)
3170     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3171
3172   /* No argument - default to IRQ.  */
3173   if (argument == NULL_TREE)
3174     return ARM_FT_ISR;
3175
3176   /* Get the value of the argument.  */
3177   if (TREE_VALUE (argument) == NULL_TREE
3178       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3179     return ARM_FT_UNKNOWN;
3180
3181   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3182
3183   /* Check it against the list of known arguments.  */
3184   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3185     if (streq (arg, ptr->arg))
3186       return ptr->return_value;
3187
3188   /* An unrecognized interrupt type.  */
3189   return ARM_FT_UNKNOWN;
3190 }
3191
3192 /* Computes the type of the current function.  */
3193
3194 static unsigned long
3195 arm_compute_func_type (void)
3196 {
3197   unsigned long type = ARM_FT_UNKNOWN;
3198   tree a;
3199   tree attr;
3200
3201   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3202
3203   /* Decide if the current function is volatile.  Such functions
3204      never return, and many memory cycles can be saved by not storing
3205      register values that will never be needed again.  This optimization
3206      was added to speed up context switching in a kernel application.  */
3207   if (optimize > 0
3208       && (TREE_NOTHROW (current_function_decl)
3209           || !(flag_unwind_tables
3210                || (flag_exceptions
3211                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3212       && TREE_THIS_VOLATILE (current_function_decl))
3213     type |= ARM_FT_VOLATILE;
3214
3215   if (cfun->static_chain_decl != NULL)
3216     type |= ARM_FT_NESTED;
3217
3218   attr = DECL_ATTRIBUTES (current_function_decl);
3219
3220   a = lookup_attribute ("naked", attr);
3221   if (a != NULL_TREE)
3222     type |= ARM_FT_NAKED;
3223
3224   a = lookup_attribute ("isr", attr);
3225   if (a == NULL_TREE)
3226     a = lookup_attribute ("interrupt", attr);
3227
3228   if (a == NULL_TREE)
3229     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3230   else
3231     type |= arm_isr_value (TREE_VALUE (a));
3232
3233   return type;
3234 }
3235
3236 /* Returns the type of the current function.  */
3237
3238 unsigned long
3239 arm_current_func_type (void)
3240 {
3241   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3242     cfun->machine->func_type = arm_compute_func_type ();
3243
3244   return cfun->machine->func_type;
3245 }
3246
3247 bool
3248 arm_allocate_stack_slots_for_args (void)
3249 {
3250   /* Naked functions should not allocate stack slots for arguments.  */
3251   return !IS_NAKED (arm_current_func_type ());
3252 }
3253
3254 static bool
3255 arm_warn_func_return (tree decl)
3256 {
3257   /* Naked functions are implemented entirely in assembly, including the
3258      return sequence, so suppress warnings about this.  */
3259   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3260 }
3261
3262 \f
3263 /* Output assembler code for a block containing the constant parts
3264    of a trampoline, leaving space for the variable parts.
3265
3266    On the ARM, (if r8 is the static chain regnum, and remembering that
3267    referencing pc adds an offset of 8) the trampoline looks like:
3268            ldr          r8, [pc, #0]
3269            ldr          pc, [pc]
3270            .word        static chain value
3271            .word        function's address
3272    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3273
3274 static void
3275 arm_asm_trampoline_template (FILE *f)
3276 {
3277   if (TARGET_ARM)
3278     {
3279       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3280       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3281     }
3282   else if (TARGET_THUMB2)
3283     {
3284       /* The Thumb-2 trampoline is similar to the arm implementation.
3285          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3286       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3287                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3288       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3289     }
3290   else
3291     {
3292       ASM_OUTPUT_ALIGN (f, 2);
3293       fprintf (f, "\t.code\t16\n");
3294       fprintf (f, ".Ltrampoline_start:\n");
3295       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3296       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3297       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3298       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3299       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3300       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3301     }
3302   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3303   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3304 }
3305
3306 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3307
3308 static void
3309 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3310 {
3311   rtx fnaddr, mem, a_tramp;
3312
3313   emit_block_move (m_tramp, assemble_trampoline_template (),
3314                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3315
3316   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3317   emit_move_insn (mem, chain_value);
3318
3319   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3320   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3321   emit_move_insn (mem, fnaddr);
3322
3323   a_tramp = XEXP (m_tramp, 0);
3324   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3325                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3326                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3327 }
3328
3329 /* Thumb trampolines should be entered in thumb mode, so set
3330    the bottom bit of the address.  */
3331
3332 static rtx
3333 arm_trampoline_adjust_address (rtx addr)
3334 {
3335   if (TARGET_THUMB)
3336     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3337                                 NULL, 0, OPTAB_LIB_WIDEN);
3338   return addr;
3339 }
3340 \f
3341 /* Return 1 if it is possible to return using a single instruction.
3342    If SIBLING is non-null, this is a test for a return before a sibling
3343    call.  SIBLING is the call insn, so we can examine its register usage.  */
3344
3345 int
3346 use_return_insn (int iscond, rtx sibling)
3347 {
3348   int regno;
3349   unsigned int func_type;
3350   unsigned long saved_int_regs;
3351   unsigned HOST_WIDE_INT stack_adjust;
3352   arm_stack_offsets *offsets;
3353
3354   /* Never use a return instruction before reload has run.  */
3355   if (!reload_completed)
3356     return 0;
3357
3358   func_type = arm_current_func_type ();
3359
3360   /* Naked, volatile and stack alignment functions need special
3361      consideration.  */
3362   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3363     return 0;
3364
3365   /* So do interrupt functions that use the frame pointer and Thumb
3366      interrupt functions.  */
3367   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3368     return 0;
3369
3370   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3371       && !optimize_function_for_size_p (cfun))
3372     return 0;
3373
3374   offsets = arm_get_frame_offsets ();
3375   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3376
3377   /* As do variadic functions.  */
3378   if (crtl->args.pretend_args_size
3379       || cfun->machine->uses_anonymous_args
3380       /* Or if the function calls __builtin_eh_return () */
3381       || crtl->calls_eh_return
3382       /* Or if the function calls alloca */
3383       || cfun->calls_alloca
3384       /* Or if there is a stack adjustment.  However, if the stack pointer
3385          is saved on the stack, we can use a pre-incrementing stack load.  */
3386       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3387                                  && stack_adjust == 4)))
3388     return 0;
3389
3390   saved_int_regs = offsets->saved_regs_mask;
3391
3392   /* Unfortunately, the insn
3393
3394        ldmib sp, {..., sp, ...}
3395
3396      triggers a bug on most SA-110 based devices, such that the stack
3397      pointer won't be correctly restored if the instruction takes a
3398      page fault.  We work around this problem by popping r3 along with
3399      the other registers, since that is never slower than executing
3400      another instruction.
3401
3402      We test for !arm_arch5 here, because code for any architecture
3403      less than this could potentially be run on one of the buggy
3404      chips.  */
3405   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3406     {
3407       /* Validate that r3 is a call-clobbered register (always true in
3408          the default abi) ...  */
3409       if (!call_used_regs[3])
3410         return 0;
3411
3412       /* ... that it isn't being used for a return value ... */
3413       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3414         return 0;
3415
3416       /* ... or for a tail-call argument ...  */
3417       if (sibling)
3418         {
3419           gcc_assert (CALL_P (sibling));
3420
3421           if (find_regno_fusage (sibling, USE, 3))
3422             return 0;
3423         }
3424
3425       /* ... and that there are no call-saved registers in r0-r2
3426          (always true in the default ABI).  */
3427       if (saved_int_regs & 0x7)
3428         return 0;
3429     }
3430
3431   /* Can't be done if interworking with Thumb, and any registers have been
3432      stacked.  */
3433   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3434     return 0;
3435
3436   /* On StrongARM, conditional returns are expensive if they aren't
3437      taken and multiple registers have been stacked.  */
3438   if (iscond && arm_tune_strongarm)
3439     {
3440       /* Conditional return when just the LR is stored is a simple
3441          conditional-load instruction, that's not expensive.  */
3442       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3443         return 0;
3444
3445       if (flag_pic
3446           && arm_pic_register != INVALID_REGNUM
3447           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3448         return 0;
3449     }
3450
3451   /* If there are saved registers but the LR isn't saved, then we need
3452      two instructions for the return.  */
3453   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3454     return 0;
3455
3456   /* Can't be done if any of the VFP regs are pushed,
3457      since this also requires an insn.  */
3458   if (TARGET_HARD_FLOAT && TARGET_VFP)
3459     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3460       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3461         return 0;
3462
3463   if (TARGET_REALLY_IWMMXT)
3464     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3465       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3466         return 0;
3467
3468   return 1;
3469 }
3470
3471 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3472    shrink-wrapping if possible.  This is the case if we need to emit a
3473    prologue, which we can test by looking at the offsets.  */
3474 bool
3475 use_simple_return_p (void)
3476 {
3477   arm_stack_offsets *offsets;
3478
3479   offsets = arm_get_frame_offsets ();
3480   return offsets->outgoing_args != 0;
3481 }
3482
3483 /* Return TRUE if int I is a valid immediate ARM constant.  */
3484
3485 int
3486 const_ok_for_arm (HOST_WIDE_INT i)
3487 {
3488   int lowbit;
3489
3490   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3491      be all zero, or all one.  */
3492   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3493       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3494           != ((~(unsigned HOST_WIDE_INT) 0)
3495               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3496     return FALSE;
3497
3498   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3499
3500   /* Fast return for 0 and small values.  We must do this for zero, since
3501      the code below can't handle that one case.  */
3502   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3503     return TRUE;
3504
3505   /* Get the number of trailing zeros.  */
3506   lowbit = ffs((int) i) - 1;
3507
3508   /* Only even shifts are allowed in ARM mode so round down to the
3509      nearest even number.  */
3510   if (TARGET_ARM)
3511     lowbit &= ~1;
3512
3513   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3514     return TRUE;
3515
3516   if (TARGET_ARM)
3517     {
3518       /* Allow rotated constants in ARM mode.  */
3519       if (lowbit <= 4
3520            && ((i & ~0xc000003f) == 0
3521                || (i & ~0xf000000f) == 0
3522                || (i & ~0xfc000003) == 0))
3523         return TRUE;
3524     }
3525   else
3526     {
3527       HOST_WIDE_INT v;
3528
3529       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3530       v = i & 0xff;
3531       v |= v << 16;
3532       if (i == v || i == (v | (v << 8)))
3533         return TRUE;
3534
3535       /* Allow repeated pattern 0xXY00XY00.  */
3536       v = i & 0xff00;
3537       v |= v << 16;
3538       if (i == v)
3539         return TRUE;
3540     }
3541
3542   return FALSE;
3543 }
3544
3545 /* Return true if I is a valid constant for the operation CODE.  */
3546 int
3547 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3548 {
3549   if (const_ok_for_arm (i))
3550     return 1;
3551
3552   switch (code)
3553     {
3554     case SET:
3555       /* See if we can use movw.  */
3556       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3557         return 1;
3558       else
3559         /* Otherwise, try mvn.  */
3560         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3561
3562     case PLUS:
3563       /* See if we can use addw or subw.  */
3564       if (TARGET_THUMB2
3565           && ((i & 0xfffff000) == 0
3566               || ((-i) & 0xfffff000) == 0))
3567         return 1;
3568       /* else fall through.  */
3569
3570     case COMPARE:
3571     case EQ:
3572     case NE:
3573     case GT:
3574     case LE:
3575     case LT:
3576     case GE:
3577     case GEU:
3578     case LTU:
3579     case GTU:
3580     case LEU:
3581     case UNORDERED:
3582     case ORDERED:
3583     case UNEQ:
3584     case UNGE:
3585     case UNLT:
3586     case UNGT:
3587     case UNLE:
3588       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3589
3590     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3591     case XOR:
3592       return 0;
3593
3594     case IOR:
3595       if (TARGET_THUMB2)
3596         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3597       return 0;
3598
3599     case AND:
3600       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3601
3602     default:
3603       gcc_unreachable ();
3604     }
3605 }
3606
3607 /* Return true if I is a valid di mode constant for the operation CODE.  */
3608 int
3609 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3610 {
3611   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3612   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3613   rtx hi = GEN_INT (hi_val);
3614   rtx lo = GEN_INT (lo_val);
3615
3616   if (TARGET_THUMB1)
3617     return 0;
3618
3619   switch (code)
3620     {
3621     case AND:
3622     case IOR:
3623     case XOR:
3624       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3625               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3626     case PLUS:
3627       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3628
3629     default:
3630       return 0;
3631     }
3632 }
3633
3634 /* Emit a sequence of insns to handle a large constant.
3635    CODE is the code of the operation required, it can be any of SET, PLUS,
3636    IOR, AND, XOR, MINUS;
3637    MODE is the mode in which the operation is being performed;
3638    VAL is the integer to operate on;
3639    SOURCE is the other operand (a register, or a null-pointer for SET);
3640    SUBTARGETS means it is safe to create scratch registers if that will
3641    either produce a simpler sequence, or we will want to cse the values.
3642    Return value is the number of insns emitted.  */
3643
3644 /* ??? Tweak this for thumb2.  */
3645 int
3646 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3647                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3648 {
3649   rtx cond;
3650
3651   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3652     cond = COND_EXEC_TEST (PATTERN (insn));
3653   else
3654     cond = NULL_RTX;
3655
3656   if (subtargets || code == SET
3657       || (REG_P (target) && REG_P (source)
3658           && REGNO (target) != REGNO (source)))
3659     {
3660       /* After arm_reorg has been called, we can't fix up expensive
3661          constants by pushing them into memory so we must synthesize
3662          them in-line, regardless of the cost.  This is only likely to
3663          be more costly on chips that have load delay slots and we are
3664          compiling without running the scheduler (so no splitting
3665          occurred before the final instruction emission).
3666
3667          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3668       */
3669       if (!cfun->machine->after_arm_reorg
3670           && !cond
3671           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3672                                 1, 0)
3673               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3674                  + (code != SET))))
3675         {
3676           if (code == SET)
3677             {
3678               /* Currently SET is the only monadic value for CODE, all
3679                  the rest are diadic.  */
3680               if (TARGET_USE_MOVT)
3681                 arm_emit_movpair (target, GEN_INT (val));
3682               else
3683                 emit_set_insn (target, GEN_INT (val));
3684
3685               return 1;
3686             }
3687           else
3688             {
3689               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3690
3691               if (TARGET_USE_MOVT)
3692                 arm_emit_movpair (temp, GEN_INT (val));
3693               else
3694                 emit_set_insn (temp, GEN_INT (val));
3695
3696               /* For MINUS, the value is subtracted from, since we never
3697                  have subtraction of a constant.  */
3698               if (code == MINUS)
3699                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3700               else
3701                 emit_set_insn (target,
3702                                gen_rtx_fmt_ee (code, mode, source, temp));
3703               return 2;
3704             }
3705         }
3706     }
3707
3708   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3709                            1);
3710 }
3711
3712 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3713    ARM/THUMB2 immediates, and add up to VAL.
3714    Thr function return value gives the number of insns required.  */
3715 static int
3716 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3717                             struct four_ints *return_sequence)
3718 {
3719   int best_consecutive_zeros = 0;
3720   int i;
3721   int best_start = 0;
3722   int insns1, insns2;
3723   struct four_ints tmp_sequence;
3724
3725   /* If we aren't targeting ARM, the best place to start is always at
3726      the bottom, otherwise look more closely.  */
3727   if (TARGET_ARM)
3728     {
3729       for (i = 0; i < 32; i += 2)
3730         {
3731           int consecutive_zeros = 0;
3732
3733           if (!(val & (3 << i)))
3734             {
3735               while ((i < 32) && !(val & (3 << i)))
3736                 {
3737                   consecutive_zeros += 2;
3738                   i += 2;
3739                 }
3740               if (consecutive_zeros > best_consecutive_zeros)
3741                 {
3742                   best_consecutive_zeros = consecutive_zeros;
3743                   best_start = i - consecutive_zeros;
3744                 }
3745               i -= 2;
3746             }
3747         }
3748     }
3749
3750   /* So long as it won't require any more insns to do so, it's
3751      desirable to emit a small constant (in bits 0...9) in the last
3752      insn.  This way there is more chance that it can be combined with
3753      a later addressing insn to form a pre-indexed load or store
3754      operation.  Consider:
3755
3756            *((volatile int *)0xe0000100) = 1;
3757            *((volatile int *)0xe0000110) = 2;
3758
3759      We want this to wind up as:
3760
3761             mov rA, #0xe0000000
3762             mov rB, #1
3763             str rB, [rA, #0x100]
3764             mov rB, #2
3765             str rB, [rA, #0x110]
3766
3767      rather than having to synthesize both large constants from scratch.
3768
3769      Therefore, we calculate how many insns would be required to emit
3770      the constant starting from `best_start', and also starting from
3771      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3772      yield a shorter sequence, we may as well use zero.  */
3773   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3774   if (best_start != 0
3775       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3776     {
3777       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3778       if (insns2 <= insns1)
3779         {
3780           *return_sequence = tmp_sequence;
3781           insns1 = insns2;
3782         }
3783     }
3784
3785   return insns1;
3786 }
3787
3788 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3789 static int
3790 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3791                              struct four_ints *return_sequence, int i)
3792 {
3793   int remainder = val & 0xffffffff;
3794   int insns = 0;
3795
3796   /* Try and find a way of doing the job in either two or three
3797      instructions.
3798
3799      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3800      location.  We start at position I.  This may be the MSB, or
3801      optimial_immediate_sequence may have positioned it at the largest block
3802      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3803      wrapping around to the top of the word when we drop off the bottom.
3804      In the worst case this code should produce no more than four insns.
3805
3806      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3807      constants, shifted to any arbitrary location.  We should always start
3808      at the MSB.  */
3809   do
3810     {
3811       int end;
3812       unsigned int b1, b2, b3, b4;
3813       unsigned HOST_WIDE_INT result;
3814       int loc;
3815
3816       gcc_assert (insns < 4);
3817
3818       if (i <= 0)
3819         i += 32;
3820
3821       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3822       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3823         {
3824           loc = i;
3825           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3826             /* We can use addw/subw for the last 12 bits.  */
3827             result = remainder;
3828           else
3829             {
3830               /* Use an 8-bit shifted/rotated immediate.  */
3831               end = i - 8;
3832               if (end < 0)
3833                 end += 32;
3834               result = remainder & ((0x0ff << end)
3835                                    | ((i < end) ? (0xff >> (32 - end))
3836                                                 : 0));
3837               i -= 8;
3838             }
3839         }
3840       else
3841         {
3842           /* Arm allows rotates by a multiple of two. Thumb-2 allows
3843              arbitrary shifts.  */
3844           i -= TARGET_ARM ? 2 : 1;
3845           continue;
3846         }
3847
3848       /* Next, see if we can do a better job with a thumb2 replicated
3849          constant.
3850
3851          We do it this way around to catch the cases like 0x01F001E0 where
3852          two 8-bit immediates would work, but a replicated constant would
3853          make it worse.
3854
3855          TODO: 16-bit constants that don't clear all the bits, but still win.
3856          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3857       if (TARGET_THUMB2)
3858         {
3859           b1 = (remainder & 0xff000000) >> 24;
3860           b2 = (remainder & 0x00ff0000) >> 16;
3861           b3 = (remainder & 0x0000ff00) >> 8;
3862           b4 = remainder & 0xff;
3863
3864           if (loc > 24)
3865             {
3866               /* The 8-bit immediate already found clears b1 (and maybe b2),
3867                  but must leave b3 and b4 alone.  */
3868
3869               /* First try to find a 32-bit replicated constant that clears
3870                  almost everything.  We can assume that we can't do it in one,
3871                  or else we wouldn't be here.  */
3872               unsigned int tmp = b1 & b2 & b3 & b4;
3873               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3874                                   + (tmp << 24);
3875               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3876                                             + (tmp == b3) + (tmp == b4);
3877               if (tmp
3878                   && (matching_bytes >= 3
3879                       || (matching_bytes == 2
3880                           && const_ok_for_op (remainder & ~tmp2, code))))
3881                 {
3882                   /* At least 3 of the bytes match, and the fourth has at
3883                      least as many bits set, or two of the bytes match
3884                      and it will only require one more insn to finish.  */
3885                   result = tmp2;
3886                   i = tmp != b1 ? 32
3887                       : tmp != b2 ? 24
3888                       : tmp != b3 ? 16
3889                       : 8;
3890                 }
3891
3892               /* Second, try to find a 16-bit replicated constant that can
3893                  leave three of the bytes clear.  If b2 or b4 is already
3894                  zero, then we can.  If the 8-bit from above would not
3895                  clear b2 anyway, then we still win.  */
3896               else if (b1 == b3 && (!b2 || !b4
3897                                || (remainder & 0x00ff0000 & ~result)))
3898                 {
3899                   result = remainder & 0xff00ff00;
3900                   i = 24;
3901                 }
3902             }
3903           else if (loc > 16)
3904             {
3905               /* The 8-bit immediate already found clears b2 (and maybe b3)
3906                  and we don't get here unless b1 is alredy clear, but it will
3907                  leave b4 unchanged.  */
3908
3909               /* If we can clear b2 and b4 at once, then we win, since the
3910                  8-bits couldn't possibly reach that far.  */
3911               if (b2 == b4)
3912                 {
3913                   result = remainder & 0x00ff00ff;
3914                   i = 16;
3915                 }
3916             }
3917         }
3918
3919       return_sequence->i[insns++] = result;
3920       remainder &= ~result;
3921
3922       if (code == SET || code == MINUS)
3923         code = PLUS;
3924     }
3925   while (remainder);
3926
3927   return insns;
3928 }
3929
3930 /* Emit an instruction with the indicated PATTERN.  If COND is
3931    non-NULL, conditionalize the execution of the instruction on COND
3932    being true.  */
3933
3934 static void
3935 emit_constant_insn (rtx cond, rtx pattern)
3936 {
3937   if (cond)
3938     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3939   emit_insn (pattern);
3940 }
3941
3942 /* As above, but extra parameter GENERATE which, if clear, suppresses
3943    RTL generation.  */
3944
3945 static int
3946 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3947                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3948                   int generate)
3949 {
3950   int can_invert = 0;
3951   int can_negate = 0;
3952   int final_invert = 0;
3953   int i;
3954   int set_sign_bit_copies = 0;
3955   int clear_sign_bit_copies = 0;
3956   int clear_zero_bit_copies = 0;
3957   int set_zero_bit_copies = 0;
3958   int insns = 0, neg_insns, inv_insns;
3959   unsigned HOST_WIDE_INT temp1, temp2;
3960   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3961   struct four_ints *immediates;
3962   struct four_ints pos_immediates, neg_immediates, inv_immediates;
3963
3964   /* Find out which operations are safe for a given CODE.  Also do a quick
3965      check for degenerate cases; these can occur when DImode operations
3966      are split.  */
3967   switch (code)
3968     {
3969     case SET:
3970       can_invert = 1;
3971       break;
3972
3973     case PLUS:
3974       can_negate = 1;
3975       break;
3976
3977     case IOR:
3978       if (remainder == 0xffffffff)
3979         {
3980           if (generate)
3981             emit_constant_insn (cond,
3982                                 gen_rtx_SET (VOIDmode, target,
3983                                              GEN_INT (ARM_SIGN_EXTEND (val))));
3984           return 1;
3985         }
3986
3987       if (remainder == 0)
3988         {
3989           if (reload_completed && rtx_equal_p (target, source))
3990             return 0;
3991
3992           if (generate)
3993             emit_constant_insn (cond,
3994                                 gen_rtx_SET (VOIDmode, target, source));
3995           return 1;
3996         }
3997       break;
3998
3999     case AND:
4000       if (remainder == 0)
4001         {
4002           if (generate)
4003             emit_constant_insn (cond,
4004                                 gen_rtx_SET (VOIDmode, target, const0_rtx));
4005           return 1;
4006         }
4007       if (remainder == 0xffffffff)
4008         {
4009           if (reload_completed && rtx_equal_p (target, source))
4010             return 0;
4011           if (generate)
4012             emit_constant_insn (cond,
4013                                 gen_rtx_SET (VOIDmode, target, source));
4014           return 1;
4015         }
4016       can_invert = 1;
4017       break;
4018
4019     case XOR:
4020       if (remainder == 0)
4021         {
4022           if (reload_completed && rtx_equal_p (target, source))
4023             return 0;
4024           if (generate)
4025             emit_constant_insn (cond,
4026                                 gen_rtx_SET (VOIDmode, target, source));
4027           return 1;
4028         }
4029
4030       if (remainder == 0xffffffff)
4031         {
4032           if (generate)
4033             emit_constant_insn (cond,
4034                                 gen_rtx_SET (VOIDmode, target,
4035                                              gen_rtx_NOT (mode, source)));
4036           return 1;
4037         }
4038       final_invert = 1;
4039       break;
4040
4041     case MINUS:
4042       /* We treat MINUS as (val - source), since (source - val) is always
4043          passed as (source + (-val)).  */
4044       if (remainder == 0)
4045         {
4046           if (generate)
4047             emit_constant_insn (cond,
4048                                 gen_rtx_SET (VOIDmode, target,
4049                                              gen_rtx_NEG (mode, source)));
4050           return 1;
4051         }
4052       if (const_ok_for_arm (val))
4053         {
4054           if (generate)
4055             emit_constant_insn (cond,
4056                                 gen_rtx_SET (VOIDmode, target,
4057                                              gen_rtx_MINUS (mode, GEN_INT (val),
4058                                                             source)));
4059           return 1;
4060         }
4061
4062       break;
4063
4064     default:
4065       gcc_unreachable ();
4066     }
4067
4068   /* If we can do it in one insn get out quickly.  */
4069   if (const_ok_for_op (val, code))
4070     {
4071       if (generate)
4072         emit_constant_insn (cond,
4073                             gen_rtx_SET (VOIDmode, target,
4074                                          (source
4075                                           ? gen_rtx_fmt_ee (code, mode, source,
4076                                                             GEN_INT (val))
4077                                           : GEN_INT (val))));
4078       return 1;
4079     }
4080
4081   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4082      insn.  */
4083   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4084       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4085     {
4086       if (generate)
4087         {
4088           if (mode == SImode && i == 16)
4089             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4090                smaller insn.  */
4091             emit_constant_insn (cond,
4092                                 gen_zero_extendhisi2
4093                                 (target, gen_lowpart (HImode, source)));
4094           else
4095             /* Extz only supports SImode, but we can coerce the operands
4096                into that mode.  */
4097             emit_constant_insn (cond,
4098                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4099                                               gen_lowpart (SImode, source),
4100                                               GEN_INT (i), const0_rtx));
4101         }
4102
4103       return 1;
4104     }
4105
4106   /* Calculate a few attributes that may be useful for specific
4107      optimizations.  */
4108   /* Count number of leading zeros.  */
4109   for (i = 31; i >= 0; i--)
4110     {
4111       if ((remainder & (1 << i)) == 0)
4112         clear_sign_bit_copies++;
4113       else
4114         break;
4115     }
4116
4117   /* Count number of leading 1's.  */
4118   for (i = 31; i >= 0; i--)
4119     {
4120       if ((remainder & (1 << i)) != 0)
4121         set_sign_bit_copies++;
4122       else
4123         break;
4124     }
4125
4126   /* Count number of trailing zero's.  */
4127   for (i = 0; i <= 31; i++)
4128     {
4129       if ((remainder & (1 << i)) == 0)
4130         clear_zero_bit_copies++;
4131       else
4132         break;
4133     }
4134
4135   /* Count number of trailing 1's.  */
4136   for (i = 0; i <= 31; i++)
4137     {
4138       if ((remainder & (1 << i)) != 0)
4139         set_zero_bit_copies++;
4140       else
4141         break;
4142     }
4143
4144   switch (code)
4145     {
4146     case SET:
4147       /* See if we can do this by sign_extending a constant that is known
4148          to be negative.  This is a good, way of doing it, since the shift
4149          may well merge into a subsequent insn.  */
4150       if (set_sign_bit_copies > 1)
4151         {
4152           if (const_ok_for_arm
4153               (temp1 = ARM_SIGN_EXTEND (remainder
4154                                         << (set_sign_bit_copies - 1))))
4155             {
4156               if (generate)
4157                 {
4158                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4159                   emit_constant_insn (cond,
4160                                       gen_rtx_SET (VOIDmode, new_src,
4161                                                    GEN_INT (temp1)));
4162                   emit_constant_insn (cond,
4163                                       gen_ashrsi3 (target, new_src,
4164                                                    GEN_INT (set_sign_bit_copies - 1)));
4165                 }
4166               return 2;
4167             }
4168           /* For an inverted constant, we will need to set the low bits,
4169              these will be shifted out of harm's way.  */
4170           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4171           if (const_ok_for_arm (~temp1))
4172             {
4173               if (generate)
4174                 {
4175                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4176                   emit_constant_insn (cond,
4177                                       gen_rtx_SET (VOIDmode, new_src,
4178                                                    GEN_INT (temp1)));
4179                   emit_constant_insn (cond,
4180                                       gen_ashrsi3 (target, new_src,
4181                                                    GEN_INT (set_sign_bit_copies - 1)));
4182                 }
4183               return 2;
4184             }
4185         }
4186
4187       /* See if we can calculate the value as the difference between two
4188          valid immediates.  */
4189       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4190         {
4191           int topshift = clear_sign_bit_copies & ~1;
4192
4193           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4194                                    & (0xff000000 >> topshift));
4195
4196           /* If temp1 is zero, then that means the 9 most significant
4197              bits of remainder were 1 and we've caused it to overflow.
4198              When topshift is 0 we don't need to do anything since we
4199              can borrow from 'bit 32'.  */
4200           if (temp1 == 0 && topshift != 0)
4201             temp1 = 0x80000000 >> (topshift - 1);
4202
4203           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4204
4205           if (const_ok_for_arm (temp2))
4206             {
4207               if (generate)
4208                 {
4209                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4210                   emit_constant_insn (cond,
4211                                       gen_rtx_SET (VOIDmode, new_src,
4212                                                    GEN_INT (temp1)));
4213                   emit_constant_insn (cond,
4214                                       gen_addsi3 (target, new_src,
4215                                                   GEN_INT (-temp2)));
4216                 }
4217
4218               return 2;
4219             }
4220         }
4221
4222       /* See if we can generate this by setting the bottom (or the top)
4223          16 bits, and then shifting these into the other half of the
4224          word.  We only look for the simplest cases, to do more would cost
4225          too much.  Be careful, however, not to generate this when the
4226          alternative would take fewer insns.  */
4227       if (val & 0xffff0000)
4228         {
4229           temp1 = remainder & 0xffff0000;
4230           temp2 = remainder & 0x0000ffff;
4231
4232           /* Overlaps outside this range are best done using other methods.  */
4233           for (i = 9; i < 24; i++)
4234             {
4235               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4236                   && !const_ok_for_arm (temp2))
4237                 {
4238                   rtx new_src = (subtargets
4239                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4240                                  : target);
4241                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4242                                             source, subtargets, generate);
4243                   source = new_src;
4244                   if (generate)
4245                     emit_constant_insn
4246                       (cond,
4247                        gen_rtx_SET
4248                        (VOIDmode, target,
4249                         gen_rtx_IOR (mode,
4250                                      gen_rtx_ASHIFT (mode, source,
4251                                                      GEN_INT (i)),
4252                                      source)));
4253                   return insns + 1;
4254                 }
4255             }
4256
4257           /* Don't duplicate cases already considered.  */
4258           for (i = 17; i < 24; i++)
4259             {
4260               if (((temp1 | (temp1 >> i)) == remainder)
4261                   && !const_ok_for_arm (temp1))
4262                 {
4263                   rtx new_src = (subtargets
4264                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4265                                  : target);
4266                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4267                                             source, subtargets, generate);
4268                   source = new_src;
4269                   if (generate)
4270                     emit_constant_insn
4271                       (cond,
4272                        gen_rtx_SET (VOIDmode, target,
4273                                     gen_rtx_IOR
4274                                     (mode,
4275                                      gen_rtx_LSHIFTRT (mode, source,
4276                                                        GEN_INT (i)),
4277                                      source)));
4278                   return insns + 1;
4279                 }
4280             }
4281         }
4282       break;
4283
4284     case IOR:
4285     case XOR:
4286       /* If we have IOR or XOR, and the constant can be loaded in a
4287          single instruction, and we can find a temporary to put it in,
4288          then this can be done in two instructions instead of 3-4.  */
4289       if (subtargets
4290           /* TARGET can't be NULL if SUBTARGETS is 0 */
4291           || (reload_completed && !reg_mentioned_p (target, source)))
4292         {
4293           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4294             {
4295               if (generate)
4296                 {
4297                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4298
4299                   emit_constant_insn (cond,
4300                                       gen_rtx_SET (VOIDmode, sub,
4301                                                    GEN_INT (val)));
4302                   emit_constant_insn (cond,
4303                                       gen_rtx_SET (VOIDmode, target,
4304                                                    gen_rtx_fmt_ee (code, mode,
4305                                                                    source, sub)));
4306                 }
4307               return 2;
4308             }
4309         }
4310
4311       if (code == XOR)
4312         break;
4313
4314       /*  Convert.
4315           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4316                              and the remainder 0s for e.g. 0xfff00000)
4317           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4318
4319           This can be done in 2 instructions by using shifts with mov or mvn.
4320           e.g. for
4321           x = x | 0xfff00000;
4322           we generate.
4323           mvn   r0, r0, asl #12
4324           mvn   r0, r0, lsr #12  */
4325       if (set_sign_bit_copies > 8
4326           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4327         {
4328           if (generate)
4329             {
4330               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4331               rtx shift = GEN_INT (set_sign_bit_copies);
4332
4333               emit_constant_insn
4334                 (cond,
4335                  gen_rtx_SET (VOIDmode, sub,
4336                               gen_rtx_NOT (mode,
4337                                            gen_rtx_ASHIFT (mode,
4338                                                            source,
4339                                                            shift))));
4340               emit_constant_insn
4341                 (cond,
4342                  gen_rtx_SET (VOIDmode, target,
4343                               gen_rtx_NOT (mode,
4344                                            gen_rtx_LSHIFTRT (mode, sub,
4345                                                              shift))));
4346             }
4347           return 2;
4348         }
4349
4350       /* Convert
4351           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4352            to
4353           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4354
4355           For eg. r0 = r0 | 0xfff
4356                mvn      r0, r0, lsr #12
4357                mvn      r0, r0, asl #12
4358
4359       */
4360       if (set_zero_bit_copies > 8
4361           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4362         {
4363           if (generate)
4364             {
4365               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4366               rtx shift = GEN_INT (set_zero_bit_copies);
4367
4368               emit_constant_insn
4369                 (cond,
4370                  gen_rtx_SET (VOIDmode, sub,
4371                               gen_rtx_NOT (mode,
4372                                            gen_rtx_LSHIFTRT (mode,
4373                                                              source,
4374                                                              shift))));
4375               emit_constant_insn
4376                 (cond,
4377                  gen_rtx_SET (VOIDmode, target,
4378                               gen_rtx_NOT (mode,
4379                                            gen_rtx_ASHIFT (mode, sub,
4380                                                            shift))));
4381             }
4382           return 2;
4383         }
4384
4385       /* This will never be reached for Thumb2 because orn is a valid
4386          instruction. This is for Thumb1 and the ARM 32 bit cases.
4387
4388          x = y | constant (such that ~constant is a valid constant)
4389          Transform this to
4390          x = ~(~y & ~constant).
4391       */
4392       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4393         {
4394           if (generate)
4395             {
4396               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4397               emit_constant_insn (cond,
4398                                   gen_rtx_SET (VOIDmode, sub,
4399                                                gen_rtx_NOT (mode, source)));
4400               source = sub;
4401               if (subtargets)
4402                 sub = gen_reg_rtx (mode);
4403               emit_constant_insn (cond,
4404                                   gen_rtx_SET (VOIDmode, sub,
4405                                                gen_rtx_AND (mode, source,
4406                                                             GEN_INT (temp1))));
4407               emit_constant_insn (cond,
4408                                   gen_rtx_SET (VOIDmode, target,
4409                                                gen_rtx_NOT (mode, sub)));
4410             }
4411           return 3;
4412         }
4413       break;
4414
4415     case AND:
4416       /* See if two shifts will do 2 or more insn's worth of work.  */
4417       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4418         {
4419           HOST_WIDE_INT shift_mask = ((0xffffffff
4420                                        << (32 - clear_sign_bit_copies))
4421                                       & 0xffffffff);
4422
4423           if ((remainder | shift_mask) != 0xffffffff)
4424             {
4425               if (generate)
4426                 {
4427                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4428                   insns = arm_gen_constant (AND, mode, cond,
4429                                             remainder | shift_mask,
4430                                             new_src, source, subtargets, 1);
4431                   source = new_src;
4432                 }
4433               else
4434                 {
4435                   rtx targ = subtargets ? NULL_RTX : target;
4436                   insns = arm_gen_constant (AND, mode, cond,
4437                                             remainder | shift_mask,
4438                                             targ, source, subtargets, 0);
4439                 }
4440             }
4441
4442           if (generate)
4443             {
4444               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4445               rtx shift = GEN_INT (clear_sign_bit_copies);
4446
4447               emit_insn (gen_ashlsi3 (new_src, source, shift));
4448               emit_insn (gen_lshrsi3 (target, new_src, shift));
4449             }
4450
4451           return insns + 2;
4452         }
4453
4454       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4455         {
4456           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4457
4458           if ((remainder | shift_mask) != 0xffffffff)
4459             {
4460               if (generate)
4461                 {
4462                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4463
4464                   insns = arm_gen_constant (AND, mode, cond,
4465                                             remainder | shift_mask,
4466                                             new_src, source, subtargets, 1);
4467                   source = new_src;
4468                 }
4469               else
4470                 {
4471                   rtx targ = subtargets ? NULL_RTX : target;
4472
4473                   insns = arm_gen_constant (AND, mode, cond,
4474                                             remainder | shift_mask,
4475                                             targ, source, subtargets, 0);
4476                 }
4477             }
4478
4479           if (generate)
4480             {
4481               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4482               rtx shift = GEN_INT (clear_zero_bit_copies);
4483
4484               emit_insn (gen_lshrsi3 (new_src, source, shift));
4485               emit_insn (gen_ashlsi3 (target, new_src, shift));
4486             }
4487
4488           return insns + 2;
4489         }
4490
4491       break;
4492
4493     default:
4494       break;
4495     }
4496
4497   /* Calculate what the instruction sequences would be if we generated it
4498      normally, negated, or inverted.  */
4499   if (code == AND)
4500     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4501     insns = 99;
4502   else
4503     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4504
4505   if (can_negate)
4506     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4507                                             &neg_immediates);
4508   else
4509     neg_insns = 99;
4510
4511   if (can_invert || final_invert)
4512     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4513                                             &inv_immediates);
4514   else
4515     inv_insns = 99;
4516
4517   immediates = &pos_immediates;
4518
4519   /* Is the negated immediate sequence more efficient?  */
4520   if (neg_insns < insns && neg_insns <= inv_insns)
4521     {
4522       insns = neg_insns;
4523       immediates = &neg_immediates;
4524     }
4525   else
4526     can_negate = 0;
4527
4528   /* Is the inverted immediate sequence more efficient?
4529      We must allow for an extra NOT instruction for XOR operations, although
4530      there is some chance that the final 'mvn' will get optimized later.  */
4531   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4532     {
4533       insns = inv_insns;
4534       immediates = &inv_immediates;
4535     }
4536   else
4537     {
4538       can_invert = 0;
4539       final_invert = 0;
4540     }
4541
4542   /* Now output the chosen sequence as instructions.  */
4543   if (generate)
4544     {
4545       for (i = 0; i < insns; i++)
4546         {
4547           rtx new_src, temp1_rtx;
4548
4549           temp1 = immediates->i[i];
4550
4551           if (code == SET || code == MINUS)
4552             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4553           else if ((final_invert || i < (insns - 1)) && subtargets)
4554             new_src = gen_reg_rtx (mode);
4555           else
4556             new_src = target;
4557
4558           if (can_invert)
4559             temp1 = ~temp1;
4560           else if (can_negate)
4561             temp1 = -temp1;
4562
4563           temp1 = trunc_int_for_mode (temp1, mode);
4564           temp1_rtx = GEN_INT (temp1);
4565
4566           if (code == SET)
4567             ;
4568           else if (code == MINUS)
4569             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4570           else
4571             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4572
4573           emit_constant_insn (cond,
4574                               gen_rtx_SET (VOIDmode, new_src,
4575                                            temp1_rtx));
4576           source = new_src;
4577
4578           if (code == SET)
4579             {
4580               can_negate = can_invert;
4581               can_invert = 0;
4582               code = PLUS;
4583             }
4584           else if (code == MINUS)
4585             code = PLUS;
4586         }
4587     }
4588
4589   if (final_invert)
4590     {
4591       if (generate)
4592         emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4593                                                gen_rtx_NOT (mode, source)));
4594       insns++;
4595     }
4596
4597   return insns;
4598 }
4599
4600 /* Canonicalize a comparison so that we are more likely to recognize it.
4601    This can be done for a few constant compares, where we can make the
4602    immediate value easier to load.  */
4603
4604 static void
4605 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4606                              bool op0_preserve_value)
4607 {
4608   machine_mode mode;
4609   unsigned HOST_WIDE_INT i, maxval;
4610
4611   mode = GET_MODE (*op0);
4612   if (mode == VOIDmode)
4613     mode = GET_MODE (*op1);
4614
4615   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4616
4617   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4618      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4619      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4620      for GTU/LEU in Thumb mode.  */
4621   if (mode == DImode)
4622     {
4623
4624       if (*code == GT || *code == LE
4625           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4626         {
4627           /* Missing comparison.  First try to use an available
4628              comparison.  */
4629           if (CONST_INT_P (*op1))
4630             {
4631               i = INTVAL (*op1);
4632               switch (*code)
4633                 {
4634                 case GT:
4635                 case LE:
4636                   if (i != maxval
4637                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4638                     {
4639                       *op1 = GEN_INT (i + 1);
4640                       *code = *code == GT ? GE : LT;
4641                       return;
4642                     }
4643                   break;
4644                 case GTU:
4645                 case LEU:
4646                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4647                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4648                     {
4649                       *op1 = GEN_INT (i + 1);
4650                       *code = *code == GTU ? GEU : LTU;
4651                       return;
4652                     }
4653                   break;
4654                 default:
4655                   gcc_unreachable ();
4656                 }
4657             }
4658
4659           /* If that did not work, reverse the condition.  */
4660           if (!op0_preserve_value)
4661             {
4662               std::swap (*op0, *op1);
4663               *code = (int)swap_condition ((enum rtx_code)*code);
4664             }
4665         }
4666       return;
4667     }
4668
4669   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4670      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4671      to facilitate possible combining with a cmp into 'ands'.  */
4672   if (mode == SImode
4673       && GET_CODE (*op0) == ZERO_EXTEND
4674       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4675       && GET_MODE (XEXP (*op0, 0)) == QImode
4676       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4677       && subreg_lowpart_p (XEXP (*op0, 0))
4678       && *op1 == const0_rtx)
4679     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4680                         GEN_INT (255));
4681
4682   /* Comparisons smaller than DImode.  Only adjust comparisons against
4683      an out-of-range constant.  */
4684   if (!CONST_INT_P (*op1)
4685       || const_ok_for_arm (INTVAL (*op1))
4686       || const_ok_for_arm (- INTVAL (*op1)))
4687     return;
4688
4689   i = INTVAL (*op1);
4690
4691   switch (*code)
4692     {
4693     case EQ:
4694     case NE:
4695       return;
4696
4697     case GT:
4698     case LE:
4699       if (i != maxval
4700           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4701         {
4702           *op1 = GEN_INT (i + 1);
4703           *code = *code == GT ? GE : LT;
4704           return;
4705         }
4706       break;
4707
4708     case GE:
4709     case LT:
4710       if (i != ~maxval
4711           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4712         {
4713           *op1 = GEN_INT (i - 1);
4714           *code = *code == GE ? GT : LE;
4715           return;
4716         }
4717       break;
4718
4719     case GTU:
4720     case LEU:
4721       if (i != ~((unsigned HOST_WIDE_INT) 0)
4722           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4723         {
4724           *op1 = GEN_INT (i + 1);
4725           *code = *code == GTU ? GEU : LTU;
4726           return;
4727         }
4728       break;
4729
4730     case GEU:
4731     case LTU:
4732       if (i != 0
4733           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4734         {
4735           *op1 = GEN_INT (i - 1);
4736           *code = *code == GEU ? GTU : LEU;
4737           return;
4738         }
4739       break;
4740
4741     default:
4742       gcc_unreachable ();
4743     }
4744 }
4745
4746
4747 /* Define how to find the value returned by a function.  */
4748
4749 static rtx
4750 arm_function_value(const_tree type, const_tree func,
4751                    bool outgoing ATTRIBUTE_UNUSED)
4752 {
4753   machine_mode mode;
4754   int unsignedp ATTRIBUTE_UNUSED;
4755   rtx r ATTRIBUTE_UNUSED;
4756
4757   mode = TYPE_MODE (type);
4758
4759   if (TARGET_AAPCS_BASED)
4760     return aapcs_allocate_return_reg (mode, type, func);
4761
4762   /* Promote integer types.  */
4763   if (INTEGRAL_TYPE_P (type))
4764     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4765
4766   /* Promotes small structs returned in a register to full-word size
4767      for big-endian AAPCS.  */
4768   if (arm_return_in_msb (type))
4769     {
4770       HOST_WIDE_INT size = int_size_in_bytes (type);
4771       if (size % UNITS_PER_WORD != 0)
4772         {
4773           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4774           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4775         }
4776     }
4777
4778   return arm_libcall_value_1 (mode);
4779 }
4780
4781 /* libcall hashtable helpers.  */
4782
4783 struct libcall_hasher : typed_noop_remove <rtx_def>
4784 {
4785   typedef rtx_def value_type;
4786   typedef rtx_def compare_type;
4787   static inline hashval_t hash (const value_type *);
4788   static inline bool equal (const value_type *, const compare_type *);
4789   static inline void remove (value_type *);
4790 };
4791
4792 inline bool
4793 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4794 {
4795   return rtx_equal_p (p1, p2);
4796 }
4797
4798 inline hashval_t
4799 libcall_hasher::hash (const value_type *p1)
4800 {
4801   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4802 }
4803
4804 typedef hash_table<libcall_hasher> libcall_table_type;
4805
4806 static void
4807 add_libcall (libcall_table_type *htab, rtx libcall)
4808 {
4809   *htab->find_slot (libcall, INSERT) = libcall;
4810 }
4811
4812 static bool
4813 arm_libcall_uses_aapcs_base (const_rtx libcall)
4814 {
4815   static bool init_done = false;
4816   static libcall_table_type *libcall_htab = NULL;
4817
4818   if (!init_done)
4819     {
4820       init_done = true;
4821
4822       libcall_htab = new libcall_table_type (31);
4823       add_libcall (libcall_htab,
4824                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4825       add_libcall (libcall_htab,
4826                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4827       add_libcall (libcall_htab,
4828                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4829       add_libcall (libcall_htab,
4830                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4831
4832       add_libcall (libcall_htab,
4833                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4834       add_libcall (libcall_htab,
4835                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4836       add_libcall (libcall_htab,
4837                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4838       add_libcall (libcall_htab,
4839                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4840
4841       add_libcall (libcall_htab,
4842                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4843       add_libcall (libcall_htab,
4844                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4845       add_libcall (libcall_htab,
4846                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4847       add_libcall (libcall_htab,
4848                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4849       add_libcall (libcall_htab,
4850                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4851       add_libcall (libcall_htab,
4852                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
4853       add_libcall (libcall_htab,
4854                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
4855       add_libcall (libcall_htab,
4856                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
4857
4858       /* Values from double-precision helper functions are returned in core
4859          registers if the selected core only supports single-precision
4860          arithmetic, even if we are using the hard-float ABI.  The same is
4861          true for single-precision helpers, but we will never be using the
4862          hard-float ABI on a CPU which doesn't support single-precision
4863          operations in hardware.  */
4864       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4865       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4866       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4867       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4868       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4869       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4870       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4871       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4872       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4873       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4874       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4875       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4876                                                         SFmode));
4877       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4878                                                         DFmode));
4879     }
4880
4881   return libcall && libcall_htab->find (libcall) != NULL;
4882 }
4883
4884 static rtx
4885 arm_libcall_value_1 (machine_mode mode)
4886 {
4887   if (TARGET_AAPCS_BASED)
4888     return aapcs_libcall_value (mode);
4889   else if (TARGET_IWMMXT_ABI
4890            && arm_vector_mode_supported_p (mode))
4891     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4892   else
4893     return gen_rtx_REG (mode, ARG_REGISTER (1));
4894 }
4895
4896 /* Define how to find the value returned by a library function
4897    assuming the value has mode MODE.  */
4898
4899 static rtx
4900 arm_libcall_value (machine_mode mode, const_rtx libcall)
4901 {
4902   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4903       && GET_MODE_CLASS (mode) == MODE_FLOAT)
4904     {
4905       /* The following libcalls return their result in integer registers,
4906          even though they return a floating point value.  */
4907       if (arm_libcall_uses_aapcs_base (libcall))
4908         return gen_rtx_REG (mode, ARG_REGISTER(1));
4909
4910     }
4911
4912   return arm_libcall_value_1 (mode);
4913 }
4914
4915 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
4916
4917 static bool
4918 arm_function_value_regno_p (const unsigned int regno)
4919 {
4920   if (regno == ARG_REGISTER (1)
4921       || (TARGET_32BIT
4922           && TARGET_AAPCS_BASED
4923           && TARGET_VFP
4924           && TARGET_HARD_FLOAT
4925           && regno == FIRST_VFP_REGNUM)
4926       || (TARGET_IWMMXT_ABI
4927           && regno == FIRST_IWMMXT_REGNUM))
4928     return true;
4929
4930   return false;
4931 }
4932
4933 /* Determine the amount of memory needed to store the possible return
4934    registers of an untyped call.  */
4935 int
4936 arm_apply_result_size (void)
4937 {
4938   int size = 16;
4939
4940   if (TARGET_32BIT)
4941     {
4942       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4943         size += 32;
4944       if (TARGET_IWMMXT_ABI)
4945         size += 8;
4946     }
4947
4948   return size;
4949 }
4950
4951 /* Decide whether TYPE should be returned in memory (true)
4952    or in a register (false).  FNTYPE is the type of the function making
4953    the call.  */
4954 static bool
4955 arm_return_in_memory (const_tree type, const_tree fntype)
4956 {
4957   HOST_WIDE_INT size;
4958
4959   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
4960
4961   if (TARGET_AAPCS_BASED)
4962     {
4963       /* Simple, non-aggregate types (ie not including vectors and
4964          complex) are always returned in a register (or registers).
4965          We don't care about which register here, so we can short-cut
4966          some of the detail.  */
4967       if (!AGGREGATE_TYPE_P (type)
4968           && TREE_CODE (type) != VECTOR_TYPE
4969           && TREE_CODE (type) != COMPLEX_TYPE)
4970         return false;
4971
4972       /* Any return value that is no larger than one word can be
4973          returned in r0.  */
4974       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4975         return false;
4976
4977       /* Check any available co-processors to see if they accept the
4978          type as a register candidate (VFP, for example, can return
4979          some aggregates in consecutive registers).  These aren't
4980          available if the call is variadic.  */
4981       if (aapcs_select_return_coproc (type, fntype) >= 0)
4982         return false;
4983
4984       /* Vector values should be returned using ARM registers, not
4985          memory (unless they're over 16 bytes, which will break since
4986          we only have four call-clobbered registers to play with).  */
4987       if (TREE_CODE (type) == VECTOR_TYPE)
4988         return (size < 0 || size > (4 * UNITS_PER_WORD));
4989
4990       /* The rest go in memory.  */
4991       return true;
4992     }
4993
4994   if (TREE_CODE (type) == VECTOR_TYPE)
4995     return (size < 0 || size > (4 * UNITS_PER_WORD));
4996
4997   if (!AGGREGATE_TYPE_P (type) &&
4998       (TREE_CODE (type) != VECTOR_TYPE))
4999     /* All simple types are returned in registers.  */
5000     return false;
5001
5002   if (arm_abi != ARM_ABI_APCS)
5003     {
5004       /* ATPCS and later return aggregate types in memory only if they are
5005          larger than a word (or are variable size).  */
5006       return (size < 0 || size > UNITS_PER_WORD);
5007     }
5008
5009   /* For the arm-wince targets we choose to be compatible with Microsoft's
5010      ARM and Thumb compilers, which always return aggregates in memory.  */
5011 #ifndef ARM_WINCE
5012   /* All structures/unions bigger than one word are returned in memory.
5013      Also catch the case where int_size_in_bytes returns -1.  In this case
5014      the aggregate is either huge or of variable size, and in either case
5015      we will want to return it via memory and not in a register.  */
5016   if (size < 0 || size > UNITS_PER_WORD)
5017     return true;
5018
5019   if (TREE_CODE (type) == RECORD_TYPE)
5020     {
5021       tree field;
5022
5023       /* For a struct the APCS says that we only return in a register
5024          if the type is 'integer like' and every addressable element
5025          has an offset of zero.  For practical purposes this means
5026          that the structure can have at most one non bit-field element
5027          and that this element must be the first one in the structure.  */
5028
5029       /* Find the first field, ignoring non FIELD_DECL things which will
5030          have been created by C++.  */
5031       for (field = TYPE_FIELDS (type);
5032            field && TREE_CODE (field) != FIELD_DECL;
5033            field = DECL_CHAIN (field))
5034         continue;
5035
5036       if (field == NULL)
5037         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5038
5039       /* Check that the first field is valid for returning in a register.  */
5040
5041       /* ... Floats are not allowed */
5042       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5043         return true;
5044
5045       /* ... Aggregates that are not themselves valid for returning in
5046          a register are not allowed.  */
5047       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5048         return true;
5049
5050       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5051          since they are not addressable.  */
5052       for (field = DECL_CHAIN (field);
5053            field;
5054            field = DECL_CHAIN (field))
5055         {
5056           if (TREE_CODE (field) != FIELD_DECL)
5057             continue;
5058
5059           if (!DECL_BIT_FIELD_TYPE (field))
5060             return true;
5061         }
5062
5063       return false;
5064     }
5065
5066   if (TREE_CODE (type) == UNION_TYPE)
5067     {
5068       tree field;
5069
5070       /* Unions can be returned in registers if every element is
5071          integral, or can be returned in an integer register.  */
5072       for (field = TYPE_FIELDS (type);
5073            field;
5074            field = DECL_CHAIN (field))
5075         {
5076           if (TREE_CODE (field) != FIELD_DECL)
5077             continue;
5078
5079           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5080             return true;
5081
5082           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5083             return true;
5084         }
5085
5086       return false;
5087     }
5088 #endif /* not ARM_WINCE */
5089
5090   /* Return all other types in memory.  */
5091   return true;
5092 }
5093
5094 const struct pcs_attribute_arg
5095 {
5096   const char *arg;
5097   enum arm_pcs value;
5098 } pcs_attribute_args[] =
5099   {
5100     {"aapcs", ARM_PCS_AAPCS},
5101     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5102 #if 0
5103     /* We could recognize these, but changes would be needed elsewhere
5104      * to implement them.  */
5105     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5106     {"atpcs", ARM_PCS_ATPCS},
5107     {"apcs", ARM_PCS_APCS},
5108 #endif
5109     {NULL, ARM_PCS_UNKNOWN}
5110   };
5111
5112 static enum arm_pcs
5113 arm_pcs_from_attribute (tree attr)
5114 {
5115   const struct pcs_attribute_arg *ptr;
5116   const char *arg;
5117
5118   /* Get the value of the argument.  */
5119   if (TREE_VALUE (attr) == NULL_TREE
5120       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5121     return ARM_PCS_UNKNOWN;
5122
5123   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5124
5125   /* Check it against the list of known arguments.  */
5126   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5127     if (streq (arg, ptr->arg))
5128       return ptr->value;
5129
5130   /* An unrecognized interrupt type.  */
5131   return ARM_PCS_UNKNOWN;
5132 }
5133
5134 /* Get the PCS variant to use for this call.  TYPE is the function's type
5135    specification, DECL is the specific declartion.  DECL may be null if
5136    the call could be indirect or if this is a library call.  */
5137 static enum arm_pcs
5138 arm_get_pcs_model (const_tree type, const_tree decl)
5139 {
5140   bool user_convention = false;
5141   enum arm_pcs user_pcs = arm_pcs_default;
5142   tree attr;
5143
5144   gcc_assert (type);
5145
5146   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5147   if (attr)
5148     {
5149       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5150       user_convention = true;
5151     }
5152
5153   if (TARGET_AAPCS_BASED)
5154     {
5155       /* Detect varargs functions.  These always use the base rules
5156          (no argument is ever a candidate for a co-processor
5157          register).  */
5158       bool base_rules = stdarg_p (type);
5159
5160       if (user_convention)
5161         {
5162           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5163             sorry ("non-AAPCS derived PCS variant");
5164           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5165             error ("variadic functions must use the base AAPCS variant");
5166         }
5167
5168       if (base_rules)
5169         return ARM_PCS_AAPCS;
5170       else if (user_convention)
5171         return user_pcs;
5172       else if (decl && flag_unit_at_a_time)
5173         {
5174           /* Local functions never leak outside this compilation unit,
5175              so we are free to use whatever conventions are
5176              appropriate.  */
5177           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5178           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5179           if (i && i->local)
5180             return ARM_PCS_AAPCS_LOCAL;
5181         }
5182     }
5183   else if (user_convention && user_pcs != arm_pcs_default)
5184     sorry ("PCS variant");
5185
5186   /* For everything else we use the target's default.  */
5187   return arm_pcs_default;
5188 }
5189
5190
5191 static void
5192 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5193                     const_tree fntype ATTRIBUTE_UNUSED,
5194                     rtx libcall ATTRIBUTE_UNUSED,
5195                     const_tree fndecl ATTRIBUTE_UNUSED)
5196 {
5197   /* Record the unallocated VFP registers.  */
5198   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5199   pcum->aapcs_vfp_reg_alloc = 0;
5200 }
5201
5202 /* Walk down the type tree of TYPE counting consecutive base elements.
5203    If *MODEP is VOIDmode, then set it to the first valid floating point
5204    type.  If a non-floating point type is found, or if a floating point
5205    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5206    otherwise return the count in the sub-tree.  */
5207 static int
5208 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5209 {
5210   machine_mode mode;
5211   HOST_WIDE_INT size;
5212
5213   switch (TREE_CODE (type))
5214     {
5215     case REAL_TYPE:
5216       mode = TYPE_MODE (type);
5217       if (mode != DFmode && mode != SFmode)
5218         return -1;
5219
5220       if (*modep == VOIDmode)
5221         *modep = mode;
5222
5223       if (*modep == mode)
5224         return 1;
5225
5226       break;
5227
5228     case COMPLEX_TYPE:
5229       mode = TYPE_MODE (TREE_TYPE (type));
5230       if (mode != DFmode && mode != SFmode)
5231         return -1;
5232
5233       if (*modep == VOIDmode)
5234         *modep = mode;
5235
5236       if (*modep == mode)
5237         return 2;
5238
5239       break;
5240
5241     case VECTOR_TYPE:
5242       /* Use V2SImode and V4SImode as representatives of all 64-bit
5243          and 128-bit vector types, whether or not those modes are
5244          supported with the present options.  */
5245       size = int_size_in_bytes (type);
5246       switch (size)
5247         {
5248         case 8:
5249           mode = V2SImode;
5250           break;
5251         case 16:
5252           mode = V4SImode;
5253           break;
5254         default:
5255           return -1;
5256         }
5257
5258       if (*modep == VOIDmode)
5259         *modep = mode;
5260
5261       /* Vector modes are considered to be opaque: two vectors are
5262          equivalent for the purposes of being homogeneous aggregates
5263          if they are the same size.  */
5264       if (*modep == mode)
5265         return 1;
5266
5267       break;
5268
5269     case ARRAY_TYPE:
5270       {
5271         int count;
5272         tree index = TYPE_DOMAIN (type);
5273
5274         /* Can't handle incomplete types nor sizes that are not
5275            fixed.  */
5276         if (!COMPLETE_TYPE_P (type)
5277             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5278           return -1;
5279
5280         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5281         if (count == -1
5282             || !index
5283             || !TYPE_MAX_VALUE (index)
5284             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5285             || !TYPE_MIN_VALUE (index)
5286             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5287             || count < 0)
5288           return -1;
5289
5290         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5291                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5292
5293         /* There must be no padding.  */
5294         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5295           return -1;
5296
5297         return count;
5298       }
5299
5300     case RECORD_TYPE:
5301       {
5302         int count = 0;
5303         int sub_count;
5304         tree field;
5305
5306         /* Can't handle incomplete types nor sizes that are not
5307            fixed.  */
5308         if (!COMPLETE_TYPE_P (type)
5309             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5310           return -1;
5311
5312         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5313           {
5314             if (TREE_CODE (field) != FIELD_DECL)
5315               continue;
5316
5317             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5318             if (sub_count < 0)
5319               return -1;
5320             count += sub_count;
5321           }
5322
5323         /* There must be no padding.  */
5324         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5325           return -1;
5326
5327         return count;
5328       }
5329
5330     case UNION_TYPE:
5331     case QUAL_UNION_TYPE:
5332       {
5333         /* These aren't very interesting except in a degenerate case.  */
5334         int count = 0;
5335         int sub_count;
5336         tree field;
5337
5338         /* Can't handle incomplete types nor sizes that are not
5339            fixed.  */
5340         if (!COMPLETE_TYPE_P (type)
5341             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5342           return -1;
5343
5344         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5345           {
5346             if (TREE_CODE (field) != FIELD_DECL)
5347               continue;
5348
5349             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5350             if (sub_count < 0)
5351               return -1;
5352             count = count > sub_count ? count : sub_count;
5353           }
5354
5355         /* There must be no padding.  */
5356         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5357           return -1;
5358
5359         return count;
5360       }
5361
5362     default:
5363       break;
5364     }
5365
5366   return -1;
5367 }
5368
5369 /* Return true if PCS_VARIANT should use VFP registers.  */
5370 static bool
5371 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5372 {
5373   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5374     {
5375       static bool seen_thumb1_vfp = false;
5376
5377       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5378         {
5379           sorry ("Thumb-1 hard-float VFP ABI");
5380           /* sorry() is not immediately fatal, so only display this once.  */
5381           seen_thumb1_vfp = true;
5382         }
5383
5384       return true;
5385     }
5386
5387   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5388     return false;
5389
5390   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5391           (TARGET_VFP_DOUBLE || !is_double));
5392 }
5393
5394 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5395    suitable for passing or returning in VFP registers for the PCS
5396    variant selected.  If it is, then *BASE_MODE is updated to contain
5397    a machine mode describing each element of the argument's type and
5398    *COUNT to hold the number of such elements.  */
5399 static bool
5400 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5401                                        machine_mode mode, const_tree type,
5402                                        machine_mode *base_mode, int *count)
5403 {
5404   machine_mode new_mode = VOIDmode;
5405
5406   /* If we have the type information, prefer that to working things
5407      out from the mode.  */
5408   if (type)
5409     {
5410       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5411
5412       if (ag_count > 0 && ag_count <= 4)
5413         *count = ag_count;
5414       else
5415         return false;
5416     }
5417   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5418            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5419            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5420     {
5421       *count = 1;
5422       new_mode = mode;
5423     }
5424   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5425     {
5426       *count = 2;
5427       new_mode = (mode == DCmode ? DFmode : SFmode);
5428     }
5429   else
5430     return false;
5431
5432
5433   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5434     return false;
5435
5436   *base_mode = new_mode;
5437   return true;
5438 }
5439
5440 static bool
5441 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5442                                machine_mode mode, const_tree type)
5443 {
5444   int count ATTRIBUTE_UNUSED;
5445   machine_mode ag_mode ATTRIBUTE_UNUSED;
5446
5447   if (!use_vfp_abi (pcs_variant, false))
5448     return false;
5449   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5450                                                 &ag_mode, &count);
5451 }
5452
5453 static bool
5454 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5455                              const_tree type)
5456 {
5457   if (!use_vfp_abi (pcum->pcs_variant, false))
5458     return false;
5459
5460   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5461                                                 &pcum->aapcs_vfp_rmode,
5462                                                 &pcum->aapcs_vfp_rcount);
5463 }
5464
5465 static bool
5466 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5467                     const_tree type  ATTRIBUTE_UNUSED)
5468 {
5469   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5470   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5471   int regno;
5472
5473   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5474     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5475       {
5476         pcum->aapcs_vfp_reg_alloc = mask << regno;
5477         if (mode == BLKmode
5478             || (mode == TImode && ! TARGET_NEON)
5479             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5480           {
5481             int i;
5482             int rcount = pcum->aapcs_vfp_rcount;
5483             int rshift = shift;
5484             machine_mode rmode = pcum->aapcs_vfp_rmode;
5485             rtx par;
5486             if (!TARGET_NEON)
5487               {
5488                 /* Avoid using unsupported vector modes.  */
5489                 if (rmode == V2SImode)
5490                   rmode = DImode;
5491                 else if (rmode == V4SImode)
5492                   {
5493                     rmode = DImode;
5494                     rcount *= 2;
5495                     rshift /= 2;
5496                   }
5497               }
5498             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5499             for (i = 0; i < rcount; i++)
5500               {
5501                 rtx tmp = gen_rtx_REG (rmode,
5502                                        FIRST_VFP_REGNUM + regno + i * rshift);
5503                 tmp = gen_rtx_EXPR_LIST
5504                   (VOIDmode, tmp,
5505                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5506                 XVECEXP (par, 0, i) = tmp;
5507               }
5508
5509             pcum->aapcs_reg = par;
5510           }
5511         else
5512           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5513         return true;
5514       }
5515   return false;
5516 }
5517
5518 static rtx
5519 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5520                                machine_mode mode,
5521                                const_tree type ATTRIBUTE_UNUSED)
5522 {
5523   if (!use_vfp_abi (pcs_variant, false))
5524     return NULL;
5525
5526   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5527     {
5528       int count;
5529       machine_mode ag_mode;
5530       int i;
5531       rtx par;
5532       int shift;
5533
5534       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5535                                              &ag_mode, &count);
5536
5537       if (!TARGET_NEON)
5538         {
5539           if (ag_mode == V2SImode)
5540             ag_mode = DImode;
5541           else if (ag_mode == V4SImode)
5542             {
5543               ag_mode = DImode;
5544               count *= 2;
5545             }
5546         }
5547       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5548       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5549       for (i = 0; i < count; i++)
5550         {
5551           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5552           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5553                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5554           XVECEXP (par, 0, i) = tmp;
5555         }
5556
5557       return par;
5558     }
5559
5560   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5561 }
5562
5563 static void
5564 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5565                    machine_mode mode  ATTRIBUTE_UNUSED,
5566                    const_tree type  ATTRIBUTE_UNUSED)
5567 {
5568   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5569   pcum->aapcs_vfp_reg_alloc = 0;
5570   return;
5571 }
5572
5573 #define AAPCS_CP(X)                             \
5574   {                                             \
5575     aapcs_ ## X ## _cum_init,                   \
5576     aapcs_ ## X ## _is_call_candidate,          \
5577     aapcs_ ## X ## _allocate,                   \
5578     aapcs_ ## X ## _is_return_candidate,        \
5579     aapcs_ ## X ## _allocate_return_reg,        \
5580     aapcs_ ## X ## _advance                     \
5581   }
5582
5583 /* Table of co-processors that can be used to pass arguments in
5584    registers.  Idealy no arugment should be a candidate for more than
5585    one co-processor table entry, but the table is processed in order
5586    and stops after the first match.  If that entry then fails to put
5587    the argument into a co-processor register, the argument will go on
5588    the stack.  */
5589 static struct
5590 {
5591   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5592   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5593
5594   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5595      BLKmode) is a candidate for this co-processor's registers; this
5596      function should ignore any position-dependent state in
5597      CUMULATIVE_ARGS and only use call-type dependent information.  */
5598   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5599
5600   /* Return true if the argument does get a co-processor register; it
5601      should set aapcs_reg to an RTX of the register allocated as is
5602      required for a return from FUNCTION_ARG.  */
5603   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5604
5605   /* Return true if a result of mode MODE (or type TYPE if MODE is
5606      BLKmode) is can be returned in this co-processor's registers.  */
5607   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5608
5609   /* Allocate and return an RTX element to hold the return type of a
5610      call, this routine must not fail and will only be called if
5611      is_return_candidate returned true with the same parameters.  */
5612   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5613
5614   /* Finish processing this argument and prepare to start processing
5615      the next one.  */
5616   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5617 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5618   {
5619     AAPCS_CP(vfp)
5620   };
5621
5622 #undef AAPCS_CP
5623
5624 static int
5625 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5626                           const_tree type)
5627 {
5628   int i;
5629
5630   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5631     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5632       return i;
5633
5634   return -1;
5635 }
5636
5637 static int
5638 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5639 {
5640   /* We aren't passed a decl, so we can't check that a call is local.
5641      However, it isn't clear that that would be a win anyway, since it
5642      might limit some tail-calling opportunities.  */
5643   enum arm_pcs pcs_variant;
5644
5645   if (fntype)
5646     {
5647       const_tree fndecl = NULL_TREE;
5648
5649       if (TREE_CODE (fntype) == FUNCTION_DECL)
5650         {
5651           fndecl = fntype;
5652           fntype = TREE_TYPE (fntype);
5653         }
5654
5655       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5656     }
5657   else
5658     pcs_variant = arm_pcs_default;
5659
5660   if (pcs_variant != ARM_PCS_AAPCS)
5661     {
5662       int i;
5663
5664       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5665         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5666                                                         TYPE_MODE (type),
5667                                                         type))
5668           return i;
5669     }
5670   return -1;
5671 }
5672
5673 static rtx
5674 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5675                            const_tree fntype)
5676 {
5677   /* We aren't passed a decl, so we can't check that a call is local.
5678      However, it isn't clear that that would be a win anyway, since it
5679      might limit some tail-calling opportunities.  */
5680   enum arm_pcs pcs_variant;
5681   int unsignedp ATTRIBUTE_UNUSED;
5682
5683   if (fntype)
5684     {
5685       const_tree fndecl = NULL_TREE;
5686
5687       if (TREE_CODE (fntype) == FUNCTION_DECL)
5688         {
5689           fndecl = fntype;
5690           fntype = TREE_TYPE (fntype);
5691         }
5692
5693       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5694     }
5695   else
5696     pcs_variant = arm_pcs_default;
5697
5698   /* Promote integer types.  */
5699   if (type && INTEGRAL_TYPE_P (type))
5700     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5701
5702   if (pcs_variant != ARM_PCS_AAPCS)
5703     {
5704       int i;
5705
5706       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5707         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5708                                                         type))
5709           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5710                                                              mode, type);
5711     }
5712
5713   /* Promotes small structs returned in a register to full-word size
5714      for big-endian AAPCS.  */
5715   if (type && arm_return_in_msb (type))
5716     {
5717       HOST_WIDE_INT size = int_size_in_bytes (type);
5718       if (size % UNITS_PER_WORD != 0)
5719         {
5720           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5721           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5722         }
5723     }
5724
5725   return gen_rtx_REG (mode, R0_REGNUM);
5726 }
5727
5728 static rtx
5729 aapcs_libcall_value (machine_mode mode)
5730 {
5731   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5732       && GET_MODE_SIZE (mode) <= 4)
5733     mode = SImode;
5734
5735   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5736 }
5737
5738 /* Lay out a function argument using the AAPCS rules.  The rule
5739    numbers referred to here are those in the AAPCS.  */
5740 static void
5741 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5742                   const_tree type, bool named)
5743 {
5744   int nregs, nregs2;
5745   int ncrn;
5746
5747   /* We only need to do this once per argument.  */
5748   if (pcum->aapcs_arg_processed)
5749     return;
5750
5751   pcum->aapcs_arg_processed = true;
5752
5753   /* Special case: if named is false then we are handling an incoming
5754      anonymous argument which is on the stack.  */
5755   if (!named)
5756     return;
5757
5758   /* Is this a potential co-processor register candidate?  */
5759   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5760     {
5761       int slot = aapcs_select_call_coproc (pcum, mode, type);
5762       pcum->aapcs_cprc_slot = slot;
5763
5764       /* We don't have to apply any of the rules from part B of the
5765          preparation phase, these are handled elsewhere in the
5766          compiler.  */
5767
5768       if (slot >= 0)
5769         {
5770           /* A Co-processor register candidate goes either in its own
5771              class of registers or on the stack.  */
5772           if (!pcum->aapcs_cprc_failed[slot])
5773             {
5774               /* C1.cp - Try to allocate the argument to co-processor
5775                  registers.  */
5776               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5777                 return;
5778
5779               /* C2.cp - Put the argument on the stack and note that we
5780                  can't assign any more candidates in this slot.  We also
5781                  need to note that we have allocated stack space, so that
5782                  we won't later try to split a non-cprc candidate between
5783                  core registers and the stack.  */
5784               pcum->aapcs_cprc_failed[slot] = true;
5785               pcum->can_split = false;
5786             }
5787
5788           /* We didn't get a register, so this argument goes on the
5789              stack.  */
5790           gcc_assert (pcum->can_split == false);
5791           return;
5792         }
5793     }
5794
5795   /* C3 - For double-word aligned arguments, round the NCRN up to the
5796      next even number.  */
5797   ncrn = pcum->aapcs_ncrn;
5798   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5799     ncrn++;
5800
5801   nregs = ARM_NUM_REGS2(mode, type);
5802
5803   /* Sigh, this test should really assert that nregs > 0, but a GCC
5804      extension allows empty structs and then gives them empty size; it
5805      then allows such a structure to be passed by value.  For some of
5806      the code below we have to pretend that such an argument has
5807      non-zero size so that we 'locate' it correctly either in
5808      registers or on the stack.  */
5809   gcc_assert (nregs >= 0);
5810
5811   nregs2 = nregs ? nregs : 1;
5812
5813   /* C4 - Argument fits entirely in core registers.  */
5814   if (ncrn + nregs2 <= NUM_ARG_REGS)
5815     {
5816       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5817       pcum->aapcs_next_ncrn = ncrn + nregs;
5818       return;
5819     }
5820
5821   /* C5 - Some core registers left and there are no arguments already
5822      on the stack: split this argument between the remaining core
5823      registers and the stack.  */
5824   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5825     {
5826       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5827       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5828       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5829       return;
5830     }
5831
5832   /* C6 - NCRN is set to 4.  */
5833   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5834
5835   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5836   return;
5837 }
5838
5839 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5840    for a call to a function whose data type is FNTYPE.
5841    For a library call, FNTYPE is NULL.  */
5842 void
5843 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5844                           rtx libname,
5845                           tree fndecl ATTRIBUTE_UNUSED)
5846 {
5847   /* Long call handling.  */
5848   if (fntype)
5849     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5850   else
5851     pcum->pcs_variant = arm_pcs_default;
5852
5853   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5854     {
5855       if (arm_libcall_uses_aapcs_base (libname))
5856         pcum->pcs_variant = ARM_PCS_AAPCS;
5857
5858       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5859       pcum->aapcs_reg = NULL_RTX;
5860       pcum->aapcs_partial = 0;
5861       pcum->aapcs_arg_processed = false;
5862       pcum->aapcs_cprc_slot = -1;
5863       pcum->can_split = true;
5864
5865       if (pcum->pcs_variant != ARM_PCS_AAPCS)
5866         {
5867           int i;
5868
5869           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5870             {
5871               pcum->aapcs_cprc_failed[i] = false;
5872               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5873             }
5874         }
5875       return;
5876     }
5877
5878   /* Legacy ABIs */
5879
5880   /* On the ARM, the offset starts at 0.  */
5881   pcum->nregs = 0;
5882   pcum->iwmmxt_nregs = 0;
5883   pcum->can_split = true;
5884
5885   /* Varargs vectors are treated the same as long long.
5886      named_count avoids having to change the way arm handles 'named' */
5887   pcum->named_count = 0;
5888   pcum->nargs = 0;
5889
5890   if (TARGET_REALLY_IWMMXT && fntype)
5891     {
5892       tree fn_arg;
5893
5894       for (fn_arg = TYPE_ARG_TYPES (fntype);
5895            fn_arg;
5896            fn_arg = TREE_CHAIN (fn_arg))
5897         pcum->named_count += 1;
5898
5899       if (! pcum->named_count)
5900         pcum->named_count = INT_MAX;
5901     }
5902 }
5903
5904 /* Return true if we use LRA instead of reload pass.  */
5905 static bool
5906 arm_lra_p (void)
5907 {
5908   return arm_lra_flag;
5909 }
5910
5911 /* Return true if mode/type need doubleword alignment.  */
5912 static bool
5913 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5914 {
5915   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5916           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5917 }
5918
5919
5920 /* Determine where to put an argument to a function.
5921    Value is zero to push the argument on the stack,
5922    or a hard register in which to store the argument.
5923
5924    MODE is the argument's machine mode.
5925    TYPE is the data type of the argument (as a tree).
5926     This is null for libcalls where that information may
5927     not be available.
5928    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5929     the preceding args and about the function being called.
5930    NAMED is nonzero if this argument is a named parameter
5931     (otherwise it is an extra parameter matching an ellipsis).
5932
5933    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5934    other arguments are passed on the stack.  If (NAMED == 0) (which happens
5935    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5936    defined), say it is passed in the stack (function_prologue will
5937    indeed make it pass in the stack if necessary).  */
5938
5939 static rtx
5940 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5941                   const_tree type, bool named)
5942 {
5943   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5944   int nregs;
5945
5946   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
5947      a call insn (op3 of a call_value insn).  */
5948   if (mode == VOIDmode)
5949     return const0_rtx;
5950
5951   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5952     {
5953       aapcs_layout_arg (pcum, mode, type, named);
5954       return pcum->aapcs_reg;
5955     }
5956
5957   /* Varargs vectors are treated the same as long long.
5958      named_count avoids having to change the way arm handles 'named' */
5959   if (TARGET_IWMMXT_ABI
5960       && arm_vector_mode_supported_p (mode)
5961       && pcum->named_count > pcum->nargs + 1)
5962     {
5963       if (pcum->iwmmxt_nregs <= 9)
5964         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5965       else
5966         {
5967           pcum->can_split = false;
5968           return NULL_RTX;
5969         }
5970     }
5971
5972   /* Put doubleword aligned quantities in even register pairs.  */
5973   if (pcum->nregs & 1
5974       && ARM_DOUBLEWORD_ALIGN
5975       && arm_needs_doubleword_align (mode, type))
5976     pcum->nregs++;
5977
5978   /* Only allow splitting an arg between regs and memory if all preceding
5979      args were allocated to regs.  For args passed by reference we only count
5980      the reference pointer.  */
5981   if (pcum->can_split)
5982     nregs = 1;
5983   else
5984     nregs = ARM_NUM_REGS2 (mode, type);
5985
5986   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5987     return NULL_RTX;
5988
5989   return gen_rtx_REG (mode, pcum->nregs);
5990 }
5991
5992 static unsigned int
5993 arm_function_arg_boundary (machine_mode mode, const_tree type)
5994 {
5995   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5996           ? DOUBLEWORD_ALIGNMENT
5997           : PARM_BOUNDARY);
5998 }
5999
6000 static int
6001 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6002                        tree type, bool named)
6003 {
6004   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6005   int nregs = pcum->nregs;
6006
6007   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6008     {
6009       aapcs_layout_arg (pcum, mode, type, named);
6010       return pcum->aapcs_partial;
6011     }
6012
6013   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6014     return 0;
6015
6016   if (NUM_ARG_REGS > nregs
6017       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6018       && pcum->can_split)
6019     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6020
6021   return 0;
6022 }
6023
6024 /* Update the data in PCUM to advance over an argument
6025    of mode MODE and data type TYPE.
6026    (TYPE is null for libcalls where that information may not be available.)  */
6027
6028 static void
6029 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6030                           const_tree type, bool named)
6031 {
6032   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6033
6034   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6035     {
6036       aapcs_layout_arg (pcum, mode, type, named);
6037
6038       if (pcum->aapcs_cprc_slot >= 0)
6039         {
6040           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6041                                                               type);
6042           pcum->aapcs_cprc_slot = -1;
6043         }
6044
6045       /* Generic stuff.  */
6046       pcum->aapcs_arg_processed = false;
6047       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6048       pcum->aapcs_reg = NULL_RTX;
6049       pcum->aapcs_partial = 0;
6050     }
6051   else
6052     {
6053       pcum->nargs += 1;
6054       if (arm_vector_mode_supported_p (mode)
6055           && pcum->named_count > pcum->nargs
6056           && TARGET_IWMMXT_ABI)
6057         pcum->iwmmxt_nregs += 1;
6058       else
6059         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6060     }
6061 }
6062
6063 /* Variable sized types are passed by reference.  This is a GCC
6064    extension to the ARM ABI.  */
6065
6066 static bool
6067 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6068                        machine_mode mode ATTRIBUTE_UNUSED,
6069                        const_tree type, bool named ATTRIBUTE_UNUSED)
6070 {
6071   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6072 }
6073 \f
6074 /* Encode the current state of the #pragma [no_]long_calls.  */
6075 typedef enum
6076 {
6077   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6078   LONG,         /* #pragma long_calls is in effect.  */
6079   SHORT         /* #pragma no_long_calls is in effect.  */
6080 } arm_pragma_enum;
6081
6082 static arm_pragma_enum arm_pragma_long_calls = OFF;
6083
6084 void
6085 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6086 {
6087   arm_pragma_long_calls = LONG;
6088 }
6089
6090 void
6091 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6092 {
6093   arm_pragma_long_calls = SHORT;
6094 }
6095
6096 void
6097 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6098 {
6099   arm_pragma_long_calls = OFF;
6100 }
6101 \f
6102 /* Handle an attribute requiring a FUNCTION_DECL;
6103    arguments as in struct attribute_spec.handler.  */
6104 static tree
6105 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6106                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6107 {
6108   if (TREE_CODE (*node) != FUNCTION_DECL)
6109     {
6110       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6111                name);
6112       *no_add_attrs = true;
6113     }
6114
6115   return NULL_TREE;
6116 }
6117
6118 /* Handle an "interrupt" or "isr" attribute;
6119    arguments as in struct attribute_spec.handler.  */
6120 static tree
6121 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6122                           bool *no_add_attrs)
6123 {
6124   if (DECL_P (*node))
6125     {
6126       if (TREE_CODE (*node) != FUNCTION_DECL)
6127         {
6128           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6129                    name);
6130           *no_add_attrs = true;
6131         }
6132       /* FIXME: the argument if any is checked for type attributes;
6133          should it be checked for decl ones?  */
6134     }
6135   else
6136     {
6137       if (TREE_CODE (*node) == FUNCTION_TYPE
6138           || TREE_CODE (*node) == METHOD_TYPE)
6139         {
6140           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6141             {
6142               warning (OPT_Wattributes, "%qE attribute ignored",
6143                        name);
6144               *no_add_attrs = true;
6145             }
6146         }
6147       else if (TREE_CODE (*node) == POINTER_TYPE
6148                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6149                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6150                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6151         {
6152           *node = build_variant_type_copy (*node);
6153           TREE_TYPE (*node) = build_type_attribute_variant
6154             (TREE_TYPE (*node),
6155              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6156           *no_add_attrs = true;
6157         }
6158       else
6159         {
6160           /* Possibly pass this attribute on from the type to a decl.  */
6161           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6162                        | (int) ATTR_FLAG_FUNCTION_NEXT
6163                        | (int) ATTR_FLAG_ARRAY_NEXT))
6164             {
6165               *no_add_attrs = true;
6166               return tree_cons (name, args, NULL_TREE);
6167             }
6168           else
6169             {
6170               warning (OPT_Wattributes, "%qE attribute ignored",
6171                        name);
6172             }
6173         }
6174     }
6175
6176   return NULL_TREE;
6177 }
6178
6179 /* Handle a "pcs" attribute; arguments as in struct
6180    attribute_spec.handler.  */
6181 static tree
6182 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6183                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6184 {
6185   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6186     {
6187       warning (OPT_Wattributes, "%qE attribute ignored", name);
6188       *no_add_attrs = true;
6189     }
6190   return NULL_TREE;
6191 }
6192
6193 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6194 /* Handle the "notshared" attribute.  This attribute is another way of
6195    requesting hidden visibility.  ARM's compiler supports
6196    "__declspec(notshared)"; we support the same thing via an
6197    attribute.  */
6198
6199 static tree
6200 arm_handle_notshared_attribute (tree *node,
6201                                 tree name ATTRIBUTE_UNUSED,
6202                                 tree args ATTRIBUTE_UNUSED,
6203                                 int flags ATTRIBUTE_UNUSED,
6204                                 bool *no_add_attrs)
6205 {
6206   tree decl = TYPE_NAME (*node);
6207
6208   if (decl)
6209     {
6210       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6211       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6212       *no_add_attrs = false;
6213     }
6214   return NULL_TREE;
6215 }
6216 #endif
6217
6218 /* Return 0 if the attributes for two types are incompatible, 1 if they
6219    are compatible, and 2 if they are nearly compatible (which causes a
6220    warning to be generated).  */
6221 static int
6222 arm_comp_type_attributes (const_tree type1, const_tree type2)
6223 {
6224   int l1, l2, s1, s2;
6225
6226   /* Check for mismatch of non-default calling convention.  */
6227   if (TREE_CODE (type1) != FUNCTION_TYPE)
6228     return 1;
6229
6230   /* Check for mismatched call attributes.  */
6231   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6232   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6233   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6234   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6235
6236   /* Only bother to check if an attribute is defined.  */
6237   if (l1 | l2 | s1 | s2)
6238     {
6239       /* If one type has an attribute, the other must have the same attribute.  */
6240       if ((l1 != l2) || (s1 != s2))
6241         return 0;
6242
6243       /* Disallow mixed attributes.  */
6244       if ((l1 & s2) || (l2 & s1))
6245         return 0;
6246     }
6247
6248   /* Check for mismatched ISR attribute.  */
6249   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6250   if (! l1)
6251     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6252   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6253   if (! l2)
6254     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6255   if (l1 != l2)
6256     return 0;
6257
6258   return 1;
6259 }
6260
6261 /*  Assigns default attributes to newly defined type.  This is used to
6262     set short_call/long_call attributes for function types of
6263     functions defined inside corresponding #pragma scopes.  */
6264 static void
6265 arm_set_default_type_attributes (tree type)
6266 {
6267   /* Add __attribute__ ((long_call)) to all functions, when
6268      inside #pragma long_calls or __attribute__ ((short_call)),
6269      when inside #pragma no_long_calls.  */
6270   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6271     {
6272       tree type_attr_list, attr_name;
6273       type_attr_list = TYPE_ATTRIBUTES (type);
6274
6275       if (arm_pragma_long_calls == LONG)
6276         attr_name = get_identifier ("long_call");
6277       else if (arm_pragma_long_calls == SHORT)
6278         attr_name = get_identifier ("short_call");
6279       else
6280         return;
6281
6282       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6283       TYPE_ATTRIBUTES (type) = type_attr_list;
6284     }
6285 }
6286 \f
6287 /* Return true if DECL is known to be linked into section SECTION.  */
6288
6289 static bool
6290 arm_function_in_section_p (tree decl, section *section)
6291 {
6292   /* We can only be certain about functions defined in the same
6293      compilation unit.  */
6294   if (!TREE_STATIC (decl))
6295     return false;
6296
6297   /* Make sure that SYMBOL always binds to the definition in this
6298      compilation unit.  */
6299   if (!targetm.binds_local_p (decl))
6300     return false;
6301
6302   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6303   if (!DECL_SECTION_NAME (decl))
6304     {
6305       /* Make sure that we will not create a unique section for DECL.  */
6306       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6307         return false;
6308     }
6309
6310   return function_section (decl) == section;
6311 }
6312
6313 /* Return nonzero if a 32-bit "long_call" should be generated for
6314    a call from the current function to DECL.  We generate a long_call
6315    if the function:
6316
6317         a.  has an __attribute__((long call))
6318      or b.  is within the scope of a #pragma long_calls
6319      or c.  the -mlong-calls command line switch has been specified
6320
6321    However we do not generate a long call if the function:
6322
6323         d.  has an __attribute__ ((short_call))
6324      or e.  is inside the scope of a #pragma no_long_calls
6325      or f.  is defined in the same section as the current function.  */
6326
6327 bool
6328 arm_is_long_call_p (tree decl)
6329 {
6330   tree attrs;
6331
6332   if (!decl)
6333     return TARGET_LONG_CALLS;
6334
6335   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6336   if (lookup_attribute ("short_call", attrs))
6337     return false;
6338
6339   /* For "f", be conservative, and only cater for cases in which the
6340      whole of the current function is placed in the same section.  */
6341   if (!flag_reorder_blocks_and_partition
6342       && TREE_CODE (decl) == FUNCTION_DECL
6343       && arm_function_in_section_p (decl, current_function_section ()))
6344     return false;
6345
6346   if (lookup_attribute ("long_call", attrs))
6347     return true;
6348
6349   return TARGET_LONG_CALLS;
6350 }
6351
6352 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6353 static bool
6354 arm_function_ok_for_sibcall (tree decl, tree exp)
6355 {
6356   unsigned long func_type;
6357
6358   if (cfun->machine->sibcall_blocked)
6359     return false;
6360
6361   /* Never tailcall something if we are generating code for Thumb-1.  */
6362   if (TARGET_THUMB1)
6363     return false;
6364
6365   /* The PIC register is live on entry to VxWorks PLT entries, so we
6366      must make the call before restoring the PIC register.  */
6367   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6368     return false;
6369
6370   /* If we are interworking and the function is not declared static
6371      then we can't tail-call it unless we know that it exists in this
6372      compilation unit (since it might be a Thumb routine).  */
6373   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6374       && !TREE_ASM_WRITTEN (decl))
6375     return false;
6376
6377   func_type = arm_current_func_type ();
6378   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6379   if (IS_INTERRUPT (func_type))
6380     return false;
6381
6382   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6383     {
6384       /* Check that the return value locations are the same.  For
6385          example that we aren't returning a value from the sibling in
6386          a VFP register but then need to transfer it to a core
6387          register.  */
6388       rtx a, b;
6389
6390       a = arm_function_value (TREE_TYPE (exp), decl, false);
6391       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6392                               cfun->decl, false);
6393       if (!rtx_equal_p (a, b))
6394         return false;
6395     }
6396
6397   /* Never tailcall if function may be called with a misaligned SP.  */
6398   if (IS_STACKALIGN (func_type))
6399     return false;
6400
6401   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6402      references should become a NOP.  Don't convert such calls into
6403      sibling calls.  */
6404   if (TARGET_AAPCS_BASED
6405       && arm_abi == ARM_ABI_AAPCS
6406       && decl
6407       && DECL_WEAK (decl))
6408     return false;
6409
6410   /* Everything else is ok.  */
6411   return true;
6412 }
6413
6414 \f
6415 /* Addressing mode support functions.  */
6416
6417 /* Return nonzero if X is a legitimate immediate operand when compiling
6418    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6419 int
6420 legitimate_pic_operand_p (rtx x)
6421 {
6422   if (GET_CODE (x) == SYMBOL_REF
6423       || (GET_CODE (x) == CONST
6424           && GET_CODE (XEXP (x, 0)) == PLUS
6425           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6426     return 0;
6427
6428   return 1;
6429 }
6430
6431 /* Record that the current function needs a PIC register.  Initialize
6432    cfun->machine->pic_reg if we have not already done so.  */
6433
6434 static void
6435 require_pic_register (void)
6436 {
6437   /* A lot of the logic here is made obscure by the fact that this
6438      routine gets called as part of the rtx cost estimation process.
6439      We don't want those calls to affect any assumptions about the real
6440      function; and further, we can't call entry_of_function() until we
6441      start the real expansion process.  */
6442   if (!crtl->uses_pic_offset_table)
6443     {
6444       gcc_assert (can_create_pseudo_p ());
6445       if (arm_pic_register != INVALID_REGNUM
6446           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6447         {
6448           if (!cfun->machine->pic_reg)
6449             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6450
6451           /* Play games to avoid marking the function as needing pic
6452              if we are being called as part of the cost-estimation
6453              process.  */
6454           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6455             crtl->uses_pic_offset_table = 1;
6456         }
6457       else
6458         {
6459           rtx_insn *seq, *insn;
6460
6461           if (!cfun->machine->pic_reg)
6462             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6463
6464           /* Play games to avoid marking the function as needing pic
6465              if we are being called as part of the cost-estimation
6466              process.  */
6467           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6468             {
6469               crtl->uses_pic_offset_table = 1;
6470               start_sequence ();
6471
6472               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6473                   && arm_pic_register > LAST_LO_REGNUM)
6474                 emit_move_insn (cfun->machine->pic_reg,
6475                                 gen_rtx_REG (Pmode, arm_pic_register));
6476               else
6477                 arm_load_pic_register (0UL);
6478
6479               seq = get_insns ();
6480               end_sequence ();
6481
6482               for (insn = seq; insn; insn = NEXT_INSN (insn))
6483                 if (INSN_P (insn))
6484                   INSN_LOCATION (insn) = prologue_location;
6485
6486               /* We can be called during expansion of PHI nodes, where
6487                  we can't yet emit instructions directly in the final
6488                  insn stream.  Queue the insns on the entry edge, they will
6489                  be committed after everything else is expanded.  */
6490               insert_insn_on_edge (seq,
6491                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6492             }
6493         }
6494     }
6495 }
6496
6497 rtx
6498 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6499 {
6500   if (GET_CODE (orig) == SYMBOL_REF
6501       || GET_CODE (orig) == LABEL_REF)
6502     {
6503       rtx insn;
6504
6505       if (reg == 0)
6506         {
6507           gcc_assert (can_create_pseudo_p ());
6508           reg = gen_reg_rtx (Pmode);
6509         }
6510
6511       /* VxWorks does not impose a fixed gap between segments; the run-time
6512          gap can be different from the object-file gap.  We therefore can't
6513          use GOTOFF unless we are absolutely sure that the symbol is in the
6514          same segment as the GOT.  Unfortunately, the flexibility of linker
6515          scripts means that we can't be sure of that in general, so assume
6516          that GOTOFF is never valid on VxWorks.  */
6517       if ((GET_CODE (orig) == LABEL_REF
6518            || (GET_CODE (orig) == SYMBOL_REF &&
6519                SYMBOL_REF_LOCAL_P (orig)))
6520           && NEED_GOT_RELOC
6521           && arm_pic_data_is_text_relative)
6522         insn = arm_pic_static_addr (orig, reg);
6523       else
6524         {
6525           rtx pat;
6526           rtx mem;
6527
6528           /* If this function doesn't have a pic register, create one now.  */
6529           require_pic_register ();
6530
6531           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6532
6533           /* Make the MEM as close to a constant as possible.  */
6534           mem = SET_SRC (pat);
6535           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6536           MEM_READONLY_P (mem) = 1;
6537           MEM_NOTRAP_P (mem) = 1;
6538
6539           insn = emit_insn (pat);
6540         }
6541
6542       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6543          by loop.  */
6544       set_unique_reg_note (insn, REG_EQUAL, orig);
6545
6546       return reg;
6547     }
6548   else if (GET_CODE (orig) == CONST)
6549     {
6550       rtx base, offset;
6551
6552       if (GET_CODE (XEXP (orig, 0)) == PLUS
6553           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6554         return orig;
6555
6556       /* Handle the case where we have: const (UNSPEC_TLS).  */
6557       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6558           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6559         return orig;
6560
6561       /* Handle the case where we have:
6562          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6563          CONST_INT.  */
6564       if (GET_CODE (XEXP (orig, 0)) == PLUS
6565           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6566           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6567         {
6568           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6569           return orig;
6570         }
6571
6572       if (reg == 0)
6573         {
6574           gcc_assert (can_create_pseudo_p ());
6575           reg = gen_reg_rtx (Pmode);
6576         }
6577
6578       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6579
6580       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6581       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6582                                        base == reg ? 0 : reg);
6583
6584       if (CONST_INT_P (offset))
6585         {
6586           /* The base register doesn't really matter, we only want to
6587              test the index for the appropriate mode.  */
6588           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6589             {
6590               gcc_assert (can_create_pseudo_p ());
6591               offset = force_reg (Pmode, offset);
6592             }
6593
6594           if (CONST_INT_P (offset))
6595             return plus_constant (Pmode, base, INTVAL (offset));
6596         }
6597
6598       if (GET_MODE_SIZE (mode) > 4
6599           && (GET_MODE_CLASS (mode) == MODE_INT
6600               || TARGET_SOFT_FLOAT))
6601         {
6602           emit_insn (gen_addsi3 (reg, base, offset));
6603           return reg;
6604         }
6605
6606       return gen_rtx_PLUS (Pmode, base, offset);
6607     }
6608
6609   return orig;
6610 }
6611
6612
6613 /* Find a spare register to use during the prolog of a function.  */
6614
6615 static int
6616 thumb_find_work_register (unsigned long pushed_regs_mask)
6617 {
6618   int reg;
6619
6620   /* Check the argument registers first as these are call-used.  The
6621      register allocation order means that sometimes r3 might be used
6622      but earlier argument registers might not, so check them all.  */
6623   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6624     if (!df_regs_ever_live_p (reg))
6625       return reg;
6626
6627   /* Before going on to check the call-saved registers we can try a couple
6628      more ways of deducing that r3 is available.  The first is when we are
6629      pushing anonymous arguments onto the stack and we have less than 4
6630      registers worth of fixed arguments(*).  In this case r3 will be part of
6631      the variable argument list and so we can be sure that it will be
6632      pushed right at the start of the function.  Hence it will be available
6633      for the rest of the prologue.
6634      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6635   if (cfun->machine->uses_anonymous_args
6636       && crtl->args.pretend_args_size > 0)
6637     return LAST_ARG_REGNUM;
6638
6639   /* The other case is when we have fixed arguments but less than 4 registers
6640      worth.  In this case r3 might be used in the body of the function, but
6641      it is not being used to convey an argument into the function.  In theory
6642      we could just check crtl->args.size to see how many bytes are
6643      being passed in argument registers, but it seems that it is unreliable.
6644      Sometimes it will have the value 0 when in fact arguments are being
6645      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6646      check the args_info.nregs field as well.  The problem with this field is
6647      that it makes no allowances for arguments that are passed to the
6648      function but which are not used.  Hence we could miss an opportunity
6649      when a function has an unused argument in r3.  But it is better to be
6650      safe than to be sorry.  */
6651   if (! cfun->machine->uses_anonymous_args
6652       && crtl->args.size >= 0
6653       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6654       && (TARGET_AAPCS_BASED
6655           ? crtl->args.info.aapcs_ncrn < 4
6656           : crtl->args.info.nregs < 4))
6657     return LAST_ARG_REGNUM;
6658
6659   /* Otherwise look for a call-saved register that is going to be pushed.  */
6660   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6661     if (pushed_regs_mask & (1 << reg))
6662       return reg;
6663
6664   if (TARGET_THUMB2)
6665     {
6666       /* Thumb-2 can use high regs.  */
6667       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6668         if (pushed_regs_mask & (1 << reg))
6669           return reg;
6670     }
6671   /* Something went wrong - thumb_compute_save_reg_mask()
6672      should have arranged for a suitable register to be pushed.  */
6673   gcc_unreachable ();
6674 }
6675
6676 static GTY(()) int pic_labelno;
6677
6678 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6679    low register.  */
6680
6681 void
6682 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6683 {
6684   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6685
6686   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6687     return;
6688
6689   gcc_assert (flag_pic);
6690
6691   pic_reg = cfun->machine->pic_reg;
6692   if (TARGET_VXWORKS_RTP)
6693     {
6694       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6695       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6696       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6697
6698       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6699
6700       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6701       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6702     }
6703   else
6704     {
6705       /* We use an UNSPEC rather than a LABEL_REF because this label
6706          never appears in the code stream.  */
6707
6708       labelno = GEN_INT (pic_labelno++);
6709       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6710       l1 = gen_rtx_CONST (VOIDmode, l1);
6711
6712       /* On the ARM the PC register contains 'dot + 8' at the time of the
6713          addition, on the Thumb it is 'dot + 4'.  */
6714       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6715       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6716                                 UNSPEC_GOTSYM_OFF);
6717       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6718
6719       if (TARGET_32BIT)
6720         {
6721           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6722         }
6723       else /* TARGET_THUMB1 */
6724         {
6725           if (arm_pic_register != INVALID_REGNUM
6726               && REGNO (pic_reg) > LAST_LO_REGNUM)
6727             {
6728               /* We will have pushed the pic register, so we should always be
6729                  able to find a work register.  */
6730               pic_tmp = gen_rtx_REG (SImode,
6731                                      thumb_find_work_register (saved_regs));
6732               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6733               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6734               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6735             }
6736           else if (arm_pic_register != INVALID_REGNUM
6737                    && arm_pic_register > LAST_LO_REGNUM
6738                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6739             {
6740               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6741               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6742               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6743             }
6744           else
6745             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6746         }
6747     }
6748
6749   /* Need to emit this whether or not we obey regdecls,
6750      since setjmp/longjmp can cause life info to screw up.  */
6751   emit_use (pic_reg);
6752 }
6753
6754 /* Generate code to load the address of a static var when flag_pic is set.  */
6755 static rtx
6756 arm_pic_static_addr (rtx orig, rtx reg)
6757 {
6758   rtx l1, labelno, offset_rtx, insn;
6759
6760   gcc_assert (flag_pic);
6761
6762   /* We use an UNSPEC rather than a LABEL_REF because this label
6763      never appears in the code stream.  */
6764   labelno = GEN_INT (pic_labelno++);
6765   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6766   l1 = gen_rtx_CONST (VOIDmode, l1);
6767
6768   /* On the ARM the PC register contains 'dot + 8' at the time of the
6769      addition, on the Thumb it is 'dot + 4'.  */
6770   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6771   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6772                                UNSPEC_SYMBOL_OFFSET);
6773   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6774
6775   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6776   return insn;
6777 }
6778
6779 /* Return nonzero if X is valid as an ARM state addressing register.  */
6780 static int
6781 arm_address_register_rtx_p (rtx x, int strict_p)
6782 {
6783   int regno;
6784
6785   if (!REG_P (x))
6786     return 0;
6787
6788   regno = REGNO (x);
6789
6790   if (strict_p)
6791     return ARM_REGNO_OK_FOR_BASE_P (regno);
6792
6793   return (regno <= LAST_ARM_REGNUM
6794           || regno >= FIRST_PSEUDO_REGISTER
6795           || regno == FRAME_POINTER_REGNUM
6796           || regno == ARG_POINTER_REGNUM);
6797 }
6798
6799 /* Return TRUE if this rtx is the difference of a symbol and a label,
6800    and will reduce to a PC-relative relocation in the object file.
6801    Expressions like this can be left alone when generating PIC, rather
6802    than forced through the GOT.  */
6803 static int
6804 pcrel_constant_p (rtx x)
6805 {
6806   if (GET_CODE (x) == MINUS)
6807     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6808
6809   return FALSE;
6810 }
6811
6812 /* Return true if X will surely end up in an index register after next
6813    splitting pass.  */
6814 static bool
6815 will_be_in_index_register (const_rtx x)
6816 {
6817   /* arm.md: calculate_pic_address will split this into a register.  */
6818   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6819 }
6820
6821 /* Return nonzero if X is a valid ARM state address operand.  */
6822 int
6823 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6824                                 int strict_p)
6825 {
6826   bool use_ldrd;
6827   enum rtx_code code = GET_CODE (x);
6828
6829   if (arm_address_register_rtx_p (x, strict_p))
6830     return 1;
6831
6832   use_ldrd = (TARGET_LDRD
6833               && (mode == DImode
6834                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6835
6836   if (code == POST_INC || code == PRE_DEC
6837       || ((code == PRE_INC || code == POST_DEC)
6838           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6839     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6840
6841   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6842            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6843            && GET_CODE (XEXP (x, 1)) == PLUS
6844            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6845     {
6846       rtx addend = XEXP (XEXP (x, 1), 1);
6847
6848       /* Don't allow ldrd post increment by register because it's hard
6849          to fixup invalid register choices.  */
6850       if (use_ldrd
6851           && GET_CODE (x) == POST_MODIFY
6852           && REG_P (addend))
6853         return 0;
6854
6855       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6856               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6857     }
6858
6859   /* After reload constants split into minipools will have addresses
6860      from a LABEL_REF.  */
6861   else if (reload_completed
6862            && (code == LABEL_REF
6863                || (code == CONST
6864                    && GET_CODE (XEXP (x, 0)) == PLUS
6865                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6866                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6867     return 1;
6868
6869   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6870     return 0;
6871
6872   else if (code == PLUS)
6873     {
6874       rtx xop0 = XEXP (x, 0);
6875       rtx xop1 = XEXP (x, 1);
6876
6877       return ((arm_address_register_rtx_p (xop0, strict_p)
6878                && ((CONST_INT_P (xop1)
6879                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6880                    || (!strict_p && will_be_in_index_register (xop1))))
6881               || (arm_address_register_rtx_p (xop1, strict_p)
6882                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6883     }
6884
6885 #if 0
6886   /* Reload currently can't handle MINUS, so disable this for now */
6887   else if (GET_CODE (x) == MINUS)
6888     {
6889       rtx xop0 = XEXP (x, 0);
6890       rtx xop1 = XEXP (x, 1);
6891
6892       return (arm_address_register_rtx_p (xop0, strict_p)
6893               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6894     }
6895 #endif
6896
6897   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6898            && code == SYMBOL_REF
6899            && CONSTANT_POOL_ADDRESS_P (x)
6900            && ! (flag_pic
6901                  && symbol_mentioned_p (get_pool_constant (x))
6902                  && ! pcrel_constant_p (get_pool_constant (x))))
6903     return 1;
6904
6905   return 0;
6906 }
6907
6908 /* Return nonzero if X is a valid Thumb-2 address operand.  */
6909 static int
6910 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6911 {
6912   bool use_ldrd;
6913   enum rtx_code code = GET_CODE (x);
6914
6915   if (arm_address_register_rtx_p (x, strict_p))
6916     return 1;
6917
6918   use_ldrd = (TARGET_LDRD
6919               && (mode == DImode
6920                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6921
6922   if (code == POST_INC || code == PRE_DEC
6923       || ((code == PRE_INC || code == POST_DEC)
6924           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6925     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6926
6927   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6928            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6929            && GET_CODE (XEXP (x, 1)) == PLUS
6930            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6931     {
6932       /* Thumb-2 only has autoincrement by constant.  */
6933       rtx addend = XEXP (XEXP (x, 1), 1);
6934       HOST_WIDE_INT offset;
6935
6936       if (!CONST_INT_P (addend))
6937         return 0;
6938
6939       offset = INTVAL(addend);
6940       if (GET_MODE_SIZE (mode) <= 4)
6941         return (offset > -256 && offset < 256);
6942
6943       return (use_ldrd && offset > -1024 && offset < 1024
6944               && (offset & 3) == 0);
6945     }
6946
6947   /* After reload constants split into minipools will have addresses
6948      from a LABEL_REF.  */
6949   else if (reload_completed
6950            && (code == LABEL_REF
6951                || (code == CONST
6952                    && GET_CODE (XEXP (x, 0)) == PLUS
6953                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6954                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6955     return 1;
6956
6957   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6958     return 0;
6959
6960   else if (code == PLUS)
6961     {
6962       rtx xop0 = XEXP (x, 0);
6963       rtx xop1 = XEXP (x, 1);
6964
6965       return ((arm_address_register_rtx_p (xop0, strict_p)
6966                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6967                    || (!strict_p && will_be_in_index_register (xop1))))
6968               || (arm_address_register_rtx_p (xop1, strict_p)
6969                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6970     }
6971
6972   /* Normally we can assign constant values to target registers without
6973      the help of constant pool.  But there are cases we have to use constant
6974      pool like:
6975      1) assign a label to register.
6976      2) sign-extend a 8bit value to 32bit and then assign to register.
6977
6978      Constant pool access in format:
6979      (set (reg r0) (mem (symbol_ref (".LC0"))))
6980      will cause the use of literal pool (later in function arm_reorg).
6981      So here we mark such format as an invalid format, then the compiler
6982      will adjust it into:
6983      (set (reg r0) (symbol_ref (".LC0")))
6984      (set (reg r0) (mem (reg r0))).
6985      No extra register is required, and (mem (reg r0)) won't cause the use
6986      of literal pools.  */
6987   else if (arm_disable_literal_pool && code == SYMBOL_REF
6988            && CONSTANT_POOL_ADDRESS_P (x))
6989     return 0;
6990
6991   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6992            && code == SYMBOL_REF
6993            && CONSTANT_POOL_ADDRESS_P (x)
6994            && ! (flag_pic
6995                  && symbol_mentioned_p (get_pool_constant (x))
6996                  && ! pcrel_constant_p (get_pool_constant (x))))
6997     return 1;
6998
6999   return 0;
7000 }
7001
7002 /* Return nonzero if INDEX is valid for an address index operand in
7003    ARM state.  */
7004 static int
7005 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7006                         int strict_p)
7007 {
7008   HOST_WIDE_INT range;
7009   enum rtx_code code = GET_CODE (index);
7010
7011   /* Standard coprocessor addressing modes.  */
7012   if (TARGET_HARD_FLOAT
7013       && TARGET_VFP
7014       && (mode == SFmode || mode == DFmode))
7015     return (code == CONST_INT && INTVAL (index) < 1024
7016             && INTVAL (index) > -1024
7017             && (INTVAL (index) & 3) == 0);
7018
7019   /* For quad modes, we restrict the constant offset to be slightly less
7020      than what the instruction format permits.  We do this because for
7021      quad mode moves, we will actually decompose them into two separate
7022      double-mode reads or writes.  INDEX must therefore be a valid
7023      (double-mode) offset and so should INDEX+8.  */
7024   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7025     return (code == CONST_INT
7026             && INTVAL (index) < 1016
7027             && INTVAL (index) > -1024
7028             && (INTVAL (index) & 3) == 0);
7029
7030   /* We have no such constraint on double mode offsets, so we permit the
7031      full range of the instruction format.  */
7032   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7033     return (code == CONST_INT
7034             && INTVAL (index) < 1024
7035             && INTVAL (index) > -1024
7036             && (INTVAL (index) & 3) == 0);
7037
7038   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7039     return (code == CONST_INT
7040             && INTVAL (index) < 1024
7041             && INTVAL (index) > -1024
7042             && (INTVAL (index) & 3) == 0);
7043
7044   if (arm_address_register_rtx_p (index, strict_p)
7045       && (GET_MODE_SIZE (mode) <= 4))
7046     return 1;
7047
7048   if (mode == DImode || mode == DFmode)
7049     {
7050       if (code == CONST_INT)
7051         {
7052           HOST_WIDE_INT val = INTVAL (index);
7053
7054           if (TARGET_LDRD)
7055             return val > -256 && val < 256;
7056           else
7057             return val > -4096 && val < 4092;
7058         }
7059
7060       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7061     }
7062
7063   if (GET_MODE_SIZE (mode) <= 4
7064       && ! (arm_arch4
7065             && (mode == HImode
7066                 || mode == HFmode
7067                 || (mode == QImode && outer == SIGN_EXTEND))))
7068     {
7069       if (code == MULT)
7070         {
7071           rtx xiop0 = XEXP (index, 0);
7072           rtx xiop1 = XEXP (index, 1);
7073
7074           return ((arm_address_register_rtx_p (xiop0, strict_p)
7075                    && power_of_two_operand (xiop1, SImode))
7076                   || (arm_address_register_rtx_p (xiop1, strict_p)
7077                       && power_of_two_operand (xiop0, SImode)));
7078         }
7079       else if (code == LSHIFTRT || code == ASHIFTRT
7080                || code == ASHIFT || code == ROTATERT)
7081         {
7082           rtx op = XEXP (index, 1);
7083
7084           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7085                   && CONST_INT_P (op)
7086                   && INTVAL (op) > 0
7087                   && INTVAL (op) <= 31);
7088         }
7089     }
7090
7091   /* For ARM v4 we may be doing a sign-extend operation during the
7092      load.  */
7093   if (arm_arch4)
7094     {
7095       if (mode == HImode
7096           || mode == HFmode
7097           || (outer == SIGN_EXTEND && mode == QImode))
7098         range = 256;
7099       else
7100         range = 4096;
7101     }
7102   else
7103     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7104
7105   return (code == CONST_INT
7106           && INTVAL (index) < range
7107           && INTVAL (index) > -range);
7108 }
7109
7110 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7111    index operand.  i.e. 1, 2, 4 or 8.  */
7112 static bool
7113 thumb2_index_mul_operand (rtx op)
7114 {
7115   HOST_WIDE_INT val;
7116
7117   if (!CONST_INT_P (op))
7118     return false;
7119
7120   val = INTVAL(op);
7121   return (val == 1 || val == 2 || val == 4 || val == 8);
7122 }
7123
7124 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7125 static int
7126 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7127 {
7128   enum rtx_code code = GET_CODE (index);
7129
7130   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7131   /* Standard coprocessor addressing modes.  */
7132   if (TARGET_HARD_FLOAT
7133       && TARGET_VFP
7134       && (mode == SFmode || mode == DFmode))
7135     return (code == CONST_INT && INTVAL (index) < 1024
7136             /* Thumb-2 allows only > -256 index range for it's core register
7137                load/stores. Since we allow SF/DF in core registers, we have
7138                to use the intersection between -256~4096 (core) and -1024~1024
7139                (coprocessor).  */
7140             && INTVAL (index) > -256
7141             && (INTVAL (index) & 3) == 0);
7142
7143   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7144     {
7145       /* For DImode assume values will usually live in core regs
7146          and only allow LDRD addressing modes.  */
7147       if (!TARGET_LDRD || mode != DImode)
7148         return (code == CONST_INT
7149                 && INTVAL (index) < 1024
7150                 && INTVAL (index) > -1024
7151                 && (INTVAL (index) & 3) == 0);
7152     }
7153
7154   /* For quad modes, we restrict the constant offset to be slightly less
7155      than what the instruction format permits.  We do this because for
7156      quad mode moves, we will actually decompose them into two separate
7157      double-mode reads or writes.  INDEX must therefore be a valid
7158      (double-mode) offset and so should INDEX+8.  */
7159   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7160     return (code == CONST_INT
7161             && INTVAL (index) < 1016
7162             && INTVAL (index) > -1024
7163             && (INTVAL (index) & 3) == 0);
7164
7165   /* We have no such constraint on double mode offsets, so we permit the
7166      full range of the instruction format.  */
7167   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7168     return (code == CONST_INT
7169             && INTVAL (index) < 1024
7170             && INTVAL (index) > -1024
7171             && (INTVAL (index) & 3) == 0);
7172
7173   if (arm_address_register_rtx_p (index, strict_p)
7174       && (GET_MODE_SIZE (mode) <= 4))
7175     return 1;
7176
7177   if (mode == DImode || mode == DFmode)
7178     {
7179       if (code == CONST_INT)
7180         {
7181           HOST_WIDE_INT val = INTVAL (index);
7182           /* ??? Can we assume ldrd for thumb2?  */
7183           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7184           /* ldrd supports offsets of +-1020.
7185              However the ldr fallback does not.  */
7186           return val > -256 && val < 256 && (val & 3) == 0;
7187         }
7188       else
7189         return 0;
7190     }
7191
7192   if (code == MULT)
7193     {
7194       rtx xiop0 = XEXP (index, 0);
7195       rtx xiop1 = XEXP (index, 1);
7196
7197       return ((arm_address_register_rtx_p (xiop0, strict_p)
7198                && thumb2_index_mul_operand (xiop1))
7199               || (arm_address_register_rtx_p (xiop1, strict_p)
7200                   && thumb2_index_mul_operand (xiop0)));
7201     }
7202   else if (code == ASHIFT)
7203     {
7204       rtx op = XEXP (index, 1);
7205
7206       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7207               && CONST_INT_P (op)
7208               && INTVAL (op) > 0
7209               && INTVAL (op) <= 3);
7210     }
7211
7212   return (code == CONST_INT
7213           && INTVAL (index) < 4096
7214           && INTVAL (index) > -256);
7215 }
7216
7217 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7218 static int
7219 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7220 {
7221   int regno;
7222
7223   if (!REG_P (x))
7224     return 0;
7225
7226   regno = REGNO (x);
7227
7228   if (strict_p)
7229     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7230
7231   return (regno <= LAST_LO_REGNUM
7232           || regno > LAST_VIRTUAL_REGISTER
7233           || regno == FRAME_POINTER_REGNUM
7234           || (GET_MODE_SIZE (mode) >= 4
7235               && (regno == STACK_POINTER_REGNUM
7236                   || regno >= FIRST_PSEUDO_REGISTER
7237                   || x == hard_frame_pointer_rtx
7238                   || x == arg_pointer_rtx)));
7239 }
7240
7241 /* Return nonzero if x is a legitimate index register.  This is the case
7242    for any base register that can access a QImode object.  */
7243 inline static int
7244 thumb1_index_register_rtx_p (rtx x, int strict_p)
7245 {
7246   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7247 }
7248
7249 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7250
7251    The AP may be eliminated to either the SP or the FP, so we use the
7252    least common denominator, e.g. SImode, and offsets from 0 to 64.
7253
7254    ??? Verify whether the above is the right approach.
7255
7256    ??? Also, the FP may be eliminated to the SP, so perhaps that
7257    needs special handling also.
7258
7259    ??? Look at how the mips16 port solves this problem.  It probably uses
7260    better ways to solve some of these problems.
7261
7262    Although it is not incorrect, we don't accept QImode and HImode
7263    addresses based on the frame pointer or arg pointer until the
7264    reload pass starts.  This is so that eliminating such addresses
7265    into stack based ones won't produce impossible code.  */
7266 int
7267 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7268 {
7269   /* ??? Not clear if this is right.  Experiment.  */
7270   if (GET_MODE_SIZE (mode) < 4
7271       && !(reload_in_progress || reload_completed)
7272       && (reg_mentioned_p (frame_pointer_rtx, x)
7273           || reg_mentioned_p (arg_pointer_rtx, x)
7274           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7275           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7276           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7277           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7278     return 0;
7279
7280   /* Accept any base register.  SP only in SImode or larger.  */
7281   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7282     return 1;
7283
7284   /* This is PC relative data before arm_reorg runs.  */
7285   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7286            && GET_CODE (x) == SYMBOL_REF
7287            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7288     return 1;
7289
7290   /* This is PC relative data after arm_reorg runs.  */
7291   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7292            && reload_completed
7293            && (GET_CODE (x) == LABEL_REF
7294                || (GET_CODE (x) == CONST
7295                    && GET_CODE (XEXP (x, 0)) == PLUS
7296                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7297                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7298     return 1;
7299
7300   /* Post-inc indexing only supported for SImode and larger.  */
7301   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7302            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7303     return 1;
7304
7305   else if (GET_CODE (x) == PLUS)
7306     {
7307       /* REG+REG address can be any two index registers.  */
7308       /* We disallow FRAME+REG addressing since we know that FRAME
7309          will be replaced with STACK, and SP relative addressing only
7310          permits SP+OFFSET.  */
7311       if (GET_MODE_SIZE (mode) <= 4
7312           && XEXP (x, 0) != frame_pointer_rtx
7313           && XEXP (x, 1) != frame_pointer_rtx
7314           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7315           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7316               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7317         return 1;
7318
7319       /* REG+const has 5-7 bit offset for non-SP registers.  */
7320       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7321                 || XEXP (x, 0) == arg_pointer_rtx)
7322                && CONST_INT_P (XEXP (x, 1))
7323                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7324         return 1;
7325
7326       /* REG+const has 10-bit offset for SP, but only SImode and
7327          larger is supported.  */
7328       /* ??? Should probably check for DI/DFmode overflow here
7329          just like GO_IF_LEGITIMATE_OFFSET does.  */
7330       else if (REG_P (XEXP (x, 0))
7331                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7332                && GET_MODE_SIZE (mode) >= 4
7333                && CONST_INT_P (XEXP (x, 1))
7334                && INTVAL (XEXP (x, 1)) >= 0
7335                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7336                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7337         return 1;
7338
7339       else if (REG_P (XEXP (x, 0))
7340                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7341                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7342                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7343                        && REGNO (XEXP (x, 0))
7344                           <= LAST_VIRTUAL_POINTER_REGISTER))
7345                && GET_MODE_SIZE (mode) >= 4
7346                && CONST_INT_P (XEXP (x, 1))
7347                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7348         return 1;
7349     }
7350
7351   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7352            && GET_MODE_SIZE (mode) == 4
7353            && GET_CODE (x) == SYMBOL_REF
7354            && CONSTANT_POOL_ADDRESS_P (x)
7355            && ! (flag_pic
7356                  && symbol_mentioned_p (get_pool_constant (x))
7357                  && ! pcrel_constant_p (get_pool_constant (x))))
7358     return 1;
7359
7360   return 0;
7361 }
7362
7363 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7364    instruction of mode MODE.  */
7365 int
7366 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7367 {
7368   switch (GET_MODE_SIZE (mode))
7369     {
7370     case 1:
7371       return val >= 0 && val < 32;
7372
7373     case 2:
7374       return val >= 0 && val < 64 && (val & 1) == 0;
7375
7376     default:
7377       return (val >= 0
7378               && (val + GET_MODE_SIZE (mode)) <= 128
7379               && (val & 3) == 0);
7380     }
7381 }
7382
7383 bool
7384 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7385 {
7386   if (TARGET_ARM)
7387     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7388   else if (TARGET_THUMB2)
7389     return thumb2_legitimate_address_p (mode, x, strict_p);
7390   else /* if (TARGET_THUMB1) */
7391     return thumb1_legitimate_address_p (mode, x, strict_p);
7392 }
7393
7394 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7395
7396    Given an rtx X being reloaded into a reg required to be
7397    in class CLASS, return the class of reg to actually use.
7398    In general this is just CLASS, but for the Thumb core registers and
7399    immediate constants we prefer a LO_REGS class or a subset.  */
7400
7401 static reg_class_t
7402 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7403 {
7404   if (TARGET_32BIT)
7405     return rclass;
7406   else
7407     {
7408       if (rclass == GENERAL_REGS)
7409         return LO_REGS;
7410       else
7411         return rclass;
7412     }
7413 }
7414
7415 /* Build the SYMBOL_REF for __tls_get_addr.  */
7416
7417 static GTY(()) rtx tls_get_addr_libfunc;
7418
7419 static rtx
7420 get_tls_get_addr (void)
7421 {
7422   if (!tls_get_addr_libfunc)
7423     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7424   return tls_get_addr_libfunc;
7425 }
7426
7427 rtx
7428 arm_load_tp (rtx target)
7429 {
7430   if (!target)
7431     target = gen_reg_rtx (SImode);
7432
7433   if (TARGET_HARD_TP)
7434     {
7435       /* Can return in any reg.  */
7436       emit_insn (gen_load_tp_hard (target));
7437     }
7438   else
7439     {
7440       /* Always returned in r0.  Immediately copy the result into a pseudo,
7441          otherwise other uses of r0 (e.g. setting up function arguments) may
7442          clobber the value.  */
7443
7444       rtx tmp;
7445
7446       emit_insn (gen_load_tp_soft ());
7447
7448       tmp = gen_rtx_REG (SImode, 0);
7449       emit_move_insn (target, tmp);
7450     }
7451   return target;
7452 }
7453
7454 static rtx
7455 load_tls_operand (rtx x, rtx reg)
7456 {
7457   rtx tmp;
7458
7459   if (reg == NULL_RTX)
7460     reg = gen_reg_rtx (SImode);
7461
7462   tmp = gen_rtx_CONST (SImode, x);
7463
7464   emit_move_insn (reg, tmp);
7465
7466   return reg;
7467 }
7468
7469 static rtx
7470 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7471 {
7472   rtx insns, label, labelno, sum;
7473
7474   gcc_assert (reloc != TLS_DESCSEQ);
7475   start_sequence ();
7476
7477   labelno = GEN_INT (pic_labelno++);
7478   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7479   label = gen_rtx_CONST (VOIDmode, label);
7480
7481   sum = gen_rtx_UNSPEC (Pmode,
7482                         gen_rtvec (4, x, GEN_INT (reloc), label,
7483                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7484                         UNSPEC_TLS);
7485   reg = load_tls_operand (sum, reg);
7486
7487   if (TARGET_ARM)
7488     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7489   else
7490     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7491
7492   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7493                                      LCT_PURE, /* LCT_CONST?  */
7494                                      Pmode, 1, reg, Pmode);
7495
7496   insns = get_insns ();
7497   end_sequence ();
7498
7499   return insns;
7500 }
7501
7502 static rtx
7503 arm_tls_descseq_addr (rtx x, rtx reg)
7504 {
7505   rtx labelno = GEN_INT (pic_labelno++);
7506   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7507   rtx sum = gen_rtx_UNSPEC (Pmode,
7508                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7509                                        gen_rtx_CONST (VOIDmode, label),
7510                                        GEN_INT (!TARGET_ARM)),
7511                             UNSPEC_TLS);
7512   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7513
7514   emit_insn (gen_tlscall (x, labelno));
7515   if (!reg)
7516     reg = gen_reg_rtx (SImode);
7517   else
7518     gcc_assert (REGNO (reg) != 0);
7519
7520   emit_move_insn (reg, reg0);
7521
7522   return reg;
7523 }
7524
7525 rtx
7526 legitimize_tls_address (rtx x, rtx reg)
7527 {
7528   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7529   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7530
7531   switch (model)
7532     {
7533     case TLS_MODEL_GLOBAL_DYNAMIC:
7534       if (TARGET_GNU2_TLS)
7535         {
7536           reg = arm_tls_descseq_addr (x, reg);
7537
7538           tp = arm_load_tp (NULL_RTX);
7539
7540           dest = gen_rtx_PLUS (Pmode, tp, reg);
7541         }
7542       else
7543         {
7544           /* Original scheme */
7545           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7546           dest = gen_reg_rtx (Pmode);
7547           emit_libcall_block (insns, dest, ret, x);
7548         }
7549       return dest;
7550
7551     case TLS_MODEL_LOCAL_DYNAMIC:
7552       if (TARGET_GNU2_TLS)
7553         {
7554           reg = arm_tls_descseq_addr (x, reg);
7555
7556           tp = arm_load_tp (NULL_RTX);
7557
7558           dest = gen_rtx_PLUS (Pmode, tp, reg);
7559         }
7560       else
7561         {
7562           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7563
7564           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7565              share the LDM result with other LD model accesses.  */
7566           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7567                                 UNSPEC_TLS);
7568           dest = gen_reg_rtx (Pmode);
7569           emit_libcall_block (insns, dest, ret, eqv);
7570
7571           /* Load the addend.  */
7572           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7573                                                      GEN_INT (TLS_LDO32)),
7574                                    UNSPEC_TLS);
7575           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7576           dest = gen_rtx_PLUS (Pmode, dest, addend);
7577         }
7578       return dest;
7579
7580     case TLS_MODEL_INITIAL_EXEC:
7581       labelno = GEN_INT (pic_labelno++);
7582       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7583       label = gen_rtx_CONST (VOIDmode, label);
7584       sum = gen_rtx_UNSPEC (Pmode,
7585                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7586                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7587                             UNSPEC_TLS);
7588       reg = load_tls_operand (sum, reg);
7589
7590       if (TARGET_ARM)
7591         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7592       else if (TARGET_THUMB2)
7593         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7594       else
7595         {
7596           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7597           emit_move_insn (reg, gen_const_mem (SImode, reg));
7598         }
7599
7600       tp = arm_load_tp (NULL_RTX);
7601
7602       return gen_rtx_PLUS (Pmode, tp, reg);
7603
7604     case TLS_MODEL_LOCAL_EXEC:
7605       tp = arm_load_tp (NULL_RTX);
7606
7607       reg = gen_rtx_UNSPEC (Pmode,
7608                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7609                             UNSPEC_TLS);
7610       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7611
7612       return gen_rtx_PLUS (Pmode, tp, reg);
7613
7614     default:
7615       abort ();
7616     }
7617 }
7618
7619 /* Try machine-dependent ways of modifying an illegitimate address
7620    to be legitimate.  If we find one, return the new, valid address.  */
7621 rtx
7622 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7623 {
7624   if (arm_tls_referenced_p (x))
7625     {
7626       rtx addend = NULL;
7627
7628       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7629         {
7630           addend = XEXP (XEXP (x, 0), 1);
7631           x = XEXP (XEXP (x, 0), 0);
7632         }
7633
7634       if (GET_CODE (x) != SYMBOL_REF)
7635         return x;
7636
7637       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7638
7639       x = legitimize_tls_address (x, NULL_RTX);
7640
7641       if (addend)
7642         {
7643           x = gen_rtx_PLUS (SImode, x, addend);
7644           orig_x = x;
7645         }
7646       else
7647         return x;
7648     }
7649
7650   if (!TARGET_ARM)
7651     {
7652       /* TODO: legitimize_address for Thumb2.  */
7653       if (TARGET_THUMB2)
7654         return x;
7655       return thumb_legitimize_address (x, orig_x, mode);
7656     }
7657
7658   if (GET_CODE (x) == PLUS)
7659     {
7660       rtx xop0 = XEXP (x, 0);
7661       rtx xop1 = XEXP (x, 1);
7662
7663       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7664         xop0 = force_reg (SImode, xop0);
7665
7666       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7667           && !symbol_mentioned_p (xop1))
7668         xop1 = force_reg (SImode, xop1);
7669
7670       if (ARM_BASE_REGISTER_RTX_P (xop0)
7671           && CONST_INT_P (xop1))
7672         {
7673           HOST_WIDE_INT n, low_n;
7674           rtx base_reg, val;
7675           n = INTVAL (xop1);
7676
7677           /* VFP addressing modes actually allow greater offsets, but for
7678              now we just stick with the lowest common denominator.  */
7679           if (mode == DImode
7680               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7681             {
7682               low_n = n & 0x0f;
7683               n &= ~0x0f;
7684               if (low_n > 4)
7685                 {
7686                   n += 16;
7687                   low_n -= 16;
7688                 }
7689             }
7690           else
7691             {
7692               low_n = ((mode) == TImode ? 0
7693                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7694               n -= low_n;
7695             }
7696
7697           base_reg = gen_reg_rtx (SImode);
7698           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7699           emit_move_insn (base_reg, val);
7700           x = plus_constant (Pmode, base_reg, low_n);
7701         }
7702       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7703         x = gen_rtx_PLUS (SImode, xop0, xop1);
7704     }
7705
7706   /* XXX We don't allow MINUS any more -- see comment in
7707      arm_legitimate_address_outer_p ().  */
7708   else if (GET_CODE (x) == MINUS)
7709     {
7710       rtx xop0 = XEXP (x, 0);
7711       rtx xop1 = XEXP (x, 1);
7712
7713       if (CONSTANT_P (xop0))
7714         xop0 = force_reg (SImode, xop0);
7715
7716       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7717         xop1 = force_reg (SImode, xop1);
7718
7719       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7720         x = gen_rtx_MINUS (SImode, xop0, xop1);
7721     }
7722
7723   /* Make sure to take full advantage of the pre-indexed addressing mode
7724      with absolute addresses which often allows for the base register to
7725      be factorized for multiple adjacent memory references, and it might
7726      even allows for the mini pool to be avoided entirely. */
7727   else if (CONST_INT_P (x) && optimize > 0)
7728     {
7729       unsigned int bits;
7730       HOST_WIDE_INT mask, base, index;
7731       rtx base_reg;
7732
7733       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7734          use a 8-bit index. So let's use a 12-bit index for SImode only and
7735          hope that arm_gen_constant will enable ldrb to use more bits. */
7736       bits = (mode == SImode) ? 12 : 8;
7737       mask = (1 << bits) - 1;
7738       base = INTVAL (x) & ~mask;
7739       index = INTVAL (x) & mask;
7740       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7741         {
7742           /* It'll most probably be more efficient to generate the base
7743              with more bits set and use a negative index instead. */
7744           base |= mask;
7745           index -= mask;
7746         }
7747       base_reg = force_reg (SImode, GEN_INT (base));
7748       x = plus_constant (Pmode, base_reg, index);
7749     }
7750
7751   if (flag_pic)
7752     {
7753       /* We need to find and carefully transform any SYMBOL and LABEL
7754          references; so go back to the original address expression.  */
7755       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7756
7757       if (new_x != orig_x)
7758         x = new_x;
7759     }
7760
7761   return x;
7762 }
7763
7764
7765 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7766    to be legitimate.  If we find one, return the new, valid address.  */
7767 rtx
7768 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7769 {
7770   if (GET_CODE (x) == PLUS
7771       && CONST_INT_P (XEXP (x, 1))
7772       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7773           || INTVAL (XEXP (x, 1)) < 0))
7774     {
7775       rtx xop0 = XEXP (x, 0);
7776       rtx xop1 = XEXP (x, 1);
7777       HOST_WIDE_INT offset = INTVAL (xop1);
7778
7779       /* Try and fold the offset into a biasing of the base register and
7780          then offsetting that.  Don't do this when optimizing for space
7781          since it can cause too many CSEs.  */
7782       if (optimize_size && offset >= 0
7783           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7784         {
7785           HOST_WIDE_INT delta;
7786
7787           if (offset >= 256)
7788             delta = offset - (256 - GET_MODE_SIZE (mode));
7789           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7790             delta = 31 * GET_MODE_SIZE (mode);
7791           else
7792             delta = offset & (~31 * GET_MODE_SIZE (mode));
7793
7794           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7795                                 NULL_RTX);
7796           x = plus_constant (Pmode, xop0, delta);
7797         }
7798       else if (offset < 0 && offset > -256)
7799         /* Small negative offsets are best done with a subtract before the
7800            dereference, forcing these into a register normally takes two
7801            instructions.  */
7802         x = force_operand (x, NULL_RTX);
7803       else
7804         {
7805           /* For the remaining cases, force the constant into a register.  */
7806           xop1 = force_reg (SImode, xop1);
7807           x = gen_rtx_PLUS (SImode, xop0, xop1);
7808         }
7809     }
7810   else if (GET_CODE (x) == PLUS
7811            && s_register_operand (XEXP (x, 1), SImode)
7812            && !s_register_operand (XEXP (x, 0), SImode))
7813     {
7814       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7815
7816       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7817     }
7818
7819   if (flag_pic)
7820     {
7821       /* We need to find and carefully transform any SYMBOL and LABEL
7822          references; so go back to the original address expression.  */
7823       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7824
7825       if (new_x != orig_x)
7826         x = new_x;
7827     }
7828
7829   return x;
7830 }
7831
7832 bool
7833 arm_legitimize_reload_address (rtx *p,
7834                                machine_mode mode,
7835                                int opnum, int type,
7836                                int ind_levels ATTRIBUTE_UNUSED)
7837 {
7838   /* We must recognize output that we have already generated ourselves.  */
7839   if (GET_CODE (*p) == PLUS
7840       && GET_CODE (XEXP (*p, 0)) == PLUS
7841       && REG_P (XEXP (XEXP (*p, 0), 0))
7842       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7843       && CONST_INT_P (XEXP (*p, 1)))
7844     {
7845       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7846                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7847                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7848       return true;
7849     }
7850
7851   if (GET_CODE (*p) == PLUS
7852       && REG_P (XEXP (*p, 0))
7853       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7854       /* If the base register is equivalent to a constant, let the generic
7855          code handle it.  Otherwise we will run into problems if a future
7856          reload pass decides to rematerialize the constant.  */
7857       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7858       && CONST_INT_P (XEXP (*p, 1)))
7859     {
7860       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7861       HOST_WIDE_INT low, high;
7862
7863       /* Detect coprocessor load/stores.  */
7864       bool coproc_p = ((TARGET_HARD_FLOAT
7865                         && TARGET_VFP
7866                         && (mode == SFmode || mode == DFmode))
7867                        || (TARGET_REALLY_IWMMXT
7868                            && VALID_IWMMXT_REG_MODE (mode))
7869                        || (TARGET_NEON
7870                            && (VALID_NEON_DREG_MODE (mode)
7871                                || VALID_NEON_QREG_MODE (mode))));
7872
7873       /* For some conditions, bail out when lower two bits are unaligned.  */
7874       if ((val & 0x3) != 0
7875           /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7876           && (coproc_p
7877               /* For DI, and DF under soft-float: */
7878               || ((mode == DImode || mode == DFmode)
7879                   /* Without ldrd, we use stm/ldm, which does not
7880                      fair well with unaligned bits.  */
7881                   && (! TARGET_LDRD
7882                       /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
7883                       || TARGET_THUMB2))))
7884         return false;
7885
7886       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7887          of which the (reg+high) gets turned into a reload add insn,
7888          we try to decompose the index into high/low values that can often
7889          also lead to better reload CSE.
7890          For example:
7891                  ldr r0, [r2, #4100]  // Offset too large
7892                  ldr r1, [r2, #4104]  // Offset too large
7893
7894          is best reloaded as:
7895                  add t1, r2, #4096
7896                  ldr r0, [t1, #4]
7897                  add t2, r2, #4096
7898                  ldr r1, [t2, #8]
7899
7900          which post-reload CSE can simplify in most cases to eliminate the
7901          second add instruction:
7902                  add t1, r2, #4096
7903                  ldr r0, [t1, #4]
7904                  ldr r1, [t1, #8]
7905
7906          The idea here is that we want to split out the bits of the constant
7907          as a mask, rather than as subtracting the maximum offset that the
7908          respective type of load/store used can handle.
7909
7910          When encountering negative offsets, we can still utilize it even if
7911          the overall offset is positive; sometimes this may lead to an immediate
7912          that can be constructed with fewer instructions.
7913          For example:
7914                  ldr r0, [r2, #0x3FFFFC]
7915
7916          This is best reloaded as:
7917                  add t1, r2, #0x400000
7918                  ldr r0, [t1, #-4]
7919
7920          The trick for spotting this for a load insn with N bits of offset
7921          (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7922          negative offset that is going to make bit N and all the bits below
7923          it become zero in the remainder part.
7924
7925          The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7926          to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7927          used in most cases of ARM load/store instructions.  */
7928
7929 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
7930       (((VAL) & ((1 << (N)) - 1))                                       \
7931        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
7932        : 0)
7933
7934       if (coproc_p)
7935         {
7936           low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7937
7938           /* NEON quad-word load/stores are made of two double-word accesses,
7939              so the valid index range is reduced by 8. Treat as 9-bit range if
7940              we go over it.  */
7941           if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7942             low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7943         }
7944       else if (GET_MODE_SIZE (mode) == 8)
7945         {
7946           if (TARGET_LDRD)
7947             low = (TARGET_THUMB2
7948                    ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7949                    : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7950           else
7951             /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7952                to access doublewords. The supported load/store offsets are
7953                -8, -4, and 4, which we try to produce here.  */
7954             low = ((val & 0xf) ^ 0x8) - 0x8;
7955         }
7956       else if (GET_MODE_SIZE (mode) < 8)
7957         {
7958           /* NEON element load/stores do not have an offset.  */
7959           if (TARGET_NEON_FP16 && mode == HFmode)
7960             return false;
7961
7962           if (TARGET_THUMB2)
7963             {
7964               /* Thumb-2 has an asymmetrical index range of (-256,4096).
7965                  Try the wider 12-bit range first, and re-try if the result
7966                  is out of range.  */
7967               low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7968               if (low < -255)
7969                 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7970             }
7971           else
7972             {
7973               if (mode == HImode || mode == HFmode)
7974                 {
7975                   if (arm_arch4)
7976                     low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7977                   else
7978                     {
7979                       /* The storehi/movhi_bytes fallbacks can use only
7980                          [-4094,+4094] of the full ldrb/strb index range.  */
7981                       low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7982                       if (low == 4095 || low == -4095)
7983                         return false;
7984                     }
7985                 }
7986               else
7987                 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7988             }
7989         }
7990       else
7991         return false;
7992
7993       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7994                ^ (unsigned HOST_WIDE_INT) 0x80000000)
7995               - (unsigned HOST_WIDE_INT) 0x80000000);
7996       /* Check for overflow or zero */
7997       if (low == 0 || high == 0 || (high + low != val))
7998         return false;
7999
8000       /* Reload the high part into a base reg; leave the low part
8001          in the mem.
8002          Note that replacing this gen_rtx_PLUS with plus_constant is
8003          wrong in this case because we rely on the
8004          (plus (plus reg c1) c2) structure being preserved so that
8005          XEXP (*p, 0) in push_reload below uses the correct term.  */
8006       *p = gen_rtx_PLUS (GET_MODE (*p),
8007                          gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8008                                        GEN_INT (high)),
8009                          GEN_INT (low));
8010       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8011                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8012                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
8013       return true;
8014     }
8015
8016   return false;
8017 }
8018
8019 rtx
8020 thumb_legitimize_reload_address (rtx *x_p,
8021                                  machine_mode mode,
8022                                  int opnum, int type,
8023                                  int ind_levels ATTRIBUTE_UNUSED)
8024 {
8025   rtx x = *x_p;
8026
8027   if (GET_CODE (x) == PLUS
8028       && GET_MODE_SIZE (mode) < 4
8029       && REG_P (XEXP (x, 0))
8030       && XEXP (x, 0) == stack_pointer_rtx
8031       && CONST_INT_P (XEXP (x, 1))
8032       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8033     {
8034       rtx orig_x = x;
8035
8036       x = copy_rtx (x);
8037       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8038                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8039       return x;
8040     }
8041
8042   /* If both registers are hi-regs, then it's better to reload the
8043      entire expression rather than each register individually.  That
8044      only requires one reload register rather than two.  */
8045   if (GET_CODE (x) == PLUS
8046       && REG_P (XEXP (x, 0))
8047       && REG_P (XEXP (x, 1))
8048       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8049       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8050     {
8051       rtx orig_x = x;
8052
8053       x = copy_rtx (x);
8054       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8055                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8056       return x;
8057     }
8058
8059   return NULL;
8060 }
8061
8062 /* Return TRUE if X contains any TLS symbol references.  */
8063
8064 bool
8065 arm_tls_referenced_p (rtx x)
8066 {
8067   if (! TARGET_HAVE_TLS)
8068     return false;
8069
8070   subrtx_iterator::array_type array;
8071   FOR_EACH_SUBRTX (iter, array, x, ALL)
8072     {
8073       const_rtx x = *iter;
8074       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8075         return true;
8076
8077       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8078          TLS offsets, not real symbol references.  */
8079       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8080         iter.skip_subrtxes ();
8081     }
8082   return false;
8083 }
8084
8085 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8086
8087    On the ARM, allow any integer (invalid ones are removed later by insn
8088    patterns), nice doubles and symbol_refs which refer to the function's
8089    constant pool XXX.
8090
8091    When generating pic allow anything.  */
8092
8093 static bool
8094 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8095 {
8096   /* At present, we have no support for Neon structure constants, so forbid
8097      them here.  It might be possible to handle simple cases like 0 and -1
8098      in future.  */
8099   if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8100     return false;
8101
8102   return flag_pic || !label_mentioned_p (x);
8103 }
8104
8105 static bool
8106 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8107 {
8108   return (CONST_INT_P (x)
8109           || CONST_DOUBLE_P (x)
8110           || CONSTANT_ADDRESS_P (x)
8111           || flag_pic);
8112 }
8113
8114 static bool
8115 arm_legitimate_constant_p (machine_mode mode, rtx x)
8116 {
8117   return (!arm_cannot_force_const_mem (mode, x)
8118           && (TARGET_32BIT
8119               ? arm_legitimate_constant_p_1 (mode, x)
8120               : thumb_legitimate_constant_p (mode, x)));
8121 }
8122
8123 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8124
8125 static bool
8126 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8127 {
8128   rtx base, offset;
8129
8130   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8131     {
8132       split_const (x, &base, &offset);
8133       if (GET_CODE (base) == SYMBOL_REF
8134           && !offset_within_block_p (base, INTVAL (offset)))
8135         return true;
8136     }
8137   return arm_tls_referenced_p (x);
8138 }
8139 \f
8140 #define REG_OR_SUBREG_REG(X)                                            \
8141   (REG_P (X)                                                    \
8142    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8143
8144 #define REG_OR_SUBREG_RTX(X)                    \
8145    (REG_P (X) ? (X) : SUBREG_REG (X))
8146
8147 static inline int
8148 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8149 {
8150   machine_mode mode = GET_MODE (x);
8151   int total, words;
8152
8153   switch (code)
8154     {
8155     case ASHIFT:
8156     case ASHIFTRT:
8157     case LSHIFTRT:
8158     case ROTATERT:
8159       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8160
8161     case PLUS:
8162     case MINUS:
8163     case COMPARE:
8164     case NEG:
8165     case NOT:
8166       return COSTS_N_INSNS (1);
8167
8168     case MULT:
8169       if (CONST_INT_P (XEXP (x, 1)))
8170         {
8171           int cycles = 0;
8172           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8173
8174           while (i)
8175             {
8176               i >>= 2;
8177               cycles++;
8178             }
8179           return COSTS_N_INSNS (2) + cycles;
8180         }
8181       return COSTS_N_INSNS (1) + 16;
8182
8183     case SET:
8184       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8185          the mode.  */
8186       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8187       return (COSTS_N_INSNS (words)
8188               + 4 * ((MEM_P (SET_SRC (x)))
8189                      + MEM_P (SET_DEST (x))));
8190
8191     case CONST_INT:
8192       if (outer == SET)
8193         {
8194           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8195             return 0;
8196           if (thumb_shiftable_const (INTVAL (x)))
8197             return COSTS_N_INSNS (2);
8198           return COSTS_N_INSNS (3);
8199         }
8200       else if ((outer == PLUS || outer == COMPARE)
8201                && INTVAL (x) < 256 && INTVAL (x) > -256)
8202         return 0;
8203       else if ((outer == IOR || outer == XOR || outer == AND)
8204                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8205         return COSTS_N_INSNS (1);
8206       else if (outer == AND)
8207         {
8208           int i;
8209           /* This duplicates the tests in the andsi3 expander.  */
8210           for (i = 9; i <= 31; i++)
8211             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8212                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8213               return COSTS_N_INSNS (2);
8214         }
8215       else if (outer == ASHIFT || outer == ASHIFTRT
8216                || outer == LSHIFTRT)
8217         return 0;
8218       return COSTS_N_INSNS (2);
8219
8220     case CONST:
8221     case CONST_DOUBLE:
8222     case LABEL_REF:
8223     case SYMBOL_REF:
8224       return COSTS_N_INSNS (3);
8225
8226     case UDIV:
8227     case UMOD:
8228     case DIV:
8229     case MOD:
8230       return 100;
8231
8232     case TRUNCATE:
8233       return 99;
8234
8235     case AND:
8236     case XOR:
8237     case IOR:
8238       /* XXX guess.  */
8239       return 8;
8240
8241     case MEM:
8242       /* XXX another guess.  */
8243       /* Memory costs quite a lot for the first word, but subsequent words
8244          load at the equivalent of a single insn each.  */
8245       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8246               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8247                  ? 4 : 0));
8248
8249     case IF_THEN_ELSE:
8250       /* XXX a guess.  */
8251       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8252         return 14;
8253       return 2;
8254
8255     case SIGN_EXTEND:
8256     case ZERO_EXTEND:
8257       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8258       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8259
8260       if (mode == SImode)
8261         return total;
8262
8263       if (arm_arch6)
8264         return total + COSTS_N_INSNS (1);
8265
8266       /* Assume a two-shift sequence.  Increase the cost slightly so
8267          we prefer actual shifts over an extend operation.  */
8268       return total + 1 + COSTS_N_INSNS (2);
8269
8270     default:
8271       return 99;
8272     }
8273 }
8274
8275 static inline bool
8276 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8277 {
8278   machine_mode mode = GET_MODE (x);
8279   enum rtx_code subcode;
8280   rtx operand;
8281   enum rtx_code code = GET_CODE (x);
8282   *total = 0;
8283
8284   switch (code)
8285     {
8286     case MEM:
8287       /* Memory costs quite a lot for the first word, but subsequent words
8288          load at the equivalent of a single insn each.  */
8289       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8290       return true;
8291
8292     case DIV:
8293     case MOD:
8294     case UDIV:
8295     case UMOD:
8296       if (TARGET_HARD_FLOAT && mode == SFmode)
8297         *total = COSTS_N_INSNS (2);
8298       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8299         *total = COSTS_N_INSNS (4);
8300       else
8301         *total = COSTS_N_INSNS (20);
8302       return false;
8303
8304     case ROTATE:
8305       if (REG_P (XEXP (x, 1)))
8306         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8307       else if (!CONST_INT_P (XEXP (x, 1)))
8308         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8309
8310       /* Fall through */
8311     case ROTATERT:
8312       if (mode != SImode)
8313         {
8314           *total += COSTS_N_INSNS (4);
8315           return true;
8316         }
8317
8318       /* Fall through */
8319     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8320       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8321       if (mode == DImode)
8322         {
8323           *total += COSTS_N_INSNS (3);
8324           return true;
8325         }
8326
8327       *total += COSTS_N_INSNS (1);
8328       /* Increase the cost of complex shifts because they aren't any faster,
8329          and reduce dual issue opportunities.  */
8330       if (arm_tune_cortex_a9
8331           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8332         ++*total;
8333
8334       return true;
8335
8336     case MINUS:
8337       if (mode == DImode)
8338         {
8339           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8340           if (CONST_INT_P (XEXP (x, 0))
8341               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8342             {
8343               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8344               return true;
8345             }
8346
8347           if (CONST_INT_P (XEXP (x, 1))
8348               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8349             {
8350               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8351               return true;
8352             }
8353
8354           return false;
8355         }
8356
8357       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8358         {
8359           if (TARGET_HARD_FLOAT
8360               && (mode == SFmode
8361                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8362             {
8363               *total = COSTS_N_INSNS (1);
8364               if (CONST_DOUBLE_P (XEXP (x, 0))
8365                   && arm_const_double_rtx (XEXP (x, 0)))
8366                 {
8367                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8368                   return true;
8369                 }
8370
8371               if (CONST_DOUBLE_P (XEXP (x, 1))
8372                   && arm_const_double_rtx (XEXP (x, 1)))
8373                 {
8374                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8375                   return true;
8376                 }
8377
8378               return false;
8379             }
8380           *total = COSTS_N_INSNS (20);
8381           return false;
8382         }
8383
8384       *total = COSTS_N_INSNS (1);
8385       if (CONST_INT_P (XEXP (x, 0))
8386           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8387         {
8388           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8389           return true;
8390         }
8391
8392       subcode = GET_CODE (XEXP (x, 1));
8393       if (subcode == ASHIFT || subcode == ASHIFTRT
8394           || subcode == LSHIFTRT
8395           || subcode == ROTATE || subcode == ROTATERT)
8396         {
8397           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8398           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8399           return true;
8400         }
8401
8402       /* A shift as a part of RSB costs no more than RSB itself.  */
8403       if (GET_CODE (XEXP (x, 0)) == MULT
8404           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8405         {
8406           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8407           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8408           return true;
8409         }
8410
8411       if (subcode == MULT
8412           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8413         {
8414           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8415           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8416           return true;
8417         }
8418
8419       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8420           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8421         {
8422           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8423           if (REG_P (XEXP (XEXP (x, 1), 0))
8424               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8425             *total += COSTS_N_INSNS (1);
8426
8427           return true;
8428         }
8429
8430       /* Fall through */
8431
8432     case PLUS:
8433       if (code == PLUS && arm_arch6 && mode == SImode
8434           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8435               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8436         {
8437           *total = COSTS_N_INSNS (1);
8438           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8439                               0, speed);
8440           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8441           return true;
8442         }
8443
8444       /* MLA: All arguments must be registers.  We filter out
8445          multiplication by a power of two, so that we fall down into
8446          the code below.  */
8447       if (GET_CODE (XEXP (x, 0)) == MULT
8448           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8449         {
8450           /* The cost comes from the cost of the multiply.  */
8451           return false;
8452         }
8453
8454       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8455         {
8456           if (TARGET_HARD_FLOAT
8457               && (mode == SFmode
8458                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8459             {
8460               *total = COSTS_N_INSNS (1);
8461               if (CONST_DOUBLE_P (XEXP (x, 1))
8462                   && arm_const_double_rtx (XEXP (x, 1)))
8463                 {
8464                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8465                   return true;
8466                 }
8467
8468               return false;
8469             }
8470
8471           *total = COSTS_N_INSNS (20);
8472           return false;
8473         }
8474
8475       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8476           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8477         {
8478           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8479           if (REG_P (XEXP (XEXP (x, 0), 0))
8480               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8481             *total += COSTS_N_INSNS (1);
8482           return true;
8483         }
8484
8485       /* Fall through */
8486
8487     case AND: case XOR: case IOR:
8488
8489       /* Normally the frame registers will be spilt into reg+const during
8490          reload, so it is a bad idea to combine them with other instructions,
8491          since then they might not be moved outside of loops.  As a compromise
8492          we allow integration with ops that have a constant as their second
8493          operand.  */
8494       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8495           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8496           && !CONST_INT_P (XEXP (x, 1)))
8497         *total = COSTS_N_INSNS (1);
8498
8499       if (mode == DImode)
8500         {
8501           *total += COSTS_N_INSNS (2);
8502           if (CONST_INT_P (XEXP (x, 1))
8503               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8504             {
8505               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8506               return true;
8507             }
8508
8509           return false;
8510         }
8511
8512       *total += COSTS_N_INSNS (1);
8513       if (CONST_INT_P (XEXP (x, 1))
8514           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8515         {
8516           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8517           return true;
8518         }
8519       subcode = GET_CODE (XEXP (x, 0));
8520       if (subcode == ASHIFT || subcode == ASHIFTRT
8521           || subcode == LSHIFTRT
8522           || subcode == ROTATE || subcode == ROTATERT)
8523         {
8524           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8525           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8526           return true;
8527         }
8528
8529       if (subcode == MULT
8530           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8531         {
8532           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8533           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8534           return true;
8535         }
8536
8537       if (subcode == UMIN || subcode == UMAX
8538           || subcode == SMIN || subcode == SMAX)
8539         {
8540           *total = COSTS_N_INSNS (3);
8541           return true;
8542         }
8543
8544       return false;
8545
8546     case MULT:
8547       /* This should have been handled by the CPU specific routines.  */
8548       gcc_unreachable ();
8549
8550     case TRUNCATE:
8551       if (arm_arch3m && mode == SImode
8552           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8553           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8554           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8555               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8556           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8557               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8558         {
8559           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8560           return true;
8561         }
8562       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8563       return false;
8564
8565     case NEG:
8566       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8567         {
8568           if (TARGET_HARD_FLOAT
8569               && (mode == SFmode
8570                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8571             {
8572               *total = COSTS_N_INSNS (1);
8573               return false;
8574             }
8575           *total = COSTS_N_INSNS (2);
8576           return false;
8577         }
8578
8579       /* Fall through */
8580     case NOT:
8581       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8582       if (mode == SImode && code == NOT)
8583         {
8584           subcode = GET_CODE (XEXP (x, 0));
8585           if (subcode == ASHIFT || subcode == ASHIFTRT
8586               || subcode == LSHIFTRT
8587               || subcode == ROTATE || subcode == ROTATERT
8588               || (subcode == MULT
8589                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8590             {
8591               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8592               /* Register shifts cost an extra cycle.  */
8593               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8594                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8595                                                         subcode, 1, speed);
8596               return true;
8597             }
8598         }
8599
8600       return false;
8601
8602     case IF_THEN_ELSE:
8603       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8604         {
8605           *total = COSTS_N_INSNS (4);
8606           return true;
8607         }
8608
8609       operand = XEXP (x, 0);
8610
8611       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8612              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8613             && REG_P (XEXP (operand, 0))
8614             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8615         *total += COSTS_N_INSNS (1);
8616       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8617                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8618       return true;
8619
8620     case NE:
8621       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8622         {
8623           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8624           return true;
8625         }
8626       goto scc_insn;
8627
8628     case GE:
8629       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8630           && mode == SImode && XEXP (x, 1) == const0_rtx)
8631         {
8632           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8633           return true;
8634         }
8635       goto scc_insn;
8636
8637     case LT:
8638       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8639           && mode == SImode && XEXP (x, 1) == const0_rtx)
8640         {
8641           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8642           return true;
8643         }
8644       goto scc_insn;
8645
8646     case EQ:
8647     case GT:
8648     case LE:
8649     case GEU:
8650     case LTU:
8651     case GTU:
8652     case LEU:
8653     case UNORDERED:
8654     case ORDERED:
8655     case UNEQ:
8656     case UNGE:
8657     case UNLT:
8658     case UNGT:
8659     case UNLE:
8660     scc_insn:
8661       /* SCC insns.  In the case where the comparison has already been
8662          performed, then they cost 2 instructions.  Otherwise they need
8663          an additional comparison before them.  */
8664       *total = COSTS_N_INSNS (2);
8665       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8666         {
8667           return true;
8668         }
8669
8670       /* Fall through */
8671     case COMPARE:
8672       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8673         {
8674           *total = 0;
8675           return true;
8676         }
8677
8678       *total += COSTS_N_INSNS (1);
8679       if (CONST_INT_P (XEXP (x, 1))
8680           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8681         {
8682           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8683           return true;
8684         }
8685
8686       subcode = GET_CODE (XEXP (x, 0));
8687       if (subcode == ASHIFT || subcode == ASHIFTRT
8688           || subcode == LSHIFTRT
8689           || subcode == ROTATE || subcode == ROTATERT)
8690         {
8691           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8692           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8693           return true;
8694         }
8695
8696       if (subcode == MULT
8697           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8698         {
8699           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8700           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8701           return true;
8702         }
8703
8704       return false;
8705
8706     case UMIN:
8707     case UMAX:
8708     case SMIN:
8709     case SMAX:
8710       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8711       if (!CONST_INT_P (XEXP (x, 1))
8712           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8713         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8714       return true;
8715
8716     case ABS:
8717       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8718         {
8719           if (TARGET_HARD_FLOAT
8720               && (mode == SFmode
8721                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8722             {
8723               *total = COSTS_N_INSNS (1);
8724               return false;
8725             }
8726           *total = COSTS_N_INSNS (20);
8727           return false;
8728         }
8729       *total = COSTS_N_INSNS (1);
8730       if (mode == DImode)
8731         *total += COSTS_N_INSNS (3);
8732       return false;
8733
8734     case SIGN_EXTEND:
8735     case ZERO_EXTEND:
8736       *total = 0;
8737       if (GET_MODE_CLASS (mode) == MODE_INT)
8738         {
8739           rtx op = XEXP (x, 0);
8740           machine_mode opmode = GET_MODE (op);
8741
8742           if (mode == DImode)
8743             *total += COSTS_N_INSNS (1);
8744
8745           if (opmode != SImode)
8746             {
8747               if (MEM_P (op))
8748                 {
8749                   /* If !arm_arch4, we use one of the extendhisi2_mem
8750                      or movhi_bytes patterns for HImode.  For a QImode
8751                      sign extension, we first zero-extend from memory
8752                      and then perform a shift sequence.  */
8753                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8754                     *total += COSTS_N_INSNS (2);
8755                 }
8756               else if (arm_arch6)
8757                 *total += COSTS_N_INSNS (1);
8758
8759               /* We don't have the necessary insn, so we need to perform some
8760                  other operation.  */
8761               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8762                 /* An and with constant 255.  */
8763                 *total += COSTS_N_INSNS (1);
8764               else
8765                 /* A shift sequence.  Increase costs slightly to avoid
8766                    combining two shifts into an extend operation.  */
8767                 *total += COSTS_N_INSNS (2) + 1;
8768             }
8769
8770           return false;
8771         }
8772
8773       switch (GET_MODE (XEXP (x, 0)))
8774         {
8775         case V8QImode:
8776         case V4HImode:
8777         case V2SImode:
8778         case V4QImode:
8779         case V2HImode:
8780           *total = COSTS_N_INSNS (1);
8781           return false;
8782
8783         default:
8784           gcc_unreachable ();
8785         }
8786       gcc_unreachable ();
8787
8788     case ZERO_EXTRACT:
8789     case SIGN_EXTRACT:
8790       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8791       return true;
8792
8793     case CONST_INT:
8794       if (const_ok_for_arm (INTVAL (x))
8795           || const_ok_for_arm (~INTVAL (x)))
8796         *total = COSTS_N_INSNS (1);
8797       else
8798         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8799                                                   INTVAL (x), NULL_RTX,
8800                                                   NULL_RTX, 0, 0));
8801       return true;
8802
8803     case CONST:
8804     case LABEL_REF:
8805     case SYMBOL_REF:
8806       *total = COSTS_N_INSNS (3);
8807       return true;
8808
8809     case HIGH:
8810       *total = COSTS_N_INSNS (1);
8811       return true;
8812
8813     case LO_SUM:
8814       *total = COSTS_N_INSNS (1);
8815       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8816       return true;
8817
8818     case CONST_DOUBLE:
8819       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8820           && (mode == SFmode || !TARGET_VFP_SINGLE))
8821         *total = COSTS_N_INSNS (1);
8822       else
8823         *total = COSTS_N_INSNS (4);
8824       return true;
8825
8826     case SET:
8827       /* The vec_extract patterns accept memory operands that require an
8828          address reload.  Account for the cost of that reload to give the
8829          auto-inc-dec pass an incentive to try to replace them.  */
8830       if (TARGET_NEON && MEM_P (SET_DEST (x))
8831           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8832         {
8833           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8834           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8835             *total += COSTS_N_INSNS (1);
8836           return true;
8837         }
8838       /* Likewise for the vec_set patterns.  */
8839       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8840           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8841           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8842         {
8843           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8844           *total = rtx_cost (mem, code, 0, speed);
8845           if (!neon_vector_mem_operand (mem, 2, true))
8846             *total += COSTS_N_INSNS (1);
8847           return true;
8848         }
8849       return false;
8850
8851     case UNSPEC:
8852       /* We cost this as high as our memory costs to allow this to
8853          be hoisted from loops.  */
8854       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8855         {
8856           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8857         }
8858       return true;
8859
8860     case CONST_VECTOR:
8861       if (TARGET_NEON
8862           && TARGET_HARD_FLOAT
8863           && outer == SET
8864           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8865           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8866         *total = COSTS_N_INSNS (1);
8867       else
8868         *total = COSTS_N_INSNS (4);
8869       return true;
8870
8871     default:
8872       *total = COSTS_N_INSNS (4);
8873       return false;
8874     }
8875 }
8876
8877 /* Estimates the size cost of thumb1 instructions.
8878    For now most of the code is copied from thumb1_rtx_costs. We need more
8879    fine grain tuning when we have more related test cases.  */
8880 static inline int
8881 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8882 {
8883   machine_mode mode = GET_MODE (x);
8884   int words;
8885
8886   switch (code)
8887     {
8888     case ASHIFT:
8889     case ASHIFTRT:
8890     case LSHIFTRT:
8891     case ROTATERT:
8892       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8893
8894     case PLUS:
8895     case MINUS:
8896       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8897          defined by RTL expansion, especially for the expansion of
8898          multiplication.  */
8899       if ((GET_CODE (XEXP (x, 0)) == MULT
8900            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8901           || (GET_CODE (XEXP (x, 1)) == MULT
8902               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8903         return COSTS_N_INSNS (2);
8904       /* On purpose fall through for normal RTX.  */
8905     case COMPARE:
8906     case NEG:
8907     case NOT:
8908       return COSTS_N_INSNS (1);
8909
8910     case MULT:
8911       if (CONST_INT_P (XEXP (x, 1)))
8912         {
8913           /* Thumb1 mul instruction can't operate on const. We must Load it
8914              into a register first.  */
8915           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8916           /* For the targets which have a very small and high-latency multiply
8917              unit, we prefer to synthesize the mult with up to 5 instructions,
8918              giving a good balance between size and performance.  */
8919           if (arm_arch6m && arm_m_profile_small_mul)
8920             return COSTS_N_INSNS (5);
8921           else
8922             return COSTS_N_INSNS (1) + const_size;
8923         }
8924       return COSTS_N_INSNS (1);
8925
8926     case SET:
8927       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8928          the mode.  */
8929       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8930       return COSTS_N_INSNS (words)
8931              + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8932                                     || satisfies_constraint_K (SET_SRC (x))
8933                                        /* thumb1_movdi_insn.  */
8934                                     || ((words > 1) && MEM_P (SET_SRC (x))));
8935
8936     case CONST_INT:
8937       if (outer == SET)
8938         {
8939           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8940             return COSTS_N_INSNS (1);
8941           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8942           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8943             return COSTS_N_INSNS (2);
8944           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8945           if (thumb_shiftable_const (INTVAL (x)))
8946             return COSTS_N_INSNS (2);
8947           return COSTS_N_INSNS (3);
8948         }
8949       else if ((outer == PLUS || outer == COMPARE)
8950                && INTVAL (x) < 256 && INTVAL (x) > -256)
8951         return 0;
8952       else if ((outer == IOR || outer == XOR || outer == AND)
8953                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8954         return COSTS_N_INSNS (1);
8955       else if (outer == AND)
8956         {
8957           int i;
8958           /* This duplicates the tests in the andsi3 expander.  */
8959           for (i = 9; i <= 31; i++)
8960             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8961                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8962               return COSTS_N_INSNS (2);
8963         }
8964       else if (outer == ASHIFT || outer == ASHIFTRT
8965                || outer == LSHIFTRT)
8966         return 0;
8967       return COSTS_N_INSNS (2);
8968
8969     case CONST:
8970     case CONST_DOUBLE:
8971     case LABEL_REF:
8972     case SYMBOL_REF:
8973       return COSTS_N_INSNS (3);
8974
8975     case UDIV:
8976     case UMOD:
8977     case DIV:
8978     case MOD:
8979       return 100;
8980
8981     case TRUNCATE:
8982       return 99;
8983
8984     case AND:
8985     case XOR:
8986     case IOR:
8987       return COSTS_N_INSNS (1);
8988
8989     case MEM:
8990       return (COSTS_N_INSNS (1)
8991               + COSTS_N_INSNS (1)
8992                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8993               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8994                  ? COSTS_N_INSNS (1) : 0));
8995
8996     case IF_THEN_ELSE:
8997       /* XXX a guess.  */
8998       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8999         return 14;
9000       return 2;
9001
9002     case ZERO_EXTEND:
9003       /* XXX still guessing.  */
9004       switch (GET_MODE (XEXP (x, 0)))
9005         {
9006           case QImode:
9007             return (1 + (mode == DImode ? 4 : 0)
9008                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9009
9010           case HImode:
9011             return (4 + (mode == DImode ? 4 : 0)
9012                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9013
9014           case SImode:
9015             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9016
9017           default:
9018             return 99;
9019         }
9020
9021     default:
9022       return 99;
9023     }
9024 }
9025
9026 /* RTX costs when optimizing for size.  */
9027 static bool
9028 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9029                     int *total)
9030 {
9031   machine_mode mode = GET_MODE (x);
9032   if (TARGET_THUMB1)
9033     {
9034       *total = thumb1_size_rtx_costs (x, code, outer_code);
9035       return true;
9036     }
9037
9038   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
9039   switch (code)
9040     {
9041     case MEM:
9042       /* A memory access costs 1 insn if the mode is small, or the address is
9043          a single register, otherwise it costs one insn per word.  */
9044       if (REG_P (XEXP (x, 0)))
9045         *total = COSTS_N_INSNS (1);
9046       else if (flag_pic
9047                && GET_CODE (XEXP (x, 0)) == PLUS
9048                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9049         /* This will be split into two instructions.
9050            See arm.md:calculate_pic_address.  */
9051         *total = COSTS_N_INSNS (2);
9052       else
9053         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9054       return true;
9055
9056     case DIV:
9057     case MOD:
9058     case UDIV:
9059     case UMOD:
9060       /* Needs a libcall, so it costs about this.  */
9061       *total = COSTS_N_INSNS (2);
9062       return false;
9063
9064     case ROTATE:
9065       if (mode == SImode && REG_P (XEXP (x, 1)))
9066         {
9067           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9068           return true;
9069         }
9070       /* Fall through */
9071     case ROTATERT:
9072     case ASHIFT:
9073     case LSHIFTRT:
9074     case ASHIFTRT:
9075       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9076         {
9077           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9078           return true;
9079         }
9080       else if (mode == SImode)
9081         {
9082           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9083           /* Slightly disparage register shifts, but not by much.  */
9084           if (!CONST_INT_P (XEXP (x, 1)))
9085             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9086           return true;
9087         }
9088
9089       /* Needs a libcall.  */
9090       *total = COSTS_N_INSNS (2);
9091       return false;
9092
9093     case MINUS:
9094       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9095           && (mode == SFmode || !TARGET_VFP_SINGLE))
9096         {
9097           *total = COSTS_N_INSNS (1);
9098           return false;
9099         }
9100
9101       if (mode == SImode)
9102         {
9103           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9104           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9105
9106           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9107               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9108               || subcode1 == ROTATE || subcode1 == ROTATERT
9109               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9110               || subcode1 == ASHIFTRT)
9111             {
9112               /* It's just the cost of the two operands.  */
9113               *total = 0;
9114               return false;
9115             }
9116
9117           *total = COSTS_N_INSNS (1);
9118           return false;
9119         }
9120
9121       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9122       return false;
9123
9124     case PLUS:
9125       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9126           && (mode == SFmode || !TARGET_VFP_SINGLE))
9127         {
9128           *total = COSTS_N_INSNS (1);
9129           return false;
9130         }
9131
9132       /* A shift as a part of ADD costs nothing.  */
9133       if (GET_CODE (XEXP (x, 0)) == MULT
9134           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9135         {
9136           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9137           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9138           *total += rtx_cost (XEXP (x, 1), code, 1, false);
9139           return true;
9140         }
9141
9142       /* Fall through */
9143     case AND: case XOR: case IOR:
9144       if (mode == SImode)
9145         {
9146           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9147
9148           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9149               || subcode == LSHIFTRT || subcode == ASHIFTRT
9150               || (code == AND && subcode == NOT))
9151             {
9152               /* It's just the cost of the two operands.  */
9153               *total = 0;
9154               return false;
9155             }
9156         }
9157
9158       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9159       return false;
9160
9161     case MULT:
9162       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9163       return false;
9164
9165     case NEG:
9166       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9167           && (mode == SFmode || !TARGET_VFP_SINGLE))
9168         {
9169           *total = COSTS_N_INSNS (1);
9170           return false;
9171         }
9172
9173       /* Fall through */
9174     case NOT:
9175       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9176
9177       return false;
9178
9179     case IF_THEN_ELSE:
9180       *total = 0;
9181       return false;
9182
9183     case COMPARE:
9184       if (cc_register (XEXP (x, 0), VOIDmode))
9185         * total = 0;
9186       else
9187         *total = COSTS_N_INSNS (1);
9188       return false;
9189
9190     case ABS:
9191       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9192           && (mode == SFmode || !TARGET_VFP_SINGLE))
9193         *total = COSTS_N_INSNS (1);
9194       else
9195         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9196       return false;
9197
9198     case SIGN_EXTEND:
9199     case ZERO_EXTEND:
9200       return arm_rtx_costs_1 (x, outer_code, total, 0);
9201
9202     case CONST_INT:
9203       if (const_ok_for_arm (INTVAL (x)))
9204         /* A multiplication by a constant requires another instruction
9205            to load the constant to a register.  */
9206         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9207                                 ? 1 : 0);
9208       else if (const_ok_for_arm (~INTVAL (x)))
9209         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9210       else if (const_ok_for_arm (-INTVAL (x)))
9211         {
9212           if (outer_code == COMPARE || outer_code == PLUS
9213               || outer_code == MINUS)
9214             *total = 0;
9215           else
9216             *total = COSTS_N_INSNS (1);
9217         }
9218       else
9219         *total = COSTS_N_INSNS (2);
9220       return true;
9221
9222     case CONST:
9223     case LABEL_REF:
9224     case SYMBOL_REF:
9225       *total = COSTS_N_INSNS (2);
9226       return true;
9227
9228     case CONST_DOUBLE:
9229       *total = COSTS_N_INSNS (4);
9230       return true;
9231
9232     case CONST_VECTOR:
9233       if (TARGET_NEON
9234           && TARGET_HARD_FLOAT
9235           && outer_code == SET
9236           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9237           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9238         *total = COSTS_N_INSNS (1);
9239       else
9240         *total = COSTS_N_INSNS (4);
9241       return true;
9242
9243     case HIGH:
9244     case LO_SUM:
9245       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9246          cost of these slightly.  */
9247       *total = COSTS_N_INSNS (1) + 1;
9248       return true;
9249
9250     case SET:
9251       return false;
9252
9253     default:
9254       if (mode != VOIDmode)
9255         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9256       else
9257         *total = COSTS_N_INSNS (4); /* How knows?  */
9258       return false;
9259     }
9260 }
9261
9262 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9263    operand, then return the operand that is being shifted.  If the shift
9264    is not by a constant, then set SHIFT_REG to point to the operand.
9265    Return NULL if OP is not a shifter operand.  */
9266 static rtx
9267 shifter_op_p (rtx op, rtx *shift_reg)
9268 {
9269   enum rtx_code code = GET_CODE (op);
9270
9271   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9272       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9273     return XEXP (op, 0);
9274   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9275     return XEXP (op, 0);
9276   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9277            || code == ASHIFTRT)
9278     {
9279       if (!CONST_INT_P (XEXP (op, 1)))
9280         *shift_reg = XEXP (op, 1);
9281       return XEXP (op, 0);
9282     }
9283
9284   return NULL;
9285 }
9286
9287 static bool
9288 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9289 {
9290   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9291   gcc_assert (GET_CODE (x) == UNSPEC);
9292
9293   switch (XINT (x, 1))
9294     {
9295     case UNSPEC_UNALIGNED_LOAD:
9296       /* We can only do unaligned loads into the integer unit, and we can't
9297          use LDM or LDRD.  */
9298       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9299       if (speed_p)
9300         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9301                   + extra_cost->ldst.load_unaligned);
9302
9303 #ifdef NOT_YET
9304       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9305                                  ADDR_SPACE_GENERIC, speed_p);
9306 #endif
9307       return true;
9308
9309     case UNSPEC_UNALIGNED_STORE:
9310       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9311       if (speed_p)
9312         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9313                   + extra_cost->ldst.store_unaligned);
9314
9315       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9316 #ifdef NOT_YET
9317       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9318                                  ADDR_SPACE_GENERIC, speed_p);
9319 #endif
9320       return true;
9321
9322     case UNSPEC_VRINTZ:
9323     case UNSPEC_VRINTP:
9324     case UNSPEC_VRINTM:
9325     case UNSPEC_VRINTR:
9326     case UNSPEC_VRINTX:
9327     case UNSPEC_VRINTA:
9328       *cost = COSTS_N_INSNS (1);
9329       if (speed_p)
9330         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9331
9332       return true;
9333     default:
9334       *cost = COSTS_N_INSNS (2);
9335       break;
9336     }
9337   return false;
9338 }
9339
9340 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9341    call (one insn for -Os) and then one for processing the result.  */
9342 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9343
9344 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9345         do                                                              \
9346           {                                                             \
9347             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9348             if (shift_op != NULL                                        \
9349                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9350               {                                                         \
9351                 if (shift_reg)                                          \
9352                   {                                                     \
9353                     if (speed_p)                                        \
9354                       *cost += extra_cost->alu.arith_shift_reg; \
9355                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9356                   }                                                     \
9357                 else if (speed_p)                                       \
9358                   *cost += extra_cost->alu.arith_shift;         \
9359                                                                         \
9360                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9361                           + rtx_cost (XEXP (x, 1 - IDX),                \
9362                                       OP, 1, speed_p));         \
9363                 return true;                                            \
9364               }                                                         \
9365           }                                                             \
9366         while (0);
9367
9368 /* RTX costs.  Make an estimate of the cost of executing the operation
9369    X, which is contained with an operation with code OUTER_CODE.
9370    SPEED_P indicates whether the cost desired is the performance cost,
9371    or the size cost.  The estimate is stored in COST and the return
9372    value is TRUE if the cost calculation is final, or FALSE if the
9373    caller should recurse through the operands of X to add additional
9374    costs.
9375
9376    We currently make no attempt to model the size savings of Thumb-2
9377    16-bit instructions.  At the normal points in compilation where
9378    this code is called we have no measure of whether the condition
9379    flags are live or not, and thus no realistic way to determine what
9380    the size will eventually be.  */
9381 static bool
9382 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9383                    const struct cpu_cost_table *extra_cost,
9384                    int *cost, bool speed_p)
9385 {
9386   machine_mode mode = GET_MODE (x);
9387
9388   if (TARGET_THUMB1)
9389     {
9390       if (speed_p)
9391         *cost = thumb1_rtx_costs (x, code, outer_code);
9392       else
9393         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9394       return true;
9395     }
9396
9397   switch (code)
9398     {
9399     case SET:
9400       *cost = 0;
9401       /* SET RTXs don't have a mode so we get it from the destination.  */
9402       mode = GET_MODE (SET_DEST (x));
9403
9404       if (REG_P (SET_SRC (x))
9405           && REG_P (SET_DEST (x)))
9406         {
9407           /* Assume that most copies can be done with a single insn,
9408              unless we don't have HW FP, in which case everything
9409              larger than word mode will require two insns.  */
9410           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9411                                    && GET_MODE_SIZE (mode) > 4)
9412                                   || mode == DImode)
9413                                  ? 2 : 1);
9414           /* Conditional register moves can be encoded
9415              in 16 bits in Thumb mode.  */
9416           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9417             *cost >>= 1;
9418
9419           return true;
9420         }
9421
9422       if (CONST_INT_P (SET_SRC (x)))
9423         {
9424           /* Handle CONST_INT here, since the value doesn't have a mode
9425              and we would otherwise be unable to work out the true cost.  */
9426           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9427           outer_code = SET;
9428           /* Slightly lower the cost of setting a core reg to a constant.
9429              This helps break up chains and allows for better scheduling.  */
9430           if (REG_P (SET_DEST (x))
9431               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9432             *cost -= 1;
9433           x = SET_SRC (x);
9434           /* Immediate moves with an immediate in the range [0, 255] can be
9435              encoded in 16 bits in Thumb mode.  */
9436           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9437               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9438             *cost >>= 1;
9439           goto const_int_cost;
9440         }
9441
9442       return false;
9443
9444     case MEM:
9445       /* A memory access costs 1 insn if the mode is small, or the address is
9446          a single register, otherwise it costs one insn per word.  */
9447       if (REG_P (XEXP (x, 0)))
9448         *cost = COSTS_N_INSNS (1);
9449       else if (flag_pic
9450                && GET_CODE (XEXP (x, 0)) == PLUS
9451                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9452         /* This will be split into two instructions.
9453            See arm.md:calculate_pic_address.  */
9454         *cost = COSTS_N_INSNS (2);
9455       else
9456         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9457
9458       /* For speed optimizations, add the costs of the address and
9459          accessing memory.  */
9460       if (speed_p)
9461 #ifdef NOT_YET
9462         *cost += (extra_cost->ldst.load
9463                   + arm_address_cost (XEXP (x, 0), mode,
9464                                       ADDR_SPACE_GENERIC, speed_p));
9465 #else
9466         *cost += extra_cost->ldst.load;
9467 #endif
9468       return true;
9469
9470     case PARALLEL:
9471     {
9472    /* Calculations of LDM costs are complex.  We assume an initial cost
9473    (ldm_1st) which will load the number of registers mentioned in
9474    ldm_regs_per_insn_1st registers; then each additional
9475    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9476    formula for N regs is thus:
9477
9478    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9479                              + ldm_regs_per_insn_subsequent - 1)
9480                             / ldm_regs_per_insn_subsequent).
9481
9482    Additional costs may also be added for addressing.  A similar
9483    formula is used for STM.  */
9484
9485       bool is_ldm = load_multiple_operation (x, SImode);
9486       bool is_stm = store_multiple_operation (x, SImode);
9487
9488       *cost = COSTS_N_INSNS (1);
9489
9490       if (is_ldm || is_stm)
9491         {
9492           if (speed_p)
9493             {
9494               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9495               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9496                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9497                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9498               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9499                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9500                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9501
9502               *cost += regs_per_insn_1st
9503                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9504                                             + regs_per_insn_sub - 1)
9505                                           / regs_per_insn_sub);
9506               return true;
9507             }
9508
9509         }
9510       return false;
9511     }
9512     case DIV:
9513     case UDIV:
9514       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9515           && (mode == SFmode || !TARGET_VFP_SINGLE))
9516         *cost = COSTS_N_INSNS (speed_p
9517                                ? extra_cost->fp[mode != SFmode].div : 1);
9518       else if (mode == SImode && TARGET_IDIV)
9519         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9520       else
9521         *cost = LIBCALL_COST (2);
9522       return false;     /* All arguments must be in registers.  */
9523
9524     case MOD:
9525     case UMOD:
9526       *cost = LIBCALL_COST (2);
9527       return false;     /* All arguments must be in registers.  */
9528
9529     case ROTATE:
9530       if (mode == SImode && REG_P (XEXP (x, 1)))
9531         {
9532           *cost = (COSTS_N_INSNS (2)
9533                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9534           if (speed_p)
9535             *cost += extra_cost->alu.shift_reg;
9536           return true;
9537         }
9538       /* Fall through */
9539     case ROTATERT:
9540     case ASHIFT:
9541     case LSHIFTRT:
9542     case ASHIFTRT:
9543       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9544         {
9545           *cost = (COSTS_N_INSNS (3)
9546                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9547           if (speed_p)
9548             *cost += 2 * extra_cost->alu.shift;
9549           return true;
9550         }
9551       else if (mode == SImode)
9552         {
9553           *cost = (COSTS_N_INSNS (1)
9554                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9555           /* Slightly disparage register shifts at -Os, but not by much.  */
9556           if (!CONST_INT_P (XEXP (x, 1)))
9557             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9558                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9559           return true;
9560         }
9561       else if (GET_MODE_CLASS (mode) == MODE_INT
9562                && GET_MODE_SIZE (mode) < 4)
9563         {
9564           if (code == ASHIFT)
9565             {
9566               *cost = (COSTS_N_INSNS (1)
9567                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9568               /* Slightly disparage register shifts at -Os, but not by
9569                  much.  */
9570               if (!CONST_INT_P (XEXP (x, 1)))
9571                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9572                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9573             }
9574           else if (code == LSHIFTRT || code == ASHIFTRT)
9575             {
9576               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9577                 {
9578                   /* Can use SBFX/UBFX.  */
9579                   *cost = COSTS_N_INSNS (1);
9580                   if (speed_p)
9581                     *cost += extra_cost->alu.bfx;
9582                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9583                 }
9584               else
9585                 {
9586                   *cost = COSTS_N_INSNS (2);
9587                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9588                   if (speed_p)
9589                     {
9590                       if (CONST_INT_P (XEXP (x, 1)))
9591                         *cost += 2 * extra_cost->alu.shift;
9592                       else
9593                         *cost += (extra_cost->alu.shift
9594                                   + extra_cost->alu.shift_reg);
9595                     }
9596                   else
9597                     /* Slightly disparage register shifts.  */
9598                     *cost += !CONST_INT_P (XEXP (x, 1));
9599                 }
9600             }
9601           else /* Rotates.  */
9602             {
9603               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9604               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9605               if (speed_p)
9606                 {
9607                   if (CONST_INT_P (XEXP (x, 1)))
9608                     *cost += (2 * extra_cost->alu.shift
9609                               + extra_cost->alu.log_shift);
9610                   else
9611                     *cost += (extra_cost->alu.shift
9612                               + extra_cost->alu.shift_reg
9613                               + extra_cost->alu.log_shift_reg);
9614                 }
9615             }
9616           return true;
9617         }
9618
9619       *cost = LIBCALL_COST (2);
9620       return false;
9621
9622     case BSWAP:
9623       if (arm_arch6)
9624         {
9625           if (mode == SImode)
9626             {
9627               *cost = COSTS_N_INSNS (1);
9628               if (speed_p)
9629                 *cost += extra_cost->alu.rev;
9630
9631               return false;
9632             }
9633         }
9634       else
9635         {
9636         /* No rev instruction available.  Look at arm_legacy_rev
9637            and thumb_legacy_rev for the form of RTL used then.  */
9638           if (TARGET_THUMB)
9639             {
9640               *cost = COSTS_N_INSNS (10);
9641
9642               if (speed_p)
9643                 {
9644                   *cost += 6 * extra_cost->alu.shift;
9645                   *cost += 3 * extra_cost->alu.logical;
9646                 }
9647             }
9648           else
9649             {
9650               *cost = COSTS_N_INSNS (5);
9651
9652               if (speed_p)
9653                 {
9654                   *cost += 2 * extra_cost->alu.shift;
9655                   *cost += extra_cost->alu.arith_shift;
9656                   *cost += 2 * extra_cost->alu.logical;
9657                 }
9658             }
9659           return true;
9660         }
9661       return false;
9662
9663     case MINUS:
9664       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9665           && (mode == SFmode || !TARGET_VFP_SINGLE))
9666         {
9667           *cost = COSTS_N_INSNS (1);
9668           if (GET_CODE (XEXP (x, 0)) == MULT
9669               || GET_CODE (XEXP (x, 1)) == MULT)
9670             {
9671               rtx mul_op0, mul_op1, sub_op;
9672
9673               if (speed_p)
9674                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9675
9676               if (GET_CODE (XEXP (x, 0)) == MULT)
9677                 {
9678                   mul_op0 = XEXP (XEXP (x, 0), 0);
9679                   mul_op1 = XEXP (XEXP (x, 0), 1);
9680                   sub_op = XEXP (x, 1);
9681                 }
9682               else
9683                 {
9684                   mul_op0 = XEXP (XEXP (x, 1), 0);
9685                   mul_op1 = XEXP (XEXP (x, 1), 1);
9686                   sub_op = XEXP (x, 0);
9687                 }
9688
9689               /* The first operand of the multiply may be optionally
9690                  negated.  */
9691               if (GET_CODE (mul_op0) == NEG)
9692                 mul_op0 = XEXP (mul_op0, 0);
9693
9694               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9695                         + rtx_cost (mul_op1, code, 0, speed_p)
9696                         + rtx_cost (sub_op, code, 0, speed_p));
9697
9698               return true;
9699             }
9700
9701           if (speed_p)
9702             *cost += extra_cost->fp[mode != SFmode].addsub;
9703           return false;
9704         }
9705
9706       if (mode == SImode)
9707         {
9708           rtx shift_by_reg = NULL;
9709           rtx shift_op;
9710           rtx non_shift_op;
9711
9712           *cost = COSTS_N_INSNS (1);
9713
9714           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9715           if (shift_op == NULL)
9716             {
9717               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9718               non_shift_op = XEXP (x, 0);
9719             }
9720           else
9721             non_shift_op = XEXP (x, 1);
9722
9723           if (shift_op != NULL)
9724             {
9725               if (shift_by_reg != NULL)
9726                 {
9727                   if (speed_p)
9728                     *cost += extra_cost->alu.arith_shift_reg;
9729                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9730                 }
9731               else if (speed_p)
9732                 *cost += extra_cost->alu.arith_shift;
9733
9734               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9735                         + rtx_cost (non_shift_op, code, 0, speed_p));
9736               return true;
9737             }
9738
9739           if (arm_arch_thumb2
9740               && GET_CODE (XEXP (x, 1)) == MULT)
9741             {
9742               /* MLS.  */
9743               if (speed_p)
9744                 *cost += extra_cost->mult[0].add;
9745               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9746                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9747                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9748               return true;
9749             }
9750
9751           if (CONST_INT_P (XEXP (x, 0)))
9752             {
9753               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9754                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9755                                             NULL_RTX, 1, 0);
9756               *cost = COSTS_N_INSNS (insns);
9757               if (speed_p)
9758                 *cost += insns * extra_cost->alu.arith;
9759               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9760               return true;
9761             }
9762           else if (speed_p)
9763             *cost += extra_cost->alu.arith;
9764
9765           return false;
9766         }
9767
9768       if (GET_MODE_CLASS (mode) == MODE_INT
9769           && GET_MODE_SIZE (mode) < 4)
9770         {
9771           rtx shift_op, shift_reg;
9772           shift_reg = NULL;
9773
9774           /* We check both sides of the MINUS for shifter operands since,
9775              unlike PLUS, it's not commutative.  */
9776
9777           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9778           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9779
9780           /* Slightly disparage, as we might need to widen the result.  */
9781           *cost = 1 + COSTS_N_INSNS (1);
9782           if (speed_p)
9783             *cost += extra_cost->alu.arith;
9784
9785           if (CONST_INT_P (XEXP (x, 0)))
9786             {
9787               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9788               return true;
9789             }
9790
9791           return false;
9792         }
9793
9794       if (mode == DImode)
9795         {
9796           *cost = COSTS_N_INSNS (2);
9797
9798           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9799             {
9800               rtx op1 = XEXP (x, 1);
9801
9802               if (speed_p)
9803                 *cost += 2 * extra_cost->alu.arith;
9804
9805               if (GET_CODE (op1) == ZERO_EXTEND)
9806                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9807               else
9808                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9809               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9810                                  0, speed_p);
9811               return true;
9812             }
9813           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9814             {
9815               if (speed_p)
9816                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9817               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9818                                   0, speed_p)
9819                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9820               return true;
9821             }
9822           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9823                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9824             {
9825               if (speed_p)
9826                 *cost += (extra_cost->alu.arith
9827                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9828                              ? extra_cost->alu.arith
9829                              : extra_cost->alu.arith_shift));
9830               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9831                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9832                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9833               return true;
9834             }
9835
9836           if (speed_p)
9837             *cost += 2 * extra_cost->alu.arith;
9838           return false;
9839         }
9840
9841       /* Vector mode?  */
9842
9843       *cost = LIBCALL_COST (2);
9844       return false;
9845
9846     case PLUS:
9847       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9848           && (mode == SFmode || !TARGET_VFP_SINGLE))
9849         {
9850           *cost = COSTS_N_INSNS (1);
9851           if (GET_CODE (XEXP (x, 0)) == MULT)
9852             {
9853               rtx mul_op0, mul_op1, add_op;
9854
9855               if (speed_p)
9856                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9857
9858               mul_op0 = XEXP (XEXP (x, 0), 0);
9859               mul_op1 = XEXP (XEXP (x, 0), 1);
9860               add_op = XEXP (x, 1);
9861
9862               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9863                         + rtx_cost (mul_op1, code, 0, speed_p)
9864                         + rtx_cost (add_op, code, 0, speed_p));
9865
9866               return true;
9867             }
9868
9869           if (speed_p)
9870             *cost += extra_cost->fp[mode != SFmode].addsub;
9871           return false;
9872         }
9873       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9874         {
9875           *cost = LIBCALL_COST (2);
9876           return false;
9877         }
9878
9879         /* Narrow modes can be synthesized in SImode, but the range
9880            of useful sub-operations is limited.  Check for shift operations
9881            on one of the operands.  Only left shifts can be used in the
9882            narrow modes.  */
9883       if (GET_MODE_CLASS (mode) == MODE_INT
9884           && GET_MODE_SIZE (mode) < 4)
9885         {
9886           rtx shift_op, shift_reg;
9887           shift_reg = NULL;
9888
9889           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9890
9891           if (CONST_INT_P (XEXP (x, 1)))
9892             {
9893               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9894                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9895                                             NULL_RTX, 1, 0);
9896               *cost = COSTS_N_INSNS (insns);
9897               if (speed_p)
9898                 *cost += insns * extra_cost->alu.arith;
9899               /* Slightly penalize a narrow operation as the result may
9900                  need widening.  */
9901               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9902               return true;
9903             }
9904
9905           /* Slightly penalize a narrow operation as the result may
9906              need widening.  */
9907           *cost = 1 + COSTS_N_INSNS (1);
9908           if (speed_p)
9909             *cost += extra_cost->alu.arith;
9910
9911           return false;
9912         }
9913
9914       if (mode == SImode)
9915         {
9916           rtx shift_op, shift_reg;
9917
9918           *cost = COSTS_N_INSNS (1);
9919           if (TARGET_INT_SIMD
9920               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9921                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9922             {
9923               /* UXTA[BH] or SXTA[BH].  */
9924               if (speed_p)
9925                 *cost += extra_cost->alu.extend_arith;
9926               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9927                                   speed_p)
9928                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9929               return true;
9930             }
9931
9932           shift_reg = NULL;
9933           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9934           if (shift_op != NULL)
9935             {
9936               if (shift_reg)
9937                 {
9938                   if (speed_p)
9939                     *cost += extra_cost->alu.arith_shift_reg;
9940                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9941                 }
9942               else if (speed_p)
9943                 *cost += extra_cost->alu.arith_shift;
9944
9945               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9946                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9947               return true;
9948             }
9949           if (GET_CODE (XEXP (x, 0)) == MULT)
9950             {
9951               rtx mul_op = XEXP (x, 0);
9952
9953               *cost = COSTS_N_INSNS (1);
9954
9955               if (TARGET_DSP_MULTIPLY
9956                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9957                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9958                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9959                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9960                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9961                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9962                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9963                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9964                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9965                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9966                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9967                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9968                                       == 16))))))
9969                 {
9970                   /* SMLA[BT][BT].  */
9971                   if (speed_p)
9972                     *cost += extra_cost->mult[0].extend_add;
9973                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9974                                       SIGN_EXTEND, 0, speed_p)
9975                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9976                                         SIGN_EXTEND, 0, speed_p)
9977                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9978                   return true;
9979                 }
9980
9981               if (speed_p)
9982                 *cost += extra_cost->mult[0].add;
9983               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9984                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9985                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9986               return true;
9987             }
9988           if (CONST_INT_P (XEXP (x, 1)))
9989             {
9990               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9991                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9992                                             NULL_RTX, 1, 0);
9993               *cost = COSTS_N_INSNS (insns);
9994               if (speed_p)
9995                 *cost += insns * extra_cost->alu.arith;
9996               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9997               return true;
9998             }
9999           else if (speed_p)
10000             *cost += extra_cost->alu.arith;
10001
10002           return false;
10003         }
10004
10005       if (mode == DImode)
10006         {
10007           if (arm_arch3m
10008               && GET_CODE (XEXP (x, 0)) == MULT
10009               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10010                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10011                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10012                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10013             {
10014               *cost = COSTS_N_INSNS (1);
10015               if (speed_p)
10016                 *cost += extra_cost->mult[1].extend_add;
10017               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10018                                   ZERO_EXTEND, 0, speed_p)
10019                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10020                                     ZERO_EXTEND, 0, speed_p)
10021                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10022               return true;
10023             }
10024
10025           *cost = COSTS_N_INSNS (2);
10026
10027           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10028               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10029             {
10030               if (speed_p)
10031                 *cost += (extra_cost->alu.arith
10032                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10033                              ? extra_cost->alu.arith
10034                              : extra_cost->alu.arith_shift));
10035
10036               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10037                                   speed_p)
10038                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10039               return true;
10040             }
10041
10042           if (speed_p)
10043             *cost += 2 * extra_cost->alu.arith;
10044           return false;
10045         }
10046
10047       /* Vector mode?  */
10048       *cost = LIBCALL_COST (2);
10049       return false;
10050     case IOR:
10051       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10052         {
10053           *cost = COSTS_N_INSNS (1);
10054           if (speed_p)
10055             *cost += extra_cost->alu.rev;
10056
10057           return true;
10058         }
10059     /* Fall through.  */
10060     case AND: case XOR:
10061       if (mode == SImode)
10062         {
10063           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10064           rtx op0 = XEXP (x, 0);
10065           rtx shift_op, shift_reg;
10066
10067           *cost = COSTS_N_INSNS (1);
10068
10069           if (subcode == NOT
10070               && (code == AND
10071                   || (code == IOR && TARGET_THUMB2)))
10072             op0 = XEXP (op0, 0);
10073
10074           shift_reg = NULL;
10075           shift_op = shifter_op_p (op0, &shift_reg);
10076           if (shift_op != NULL)
10077             {
10078               if (shift_reg)
10079                 {
10080                   if (speed_p)
10081                     *cost += extra_cost->alu.log_shift_reg;
10082                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10083                 }
10084               else if (speed_p)
10085                 *cost += extra_cost->alu.log_shift;
10086
10087               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10088                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10089               return true;
10090             }
10091
10092           if (CONST_INT_P (XEXP (x, 1)))
10093             {
10094               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10095                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10096                                             NULL_RTX, 1, 0);
10097
10098               *cost = COSTS_N_INSNS (insns);
10099               if (speed_p)
10100                 *cost += insns * extra_cost->alu.logical;
10101               *cost += rtx_cost (op0, code, 0, speed_p);
10102               return true;
10103             }
10104
10105           if (speed_p)
10106             *cost += extra_cost->alu.logical;
10107           *cost += (rtx_cost (op0, code, 0, speed_p)
10108                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10109           return true;
10110         }
10111
10112       if (mode == DImode)
10113         {
10114           rtx op0 = XEXP (x, 0);
10115           enum rtx_code subcode = GET_CODE (op0);
10116
10117           *cost = COSTS_N_INSNS (2);
10118
10119           if (subcode == NOT
10120               && (code == AND
10121                   || (code == IOR && TARGET_THUMB2)))
10122             op0 = XEXP (op0, 0);
10123
10124           if (GET_CODE (op0) == ZERO_EXTEND)
10125             {
10126               if (speed_p)
10127                 *cost += 2 * extra_cost->alu.logical;
10128
10129               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10130                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10131               return true;
10132             }
10133           else if (GET_CODE (op0) == SIGN_EXTEND)
10134             {
10135               if (speed_p)
10136                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10137
10138               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10139                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10140               return true;
10141             }
10142
10143           if (speed_p)
10144             *cost += 2 * extra_cost->alu.logical;
10145
10146           return true;
10147         }
10148       /* Vector mode?  */
10149
10150       *cost = LIBCALL_COST (2);
10151       return false;
10152
10153     case MULT:
10154       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10155           && (mode == SFmode || !TARGET_VFP_SINGLE))
10156         {
10157           rtx op0 = XEXP (x, 0);
10158
10159           *cost = COSTS_N_INSNS (1);
10160
10161           if (GET_CODE (op0) == NEG)
10162             op0 = XEXP (op0, 0);
10163
10164           if (speed_p)
10165             *cost += extra_cost->fp[mode != SFmode].mult;
10166
10167           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10168                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10169           return true;
10170         }
10171       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10172         {
10173           *cost = LIBCALL_COST (2);
10174           return false;
10175         }
10176
10177       if (mode == SImode)
10178         {
10179           *cost = COSTS_N_INSNS (1);
10180           if (TARGET_DSP_MULTIPLY
10181               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10182                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10183                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10184                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10185                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10186                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10187                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10188                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10189                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10190                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10191                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10192                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10193                                   == 16))))))
10194             {
10195               /* SMUL[TB][TB].  */
10196               if (speed_p)
10197                 *cost += extra_cost->mult[0].extend;
10198               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10199                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10200               return true;
10201             }
10202           if (speed_p)
10203             *cost += extra_cost->mult[0].simple;
10204           return false;
10205         }
10206
10207       if (mode == DImode)
10208         {
10209           if (arm_arch3m
10210               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10211                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10212                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10213                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10214             {
10215               *cost = COSTS_N_INSNS (1);
10216               if (speed_p)
10217                 *cost += extra_cost->mult[1].extend;
10218               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10219                                   ZERO_EXTEND, 0, speed_p)
10220                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10221                                     ZERO_EXTEND, 0, speed_p));
10222               return true;
10223             }
10224
10225           *cost = LIBCALL_COST (2);
10226           return false;
10227         }
10228
10229       /* Vector mode?  */
10230       *cost = LIBCALL_COST (2);
10231       return false;
10232
10233     case NEG:
10234       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10235           && (mode == SFmode || !TARGET_VFP_SINGLE))
10236         {
10237           *cost = COSTS_N_INSNS (1);
10238           if (speed_p)
10239             *cost += extra_cost->fp[mode != SFmode].neg;
10240
10241           return false;
10242         }
10243       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10244         {
10245           *cost = LIBCALL_COST (1);
10246           return false;
10247         }
10248
10249       if (mode == SImode)
10250         {
10251           if (GET_CODE (XEXP (x, 0)) == ABS)
10252             {
10253               *cost = COSTS_N_INSNS (2);
10254               /* Assume the non-flag-changing variant.  */
10255               if (speed_p)
10256                 *cost += (extra_cost->alu.log_shift
10257                           + extra_cost->alu.arith_shift);
10258               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10259               return true;
10260             }
10261
10262           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10263               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10264             {
10265               *cost = COSTS_N_INSNS (2);
10266               /* No extra cost for MOV imm and MVN imm.  */
10267               /* If the comparison op is using the flags, there's no further
10268                  cost, otherwise we need to add the cost of the comparison.  */
10269               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10270                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10271                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10272                 {
10273                   *cost += (COSTS_N_INSNS (1)
10274                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10275                                         speed_p)
10276                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10277                                         speed_p));
10278                   if (speed_p)
10279                     *cost += extra_cost->alu.arith;
10280                 }
10281               return true;
10282             }
10283           *cost = COSTS_N_INSNS (1);
10284           if (speed_p)
10285             *cost += extra_cost->alu.arith;
10286           return false;
10287         }
10288
10289       if (GET_MODE_CLASS (mode) == MODE_INT
10290           && GET_MODE_SIZE (mode) < 4)
10291         {
10292           /* Slightly disparage, as we might need an extend operation.  */
10293           *cost = 1 + COSTS_N_INSNS (1);
10294           if (speed_p)
10295             *cost += extra_cost->alu.arith;
10296           return false;
10297         }
10298
10299       if (mode == DImode)
10300         {
10301           *cost = COSTS_N_INSNS (2);
10302           if (speed_p)
10303             *cost += 2 * extra_cost->alu.arith;
10304           return false;
10305         }
10306
10307       /* Vector mode?  */
10308       *cost = LIBCALL_COST (1);
10309       return false;
10310
10311     case NOT:
10312       if (mode == SImode)
10313         {
10314           rtx shift_op;
10315           rtx shift_reg = NULL;
10316
10317           *cost = COSTS_N_INSNS (1);
10318           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10319
10320           if (shift_op)
10321             {
10322               if (shift_reg != NULL)
10323                 {
10324                   if (speed_p)
10325                     *cost += extra_cost->alu.log_shift_reg;
10326                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10327                 }
10328               else if (speed_p)
10329                 *cost += extra_cost->alu.log_shift;
10330               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10331               return true;
10332             }
10333
10334           if (speed_p)
10335             *cost += extra_cost->alu.logical;
10336           return false;
10337         }
10338       if (mode == DImode)
10339         {
10340           *cost = COSTS_N_INSNS (2);
10341           return false;
10342         }
10343
10344       /* Vector mode?  */
10345
10346       *cost += LIBCALL_COST (1);
10347       return false;
10348
10349     case IF_THEN_ELSE:
10350       {
10351         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10352           {
10353             *cost = COSTS_N_INSNS (4);
10354             return true;
10355           }
10356         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10357         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10358
10359         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10360         /* Assume that if one arm of the if_then_else is a register,
10361            that it will be tied with the result and eliminate the
10362            conditional insn.  */
10363         if (REG_P (XEXP (x, 1)))
10364           *cost += op2cost;
10365         else if (REG_P (XEXP (x, 2)))
10366           *cost += op1cost;
10367         else
10368           {
10369             if (speed_p)
10370               {
10371                 if (extra_cost->alu.non_exec_costs_exec)
10372                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10373                 else
10374                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10375               }
10376             else
10377               *cost += op1cost + op2cost;
10378           }
10379       }
10380       return true;
10381
10382     case COMPARE:
10383       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10384         *cost = 0;
10385       else
10386         {
10387           machine_mode op0mode;
10388           /* We'll mostly assume that the cost of a compare is the cost of the
10389              LHS.  However, there are some notable exceptions.  */
10390
10391           /* Floating point compares are never done as side-effects.  */
10392           op0mode = GET_MODE (XEXP (x, 0));
10393           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10394               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10395             {
10396               *cost = COSTS_N_INSNS (1);
10397               if (speed_p)
10398                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10399
10400               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10401                 {
10402                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10403                   return true;
10404                 }
10405
10406               return false;
10407             }
10408           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10409             {
10410               *cost = LIBCALL_COST (2);
10411               return false;
10412             }
10413
10414           /* DImode compares normally take two insns.  */
10415           if (op0mode == DImode)
10416             {
10417               *cost = COSTS_N_INSNS (2);
10418               if (speed_p)
10419                 *cost += 2 * extra_cost->alu.arith;
10420               return false;
10421             }
10422
10423           if (op0mode == SImode)
10424             {
10425               rtx shift_op;
10426               rtx shift_reg;
10427
10428               if (XEXP (x, 1) == const0_rtx
10429                   && !(REG_P (XEXP (x, 0))
10430                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10431                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10432                 {
10433                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10434
10435                   /* Multiply operations that set the flags are often
10436                      significantly more expensive.  */
10437                   if (speed_p
10438                       && GET_CODE (XEXP (x, 0)) == MULT
10439                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10440                     *cost += extra_cost->mult[0].flag_setting;
10441
10442                   if (speed_p
10443                       && GET_CODE (XEXP (x, 0)) == PLUS
10444                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10445                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10446                                                             0), 1), mode))
10447                     *cost += extra_cost->mult[0].flag_setting;
10448                   return true;
10449                 }
10450
10451               shift_reg = NULL;
10452               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10453               if (shift_op != NULL)
10454                 {
10455                   *cost = COSTS_N_INSNS (1);
10456                   if (shift_reg != NULL)
10457                     {
10458                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10459                       if (speed_p)
10460                         *cost += extra_cost->alu.arith_shift_reg;
10461                     }
10462                   else if (speed_p)
10463                     *cost += extra_cost->alu.arith_shift;
10464                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10465                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10466                   return true;
10467                 }
10468
10469               *cost = COSTS_N_INSNS (1);
10470               if (speed_p)
10471                 *cost += extra_cost->alu.arith;
10472               if (CONST_INT_P (XEXP (x, 1))
10473                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10474                 {
10475                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10476                   return true;
10477                 }
10478               return false;
10479             }
10480
10481           /* Vector mode?  */
10482
10483           *cost = LIBCALL_COST (2);
10484           return false;
10485         }
10486       return true;
10487
10488     case EQ:
10489     case NE:
10490     case LT:
10491     case LE:
10492     case GT:
10493     case GE:
10494     case LTU:
10495     case LEU:
10496     case GEU:
10497     case GTU:
10498     case ORDERED:
10499     case UNORDERED:
10500     case UNEQ:
10501     case UNLE:
10502     case UNLT:
10503     case UNGE:
10504     case UNGT:
10505     case LTGT:
10506       if (outer_code == SET)
10507         {
10508           /* Is it a store-flag operation?  */
10509           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10510               && XEXP (x, 1) == const0_rtx)
10511             {
10512               /* Thumb also needs an IT insn.  */
10513               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10514               return true;
10515             }
10516           if (XEXP (x, 1) == const0_rtx)
10517             {
10518               switch (code)
10519                 {
10520                 case LT:
10521                   /* LSR Rd, Rn, #31.  */
10522                   *cost = COSTS_N_INSNS (1);
10523                   if (speed_p)
10524                     *cost += extra_cost->alu.shift;
10525                   break;
10526
10527                 case EQ:
10528                   /* RSBS T1, Rn, #0
10529                      ADC  Rd, Rn, T1.  */
10530
10531                 case NE:
10532                   /* SUBS T1, Rn, #1
10533                      SBC  Rd, Rn, T1.  */
10534                   *cost = COSTS_N_INSNS (2);
10535                   break;
10536
10537                 case LE:
10538                   /* RSBS T1, Rn, Rn, LSR #31
10539                      ADC  Rd, Rn, T1. */
10540                   *cost = COSTS_N_INSNS (2);
10541                   if (speed_p)
10542                     *cost += extra_cost->alu.arith_shift;
10543                   break;
10544
10545                 case GT:
10546                   /* RSB  Rd, Rn, Rn, ASR #1
10547                      LSR  Rd, Rd, #31.  */
10548                   *cost = COSTS_N_INSNS (2);
10549                   if (speed_p)
10550                     *cost += (extra_cost->alu.arith_shift
10551                               + extra_cost->alu.shift);
10552                   break;
10553
10554                 case GE:
10555                   /* ASR  Rd, Rn, #31
10556                      ADD  Rd, Rn, #1.  */
10557                   *cost = COSTS_N_INSNS (2);
10558                   if (speed_p)
10559                     *cost += extra_cost->alu.shift;
10560                   break;
10561
10562                 default:
10563                   /* Remaining cases are either meaningless or would take
10564                      three insns anyway.  */
10565                   *cost = COSTS_N_INSNS (3);
10566                   break;
10567                 }
10568               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10569               return true;
10570             }
10571           else
10572             {
10573               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10574               if (CONST_INT_P (XEXP (x, 1))
10575                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10576                 {
10577                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10578                   return true;
10579                 }
10580
10581               return false;
10582             }
10583         }
10584       /* Not directly inside a set.  If it involves the condition code
10585          register it must be the condition for a branch, cond_exec or
10586          I_T_E operation.  Since the comparison is performed elsewhere
10587          this is just the control part which has no additional
10588          cost.  */
10589       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10590                && XEXP (x, 1) == const0_rtx)
10591         {
10592           *cost = 0;
10593           return true;
10594         }
10595       return false;
10596
10597     case ABS:
10598       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10599           && (mode == SFmode || !TARGET_VFP_SINGLE))
10600         {
10601           *cost = COSTS_N_INSNS (1);
10602           if (speed_p)
10603             *cost += extra_cost->fp[mode != SFmode].neg;
10604
10605           return false;
10606         }
10607       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10608         {
10609           *cost = LIBCALL_COST (1);
10610           return false;
10611         }
10612
10613       if (mode == SImode)
10614         {
10615           *cost = COSTS_N_INSNS (1);
10616           if (speed_p)
10617             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10618           return false;
10619         }
10620       /* Vector mode?  */
10621       *cost = LIBCALL_COST (1);
10622       return false;
10623
10624     case SIGN_EXTEND:
10625       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10626           && MEM_P (XEXP (x, 0)))
10627         {
10628           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10629
10630           if (mode == DImode)
10631             *cost += COSTS_N_INSNS (1);
10632
10633           if (!speed_p)
10634             return true;
10635
10636           if (GET_MODE (XEXP (x, 0)) == SImode)
10637             *cost += extra_cost->ldst.load;
10638           else
10639             *cost += extra_cost->ldst.load_sign_extend;
10640
10641           if (mode == DImode)
10642             *cost += extra_cost->alu.shift;
10643
10644           return true;
10645         }
10646
10647       /* Widening from less than 32-bits requires an extend operation.  */
10648       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10649         {
10650           /* We have SXTB/SXTH.  */
10651           *cost = COSTS_N_INSNS (1);
10652           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10653           if (speed_p)
10654             *cost += extra_cost->alu.extend;
10655         }
10656       else if (GET_MODE (XEXP (x, 0)) != SImode)
10657         {
10658           /* Needs two shifts.  */
10659           *cost = COSTS_N_INSNS (2);
10660           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10661           if (speed_p)
10662             *cost += 2 * extra_cost->alu.shift;
10663         }
10664
10665       /* Widening beyond 32-bits requires one more insn.  */
10666       if (mode == DImode)
10667         {
10668           *cost += COSTS_N_INSNS (1);
10669           if (speed_p)
10670             *cost += extra_cost->alu.shift;
10671         }
10672
10673       return true;
10674
10675     case ZERO_EXTEND:
10676       if ((arm_arch4
10677            || GET_MODE (XEXP (x, 0)) == SImode
10678            || GET_MODE (XEXP (x, 0)) == QImode)
10679           && MEM_P (XEXP (x, 0)))
10680         {
10681           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10682
10683           if (mode == DImode)
10684             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10685
10686           return true;
10687         }
10688
10689       /* Widening from less than 32-bits requires an extend operation.  */
10690       if (GET_MODE (XEXP (x, 0)) == QImode)
10691         {
10692           /* UXTB can be a shorter instruction in Thumb2, but it might
10693              be slower than the AND Rd, Rn, #255 alternative.  When
10694              optimizing for speed it should never be slower to use
10695              AND, and we don't really model 16-bit vs 32-bit insns
10696              here.  */
10697           *cost = COSTS_N_INSNS (1);
10698           if (speed_p)
10699             *cost += extra_cost->alu.logical;
10700         }
10701       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10702         {
10703           /* We have UXTB/UXTH.  */
10704           *cost = COSTS_N_INSNS (1);
10705           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10706           if (speed_p)
10707             *cost += extra_cost->alu.extend;
10708         }
10709       else if (GET_MODE (XEXP (x, 0)) != SImode)
10710         {
10711           /* Needs two shifts.  It's marginally preferable to use
10712              shifts rather than two BIC instructions as the second
10713              shift may merge with a subsequent insn as a shifter
10714              op.  */
10715           *cost = COSTS_N_INSNS (2);
10716           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10717           if (speed_p)
10718             *cost += 2 * extra_cost->alu.shift;
10719         }
10720       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10721         *cost = COSTS_N_INSNS (1);
10722
10723       /* Widening beyond 32-bits requires one more insn.  */
10724       if (mode == DImode)
10725         {
10726           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10727         }
10728
10729       return true;
10730
10731     case CONST_INT:
10732       *cost = 0;
10733       /* CONST_INT has no mode, so we cannot tell for sure how many
10734          insns are really going to be needed.  The best we can do is
10735          look at the value passed.  If it fits in SImode, then assume
10736          that's the mode it will be used for.  Otherwise assume it
10737          will be used in DImode.  */
10738       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10739         mode = SImode;
10740       else
10741         mode = DImode;
10742
10743       /* Avoid blowing up in arm_gen_constant ().  */
10744       if (!(outer_code == PLUS
10745             || outer_code == AND
10746             || outer_code == IOR
10747             || outer_code == XOR
10748             || outer_code == MINUS))
10749         outer_code = SET;
10750
10751     const_int_cost:
10752       if (mode == SImode)
10753         {
10754           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10755                                                     INTVAL (x), NULL, NULL,
10756                                                     0, 0));
10757           /* Extra costs?  */
10758         }
10759       else
10760         {
10761           *cost += COSTS_N_INSNS (arm_gen_constant
10762                                   (outer_code, SImode, NULL,
10763                                    trunc_int_for_mode (INTVAL (x), SImode),
10764                                    NULL, NULL, 0, 0)
10765                                   + arm_gen_constant (outer_code, SImode, NULL,
10766                                                       INTVAL (x) >> 32, NULL,
10767                                                       NULL, 0, 0));
10768           /* Extra costs?  */
10769         }
10770
10771       return true;
10772
10773     case CONST:
10774     case LABEL_REF:
10775     case SYMBOL_REF:
10776       if (speed_p)
10777         {
10778           if (arm_arch_thumb2 && !flag_pic)
10779             *cost = COSTS_N_INSNS (2);
10780           else
10781             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10782         }
10783       else
10784         *cost = COSTS_N_INSNS (2);
10785
10786       if (flag_pic)
10787         {
10788           *cost += COSTS_N_INSNS (1);
10789           if (speed_p)
10790             *cost += extra_cost->alu.arith;
10791         }
10792
10793       return true;
10794
10795     case CONST_FIXED:
10796       *cost = COSTS_N_INSNS (4);
10797       /* Fixme.  */
10798       return true;
10799
10800     case CONST_DOUBLE:
10801       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10802           && (mode == SFmode || !TARGET_VFP_SINGLE))
10803         {
10804           if (vfp3_const_double_rtx (x))
10805             {
10806               *cost = COSTS_N_INSNS (1);
10807               if (speed_p)
10808                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10809               return true;
10810             }
10811
10812           if (speed_p)
10813             {
10814               *cost = COSTS_N_INSNS (1);
10815               if (mode == DFmode)
10816                 *cost += extra_cost->ldst.loadd;
10817               else
10818                 *cost += extra_cost->ldst.loadf;
10819             }
10820           else
10821             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10822
10823           return true;
10824         }
10825       *cost = COSTS_N_INSNS (4);
10826       return true;
10827
10828     case CONST_VECTOR:
10829       /* Fixme.  */
10830       if (TARGET_NEON
10831           && TARGET_HARD_FLOAT
10832           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10833           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10834         *cost = COSTS_N_INSNS (1);
10835       else
10836         *cost = COSTS_N_INSNS (4);
10837       return true;
10838
10839     case HIGH:
10840     case LO_SUM:
10841       *cost = COSTS_N_INSNS (1);
10842       /* When optimizing for size, we prefer constant pool entries to
10843          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10844       if (!speed_p)
10845         *cost += 1;
10846       return true;
10847
10848     case CLZ:
10849       *cost = COSTS_N_INSNS (1);
10850       if (speed_p)
10851         *cost += extra_cost->alu.clz;
10852       return false;
10853
10854     case SMIN:
10855       if (XEXP (x, 1) == const0_rtx)
10856         {
10857           *cost = COSTS_N_INSNS (1);
10858           if (speed_p)
10859             *cost += extra_cost->alu.log_shift;
10860           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10861           return true;
10862         }
10863       /* Fall through.  */
10864     case SMAX:
10865     case UMIN:
10866     case UMAX:
10867       *cost = COSTS_N_INSNS (2);
10868       return false;
10869
10870     case TRUNCATE:
10871       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10872           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10873           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10874           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10875           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10876                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10877               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10878                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10879                       == ZERO_EXTEND))))
10880         {
10881           *cost = COSTS_N_INSNS (1);
10882           if (speed_p)
10883             *cost += extra_cost->mult[1].extend;
10884           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10885                               speed_p)
10886                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10887                                 0, speed_p));
10888           return true;
10889         }
10890       *cost = LIBCALL_COST (1);
10891       return false;
10892
10893     case UNSPEC:
10894       return arm_unspec_cost (x, outer_code, speed_p, cost);
10895
10896     case PC:
10897       /* Reading the PC is like reading any other register.  Writing it
10898          is more expensive, but we take that into account elsewhere.  */
10899       *cost = 0;
10900       return true;
10901
10902     case ZERO_EXTRACT:
10903       /* TODO: Simple zero_extract of bottom bits using AND.  */
10904       /* Fall through.  */
10905     case SIGN_EXTRACT:
10906       if (arm_arch6
10907           && mode == SImode
10908           && CONST_INT_P (XEXP (x, 1))
10909           && CONST_INT_P (XEXP (x, 2)))
10910         {
10911           *cost = COSTS_N_INSNS (1);
10912           if (speed_p)
10913             *cost += extra_cost->alu.bfx;
10914           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10915           return true;
10916         }
10917       /* Without UBFX/SBFX, need to resort to shift operations.  */
10918       *cost = COSTS_N_INSNS (2);
10919       if (speed_p)
10920         *cost += 2 * extra_cost->alu.shift;
10921       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10922       return true;
10923
10924     case FLOAT_EXTEND:
10925       if (TARGET_HARD_FLOAT)
10926         {
10927           *cost = COSTS_N_INSNS (1);
10928           if (speed_p)
10929             *cost += extra_cost->fp[mode == DFmode].widen;
10930           if (!TARGET_FPU_ARMV8
10931               && GET_MODE (XEXP (x, 0)) == HFmode)
10932             {
10933               /* Pre v8, widening HF->DF is a two-step process, first
10934                  widening to SFmode.  */
10935               *cost += COSTS_N_INSNS (1);
10936               if (speed_p)
10937                 *cost += extra_cost->fp[0].widen;
10938             }
10939           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10940           return true;
10941         }
10942
10943       *cost = LIBCALL_COST (1);
10944       return false;
10945
10946     case FLOAT_TRUNCATE:
10947       if (TARGET_HARD_FLOAT)
10948         {
10949           *cost = COSTS_N_INSNS (1);
10950           if (speed_p)
10951             *cost += extra_cost->fp[mode == DFmode].narrow;
10952           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10953           return true;
10954           /* Vector modes?  */
10955         }
10956       *cost = LIBCALL_COST (1);
10957       return false;
10958
10959     case FMA:
10960       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10961         {
10962           rtx op0 = XEXP (x, 0);
10963           rtx op1 = XEXP (x, 1);
10964           rtx op2 = XEXP (x, 2);
10965
10966           *cost = COSTS_N_INSNS (1);
10967
10968           /* vfms or vfnma.  */
10969           if (GET_CODE (op0) == NEG)
10970             op0 = XEXP (op0, 0);
10971
10972           /* vfnms or vfnma.  */
10973           if (GET_CODE (op2) == NEG)
10974             op2 = XEXP (op2, 0);
10975
10976           *cost += rtx_cost (op0, FMA, 0, speed_p);
10977           *cost += rtx_cost (op1, FMA, 1, speed_p);
10978           *cost += rtx_cost (op2, FMA, 2, speed_p);
10979
10980           if (speed_p)
10981             *cost += extra_cost->fp[mode ==DFmode].fma;
10982
10983           return true;
10984         }
10985
10986       *cost = LIBCALL_COST (3);
10987       return false;
10988
10989     case FIX:
10990     case UNSIGNED_FIX:
10991       if (TARGET_HARD_FLOAT)
10992         {
10993           if (GET_MODE_CLASS (mode) == MODE_INT)
10994             {
10995               *cost = COSTS_N_INSNS (1);
10996               if (speed_p)
10997                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10998               /* Strip of the 'cost' of rounding towards zero.  */
10999               if (GET_CODE (XEXP (x, 0)) == FIX)
11000                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11001               else
11002                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11003               /* ??? Increase the cost to deal with transferring from
11004                  FP -> CORE registers?  */
11005               return true;
11006             }
11007           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11008                    && TARGET_FPU_ARMV8)
11009             {
11010               *cost = COSTS_N_INSNS (1);
11011               if (speed_p)
11012                 *cost += extra_cost->fp[mode == DFmode].roundint;
11013               return false;
11014             }
11015           /* Vector costs? */
11016         }
11017       *cost = LIBCALL_COST (1);
11018       return false;
11019
11020     case FLOAT:
11021     case UNSIGNED_FLOAT:
11022       if (TARGET_HARD_FLOAT)
11023         {
11024           /* ??? Increase the cost to deal with transferring from CORE
11025              -> FP registers?  */
11026           *cost = COSTS_N_INSNS (1);
11027           if (speed_p)
11028             *cost += extra_cost->fp[mode == DFmode].fromint;
11029           return false;
11030         }
11031       *cost = LIBCALL_COST (1);
11032       return false;
11033
11034     case CALL:
11035       *cost = COSTS_N_INSNS (1);
11036       return true;
11037
11038     case ASM_OPERANDS:
11039       {
11040       /* Just a guess.  Guess number of instructions in the asm
11041          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11042          though (see PR60663).  */
11043         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11044         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11045
11046         *cost = COSTS_N_INSNS (asm_length + num_operands);
11047         return true;
11048       }
11049     default:
11050       if (mode != VOIDmode)
11051         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11052       else
11053         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11054       return false;
11055     }
11056 }
11057
11058 #undef HANDLE_NARROW_SHIFT_ARITH
11059
11060 /* RTX costs when optimizing for size.  */
11061 static bool
11062 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11063                int *total, bool speed)
11064 {
11065   bool result;
11066
11067   if (TARGET_OLD_RTX_COSTS
11068       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11069     {
11070       /* Old way.  (Deprecated.)  */
11071       if (!speed)
11072         result = arm_size_rtx_costs (x, (enum rtx_code) code,
11073                                      (enum rtx_code) outer_code, total);
11074       else
11075         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
11076                                           (enum rtx_code) outer_code, total,
11077                                           speed);
11078     }
11079   else
11080     {
11081     /* New way.  */
11082       if (current_tune->insn_extra_cost)
11083         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11084                                      (enum rtx_code) outer_code,
11085                                      current_tune->insn_extra_cost,
11086                                      total, speed);
11087     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11088        && current_tune->insn_extra_cost != NULL  */
11089       else
11090         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11091                                     (enum rtx_code) outer_code,
11092                                     &generic_extra_costs, total, speed);
11093     }
11094
11095   if (dump_file && (dump_flags & TDF_DETAILS))
11096     {
11097       print_rtl_single (dump_file, x);
11098       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11099                *total, result ? "final" : "partial");
11100     }
11101   return result;
11102 }
11103
11104 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11105    supported on any "slowmul" cores, so it can be ignored.  */
11106
11107 static bool
11108 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11109                        int *total, bool speed)
11110 {
11111   machine_mode mode = GET_MODE (x);
11112
11113   if (TARGET_THUMB)
11114     {
11115       *total = thumb1_rtx_costs (x, code, outer_code);
11116       return true;
11117     }
11118
11119   switch (code)
11120     {
11121     case MULT:
11122       if (GET_MODE_CLASS (mode) == MODE_FLOAT
11123           || mode == DImode)
11124         {
11125           *total = COSTS_N_INSNS (20);
11126           return false;
11127         }
11128
11129       if (CONST_INT_P (XEXP (x, 1)))
11130         {
11131           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11132                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11133           int cost, const_ok = const_ok_for_arm (i);
11134           int j, booth_unit_size;
11135
11136           /* Tune as appropriate.  */
11137           cost = const_ok ? 4 : 8;
11138           booth_unit_size = 2;
11139           for (j = 0; i && j < 32; j += booth_unit_size)
11140             {
11141               i >>= booth_unit_size;
11142               cost++;
11143             }
11144
11145           *total = COSTS_N_INSNS (cost);
11146           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11147           return true;
11148         }
11149
11150       *total = COSTS_N_INSNS (20);
11151       return false;
11152
11153     default:
11154       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11155     }
11156 }
11157
11158
11159 /* RTX cost for cores with a fast multiply unit (M variants).  */
11160
11161 static bool
11162 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11163                        int *total, bool speed)
11164 {
11165   machine_mode mode = GET_MODE (x);
11166
11167   if (TARGET_THUMB1)
11168     {
11169       *total = thumb1_rtx_costs (x, code, outer_code);
11170       return true;
11171     }
11172
11173   /* ??? should thumb2 use different costs?  */
11174   switch (code)
11175     {
11176     case MULT:
11177       /* There is no point basing this on the tuning, since it is always the
11178          fast variant if it exists at all.  */
11179       if (mode == DImode
11180           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11181           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11182               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11183         {
11184           *total = COSTS_N_INSNS(2);
11185           return false;
11186         }
11187
11188
11189       if (mode == DImode)
11190         {
11191           *total = COSTS_N_INSNS (5);
11192           return false;
11193         }
11194
11195       if (CONST_INT_P (XEXP (x, 1)))
11196         {
11197           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11198                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11199           int cost, const_ok = const_ok_for_arm (i);
11200           int j, booth_unit_size;
11201
11202           /* Tune as appropriate.  */
11203           cost = const_ok ? 4 : 8;
11204           booth_unit_size = 8;
11205           for (j = 0; i && j < 32; j += booth_unit_size)
11206             {
11207               i >>= booth_unit_size;
11208               cost++;
11209             }
11210
11211           *total = COSTS_N_INSNS(cost);
11212           return false;
11213         }
11214
11215       if (mode == SImode)
11216         {
11217           *total = COSTS_N_INSNS (4);
11218           return false;
11219         }
11220
11221       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11222         {
11223           if (TARGET_HARD_FLOAT
11224               && (mode == SFmode
11225                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11226             {
11227               *total = COSTS_N_INSNS (1);
11228               return false;
11229             }
11230         }
11231
11232       /* Requires a lib call */
11233       *total = COSTS_N_INSNS (20);
11234       return false;
11235
11236     default:
11237       return arm_rtx_costs_1 (x, outer_code, total, speed);
11238     }
11239 }
11240
11241
11242 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11243    so it can be ignored.  */
11244
11245 static bool
11246 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11247                       int *total, bool speed)
11248 {
11249   machine_mode mode = GET_MODE (x);
11250
11251   if (TARGET_THUMB)
11252     {
11253       *total = thumb1_rtx_costs (x, code, outer_code);
11254       return true;
11255     }
11256
11257   switch (code)
11258     {
11259     case COMPARE:
11260       if (GET_CODE (XEXP (x, 0)) != MULT)
11261         return arm_rtx_costs_1 (x, outer_code, total, speed);
11262
11263       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11264          will stall until the multiplication is complete.  */
11265       *total = COSTS_N_INSNS (3);
11266       return false;
11267
11268     case MULT:
11269       /* There is no point basing this on the tuning, since it is always the
11270          fast variant if it exists at all.  */
11271       if (mode == DImode
11272           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11273           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11274               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11275         {
11276           *total = COSTS_N_INSNS (2);
11277           return false;
11278         }
11279
11280
11281       if (mode == DImode)
11282         {
11283           *total = COSTS_N_INSNS (5);
11284           return false;
11285         }
11286
11287       if (CONST_INT_P (XEXP (x, 1)))
11288         {
11289           /* If operand 1 is a constant we can more accurately
11290              calculate the cost of the multiply.  The multiplier can
11291              retire 15 bits on the first cycle and a further 12 on the
11292              second.  We do, of course, have to load the constant into
11293              a register first.  */
11294           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11295           /* There's a general overhead of one cycle.  */
11296           int cost = 1;
11297           unsigned HOST_WIDE_INT masked_const;
11298
11299           if (i & 0x80000000)
11300             i = ~i;
11301
11302           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11303
11304           masked_const = i & 0xffff8000;
11305           if (masked_const != 0)
11306             {
11307               cost++;
11308               masked_const = i & 0xf8000000;
11309               if (masked_const != 0)
11310                 cost++;
11311             }
11312           *total = COSTS_N_INSNS (cost);
11313           return false;
11314         }
11315
11316       if (mode == SImode)
11317         {
11318           *total = COSTS_N_INSNS (3);
11319           return false;
11320         }
11321
11322       /* Requires a lib call */
11323       *total = COSTS_N_INSNS (20);
11324       return false;
11325
11326     default:
11327       return arm_rtx_costs_1 (x, outer_code, total, speed);
11328     }
11329 }
11330
11331
11332 /* RTX costs for 9e (and later) cores.  */
11333
11334 static bool
11335 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11336                   int *total, bool speed)
11337 {
11338   machine_mode mode = GET_MODE (x);
11339
11340   if (TARGET_THUMB1)
11341     {
11342       switch (code)
11343         {
11344         case MULT:
11345           /* Small multiply: 32 cycles for an integer multiply inst.  */
11346           if (arm_arch6m && arm_m_profile_small_mul)
11347             *total = COSTS_N_INSNS (32);
11348           else
11349             *total = COSTS_N_INSNS (3);
11350           return true;
11351
11352         default:
11353           *total = thumb1_rtx_costs (x, code, outer_code);
11354           return true;
11355         }
11356     }
11357
11358   switch (code)
11359     {
11360     case MULT:
11361       /* There is no point basing this on the tuning, since it is always the
11362          fast variant if it exists at all.  */
11363       if (mode == DImode
11364           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11365           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11366               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11367         {
11368           *total = COSTS_N_INSNS (2);
11369           return false;
11370         }
11371
11372
11373       if (mode == DImode)
11374         {
11375           *total = COSTS_N_INSNS (5);
11376           return false;
11377         }
11378
11379       if (mode == SImode)
11380         {
11381           *total = COSTS_N_INSNS (2);
11382           return false;
11383         }
11384
11385       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11386         {
11387           if (TARGET_HARD_FLOAT
11388               && (mode == SFmode
11389                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11390             {
11391               *total = COSTS_N_INSNS (1);
11392               return false;
11393             }
11394         }
11395
11396       *total = COSTS_N_INSNS (20);
11397       return false;
11398
11399     default:
11400       return arm_rtx_costs_1 (x, outer_code, total, speed);
11401     }
11402 }
11403 /* All address computations that can be done are free, but rtx cost returns
11404    the same for practically all of them.  So we weight the different types
11405    of address here in the order (most pref first):
11406    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11407 static inline int
11408 arm_arm_address_cost (rtx x)
11409 {
11410   enum rtx_code c  = GET_CODE (x);
11411
11412   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11413     return 0;
11414   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11415     return 10;
11416
11417   if (c == PLUS)
11418     {
11419       if (CONST_INT_P (XEXP (x, 1)))
11420         return 2;
11421
11422       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11423         return 3;
11424
11425       return 4;
11426     }
11427
11428   return 6;
11429 }
11430
11431 static inline int
11432 arm_thumb_address_cost (rtx x)
11433 {
11434   enum rtx_code c  = GET_CODE (x);
11435
11436   if (c == REG)
11437     return 1;
11438   if (c == PLUS
11439       && REG_P (XEXP (x, 0))
11440       && CONST_INT_P (XEXP (x, 1)))
11441     return 1;
11442
11443   return 2;
11444 }
11445
11446 static int
11447 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11448                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11449 {
11450   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11451 }
11452
11453 /* Adjust cost hook for XScale.  */
11454 static bool
11455 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11456 {
11457   /* Some true dependencies can have a higher cost depending
11458      on precisely how certain input operands are used.  */
11459   if (REG_NOTE_KIND(link) == 0
11460       && recog_memoized (insn) >= 0
11461       && recog_memoized (dep) >= 0)
11462     {
11463       int shift_opnum = get_attr_shift (insn);
11464       enum attr_type attr_type = get_attr_type (dep);
11465
11466       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11467          operand for INSN.  If we have a shifted input operand and the
11468          instruction we depend on is another ALU instruction, then we may
11469          have to account for an additional stall.  */
11470       if (shift_opnum != 0
11471           && (attr_type == TYPE_ALU_SHIFT_IMM
11472               || attr_type == TYPE_ALUS_SHIFT_IMM
11473               || attr_type == TYPE_LOGIC_SHIFT_IMM
11474               || attr_type == TYPE_LOGICS_SHIFT_IMM
11475               || attr_type == TYPE_ALU_SHIFT_REG
11476               || attr_type == TYPE_ALUS_SHIFT_REG
11477               || attr_type == TYPE_LOGIC_SHIFT_REG
11478               || attr_type == TYPE_LOGICS_SHIFT_REG
11479               || attr_type == TYPE_MOV_SHIFT
11480               || attr_type == TYPE_MVN_SHIFT
11481               || attr_type == TYPE_MOV_SHIFT_REG
11482               || attr_type == TYPE_MVN_SHIFT_REG))
11483         {
11484           rtx shifted_operand;
11485           int opno;
11486
11487           /* Get the shifted operand.  */
11488           extract_insn (insn);
11489           shifted_operand = recog_data.operand[shift_opnum];
11490
11491           /* Iterate over all the operands in DEP.  If we write an operand
11492              that overlaps with SHIFTED_OPERAND, then we have increase the
11493              cost of this dependency.  */
11494           extract_insn (dep);
11495           preprocess_constraints (dep);
11496           for (opno = 0; opno < recog_data.n_operands; opno++)
11497             {
11498               /* We can ignore strict inputs.  */
11499               if (recog_data.operand_type[opno] == OP_IN)
11500                 continue;
11501
11502               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11503                                            shifted_operand))
11504                 {
11505                   *cost = 2;
11506                   return false;
11507                 }
11508             }
11509         }
11510     }
11511   return true;
11512 }
11513
11514 /* Adjust cost hook for Cortex A9.  */
11515 static bool
11516 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11517 {
11518   switch (REG_NOTE_KIND (link))
11519     {
11520     case REG_DEP_ANTI:
11521       *cost = 0;
11522       return false;
11523
11524     case REG_DEP_TRUE:
11525     case REG_DEP_OUTPUT:
11526         if (recog_memoized (insn) >= 0
11527             && recog_memoized (dep) >= 0)
11528           {
11529             if (GET_CODE (PATTERN (insn)) == SET)
11530               {
11531                 if (GET_MODE_CLASS
11532                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11533                   || GET_MODE_CLASS
11534                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11535                   {
11536                     enum attr_type attr_type_insn = get_attr_type (insn);
11537                     enum attr_type attr_type_dep = get_attr_type (dep);
11538
11539                     /* By default all dependencies of the form
11540                        s0 = s0 <op> s1
11541                        s0 = s0 <op> s2
11542                        have an extra latency of 1 cycle because
11543                        of the input and output dependency in this
11544                        case. However this gets modeled as an true
11545                        dependency and hence all these checks.  */
11546                     if (REG_P (SET_DEST (PATTERN (insn)))
11547                         && REG_P (SET_DEST (PATTERN (dep)))
11548                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11549                                                     SET_DEST (PATTERN (dep))))
11550                       {
11551                         /* FMACS is a special case where the dependent
11552                            instruction can be issued 3 cycles before
11553                            the normal latency in case of an output
11554                            dependency.  */
11555                         if ((attr_type_insn == TYPE_FMACS
11556                              || attr_type_insn == TYPE_FMACD)
11557                             && (attr_type_dep == TYPE_FMACS
11558                                 || attr_type_dep == TYPE_FMACD))
11559                           {
11560                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11561                               *cost = insn_default_latency (dep) - 3;
11562                             else
11563                               *cost = insn_default_latency (dep);
11564                             return false;
11565                           }
11566                         else
11567                           {
11568                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11569                               *cost = insn_default_latency (dep) + 1;
11570                             else
11571                               *cost = insn_default_latency (dep);
11572                           }
11573                         return false;
11574                       }
11575                   }
11576               }
11577           }
11578         break;
11579
11580     default:
11581       gcc_unreachable ();
11582     }
11583
11584   return true;
11585 }
11586
11587 /* Adjust cost hook for FA726TE.  */
11588 static bool
11589 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11590 {
11591   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11592      have penalty of 3.  */
11593   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11594       && recog_memoized (insn) >= 0
11595       && recog_memoized (dep) >= 0
11596       && get_attr_conds (dep) == CONDS_SET)
11597     {
11598       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11599       if (get_attr_conds (insn) == CONDS_USE
11600           && get_attr_type (insn) != TYPE_BRANCH)
11601         {
11602           *cost = 3;
11603           return false;
11604         }
11605
11606       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11607           || get_attr_conds (insn) == CONDS_USE)
11608         {
11609           *cost = 0;
11610           return false;
11611         }
11612     }
11613
11614   return true;
11615 }
11616
11617 /* Implement TARGET_REGISTER_MOVE_COST.
11618
11619    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11620    it is typically more expensive than a single memory access.  We set
11621    the cost to less than two memory accesses so that floating
11622    point to integer conversion does not go through memory.  */
11623
11624 int
11625 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11626                         reg_class_t from, reg_class_t to)
11627 {
11628   if (TARGET_32BIT)
11629     {
11630       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11631           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11632         return 15;
11633       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11634                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11635         return 4;
11636       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11637         return 20;
11638       else
11639         return 2;
11640     }
11641   else
11642     {
11643       if (from == HI_REGS || to == HI_REGS)
11644         return 4;
11645       else
11646         return 2;
11647     }
11648 }
11649
11650 /* Implement TARGET_MEMORY_MOVE_COST.  */
11651
11652 int
11653 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11654                       bool in ATTRIBUTE_UNUSED)
11655 {
11656   if (TARGET_32BIT)
11657     return 10;
11658   else
11659     {
11660       if (GET_MODE_SIZE (mode) < 4)
11661         return 8;
11662       else
11663         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11664     }
11665 }
11666
11667 /* Vectorizer cost model implementation.  */
11668
11669 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11670 static int
11671 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11672                                 tree vectype,
11673                                 int misalign ATTRIBUTE_UNUSED)
11674 {
11675   unsigned elements;
11676
11677   switch (type_of_cost)
11678     {
11679       case scalar_stmt:
11680         return current_tune->vec_costs->scalar_stmt_cost;
11681
11682       case scalar_load:
11683         return current_tune->vec_costs->scalar_load_cost;
11684
11685       case scalar_store:
11686         return current_tune->vec_costs->scalar_store_cost;
11687
11688       case vector_stmt:
11689         return current_tune->vec_costs->vec_stmt_cost;
11690
11691       case vector_load:
11692         return current_tune->vec_costs->vec_align_load_cost;
11693
11694       case vector_store:
11695         return current_tune->vec_costs->vec_store_cost;
11696
11697       case vec_to_scalar:
11698         return current_tune->vec_costs->vec_to_scalar_cost;
11699
11700       case scalar_to_vec:
11701         return current_tune->vec_costs->scalar_to_vec_cost;
11702
11703       case unaligned_load:
11704         return current_tune->vec_costs->vec_unalign_load_cost;
11705
11706       case unaligned_store:
11707         return current_tune->vec_costs->vec_unalign_store_cost;
11708
11709       case cond_branch_taken:
11710         return current_tune->vec_costs->cond_taken_branch_cost;
11711
11712       case cond_branch_not_taken:
11713         return current_tune->vec_costs->cond_not_taken_branch_cost;
11714
11715       case vec_perm:
11716       case vec_promote_demote:
11717         return current_tune->vec_costs->vec_stmt_cost;
11718
11719       case vec_construct:
11720         elements = TYPE_VECTOR_SUBPARTS (vectype);
11721         return elements / 2 + 1;
11722
11723       default:
11724         gcc_unreachable ();
11725     }
11726 }
11727
11728 /* Implement targetm.vectorize.add_stmt_cost.  */
11729
11730 static unsigned
11731 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11732                    struct _stmt_vec_info *stmt_info, int misalign,
11733                    enum vect_cost_model_location where)
11734 {
11735   unsigned *cost = (unsigned *) data;
11736   unsigned retval = 0;
11737
11738   if (flag_vect_cost_model)
11739     {
11740       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11741       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11742
11743       /* Statements in an inner loop relative to the loop being
11744          vectorized are weighted more heavily.  The value here is
11745          arbitrary and could potentially be improved with analysis.  */
11746       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11747         count *= 50;  /* FIXME.  */
11748
11749       retval = (unsigned) (count * stmt_cost);
11750       cost[where] += retval;
11751     }
11752
11753   return retval;
11754 }
11755
11756 /* Return true if and only if this insn can dual-issue only as older.  */
11757 static bool
11758 cortexa7_older_only (rtx_insn *insn)
11759 {
11760   if (recog_memoized (insn) < 0)
11761     return false;
11762
11763   switch (get_attr_type (insn))
11764     {
11765     case TYPE_ALU_DSP_REG:
11766     case TYPE_ALU_SREG:
11767     case TYPE_ALUS_SREG:
11768     case TYPE_LOGIC_REG:
11769     case TYPE_LOGICS_REG:
11770     case TYPE_ADC_REG:
11771     case TYPE_ADCS_REG:
11772     case TYPE_ADR:
11773     case TYPE_BFM:
11774     case TYPE_REV:
11775     case TYPE_MVN_REG:
11776     case TYPE_SHIFT_IMM:
11777     case TYPE_SHIFT_REG:
11778     case TYPE_LOAD_BYTE:
11779     case TYPE_LOAD1:
11780     case TYPE_STORE1:
11781     case TYPE_FFARITHS:
11782     case TYPE_FADDS:
11783     case TYPE_FFARITHD:
11784     case TYPE_FADDD:
11785     case TYPE_FMOV:
11786     case TYPE_F_CVT:
11787     case TYPE_FCMPS:
11788     case TYPE_FCMPD:
11789     case TYPE_FCONSTS:
11790     case TYPE_FCONSTD:
11791     case TYPE_FMULS:
11792     case TYPE_FMACS:
11793     case TYPE_FMULD:
11794     case TYPE_FMACD:
11795     case TYPE_FDIVS:
11796     case TYPE_FDIVD:
11797     case TYPE_F_MRC:
11798     case TYPE_F_MRRC:
11799     case TYPE_F_FLAG:
11800     case TYPE_F_LOADS:
11801     case TYPE_F_STORES:
11802       return true;
11803     default:
11804       return false;
11805     }
11806 }
11807
11808 /* Return true if and only if this insn can dual-issue as younger.  */
11809 static bool
11810 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11811 {
11812   if (recog_memoized (insn) < 0)
11813     {
11814       if (verbose > 5)
11815         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11816       return false;
11817     }
11818
11819   switch (get_attr_type (insn))
11820     {
11821     case TYPE_ALU_IMM:
11822     case TYPE_ALUS_IMM:
11823     case TYPE_LOGIC_IMM:
11824     case TYPE_LOGICS_IMM:
11825     case TYPE_EXTEND:
11826     case TYPE_MVN_IMM:
11827     case TYPE_MOV_IMM:
11828     case TYPE_MOV_REG:
11829     case TYPE_MOV_SHIFT:
11830     case TYPE_MOV_SHIFT_REG:
11831     case TYPE_BRANCH:
11832     case TYPE_CALL:
11833       return true;
11834     default:
11835       return false;
11836     }
11837 }
11838
11839
11840 /* Look for an instruction that can dual issue only as an older
11841    instruction, and move it in front of any instructions that can
11842    dual-issue as younger, while preserving the relative order of all
11843    other instructions in the ready list.  This is a hueuristic to help
11844    dual-issue in later cycles, by postponing issue of more flexible
11845    instructions.  This heuristic may affect dual issue opportunities
11846    in the current cycle.  */
11847 static void
11848 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11849                         int *n_readyp, int clock)
11850 {
11851   int i;
11852   int first_older_only = -1, first_younger = -1;
11853
11854   if (verbose > 5)
11855     fprintf (file,
11856              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11857              clock,
11858              *n_readyp);
11859
11860   /* Traverse the ready list from the head (the instruction to issue
11861      first), and looking for the first instruction that can issue as
11862      younger and the first instruction that can dual-issue only as
11863      older.  */
11864   for (i = *n_readyp - 1; i >= 0; i--)
11865     {
11866       rtx_insn *insn = ready[i];
11867       if (cortexa7_older_only (insn))
11868         {
11869           first_older_only = i;
11870           if (verbose > 5)
11871             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11872           break;
11873         }
11874       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11875         first_younger = i;
11876     }
11877
11878   /* Nothing to reorder because either no younger insn found or insn
11879      that can dual-issue only as older appears before any insn that
11880      can dual-issue as younger.  */
11881   if (first_younger == -1)
11882     {
11883       if (verbose > 5)
11884         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11885       return;
11886     }
11887
11888   /* Nothing to reorder because no older-only insn in the ready list.  */
11889   if (first_older_only == -1)
11890     {
11891       if (verbose > 5)
11892         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11893       return;
11894     }
11895
11896   /* Move first_older_only insn before first_younger.  */
11897   if (verbose > 5)
11898     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11899              INSN_UID(ready [first_older_only]),
11900              INSN_UID(ready [first_younger]));
11901   rtx_insn *first_older_only_insn = ready [first_older_only];
11902   for (i = first_older_only; i < first_younger; i++)
11903     {
11904       ready[i] = ready[i+1];
11905     }
11906
11907   ready[i] = first_older_only_insn;
11908   return;
11909 }
11910
11911 /* Implement TARGET_SCHED_REORDER. */
11912 static int
11913 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11914                    int clock)
11915 {
11916   switch (arm_tune)
11917     {
11918     case cortexa7:
11919       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11920       break;
11921     default:
11922       /* Do nothing for other cores.  */
11923       break;
11924     }
11925
11926   return arm_issue_rate ();
11927 }
11928
11929 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11930    It corrects the value of COST based on the relationship between
11931    INSN and DEP through the dependence LINK.  It returns the new
11932    value. There is a per-core adjust_cost hook to adjust scheduler costs
11933    and the per-core hook can choose to completely override the generic
11934    adjust_cost function. Only put bits of code into arm_adjust_cost that
11935    are common across all cores.  */
11936 static int
11937 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11938 {
11939   rtx i_pat, d_pat;
11940
11941  /* When generating Thumb-1 code, we want to place flag-setting operations
11942     close to a conditional branch which depends on them, so that we can
11943     omit the comparison. */
11944   if (TARGET_THUMB1
11945       && REG_NOTE_KIND (link) == 0
11946       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11947       && recog_memoized (dep) >= 0
11948       && get_attr_conds (dep) == CONDS_SET)
11949     return 0;
11950
11951   if (current_tune->sched_adjust_cost != NULL)
11952     {
11953       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11954         return cost;
11955     }
11956
11957   /* XXX Is this strictly true?  */
11958   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11959       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11960     return 0;
11961
11962   /* Call insns don't incur a stall, even if they follow a load.  */
11963   if (REG_NOTE_KIND (link) == 0
11964       && CALL_P (insn))
11965     return 1;
11966
11967   if ((i_pat = single_set (insn)) != NULL
11968       && MEM_P (SET_SRC (i_pat))
11969       && (d_pat = single_set (dep)) != NULL
11970       && MEM_P (SET_DEST (d_pat)))
11971     {
11972       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11973       /* This is a load after a store, there is no conflict if the load reads
11974          from a cached area.  Assume that loads from the stack, and from the
11975          constant pool are cached, and that others will miss.  This is a
11976          hack.  */
11977
11978       if ((GET_CODE (src_mem) == SYMBOL_REF
11979            && CONSTANT_POOL_ADDRESS_P (src_mem))
11980           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11981           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11982           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11983         return 1;
11984     }
11985
11986   return cost;
11987 }
11988
11989 int
11990 arm_max_conditional_execute (void)
11991 {
11992   return max_insns_skipped;
11993 }
11994
11995 static int
11996 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11997 {
11998   if (TARGET_32BIT)
11999     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12000   else
12001     return (optimize > 0) ? 2 : 0;
12002 }
12003
12004 static int
12005 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12006 {
12007   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12008 }
12009
12010 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12011    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12012    sequences of non-executed instructions in IT blocks probably take the same
12013    amount of time as executed instructions (and the IT instruction itself takes
12014    space in icache).  This function was experimentally determined to give good
12015    results on a popular embedded benchmark.  */
12016
12017 static int
12018 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12019 {
12020   return (TARGET_32BIT && speed_p) ? 1
12021          : arm_default_branch_cost (speed_p, predictable_p);
12022 }
12023
12024 static bool fp_consts_inited = false;
12025
12026 static REAL_VALUE_TYPE value_fp0;
12027
12028 static void
12029 init_fp_table (void)
12030 {
12031   REAL_VALUE_TYPE r;
12032
12033   r = REAL_VALUE_ATOF ("0", DFmode);
12034   value_fp0 = r;
12035   fp_consts_inited = true;
12036 }
12037
12038 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12039 int
12040 arm_const_double_rtx (rtx x)
12041 {
12042   REAL_VALUE_TYPE r;
12043
12044   if (!fp_consts_inited)
12045     init_fp_table ();
12046
12047   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12048   if (REAL_VALUE_MINUS_ZERO (r))
12049     return 0;
12050
12051   if (REAL_VALUES_EQUAL (r, value_fp0))
12052     return 1;
12053
12054   return 0;
12055 }
12056
12057 /* VFPv3 has a fairly wide range of representable immediates, formed from
12058    "quarter-precision" floating-point values. These can be evaluated using this
12059    formula (with ^ for exponentiation):
12060
12061      -1^s * n * 2^-r
12062
12063    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12064    16 <= n <= 31 and 0 <= r <= 7.
12065
12066    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12067
12068      - A (most-significant) is the sign bit.
12069      - BCD are the exponent (encoded as r XOR 3).
12070      - EFGH are the mantissa (encoded as n - 16).
12071 */
12072
12073 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12074    fconst[sd] instruction, or -1 if X isn't suitable.  */
12075 static int
12076 vfp3_const_double_index (rtx x)
12077 {
12078   REAL_VALUE_TYPE r, m;
12079   int sign, exponent;
12080   unsigned HOST_WIDE_INT mantissa, mant_hi;
12081   unsigned HOST_WIDE_INT mask;
12082   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12083   bool fail;
12084
12085   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12086     return -1;
12087
12088   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12089
12090   /* We can't represent these things, so detect them first.  */
12091   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12092     return -1;
12093
12094   /* Extract sign, exponent and mantissa.  */
12095   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12096   r = real_value_abs (&r);
12097   exponent = REAL_EXP (&r);
12098   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12099      highest (sign) bit, with a fixed binary point at bit point_pos.
12100      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12101      bits for the mantissa, this may fail (low bits would be lost).  */
12102   real_ldexp (&m, &r, point_pos - exponent);
12103   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12104   mantissa = w.elt (0);
12105   mant_hi = w.elt (1);
12106
12107   /* If there are bits set in the low part of the mantissa, we can't
12108      represent this value.  */
12109   if (mantissa != 0)
12110     return -1;
12111
12112   /* Now make it so that mantissa contains the most-significant bits, and move
12113      the point_pos to indicate that the least-significant bits have been
12114      discarded.  */
12115   point_pos -= HOST_BITS_PER_WIDE_INT;
12116   mantissa = mant_hi;
12117
12118   /* We can permit four significant bits of mantissa only, plus a high bit
12119      which is always 1.  */
12120   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12121   if ((mantissa & mask) != 0)
12122     return -1;
12123
12124   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12125   mantissa >>= point_pos - 5;
12126
12127   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12128      floating-point immediate zero with Neon using an integer-zero load, but
12129      that case is handled elsewhere.)  */
12130   if (mantissa == 0)
12131     return -1;
12132
12133   gcc_assert (mantissa >= 16 && mantissa <= 31);
12134
12135   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12136      normalized significands are in the range [1, 2). (Our mantissa is shifted
12137      left 4 places at this point relative to normalized IEEE754 values).  GCC
12138      internally uses [0.5, 1) (see real.c), so the exponent returned from
12139      REAL_EXP must be altered.  */
12140   exponent = 5 - exponent;
12141
12142   if (exponent < 0 || exponent > 7)
12143     return -1;
12144
12145   /* Sign, mantissa and exponent are now in the correct form to plug into the
12146      formula described in the comment above.  */
12147   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12148 }
12149
12150 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12151 int
12152 vfp3_const_double_rtx (rtx x)
12153 {
12154   if (!TARGET_VFP3)
12155     return 0;
12156
12157   return vfp3_const_double_index (x) != -1;
12158 }
12159
12160 /* Recognize immediates which can be used in various Neon instructions. Legal
12161    immediates are described by the following table (for VMVN variants, the
12162    bitwise inverse of the constant shown is recognized. In either case, VMOV
12163    is output and the correct instruction to use for a given constant is chosen
12164    by the assembler). The constant shown is replicated across all elements of
12165    the destination vector.
12166
12167    insn elems variant constant (binary)
12168    ---- ----- ------- -----------------
12169    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12170    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12171    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12172    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12173    vmov  i16     4    00000000 abcdefgh
12174    vmov  i16     5    abcdefgh 00000000
12175    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12176    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12177    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12178    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12179    vmvn  i16    10    00000000 abcdefgh
12180    vmvn  i16    11    abcdefgh 00000000
12181    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12182    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12183    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12184    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12185    vmov   i8    16    abcdefgh
12186    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12187                       eeeeeeee ffffffff gggggggg hhhhhhhh
12188    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12189    vmov  f32    19    00000000 00000000 00000000 00000000
12190
12191    For case 18, B = !b. Representable values are exactly those accepted by
12192    vfp3_const_double_index, but are output as floating-point numbers rather
12193    than indices.
12194
12195    For case 19, we will change it to vmov.i32 when assembling.
12196
12197    Variants 0-5 (inclusive) may also be used as immediates for the second
12198    operand of VORR/VBIC instructions.
12199
12200    The INVERSE argument causes the bitwise inverse of the given operand to be
12201    recognized instead (used for recognizing legal immediates for the VAND/VORN
12202    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12203    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12204    output, rather than the real insns vbic/vorr).
12205
12206    INVERSE makes no difference to the recognition of float vectors.
12207
12208    The return value is the variant of immediate as shown in the above table, or
12209    -1 if the given value doesn't match any of the listed patterns.
12210 */
12211 static int
12212 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12213                       rtx *modconst, int *elementwidth)
12214 {
12215 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12216   matches = 1;                                  \
12217   for (i = 0; i < idx; i += (STRIDE))           \
12218     if (!(TEST))                                \
12219       matches = 0;                              \
12220   if (matches)                                  \
12221     {                                           \
12222       immtype = (CLASS);                        \
12223       elsize = (ELSIZE);                        \
12224       break;                                    \
12225     }
12226
12227   unsigned int i, elsize = 0, idx = 0, n_elts;
12228   unsigned int innersize;
12229   unsigned char bytes[16];
12230   int immtype = -1, matches;
12231   unsigned int invmask = inverse ? 0xff : 0;
12232   bool vector = GET_CODE (op) == CONST_VECTOR;
12233
12234   if (vector)
12235     {
12236       n_elts = CONST_VECTOR_NUNITS (op);
12237       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12238     }
12239   else
12240     {
12241       n_elts = 1;
12242       if (mode == VOIDmode)
12243         mode = DImode;
12244       innersize = GET_MODE_SIZE (mode);
12245     }
12246
12247   /* Vectors of float constants.  */
12248   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12249     {
12250       rtx el0 = CONST_VECTOR_ELT (op, 0);
12251       REAL_VALUE_TYPE r0;
12252
12253       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12254         return -1;
12255
12256       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12257
12258       for (i = 1; i < n_elts; i++)
12259         {
12260           rtx elt = CONST_VECTOR_ELT (op, i);
12261           REAL_VALUE_TYPE re;
12262
12263           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12264
12265           if (!REAL_VALUES_EQUAL (r0, re))
12266             return -1;
12267         }
12268
12269       if (modconst)
12270         *modconst = CONST_VECTOR_ELT (op, 0);
12271
12272       if (elementwidth)
12273         *elementwidth = 0;
12274
12275       if (el0 == CONST0_RTX (GET_MODE (el0)))
12276         return 19;
12277       else
12278         return 18;
12279     }
12280
12281   /* Splat vector constant out into a byte vector.  */
12282   for (i = 0; i < n_elts; i++)
12283     {
12284       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12285       unsigned HOST_WIDE_INT elpart;
12286       unsigned int part, parts;
12287
12288       if (CONST_INT_P (el))
12289         {
12290           elpart = INTVAL (el);
12291           parts = 1;
12292         }
12293       else if (CONST_DOUBLE_P (el))
12294         {
12295           elpart = CONST_DOUBLE_LOW (el);
12296           parts = 2;
12297         }
12298       else
12299         gcc_unreachable ();
12300
12301       for (part = 0; part < parts; part++)
12302         {
12303           unsigned int byte;
12304           for (byte = 0; byte < innersize; byte++)
12305             {
12306               bytes[idx++] = (elpart & 0xff) ^ invmask;
12307               elpart >>= BITS_PER_UNIT;
12308             }
12309           if (CONST_DOUBLE_P (el))
12310             elpart = CONST_DOUBLE_HIGH (el);
12311         }
12312     }
12313
12314   /* Sanity check.  */
12315   gcc_assert (idx == GET_MODE_SIZE (mode));
12316
12317   do
12318     {
12319       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12320                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12321
12322       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12323                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12324
12325       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12326                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12327
12328       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12329                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12330
12331       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12332
12333       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12334
12335       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12336                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12337
12338       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12339                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12340
12341       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12342                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12343
12344       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12345                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12346
12347       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12348
12349       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12350
12351       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12352                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12353
12354       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12355                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12356
12357       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12358                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12359
12360       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12361                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12362
12363       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12364
12365       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12366                         && bytes[i] == bytes[(i + 8) % idx]);
12367     }
12368   while (0);
12369
12370   if (immtype == -1)
12371     return -1;
12372
12373   if (elementwidth)
12374     *elementwidth = elsize;
12375
12376   if (modconst)
12377     {
12378       unsigned HOST_WIDE_INT imm = 0;
12379
12380       /* Un-invert bytes of recognized vector, if necessary.  */
12381       if (invmask != 0)
12382         for (i = 0; i < idx; i++)
12383           bytes[i] ^= invmask;
12384
12385       if (immtype == 17)
12386         {
12387           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12388           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12389
12390           for (i = 0; i < 8; i++)
12391             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12392                    << (i * BITS_PER_UNIT);
12393
12394           *modconst = GEN_INT (imm);
12395         }
12396       else
12397         {
12398           unsigned HOST_WIDE_INT imm = 0;
12399
12400           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12401             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12402
12403           *modconst = GEN_INT (imm);
12404         }
12405     }
12406
12407   return immtype;
12408 #undef CHECK
12409 }
12410
12411 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12412    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12413    float elements), and a modified constant (whatever should be output for a
12414    VMOV) in *MODCONST.  */
12415
12416 int
12417 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12418                                rtx *modconst, int *elementwidth)
12419 {
12420   rtx tmpconst;
12421   int tmpwidth;
12422   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12423
12424   if (retval == -1)
12425     return 0;
12426
12427   if (modconst)
12428     *modconst = tmpconst;
12429
12430   if (elementwidth)
12431     *elementwidth = tmpwidth;
12432
12433   return 1;
12434 }
12435
12436 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12437    the immediate is valid, write a constant suitable for using as an operand
12438    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12439    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12440
12441 int
12442 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12443                                 rtx *modconst, int *elementwidth)
12444 {
12445   rtx tmpconst;
12446   int tmpwidth;
12447   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12448
12449   if (retval < 0 || retval > 5)
12450     return 0;
12451
12452   if (modconst)
12453     *modconst = tmpconst;
12454
12455   if (elementwidth)
12456     *elementwidth = tmpwidth;
12457
12458   return 1;
12459 }
12460
12461 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12462    the immediate is valid, write a constant suitable for using as an operand
12463    to VSHR/VSHL to *MODCONST and the corresponding element width to
12464    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12465    because they have different limitations.  */
12466
12467 int
12468 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12469                                 rtx *modconst, int *elementwidth,
12470                                 bool isleftshift)
12471 {
12472   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12473   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12474   unsigned HOST_WIDE_INT last_elt = 0;
12475   unsigned HOST_WIDE_INT maxshift;
12476
12477   /* Split vector constant out into a byte vector.  */
12478   for (i = 0; i < n_elts; i++)
12479     {
12480       rtx el = CONST_VECTOR_ELT (op, i);
12481       unsigned HOST_WIDE_INT elpart;
12482
12483       if (CONST_INT_P (el))
12484         elpart = INTVAL (el);
12485       else if (CONST_DOUBLE_P (el))
12486         return 0;
12487       else
12488         gcc_unreachable ();
12489
12490       if (i != 0 && elpart != last_elt)
12491         return 0;
12492
12493       last_elt = elpart;
12494     }
12495
12496   /* Shift less than element size.  */
12497   maxshift = innersize * 8;
12498
12499   if (isleftshift)
12500     {
12501       /* Left shift immediate value can be from 0 to <size>-1.  */
12502       if (last_elt >= maxshift)
12503         return 0;
12504     }
12505   else
12506     {
12507       /* Right shift immediate value can be from 1 to <size>.  */
12508       if (last_elt == 0 || last_elt > maxshift)
12509         return 0;
12510     }
12511
12512   if (elementwidth)
12513     *elementwidth = innersize * 8;
12514
12515   if (modconst)
12516     *modconst = CONST_VECTOR_ELT (op, 0);
12517
12518   return 1;
12519 }
12520
12521 /* Return a string suitable for output of Neon immediate logic operation
12522    MNEM.  */
12523
12524 char *
12525 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12526                              int inverse, int quad)
12527 {
12528   int width, is_valid;
12529   static char templ[40];
12530
12531   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12532
12533   gcc_assert (is_valid != 0);
12534
12535   if (quad)
12536     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12537   else
12538     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12539
12540   return templ;
12541 }
12542
12543 /* Return a string suitable for output of Neon immediate shift operation
12544    (VSHR or VSHL) MNEM.  */
12545
12546 char *
12547 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12548                              machine_mode mode, int quad,
12549                              bool isleftshift)
12550 {
12551   int width, is_valid;
12552   static char templ[40];
12553
12554   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12555   gcc_assert (is_valid != 0);
12556
12557   if (quad)
12558     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12559   else
12560     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12561
12562   return templ;
12563 }
12564
12565 /* Output a sequence of pairwise operations to implement a reduction.
12566    NOTE: We do "too much work" here, because pairwise operations work on two
12567    registers-worth of operands in one go. Unfortunately we can't exploit those
12568    extra calculations to do the full operation in fewer steps, I don't think.
12569    Although all vector elements of the result but the first are ignored, we
12570    actually calculate the same result in each of the elements. An alternative
12571    such as initially loading a vector with zero to use as each of the second
12572    operands would use up an additional register and take an extra instruction,
12573    for no particular gain.  */
12574
12575 void
12576 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12577                       rtx (*reduc) (rtx, rtx, rtx))
12578 {
12579   machine_mode inner = GET_MODE_INNER (mode);
12580   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12581   rtx tmpsum = op1;
12582
12583   for (i = parts / 2; i >= 1; i /= 2)
12584     {
12585       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12586       emit_insn (reduc (dest, tmpsum, tmpsum));
12587       tmpsum = dest;
12588     }
12589 }
12590
12591 /* If VALS is a vector constant that can be loaded into a register
12592    using VDUP, generate instructions to do so and return an RTX to
12593    assign to the register.  Otherwise return NULL_RTX.  */
12594
12595 static rtx
12596 neon_vdup_constant (rtx vals)
12597 {
12598   machine_mode mode = GET_MODE (vals);
12599   machine_mode inner_mode = GET_MODE_INNER (mode);
12600   int n_elts = GET_MODE_NUNITS (mode);
12601   bool all_same = true;
12602   rtx x;
12603   int i;
12604
12605   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12606     return NULL_RTX;
12607
12608   for (i = 0; i < n_elts; ++i)
12609     {
12610       x = XVECEXP (vals, 0, i);
12611       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12612         all_same = false;
12613     }
12614
12615   if (!all_same)
12616     /* The elements are not all the same.  We could handle repeating
12617        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12618        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12619        vdup.i16).  */
12620     return NULL_RTX;
12621
12622   /* We can load this constant by using VDUP and a constant in a
12623      single ARM register.  This will be cheaper than a vector
12624      load.  */
12625
12626   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12627   return gen_rtx_VEC_DUPLICATE (mode, x);
12628 }
12629
12630 /* Generate code to load VALS, which is a PARALLEL containing only
12631    constants (for vec_init) or CONST_VECTOR, efficiently into a
12632    register.  Returns an RTX to copy into the register, or NULL_RTX
12633    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12634
12635 rtx
12636 neon_make_constant (rtx vals)
12637 {
12638   machine_mode mode = GET_MODE (vals);
12639   rtx target;
12640   rtx const_vec = NULL_RTX;
12641   int n_elts = GET_MODE_NUNITS (mode);
12642   int n_const = 0;
12643   int i;
12644
12645   if (GET_CODE (vals) == CONST_VECTOR)
12646     const_vec = vals;
12647   else if (GET_CODE (vals) == PARALLEL)
12648     {
12649       /* A CONST_VECTOR must contain only CONST_INTs and
12650          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12651          Only store valid constants in a CONST_VECTOR.  */
12652       for (i = 0; i < n_elts; ++i)
12653         {
12654           rtx x = XVECEXP (vals, 0, i);
12655           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12656             n_const++;
12657         }
12658       if (n_const == n_elts)
12659         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12660     }
12661   else
12662     gcc_unreachable ();
12663
12664   if (const_vec != NULL
12665       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12666     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12667     return const_vec;
12668   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12669     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12670        pipeline cycle; creating the constant takes one or two ARM
12671        pipeline cycles.  */
12672     return target;
12673   else if (const_vec != NULL_RTX)
12674     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12675        (for either double or quad vectors).  We can not take advantage
12676        of single-cycle VLD1 because we need a PC-relative addressing
12677        mode.  */
12678     return const_vec;
12679   else
12680     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12681        We can not construct an initializer.  */
12682     return NULL_RTX;
12683 }
12684
12685 /* Initialize vector TARGET to VALS.  */
12686
12687 void
12688 neon_expand_vector_init (rtx target, rtx vals)
12689 {
12690   machine_mode mode = GET_MODE (target);
12691   machine_mode inner_mode = GET_MODE_INNER (mode);
12692   int n_elts = GET_MODE_NUNITS (mode);
12693   int n_var = 0, one_var = -1;
12694   bool all_same = true;
12695   rtx x, mem;
12696   int i;
12697
12698   for (i = 0; i < n_elts; ++i)
12699     {
12700       x = XVECEXP (vals, 0, i);
12701       if (!CONSTANT_P (x))
12702         ++n_var, one_var = i;
12703
12704       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12705         all_same = false;
12706     }
12707
12708   if (n_var == 0)
12709     {
12710       rtx constant = neon_make_constant (vals);
12711       if (constant != NULL_RTX)
12712         {
12713           emit_move_insn (target, constant);
12714           return;
12715         }
12716     }
12717
12718   /* Splat a single non-constant element if we can.  */
12719   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12720     {
12721       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12722       emit_insn (gen_rtx_SET (VOIDmode, target,
12723                               gen_rtx_VEC_DUPLICATE (mode, x)));
12724       return;
12725     }
12726
12727   /* One field is non-constant.  Load constant then overwrite varying
12728      field.  This is more efficient than using the stack.  */
12729   if (n_var == 1)
12730     {
12731       rtx copy = copy_rtx (vals);
12732       rtx index = GEN_INT (one_var);
12733
12734       /* Load constant part of vector, substitute neighboring value for
12735          varying element.  */
12736       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12737       neon_expand_vector_init (target, copy);
12738
12739       /* Insert variable.  */
12740       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12741       switch (mode)
12742         {
12743         case V8QImode:
12744           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12745           break;
12746         case V16QImode:
12747           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12748           break;
12749         case V4HImode:
12750           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12751           break;
12752         case V8HImode:
12753           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12754           break;
12755         case V2SImode:
12756           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12757           break;
12758         case V4SImode:
12759           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12760           break;
12761         case V2SFmode:
12762           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12763           break;
12764         case V4SFmode:
12765           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12766           break;
12767         case V2DImode:
12768           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12769           break;
12770         default:
12771           gcc_unreachable ();
12772         }
12773       return;
12774     }
12775
12776   /* Construct the vector in memory one field at a time
12777      and load the whole vector.  */
12778   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12779   for (i = 0; i < n_elts; i++)
12780     emit_move_insn (adjust_address_nv (mem, inner_mode,
12781                                     i * GET_MODE_SIZE (inner_mode)),
12782                     XVECEXP (vals, 0, i));
12783   emit_move_insn (target, mem);
12784 }
12785
12786 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12787    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12788    reported source locations are bogus.  */
12789
12790 static void
12791 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12792               const char *err)
12793 {
12794   HOST_WIDE_INT lane;
12795
12796   gcc_assert (CONST_INT_P (operand));
12797
12798   lane = INTVAL (operand);
12799
12800   if (lane < low || lane >= high)
12801     error (err);
12802 }
12803
12804 /* Bounds-check lanes.  */
12805
12806 void
12807 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12808 {
12809   bounds_check (operand, low, high, "lane out of range");
12810 }
12811
12812 /* Bounds-check constants.  */
12813
12814 void
12815 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12816 {
12817   bounds_check (operand, low, high, "constant out of range");
12818 }
12819
12820 HOST_WIDE_INT
12821 neon_element_bits (machine_mode mode)
12822 {
12823   if (mode == DImode)
12824     return GET_MODE_BITSIZE (mode);
12825   else
12826     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12827 }
12828
12829 \f
12830 /* Predicates for `match_operand' and `match_operator'.  */
12831
12832 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12833    WB is true if full writeback address modes are allowed and is false
12834    if limited writeback address modes (POST_INC and PRE_DEC) are
12835    allowed.  */
12836
12837 int
12838 arm_coproc_mem_operand (rtx op, bool wb)
12839 {
12840   rtx ind;
12841
12842   /* Reject eliminable registers.  */
12843   if (! (reload_in_progress || reload_completed || lra_in_progress)
12844       && (   reg_mentioned_p (frame_pointer_rtx, op)
12845           || reg_mentioned_p (arg_pointer_rtx, op)
12846           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12847           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12848           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12849           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12850     return FALSE;
12851
12852   /* Constants are converted into offsets from labels.  */
12853   if (!MEM_P (op))
12854     return FALSE;
12855
12856   ind = XEXP (op, 0);
12857
12858   if (reload_completed
12859       && (GET_CODE (ind) == LABEL_REF
12860           || (GET_CODE (ind) == CONST
12861               && GET_CODE (XEXP (ind, 0)) == PLUS
12862               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12863               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12864     return TRUE;
12865
12866   /* Match: (mem (reg)).  */
12867   if (REG_P (ind))
12868     return arm_address_register_rtx_p (ind, 0);
12869
12870   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12871      acceptable in any case (subject to verification by
12872      arm_address_register_rtx_p).  We need WB to be true to accept
12873      PRE_INC and POST_DEC.  */
12874   if (GET_CODE (ind) == POST_INC
12875       || GET_CODE (ind) == PRE_DEC
12876       || (wb
12877           && (GET_CODE (ind) == PRE_INC
12878               || GET_CODE (ind) == POST_DEC)))
12879     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12880
12881   if (wb
12882       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12883       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12884       && GET_CODE (XEXP (ind, 1)) == PLUS
12885       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12886     ind = XEXP (ind, 1);
12887
12888   /* Match:
12889      (plus (reg)
12890            (const)).  */
12891   if (GET_CODE (ind) == PLUS
12892       && REG_P (XEXP (ind, 0))
12893       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12894       && CONST_INT_P (XEXP (ind, 1))
12895       && INTVAL (XEXP (ind, 1)) > -1024
12896       && INTVAL (XEXP (ind, 1)) <  1024
12897       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12898     return TRUE;
12899
12900   return FALSE;
12901 }
12902
12903 /* Return TRUE if OP is a memory operand which we can load or store a vector
12904    to/from. TYPE is one of the following values:
12905     0 - Vector load/stor (vldr)
12906     1 - Core registers (ldm)
12907     2 - Element/structure loads (vld1)
12908  */
12909 int
12910 neon_vector_mem_operand (rtx op, int type, bool strict)
12911 {
12912   rtx ind;
12913
12914   /* Reject eliminable registers.  */
12915   if (! (reload_in_progress || reload_completed)
12916       && (   reg_mentioned_p (frame_pointer_rtx, op)
12917           || reg_mentioned_p (arg_pointer_rtx, op)
12918           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12919           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12920           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12921           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12922     return !strict;
12923
12924   /* Constants are converted into offsets from labels.  */
12925   if (!MEM_P (op))
12926     return FALSE;
12927
12928   ind = XEXP (op, 0);
12929
12930   if (reload_completed
12931       && (GET_CODE (ind) == LABEL_REF
12932           || (GET_CODE (ind) == CONST
12933               && GET_CODE (XEXP (ind, 0)) == PLUS
12934               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12935               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12936     return TRUE;
12937
12938   /* Match: (mem (reg)).  */
12939   if (REG_P (ind))
12940     return arm_address_register_rtx_p (ind, 0);
12941
12942   /* Allow post-increment with Neon registers.  */
12943   if ((type != 1 && GET_CODE (ind) == POST_INC)
12944       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12945     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12946
12947   /* Allow post-increment by register for VLDn */
12948   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12949       && GET_CODE (XEXP (ind, 1)) == PLUS
12950       && REG_P (XEXP (XEXP (ind, 1), 1)))
12951      return true;
12952
12953   /* Match:
12954      (plus (reg)
12955           (const)).  */
12956   if (type == 0
12957       && GET_CODE (ind) == PLUS
12958       && REG_P (XEXP (ind, 0))
12959       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12960       && CONST_INT_P (XEXP (ind, 1))
12961       && INTVAL (XEXP (ind, 1)) > -1024
12962       /* For quad modes, we restrict the constant offset to be slightly less
12963          than what the instruction format permits.  We have no such constraint
12964          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12965       && (INTVAL (XEXP (ind, 1))
12966           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12967       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12968     return TRUE;
12969
12970   return FALSE;
12971 }
12972
12973 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12974    type.  */
12975 int
12976 neon_struct_mem_operand (rtx op)
12977 {
12978   rtx ind;
12979
12980   /* Reject eliminable registers.  */
12981   if (! (reload_in_progress || reload_completed)
12982       && (   reg_mentioned_p (frame_pointer_rtx, op)
12983           || reg_mentioned_p (arg_pointer_rtx, op)
12984           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12985           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12986           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12987           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12988     return FALSE;
12989
12990   /* Constants are converted into offsets from labels.  */
12991   if (!MEM_P (op))
12992     return FALSE;
12993
12994   ind = XEXP (op, 0);
12995
12996   if (reload_completed
12997       && (GET_CODE (ind) == LABEL_REF
12998           || (GET_CODE (ind) == CONST
12999               && GET_CODE (XEXP (ind, 0)) == PLUS
13000               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13001               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13002     return TRUE;
13003
13004   /* Match: (mem (reg)).  */
13005   if (REG_P (ind))
13006     return arm_address_register_rtx_p (ind, 0);
13007
13008   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13009   if (GET_CODE (ind) == POST_INC
13010       || GET_CODE (ind) == PRE_DEC)
13011     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13012
13013   return FALSE;
13014 }
13015
13016 /* Return true if X is a register that will be eliminated later on.  */
13017 int
13018 arm_eliminable_register (rtx x)
13019 {
13020   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13021                        || REGNO (x) == ARG_POINTER_REGNUM
13022                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13023                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13024 }
13025
13026 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13027    coprocessor registers.  Otherwise return NO_REGS.  */
13028
13029 enum reg_class
13030 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13031 {
13032   if (mode == HFmode)
13033     {
13034       if (!TARGET_NEON_FP16)
13035         return GENERAL_REGS;
13036       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13037         return NO_REGS;
13038       return GENERAL_REGS;
13039     }
13040
13041   /* The neon move patterns handle all legitimate vector and struct
13042      addresses.  */
13043   if (TARGET_NEON
13044       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13045       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13046           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13047           || VALID_NEON_STRUCT_MODE (mode)))
13048     return NO_REGS;
13049
13050   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13051     return NO_REGS;
13052
13053   return GENERAL_REGS;
13054 }
13055
13056 /* Values which must be returned in the most-significant end of the return
13057    register.  */
13058
13059 static bool
13060 arm_return_in_msb (const_tree valtype)
13061 {
13062   return (TARGET_AAPCS_BASED
13063           && BYTES_BIG_ENDIAN
13064           && (AGGREGATE_TYPE_P (valtype)
13065               || TREE_CODE (valtype) == COMPLEX_TYPE
13066               || FIXED_POINT_TYPE_P (valtype)));
13067 }
13068
13069 /* Return TRUE if X references a SYMBOL_REF.  */
13070 int
13071 symbol_mentioned_p (rtx x)
13072 {
13073   const char * fmt;
13074   int i;
13075
13076   if (GET_CODE (x) == SYMBOL_REF)
13077     return 1;
13078
13079   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13080      are constant offsets, not symbols.  */
13081   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13082     return 0;
13083
13084   fmt = GET_RTX_FORMAT (GET_CODE (x));
13085
13086   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13087     {
13088       if (fmt[i] == 'E')
13089         {
13090           int j;
13091
13092           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13093             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13094               return 1;
13095         }
13096       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13097         return 1;
13098     }
13099
13100   return 0;
13101 }
13102
13103 /* Return TRUE if X references a LABEL_REF.  */
13104 int
13105 label_mentioned_p (rtx x)
13106 {
13107   const char * fmt;
13108   int i;
13109
13110   if (GET_CODE (x) == LABEL_REF)
13111     return 1;
13112
13113   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13114      instruction, but they are constant offsets, not symbols.  */
13115   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13116     return 0;
13117
13118   fmt = GET_RTX_FORMAT (GET_CODE (x));
13119   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13120     {
13121       if (fmt[i] == 'E')
13122         {
13123           int j;
13124
13125           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13126             if (label_mentioned_p (XVECEXP (x, i, j)))
13127               return 1;
13128         }
13129       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13130         return 1;
13131     }
13132
13133   return 0;
13134 }
13135
13136 int
13137 tls_mentioned_p (rtx x)
13138 {
13139   switch (GET_CODE (x))
13140     {
13141     case CONST:
13142       return tls_mentioned_p (XEXP (x, 0));
13143
13144     case UNSPEC:
13145       if (XINT (x, 1) == UNSPEC_TLS)
13146         return 1;
13147
13148     default:
13149       return 0;
13150     }
13151 }
13152
13153 /* Must not copy any rtx that uses a pc-relative address.  */
13154
13155 static bool
13156 arm_cannot_copy_insn_p (rtx_insn *insn)
13157 {
13158   /* The tls call insn cannot be copied, as it is paired with a data
13159      word.  */
13160   if (recog_memoized (insn) == CODE_FOR_tlscall)
13161     return true;
13162
13163   subrtx_iterator::array_type array;
13164   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13165     {
13166       const_rtx x = *iter;
13167       if (GET_CODE (x) == UNSPEC
13168           && (XINT (x, 1) == UNSPEC_PIC_BASE
13169               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13170         return true;
13171     }
13172   return false;
13173 }
13174
13175 enum rtx_code
13176 minmax_code (rtx x)
13177 {
13178   enum rtx_code code = GET_CODE (x);
13179
13180   switch (code)
13181     {
13182     case SMAX:
13183       return GE;
13184     case SMIN:
13185       return LE;
13186     case UMIN:
13187       return LEU;
13188     case UMAX:
13189       return GEU;
13190     default:
13191       gcc_unreachable ();
13192     }
13193 }
13194
13195 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13196
13197 bool
13198 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13199                         int *mask, bool *signed_sat)
13200 {
13201   /* The high bound must be a power of two minus one.  */
13202   int log = exact_log2 (INTVAL (hi_bound) + 1);
13203   if (log == -1)
13204     return false;
13205
13206   /* The low bound is either zero (for usat) or one less than the
13207      negation of the high bound (for ssat).  */
13208   if (INTVAL (lo_bound) == 0)
13209     {
13210       if (mask)
13211         *mask = log;
13212       if (signed_sat)
13213         *signed_sat = false;
13214
13215       return true;
13216     }
13217
13218   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13219     {
13220       if (mask)
13221         *mask = log + 1;
13222       if (signed_sat)
13223         *signed_sat = true;
13224
13225       return true;
13226     }
13227
13228   return false;
13229 }
13230
13231 /* Return 1 if memory locations are adjacent.  */
13232 int
13233 adjacent_mem_locations (rtx a, rtx b)
13234 {
13235   /* We don't guarantee to preserve the order of these memory refs.  */
13236   if (volatile_refs_p (a) || volatile_refs_p (b))
13237     return 0;
13238
13239   if ((REG_P (XEXP (a, 0))
13240        || (GET_CODE (XEXP (a, 0)) == PLUS
13241            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13242       && (REG_P (XEXP (b, 0))
13243           || (GET_CODE (XEXP (b, 0)) == PLUS
13244               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13245     {
13246       HOST_WIDE_INT val0 = 0, val1 = 0;
13247       rtx reg0, reg1;
13248       int val_diff;
13249
13250       if (GET_CODE (XEXP (a, 0)) == PLUS)
13251         {
13252           reg0 = XEXP (XEXP (a, 0), 0);
13253           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13254         }
13255       else
13256         reg0 = XEXP (a, 0);
13257
13258       if (GET_CODE (XEXP (b, 0)) == PLUS)
13259         {
13260           reg1 = XEXP (XEXP (b, 0), 0);
13261           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13262         }
13263       else
13264         reg1 = XEXP (b, 0);
13265
13266       /* Don't accept any offset that will require multiple
13267          instructions to handle, since this would cause the
13268          arith_adjacentmem pattern to output an overlong sequence.  */
13269       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13270         return 0;
13271
13272       /* Don't allow an eliminable register: register elimination can make
13273          the offset too large.  */
13274       if (arm_eliminable_register (reg0))
13275         return 0;
13276
13277       val_diff = val1 - val0;
13278
13279       if (arm_ld_sched)
13280         {
13281           /* If the target has load delay slots, then there's no benefit
13282              to using an ldm instruction unless the offset is zero and
13283              we are optimizing for size.  */
13284           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13285                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13286                   && (val_diff == 4 || val_diff == -4));
13287         }
13288
13289       return ((REGNO (reg0) == REGNO (reg1))
13290               && (val_diff == 4 || val_diff == -4));
13291     }
13292
13293   return 0;
13294 }
13295
13296 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13297    for load operations, false for store operations.  CONSECUTIVE is true
13298    if the register numbers in the operation must be consecutive in the register
13299    bank. RETURN_PC is true if value is to be loaded in PC.
13300    The pattern we are trying to match for load is:
13301      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13302       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13303        :
13304        :
13305       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13306      ]
13307      where
13308      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13309      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13310      3.  If consecutive is TRUE, then for kth register being loaded,
13311          REGNO (R_dk) = REGNO (R_d0) + k.
13312    The pattern for store is similar.  */
13313 bool
13314 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13315                      bool consecutive, bool return_pc)
13316 {
13317   HOST_WIDE_INT count = XVECLEN (op, 0);
13318   rtx reg, mem, addr;
13319   unsigned regno;
13320   unsigned first_regno;
13321   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13322   rtx elt;
13323   bool addr_reg_in_reglist = false;
13324   bool update = false;
13325   int reg_increment;
13326   int offset_adj;
13327   int regs_per_val;
13328
13329   /* If not in SImode, then registers must be consecutive
13330      (e.g., VLDM instructions for DFmode).  */
13331   gcc_assert ((mode == SImode) || consecutive);
13332   /* Setting return_pc for stores is illegal.  */
13333   gcc_assert (!return_pc || load);
13334
13335   /* Set up the increments and the regs per val based on the mode.  */
13336   reg_increment = GET_MODE_SIZE (mode);
13337   regs_per_val = reg_increment / 4;
13338   offset_adj = return_pc ? 1 : 0;
13339
13340   if (count <= 1
13341       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13342       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13343     return false;
13344
13345   /* Check if this is a write-back.  */
13346   elt = XVECEXP (op, 0, offset_adj);
13347   if (GET_CODE (SET_SRC (elt)) == PLUS)
13348     {
13349       i++;
13350       base = 1;
13351       update = true;
13352
13353       /* The offset adjustment must be the number of registers being
13354          popped times the size of a single register.  */
13355       if (!REG_P (SET_DEST (elt))
13356           || !REG_P (XEXP (SET_SRC (elt), 0))
13357           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13358           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13359           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13360              ((count - 1 - offset_adj) * reg_increment))
13361         return false;
13362     }
13363
13364   i = i + offset_adj;
13365   base = base + offset_adj;
13366   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13367      success depends on the type: VLDM can do just one reg,
13368      LDM must do at least two.  */
13369   if ((count <= i) && (mode == SImode))
13370       return false;
13371
13372   elt = XVECEXP (op, 0, i - 1);
13373   if (GET_CODE (elt) != SET)
13374     return false;
13375
13376   if (load)
13377     {
13378       reg = SET_DEST (elt);
13379       mem = SET_SRC (elt);
13380     }
13381   else
13382     {
13383       reg = SET_SRC (elt);
13384       mem = SET_DEST (elt);
13385     }
13386
13387   if (!REG_P (reg) || !MEM_P (mem))
13388     return false;
13389
13390   regno = REGNO (reg);
13391   first_regno = regno;
13392   addr = XEXP (mem, 0);
13393   if (GET_CODE (addr) == PLUS)
13394     {
13395       if (!CONST_INT_P (XEXP (addr, 1)))
13396         return false;
13397
13398       offset = INTVAL (XEXP (addr, 1));
13399       addr = XEXP (addr, 0);
13400     }
13401
13402   if (!REG_P (addr))
13403     return false;
13404
13405   /* Don't allow SP to be loaded unless it is also the base register. It
13406      guarantees that SP is reset correctly when an LDM instruction
13407      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13408   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13409     return false;
13410
13411   for (; i < count; i++)
13412     {
13413       elt = XVECEXP (op, 0, i);
13414       if (GET_CODE (elt) != SET)
13415         return false;
13416
13417       if (load)
13418         {
13419           reg = SET_DEST (elt);
13420           mem = SET_SRC (elt);
13421         }
13422       else
13423         {
13424           reg = SET_SRC (elt);
13425           mem = SET_DEST (elt);
13426         }
13427
13428       if (!REG_P (reg)
13429           || GET_MODE (reg) != mode
13430           || REGNO (reg) <= regno
13431           || (consecutive
13432               && (REGNO (reg) !=
13433                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13434           /* Don't allow SP to be loaded unless it is also the base register. It
13435              guarantees that SP is reset correctly when an LDM instruction
13436              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13437           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13438           || !MEM_P (mem)
13439           || GET_MODE (mem) != mode
13440           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13441                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13442                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13443                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13444                    offset + (i - base) * reg_increment))
13445               && (!REG_P (XEXP (mem, 0))
13446                   || offset + (i - base) * reg_increment != 0)))
13447         return false;
13448
13449       regno = REGNO (reg);
13450       if (regno == REGNO (addr))
13451         addr_reg_in_reglist = true;
13452     }
13453
13454   if (load)
13455     {
13456       if (update && addr_reg_in_reglist)
13457         return false;
13458
13459       /* For Thumb-1, address register is always modified - either by write-back
13460          or by explicit load.  If the pattern does not describe an update,
13461          then the address register must be in the list of loaded registers.  */
13462       if (TARGET_THUMB1)
13463         return update || addr_reg_in_reglist;
13464     }
13465
13466   return true;
13467 }
13468
13469 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13470    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13471    instruction.  ADD_OFFSET is nonzero if the base address register needs
13472    to be modified with an add instruction before we can use it.  */
13473
13474 static bool
13475 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13476                                  int nops, HOST_WIDE_INT add_offset)
13477  {
13478   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13479      if the offset isn't small enough.  The reason 2 ldrs are faster
13480      is because these ARMs are able to do more than one cache access
13481      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13482      whilst the ARM8 has a double bandwidth cache.  This means that
13483      these cores can do both an instruction fetch and a data fetch in
13484      a single cycle, so the trick of calculating the address into a
13485      scratch register (one of the result regs) and then doing a load
13486      multiple actually becomes slower (and no smaller in code size).
13487      That is the transformation
13488
13489         ldr     rd1, [rbase + offset]
13490         ldr     rd2, [rbase + offset + 4]
13491
13492      to
13493
13494         add     rd1, rbase, offset
13495         ldmia   rd1, {rd1, rd2}
13496
13497      produces worse code -- '3 cycles + any stalls on rd2' instead of
13498      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13499      access per cycle, the first sequence could never complete in less
13500      than 6 cycles, whereas the ldm sequence would only take 5 and
13501      would make better use of sequential accesses if not hitting the
13502      cache.
13503
13504      We cheat here and test 'arm_ld_sched' which we currently know to
13505      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13506      changes, then the test below needs to be reworked.  */
13507   if (nops == 2 && arm_ld_sched && add_offset != 0)
13508     return false;
13509
13510   /* XScale has load-store double instructions, but they have stricter
13511      alignment requirements than load-store multiple, so we cannot
13512      use them.
13513
13514      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13515      the pipeline until completion.
13516
13517         NREGS           CYCLES
13518           1               3
13519           2               4
13520           3               5
13521           4               6
13522
13523      An ldr instruction takes 1-3 cycles, but does not block the
13524      pipeline.
13525
13526         NREGS           CYCLES
13527           1              1-3
13528           2              2-6
13529           3              3-9
13530           4              4-12
13531
13532      Best case ldr will always win.  However, the more ldr instructions
13533      we issue, the less likely we are to be able to schedule them well.
13534      Using ldr instructions also increases code size.
13535
13536      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13537      for counts of 3 or 4 regs.  */
13538   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13539     return false;
13540   return true;
13541 }
13542
13543 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13544    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13545    an array ORDER which describes the sequence to use when accessing the
13546    offsets that produces an ascending order.  In this sequence, each
13547    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13548    must have been filled in with the lowest offset by the caller.
13549    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13550    we use to verify that ORDER produces an ascending order of registers.
13551    Return true if it was possible to construct such an order, false if
13552    not.  */
13553
13554 static bool
13555 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13556                       int *unsorted_regs)
13557 {
13558   int i;
13559   for (i = 1; i < nops; i++)
13560     {
13561       int j;
13562
13563       order[i] = order[i - 1];
13564       for (j = 0; j < nops; j++)
13565         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13566           {
13567             /* We must find exactly one offset that is higher than the
13568                previous one by 4.  */
13569             if (order[i] != order[i - 1])
13570               return false;
13571             order[i] = j;
13572           }
13573       if (order[i] == order[i - 1])
13574         return false;
13575       /* The register numbers must be ascending.  */
13576       if (unsorted_regs != NULL
13577           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13578         return false;
13579     }
13580   return true;
13581 }
13582
13583 /* Used to determine in a peephole whether a sequence of load
13584    instructions can be changed into a load-multiple instruction.
13585    NOPS is the number of separate load instructions we are examining.  The
13586    first NOPS entries in OPERANDS are the destination registers, the
13587    next NOPS entries are memory operands.  If this function is
13588    successful, *BASE is set to the common base register of the memory
13589    accesses; *LOAD_OFFSET is set to the first memory location's offset
13590    from that base register.
13591    REGS is an array filled in with the destination register numbers.
13592    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13593    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13594    the sequence of registers in REGS matches the loads from ascending memory
13595    locations, and the function verifies that the register numbers are
13596    themselves ascending.  If CHECK_REGS is false, the register numbers
13597    are stored in the order they are found in the operands.  */
13598 static int
13599 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13600                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13601 {
13602   int unsorted_regs[MAX_LDM_STM_OPS];
13603   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13604   int order[MAX_LDM_STM_OPS];
13605   rtx base_reg_rtx = NULL;
13606   int base_reg = -1;
13607   int i, ldm_case;
13608
13609   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13610      easily extended if required.  */
13611   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13612
13613   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13614
13615   /* Loop over the operands and check that the memory references are
13616      suitable (i.e. immediate offsets from the same base register).  At
13617      the same time, extract the target register, and the memory
13618      offsets.  */
13619   for (i = 0; i < nops; i++)
13620     {
13621       rtx reg;
13622       rtx offset;
13623
13624       /* Convert a subreg of a mem into the mem itself.  */
13625       if (GET_CODE (operands[nops + i]) == SUBREG)
13626         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13627
13628       gcc_assert (MEM_P (operands[nops + i]));
13629
13630       /* Don't reorder volatile memory references; it doesn't seem worth
13631          looking for the case where the order is ok anyway.  */
13632       if (MEM_VOLATILE_P (operands[nops + i]))
13633         return 0;
13634
13635       offset = const0_rtx;
13636
13637       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13638            || (GET_CODE (reg) == SUBREG
13639                && REG_P (reg = SUBREG_REG (reg))))
13640           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13641               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13642                   || (GET_CODE (reg) == SUBREG
13643                       && REG_P (reg = SUBREG_REG (reg))))
13644               && (CONST_INT_P (offset
13645                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13646         {
13647           if (i == 0)
13648             {
13649               base_reg = REGNO (reg);
13650               base_reg_rtx = reg;
13651               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13652                 return 0;
13653             }
13654           else if (base_reg != (int) REGNO (reg))
13655             /* Not addressed from the same base register.  */
13656             return 0;
13657
13658           unsorted_regs[i] = (REG_P (operands[i])
13659                               ? REGNO (operands[i])
13660                               : REGNO (SUBREG_REG (operands[i])));
13661
13662           /* If it isn't an integer register, or if it overwrites the
13663              base register but isn't the last insn in the list, then
13664              we can't do this.  */
13665           if (unsorted_regs[i] < 0
13666               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13667               || unsorted_regs[i] > 14
13668               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13669             return 0;
13670
13671           /* Don't allow SP to be loaded unless it is also the base
13672              register.  It guarantees that SP is reset correctly when
13673              an LDM instruction is interrupted.  Otherwise, we might
13674              end up with a corrupt stack.  */
13675           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13676             return 0;
13677
13678           unsorted_offsets[i] = INTVAL (offset);
13679           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13680             order[0] = i;
13681         }
13682       else
13683         /* Not a suitable memory address.  */
13684         return 0;
13685     }
13686
13687   /* All the useful information has now been extracted from the
13688      operands into unsorted_regs and unsorted_offsets; additionally,
13689      order[0] has been set to the lowest offset in the list.  Sort
13690      the offsets into order, verifying that they are adjacent, and
13691      check that the register numbers are ascending.  */
13692   if (!compute_offset_order (nops, unsorted_offsets, order,
13693                              check_regs ? unsorted_regs : NULL))
13694     return 0;
13695
13696   if (saved_order)
13697     memcpy (saved_order, order, sizeof order);
13698
13699   if (base)
13700     {
13701       *base = base_reg;
13702
13703       for (i = 0; i < nops; i++)
13704         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13705
13706       *load_offset = unsorted_offsets[order[0]];
13707     }
13708
13709   if (TARGET_THUMB1
13710       && !peep2_reg_dead_p (nops, base_reg_rtx))
13711     return 0;
13712
13713   if (unsorted_offsets[order[0]] == 0)
13714     ldm_case = 1; /* ldmia */
13715   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13716     ldm_case = 2; /* ldmib */
13717   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13718     ldm_case = 3; /* ldmda */
13719   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13720     ldm_case = 4; /* ldmdb */
13721   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13722            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13723     ldm_case = 5;
13724   else
13725     return 0;
13726
13727   if (!multiple_operation_profitable_p (false, nops,
13728                                         ldm_case == 5
13729                                         ? unsorted_offsets[order[0]] : 0))
13730     return 0;
13731
13732   return ldm_case;
13733 }
13734
13735 /* Used to determine in a peephole whether a sequence of store instructions can
13736    be changed into a store-multiple instruction.
13737    NOPS is the number of separate store instructions we are examining.
13738    NOPS_TOTAL is the total number of instructions recognized by the peephole
13739    pattern.
13740    The first NOPS entries in OPERANDS are the source registers, the next
13741    NOPS entries are memory operands.  If this function is successful, *BASE is
13742    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13743    to the first memory location's offset from that base register.  REGS is an
13744    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13745    likewise filled with the corresponding rtx's.
13746    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13747    numbers to an ascending order of stores.
13748    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13749    from ascending memory locations, and the function verifies that the register
13750    numbers are themselves ascending.  If CHECK_REGS is false, the register
13751    numbers are stored in the order they are found in the operands.  */
13752 static int
13753 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13754                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13755                          HOST_WIDE_INT *load_offset, bool check_regs)
13756 {
13757   int unsorted_regs[MAX_LDM_STM_OPS];
13758   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13759   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13760   int order[MAX_LDM_STM_OPS];
13761   int base_reg = -1;
13762   rtx base_reg_rtx = NULL;
13763   int i, stm_case;
13764
13765   /* Write back of base register is currently only supported for Thumb 1.  */
13766   int base_writeback = TARGET_THUMB1;
13767
13768   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13769      easily extended if required.  */
13770   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13771
13772   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13773
13774   /* Loop over the operands and check that the memory references are
13775      suitable (i.e. immediate offsets from the same base register).  At
13776      the same time, extract the target register, and the memory
13777      offsets.  */
13778   for (i = 0; i < nops; i++)
13779     {
13780       rtx reg;
13781       rtx offset;
13782
13783       /* Convert a subreg of a mem into the mem itself.  */
13784       if (GET_CODE (operands[nops + i]) == SUBREG)
13785         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13786
13787       gcc_assert (MEM_P (operands[nops + i]));
13788
13789       /* Don't reorder volatile memory references; it doesn't seem worth
13790          looking for the case where the order is ok anyway.  */
13791       if (MEM_VOLATILE_P (operands[nops + i]))
13792         return 0;
13793
13794       offset = const0_rtx;
13795
13796       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13797            || (GET_CODE (reg) == SUBREG
13798                && REG_P (reg = SUBREG_REG (reg))))
13799           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13800               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13801                   || (GET_CODE (reg) == SUBREG
13802                       && REG_P (reg = SUBREG_REG (reg))))
13803               && (CONST_INT_P (offset
13804                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13805         {
13806           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13807                                   ? operands[i] : SUBREG_REG (operands[i]));
13808           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13809
13810           if (i == 0)
13811             {
13812               base_reg = REGNO (reg);
13813               base_reg_rtx = reg;
13814               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13815                 return 0;
13816             }
13817           else if (base_reg != (int) REGNO (reg))
13818             /* Not addressed from the same base register.  */
13819             return 0;
13820
13821           /* If it isn't an integer register, then we can't do this.  */
13822           if (unsorted_regs[i] < 0
13823               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13824               /* The effects are unpredictable if the base register is
13825                  both updated and stored.  */
13826               || (base_writeback && unsorted_regs[i] == base_reg)
13827               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13828               || unsorted_regs[i] > 14)
13829             return 0;
13830
13831           unsorted_offsets[i] = INTVAL (offset);
13832           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13833             order[0] = i;
13834         }
13835       else
13836         /* Not a suitable memory address.  */
13837         return 0;
13838     }
13839
13840   /* All the useful information has now been extracted from the
13841      operands into unsorted_regs and unsorted_offsets; additionally,
13842      order[0] has been set to the lowest offset in the list.  Sort
13843      the offsets into order, verifying that they are adjacent, and
13844      check that the register numbers are ascending.  */
13845   if (!compute_offset_order (nops, unsorted_offsets, order,
13846                              check_regs ? unsorted_regs : NULL))
13847     return 0;
13848
13849   if (saved_order)
13850     memcpy (saved_order, order, sizeof order);
13851
13852   if (base)
13853     {
13854       *base = base_reg;
13855
13856       for (i = 0; i < nops; i++)
13857         {
13858           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13859           if (reg_rtxs)
13860             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13861         }
13862
13863       *load_offset = unsorted_offsets[order[0]];
13864     }
13865
13866   if (TARGET_THUMB1
13867       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13868     return 0;
13869
13870   if (unsorted_offsets[order[0]] == 0)
13871     stm_case = 1; /* stmia */
13872   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13873     stm_case = 2; /* stmib */
13874   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13875     stm_case = 3; /* stmda */
13876   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13877     stm_case = 4; /* stmdb */
13878   else
13879     return 0;
13880
13881   if (!multiple_operation_profitable_p (false, nops, 0))
13882     return 0;
13883
13884   return stm_case;
13885 }
13886 \f
13887 /* Routines for use in generating RTL.  */
13888
13889 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13890    the instruction; REGS and MEMS are arrays containing the operands.
13891    BASEREG is the base register to be used in addressing the memory operands.
13892    WBACK_OFFSET is nonzero if the instruction should update the base
13893    register.  */
13894
13895 static rtx
13896 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13897                          HOST_WIDE_INT wback_offset)
13898 {
13899   int i = 0, j;
13900   rtx result;
13901
13902   if (!multiple_operation_profitable_p (false, count, 0))
13903     {
13904       rtx seq;
13905
13906       start_sequence ();
13907
13908       for (i = 0; i < count; i++)
13909         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13910
13911       if (wback_offset != 0)
13912         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13913
13914       seq = get_insns ();
13915       end_sequence ();
13916
13917       return seq;
13918     }
13919
13920   result = gen_rtx_PARALLEL (VOIDmode,
13921                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13922   if (wback_offset != 0)
13923     {
13924       XVECEXP (result, 0, 0)
13925         = gen_rtx_SET (VOIDmode, basereg,
13926                        plus_constant (Pmode, basereg, wback_offset));
13927       i = 1;
13928       count++;
13929     }
13930
13931   for (j = 0; i < count; i++, j++)
13932     XVECEXP (result, 0, i)
13933       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13934
13935   return result;
13936 }
13937
13938 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13939    the instruction; REGS and MEMS are arrays containing the operands.
13940    BASEREG is the base register to be used in addressing the memory operands.
13941    WBACK_OFFSET is nonzero if the instruction should update the base
13942    register.  */
13943
13944 static rtx
13945 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13946                           HOST_WIDE_INT wback_offset)
13947 {
13948   int i = 0, j;
13949   rtx result;
13950
13951   if (GET_CODE (basereg) == PLUS)
13952     basereg = XEXP (basereg, 0);
13953
13954   if (!multiple_operation_profitable_p (false, count, 0))
13955     {
13956       rtx seq;
13957
13958       start_sequence ();
13959
13960       for (i = 0; i < count; i++)
13961         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13962
13963       if (wback_offset != 0)
13964         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13965
13966       seq = get_insns ();
13967       end_sequence ();
13968
13969       return seq;
13970     }
13971
13972   result = gen_rtx_PARALLEL (VOIDmode,
13973                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13974   if (wback_offset != 0)
13975     {
13976       XVECEXP (result, 0, 0)
13977         = gen_rtx_SET (VOIDmode, basereg,
13978                        plus_constant (Pmode, basereg, wback_offset));
13979       i = 1;
13980       count++;
13981     }
13982
13983   for (j = 0; i < count; i++, j++)
13984     XVECEXP (result, 0, i)
13985       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13986
13987   return result;
13988 }
13989
13990 /* Generate either a load-multiple or a store-multiple instruction.  This
13991    function can be used in situations where we can start with a single MEM
13992    rtx and adjust its address upwards.
13993    COUNT is the number of operations in the instruction, not counting a
13994    possible update of the base register.  REGS is an array containing the
13995    register operands.
13996    BASEREG is the base register to be used in addressing the memory operands,
13997    which are constructed from BASEMEM.
13998    WRITE_BACK specifies whether the generated instruction should include an
13999    update of the base register.
14000    OFFSETP is used to pass an offset to and from this function; this offset
14001    is not used when constructing the address (instead BASEMEM should have an
14002    appropriate offset in its address), it is used only for setting
14003    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14004
14005 static rtx
14006 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14007                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14008 {
14009   rtx mems[MAX_LDM_STM_OPS];
14010   HOST_WIDE_INT offset = *offsetp;
14011   int i;
14012
14013   gcc_assert (count <= MAX_LDM_STM_OPS);
14014
14015   if (GET_CODE (basereg) == PLUS)
14016     basereg = XEXP (basereg, 0);
14017
14018   for (i = 0; i < count; i++)
14019     {
14020       rtx addr = plus_constant (Pmode, basereg, i * 4);
14021       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14022       offset += 4;
14023     }
14024
14025   if (write_back)
14026     *offsetp = offset;
14027
14028   if (is_load)
14029     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14030                                     write_back ? 4 * count : 0);
14031   else
14032     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14033                                      write_back ? 4 * count : 0);
14034 }
14035
14036 rtx
14037 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14038                        rtx basemem, HOST_WIDE_INT *offsetp)
14039 {
14040   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14041                               offsetp);
14042 }
14043
14044 rtx
14045 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14046                         rtx basemem, HOST_WIDE_INT *offsetp)
14047 {
14048   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14049                               offsetp);
14050 }
14051
14052 /* Called from a peephole2 expander to turn a sequence of loads into an
14053    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14054    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14055    is true if we can reorder the registers because they are used commutatively
14056    subsequently.
14057    Returns true iff we could generate a new instruction.  */
14058
14059 bool
14060 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14061 {
14062   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14063   rtx mems[MAX_LDM_STM_OPS];
14064   int i, j, base_reg;
14065   rtx base_reg_rtx;
14066   HOST_WIDE_INT offset;
14067   int write_back = FALSE;
14068   int ldm_case;
14069   rtx addr;
14070
14071   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14072                                      &base_reg, &offset, !sort_regs);
14073
14074   if (ldm_case == 0)
14075     return false;
14076
14077   if (sort_regs)
14078     for (i = 0; i < nops - 1; i++)
14079       for (j = i + 1; j < nops; j++)
14080         if (regs[i] > regs[j])
14081           {
14082             int t = regs[i];
14083             regs[i] = regs[j];
14084             regs[j] = t;
14085           }
14086   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14087
14088   if (TARGET_THUMB1)
14089     {
14090       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14091       gcc_assert (ldm_case == 1 || ldm_case == 5);
14092       write_back = TRUE;
14093     }
14094
14095   if (ldm_case == 5)
14096     {
14097       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14098       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14099       offset = 0;
14100       if (!TARGET_THUMB1)
14101         {
14102           base_reg = regs[0];
14103           base_reg_rtx = newbase;
14104         }
14105     }
14106
14107   for (i = 0; i < nops; i++)
14108     {
14109       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14110       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14111                                               SImode, addr, 0);
14112     }
14113   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14114                                       write_back ? offset + i * 4 : 0));
14115   return true;
14116 }
14117
14118 /* Called from a peephole2 expander to turn a sequence of stores into an
14119    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14120    NOPS indicates how many separate stores we are trying to combine.
14121    Returns true iff we could generate a new instruction.  */
14122
14123 bool
14124 gen_stm_seq (rtx *operands, int nops)
14125 {
14126   int i;
14127   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14128   rtx mems[MAX_LDM_STM_OPS];
14129   int base_reg;
14130   rtx base_reg_rtx;
14131   HOST_WIDE_INT offset;
14132   int write_back = FALSE;
14133   int stm_case;
14134   rtx addr;
14135   bool base_reg_dies;
14136
14137   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14138                                       mem_order, &base_reg, &offset, true);
14139
14140   if (stm_case == 0)
14141     return false;
14142
14143   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14144
14145   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14146   if (TARGET_THUMB1)
14147     {
14148       gcc_assert (base_reg_dies);
14149       write_back = TRUE;
14150     }
14151
14152   if (stm_case == 5)
14153     {
14154       gcc_assert (base_reg_dies);
14155       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14156       offset = 0;
14157     }
14158
14159   addr = plus_constant (Pmode, base_reg_rtx, offset);
14160
14161   for (i = 0; i < nops; i++)
14162     {
14163       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14164       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14165                                               SImode, addr, 0);
14166     }
14167   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14168                                        write_back ? offset + i * 4 : 0));
14169   return true;
14170 }
14171
14172 /* Called from a peephole2 expander to turn a sequence of stores that are
14173    preceded by constant loads into an STM instruction.  OPERANDS are the
14174    operands found by the peephole matcher; NOPS indicates how many
14175    separate stores we are trying to combine; there are 2 * NOPS
14176    instructions in the peephole.
14177    Returns true iff we could generate a new instruction.  */
14178
14179 bool
14180 gen_const_stm_seq (rtx *operands, int nops)
14181 {
14182   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14183   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14184   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14185   rtx mems[MAX_LDM_STM_OPS];
14186   int base_reg;
14187   rtx base_reg_rtx;
14188   HOST_WIDE_INT offset;
14189   int write_back = FALSE;
14190   int stm_case;
14191   rtx addr;
14192   bool base_reg_dies;
14193   int i, j;
14194   HARD_REG_SET allocated;
14195
14196   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14197                                       mem_order, &base_reg, &offset, false);
14198
14199   if (stm_case == 0)
14200     return false;
14201
14202   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14203
14204   /* If the same register is used more than once, try to find a free
14205      register.  */
14206   CLEAR_HARD_REG_SET (allocated);
14207   for (i = 0; i < nops; i++)
14208     {
14209       for (j = i + 1; j < nops; j++)
14210         if (regs[i] == regs[j])
14211           {
14212             rtx t = peep2_find_free_register (0, nops * 2,
14213                                               TARGET_THUMB1 ? "l" : "r",
14214                                               SImode, &allocated);
14215             if (t == NULL_RTX)
14216               return false;
14217             reg_rtxs[i] = t;
14218             regs[i] = REGNO (t);
14219           }
14220     }
14221
14222   /* Compute an ordering that maps the register numbers to an ascending
14223      sequence.  */
14224   reg_order[0] = 0;
14225   for (i = 0; i < nops; i++)
14226     if (regs[i] < regs[reg_order[0]])
14227       reg_order[0] = i;
14228
14229   for (i = 1; i < nops; i++)
14230     {
14231       int this_order = reg_order[i - 1];
14232       for (j = 0; j < nops; j++)
14233         if (regs[j] > regs[reg_order[i - 1]]
14234             && (this_order == reg_order[i - 1]
14235                 || regs[j] < regs[this_order]))
14236           this_order = j;
14237       reg_order[i] = this_order;
14238     }
14239
14240   /* Ensure that registers that must be live after the instruction end
14241      up with the correct value.  */
14242   for (i = 0; i < nops; i++)
14243     {
14244       int this_order = reg_order[i];
14245       if ((this_order != mem_order[i]
14246            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14247           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14248         return false;
14249     }
14250
14251   /* Load the constants.  */
14252   for (i = 0; i < nops; i++)
14253     {
14254       rtx op = operands[2 * nops + mem_order[i]];
14255       sorted_regs[i] = regs[reg_order[i]];
14256       emit_move_insn (reg_rtxs[reg_order[i]], op);
14257     }
14258
14259   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14260
14261   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14262   if (TARGET_THUMB1)
14263     {
14264       gcc_assert (base_reg_dies);
14265       write_back = TRUE;
14266     }
14267
14268   if (stm_case == 5)
14269     {
14270       gcc_assert (base_reg_dies);
14271       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14272       offset = 0;
14273     }
14274
14275   addr = plus_constant (Pmode, base_reg_rtx, offset);
14276
14277   for (i = 0; i < nops; i++)
14278     {
14279       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14280       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14281                                               SImode, addr, 0);
14282     }
14283   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14284                                        write_back ? offset + i * 4 : 0));
14285   return true;
14286 }
14287
14288 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14289    unaligned copies on processors which support unaligned semantics for those
14290    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14291    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14292    An interleave factor of 1 (the minimum) will perform no interleaving.
14293    Load/store multiple are used for aligned addresses where possible.  */
14294
14295 static void
14296 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14297                                    HOST_WIDE_INT length,
14298                                    unsigned int interleave_factor)
14299 {
14300   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14301   int *regnos = XALLOCAVEC (int, interleave_factor);
14302   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14303   HOST_WIDE_INT i, j;
14304   HOST_WIDE_INT remaining = length, words;
14305   rtx halfword_tmp = NULL, byte_tmp = NULL;
14306   rtx dst, src;
14307   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14308   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14309   HOST_WIDE_INT srcoffset, dstoffset;
14310   HOST_WIDE_INT src_autoinc, dst_autoinc;
14311   rtx mem, addr;
14312
14313   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14314
14315   /* Use hard registers if we have aligned source or destination so we can use
14316      load/store multiple with contiguous registers.  */
14317   if (dst_aligned || src_aligned)
14318     for (i = 0; i < interleave_factor; i++)
14319       regs[i] = gen_rtx_REG (SImode, i);
14320   else
14321     for (i = 0; i < interleave_factor; i++)
14322       regs[i] = gen_reg_rtx (SImode);
14323
14324   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14325   src = copy_addr_to_reg (XEXP (srcbase, 0));
14326
14327   srcoffset = dstoffset = 0;
14328
14329   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14330      For copying the last bytes we want to subtract this offset again.  */
14331   src_autoinc = dst_autoinc = 0;
14332
14333   for (i = 0; i < interleave_factor; i++)
14334     regnos[i] = i;
14335
14336   /* Copy BLOCK_SIZE_BYTES chunks.  */
14337
14338   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14339     {
14340       /* Load words.  */
14341       if (src_aligned && interleave_factor > 1)
14342         {
14343           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14344                                             TRUE, srcbase, &srcoffset));
14345           src_autoinc += UNITS_PER_WORD * interleave_factor;
14346         }
14347       else
14348         {
14349           for (j = 0; j < interleave_factor; j++)
14350             {
14351               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14352                                                  - src_autoinc));
14353               mem = adjust_automodify_address (srcbase, SImode, addr,
14354                                                srcoffset + j * UNITS_PER_WORD);
14355               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14356             }
14357           srcoffset += block_size_bytes;
14358         }
14359
14360       /* Store words.  */
14361       if (dst_aligned && interleave_factor > 1)
14362         {
14363           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14364                                              TRUE, dstbase, &dstoffset));
14365           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14366         }
14367       else
14368         {
14369           for (j = 0; j < interleave_factor; j++)
14370             {
14371               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14372                                                  - dst_autoinc));
14373               mem = adjust_automodify_address (dstbase, SImode, addr,
14374                                                dstoffset + j * UNITS_PER_WORD);
14375               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14376             }
14377           dstoffset += block_size_bytes;
14378         }
14379
14380       remaining -= block_size_bytes;
14381     }
14382
14383   /* Copy any whole words left (note these aren't interleaved with any
14384      subsequent halfword/byte load/stores in the interests of simplicity).  */
14385
14386   words = remaining / UNITS_PER_WORD;
14387
14388   gcc_assert (words < interleave_factor);
14389
14390   if (src_aligned && words > 1)
14391     {
14392       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14393                                         &srcoffset));
14394       src_autoinc += UNITS_PER_WORD * words;
14395     }
14396   else
14397     {
14398       for (j = 0; j < words; j++)
14399         {
14400           addr = plus_constant (Pmode, src,
14401                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14402           mem = adjust_automodify_address (srcbase, SImode, addr,
14403                                            srcoffset + j * UNITS_PER_WORD);
14404           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14405         }
14406       srcoffset += words * UNITS_PER_WORD;
14407     }
14408
14409   if (dst_aligned && words > 1)
14410     {
14411       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14412                                          &dstoffset));
14413       dst_autoinc += words * UNITS_PER_WORD;
14414     }
14415   else
14416     {
14417       for (j = 0; j < words; j++)
14418         {
14419           addr = plus_constant (Pmode, dst,
14420                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14421           mem = adjust_automodify_address (dstbase, SImode, addr,
14422                                            dstoffset + j * UNITS_PER_WORD);
14423           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14424         }
14425       dstoffset += words * UNITS_PER_WORD;
14426     }
14427
14428   remaining -= words * UNITS_PER_WORD;
14429
14430   gcc_assert (remaining < 4);
14431
14432   /* Copy a halfword if necessary.  */
14433
14434   if (remaining >= 2)
14435     {
14436       halfword_tmp = gen_reg_rtx (SImode);
14437
14438       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14439       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14440       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14441
14442       /* Either write out immediately, or delay until we've loaded the last
14443          byte, depending on interleave factor.  */
14444       if (interleave_factor == 1)
14445         {
14446           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14447           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14448           emit_insn (gen_unaligned_storehi (mem,
14449                        gen_lowpart (HImode, halfword_tmp)));
14450           halfword_tmp = NULL;
14451           dstoffset += 2;
14452         }
14453
14454       remaining -= 2;
14455       srcoffset += 2;
14456     }
14457
14458   gcc_assert (remaining < 2);
14459
14460   /* Copy last byte.  */
14461
14462   if ((remaining & 1) != 0)
14463     {
14464       byte_tmp = gen_reg_rtx (SImode);
14465
14466       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14467       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14468       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14469
14470       if (interleave_factor == 1)
14471         {
14472           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14473           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14474           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14475           byte_tmp = NULL;
14476           dstoffset++;
14477         }
14478
14479       remaining--;
14480       srcoffset++;
14481     }
14482
14483   /* Store last halfword if we haven't done so already.  */
14484
14485   if (halfword_tmp)
14486     {
14487       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14488       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14489       emit_insn (gen_unaligned_storehi (mem,
14490                    gen_lowpart (HImode, halfword_tmp)));
14491       dstoffset += 2;
14492     }
14493
14494   /* Likewise for last byte.  */
14495
14496   if (byte_tmp)
14497     {
14498       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14499       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14500       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14501       dstoffset++;
14502     }
14503
14504   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14505 }
14506
14507 /* From mips_adjust_block_mem:
14508
14509    Helper function for doing a loop-based block operation on memory
14510    reference MEM.  Each iteration of the loop will operate on LENGTH
14511    bytes of MEM.
14512
14513    Create a new base register for use within the loop and point it to
14514    the start of MEM.  Create a new memory reference that uses this
14515    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14516
14517 static void
14518 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14519                       rtx *loop_mem)
14520 {
14521   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14522
14523   /* Although the new mem does not refer to a known location,
14524      it does keep up to LENGTH bytes of alignment.  */
14525   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14526   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14527 }
14528
14529 /* From mips_block_move_loop:
14530
14531    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14532    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14533    the memory regions do not overlap.  */
14534
14535 static void
14536 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14537                                unsigned int interleave_factor,
14538                                HOST_WIDE_INT bytes_per_iter)
14539 {
14540   rtx src_reg, dest_reg, final_src, test;
14541   HOST_WIDE_INT leftover;
14542
14543   leftover = length % bytes_per_iter;
14544   length -= leftover;
14545
14546   /* Create registers and memory references for use within the loop.  */
14547   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14548   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14549
14550   /* Calculate the value that SRC_REG should have after the last iteration of
14551      the loop.  */
14552   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14553                                    0, 0, OPTAB_WIDEN);
14554
14555   /* Emit the start of the loop.  */
14556   rtx_code_label *label = gen_label_rtx ();
14557   emit_label (label);
14558
14559   /* Emit the loop body.  */
14560   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14561                                      interleave_factor);
14562
14563   /* Move on to the next block.  */
14564   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14565   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14566
14567   /* Emit the loop condition.  */
14568   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14569   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14570
14571   /* Mop up any left-over bytes.  */
14572   if (leftover)
14573     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14574 }
14575
14576 /* Emit a block move when either the source or destination is unaligned (not
14577    aligned to a four-byte boundary).  This may need further tuning depending on
14578    core type, optimize_size setting, etc.  */
14579
14580 static int
14581 arm_movmemqi_unaligned (rtx *operands)
14582 {
14583   HOST_WIDE_INT length = INTVAL (operands[2]);
14584
14585   if (optimize_size)
14586     {
14587       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14588       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14589       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14590          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14591          or dst_aligned though: allow more interleaving in those cases since the
14592          resulting code can be smaller.  */
14593       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14594       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14595
14596       if (length > 12)
14597         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14598                                        interleave_factor, bytes_per_iter);
14599       else
14600         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14601                                            interleave_factor);
14602     }
14603   else
14604     {
14605       /* Note that the loop created by arm_block_move_unaligned_loop may be
14606          subject to loop unrolling, which makes tuning this condition a little
14607          redundant.  */
14608       if (length > 32)
14609         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14610       else
14611         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14612     }
14613
14614   return 1;
14615 }
14616
14617 int
14618 arm_gen_movmemqi (rtx *operands)
14619 {
14620   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14621   HOST_WIDE_INT srcoffset, dstoffset;
14622   int i;
14623   rtx src, dst, srcbase, dstbase;
14624   rtx part_bytes_reg = NULL;
14625   rtx mem;
14626
14627   if (!CONST_INT_P (operands[2])
14628       || !CONST_INT_P (operands[3])
14629       || INTVAL (operands[2]) > 64)
14630     return 0;
14631
14632   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14633     return arm_movmemqi_unaligned (operands);
14634
14635   if (INTVAL (operands[3]) & 3)
14636     return 0;
14637
14638   dstbase = operands[0];
14639   srcbase = operands[1];
14640
14641   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14642   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14643
14644   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14645   out_words_to_go = INTVAL (operands[2]) / 4;
14646   last_bytes = INTVAL (operands[2]) & 3;
14647   dstoffset = srcoffset = 0;
14648
14649   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14650     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14651
14652   for (i = 0; in_words_to_go >= 2; i+=4)
14653     {
14654       if (in_words_to_go > 4)
14655         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14656                                           TRUE, srcbase, &srcoffset));
14657       else
14658         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14659                                           src, FALSE, srcbase,
14660                                           &srcoffset));
14661
14662       if (out_words_to_go)
14663         {
14664           if (out_words_to_go > 4)
14665             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14666                                                TRUE, dstbase, &dstoffset));
14667           else if (out_words_to_go != 1)
14668             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14669                                                out_words_to_go, dst,
14670                                                (last_bytes == 0
14671                                                 ? FALSE : TRUE),
14672                                                dstbase, &dstoffset));
14673           else
14674             {
14675               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14676               emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14677               if (last_bytes != 0)
14678                 {
14679                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14680                   dstoffset += 4;
14681                 }
14682             }
14683         }
14684
14685       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14686       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14687     }
14688
14689   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14690   if (out_words_to_go)
14691     {
14692       rtx sreg;
14693
14694       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14695       sreg = copy_to_reg (mem);
14696
14697       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14698       emit_move_insn (mem, sreg);
14699       in_words_to_go--;
14700
14701       gcc_assert (!in_words_to_go);     /* Sanity check */
14702     }
14703
14704   if (in_words_to_go)
14705     {
14706       gcc_assert (in_words_to_go > 0);
14707
14708       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14709       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14710     }
14711
14712   gcc_assert (!last_bytes || part_bytes_reg);
14713
14714   if (BYTES_BIG_ENDIAN && last_bytes)
14715     {
14716       rtx tmp = gen_reg_rtx (SImode);
14717
14718       /* The bytes we want are in the top end of the word.  */
14719       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14720                               GEN_INT (8 * (4 - last_bytes))));
14721       part_bytes_reg = tmp;
14722
14723       while (last_bytes)
14724         {
14725           mem = adjust_automodify_address (dstbase, QImode,
14726                                            plus_constant (Pmode, dst,
14727                                                           last_bytes - 1),
14728                                            dstoffset + last_bytes - 1);
14729           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14730
14731           if (--last_bytes)
14732             {
14733               tmp = gen_reg_rtx (SImode);
14734               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14735               part_bytes_reg = tmp;
14736             }
14737         }
14738
14739     }
14740   else
14741     {
14742       if (last_bytes > 1)
14743         {
14744           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14745           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14746           last_bytes -= 2;
14747           if (last_bytes)
14748             {
14749               rtx tmp = gen_reg_rtx (SImode);
14750               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14751               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14752               part_bytes_reg = tmp;
14753               dstoffset += 2;
14754             }
14755         }
14756
14757       if (last_bytes)
14758         {
14759           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14760           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14761         }
14762     }
14763
14764   return 1;
14765 }
14766
14767 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14768 by mode size.  */
14769 inline static rtx
14770 next_consecutive_mem (rtx mem)
14771 {
14772   machine_mode mode = GET_MODE (mem);
14773   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14774   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14775
14776   return adjust_automodify_address (mem, mode, addr, offset);
14777 }
14778
14779 /* Copy using LDRD/STRD instructions whenever possible.
14780    Returns true upon success. */
14781 bool
14782 gen_movmem_ldrd_strd (rtx *operands)
14783 {
14784   unsigned HOST_WIDE_INT len;
14785   HOST_WIDE_INT align;
14786   rtx src, dst, base;
14787   rtx reg0;
14788   bool src_aligned, dst_aligned;
14789   bool src_volatile, dst_volatile;
14790
14791   gcc_assert (CONST_INT_P (operands[2]));
14792   gcc_assert (CONST_INT_P (operands[3]));
14793
14794   len = UINTVAL (operands[2]);
14795   if (len > 64)
14796     return false;
14797
14798   /* Maximum alignment we can assume for both src and dst buffers.  */
14799   align = INTVAL (operands[3]);
14800
14801   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14802     return false;
14803
14804   /* Place src and dst addresses in registers
14805      and update the corresponding mem rtx.  */
14806   dst = operands[0];
14807   dst_volatile = MEM_VOLATILE_P (dst);
14808   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14809   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14810   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14811
14812   src = operands[1];
14813   src_volatile = MEM_VOLATILE_P (src);
14814   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14815   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14816   src = adjust_automodify_address (src, VOIDmode, base, 0);
14817
14818   if (!unaligned_access && !(src_aligned && dst_aligned))
14819     return false;
14820
14821   if (src_volatile || dst_volatile)
14822     return false;
14823
14824   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14825   if (!(dst_aligned || src_aligned))
14826     return arm_gen_movmemqi (operands);
14827
14828   src = adjust_address (src, DImode, 0);
14829   dst = adjust_address (dst, DImode, 0);
14830   while (len >= 8)
14831     {
14832       len -= 8;
14833       reg0 = gen_reg_rtx (DImode);
14834       if (src_aligned)
14835         emit_move_insn (reg0, src);
14836       else
14837         emit_insn (gen_unaligned_loaddi (reg0, src));
14838
14839       if (dst_aligned)
14840         emit_move_insn (dst, reg0);
14841       else
14842         emit_insn (gen_unaligned_storedi (dst, reg0));
14843
14844       src = next_consecutive_mem (src);
14845       dst = next_consecutive_mem (dst);
14846     }
14847
14848   gcc_assert (len < 8);
14849   if (len >= 4)
14850     {
14851       /* More than a word but less than a double-word to copy.  Copy a word.  */
14852       reg0 = gen_reg_rtx (SImode);
14853       src = adjust_address (src, SImode, 0);
14854       dst = adjust_address (dst, SImode, 0);
14855       if (src_aligned)
14856         emit_move_insn (reg0, src);
14857       else
14858         emit_insn (gen_unaligned_loadsi (reg0, src));
14859
14860       if (dst_aligned)
14861         emit_move_insn (dst, reg0);
14862       else
14863         emit_insn (gen_unaligned_storesi (dst, reg0));
14864
14865       src = next_consecutive_mem (src);
14866       dst = next_consecutive_mem (dst);
14867       len -= 4;
14868     }
14869
14870   if (len == 0)
14871     return true;
14872
14873   /* Copy the remaining bytes.  */
14874   if (len >= 2)
14875     {
14876       dst = adjust_address (dst, HImode, 0);
14877       src = adjust_address (src, HImode, 0);
14878       reg0 = gen_reg_rtx (SImode);
14879       if (src_aligned)
14880         emit_insn (gen_zero_extendhisi2 (reg0, src));
14881       else
14882         emit_insn (gen_unaligned_loadhiu (reg0, src));
14883
14884       if (dst_aligned)
14885         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14886       else
14887         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14888
14889       src = next_consecutive_mem (src);
14890       dst = next_consecutive_mem (dst);
14891       if (len == 2)
14892         return true;
14893     }
14894
14895   dst = adjust_address (dst, QImode, 0);
14896   src = adjust_address (src, QImode, 0);
14897   reg0 = gen_reg_rtx (QImode);
14898   emit_move_insn (reg0, src);
14899   emit_move_insn (dst, reg0);
14900   return true;
14901 }
14902
14903 /* Select a dominance comparison mode if possible for a test of the general
14904    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14905    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14906    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14907    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14908    In all cases OP will be either EQ or NE, but we don't need to know which
14909    here.  If we are unable to support a dominance comparison we return
14910    CC mode.  This will then fail to match for the RTL expressions that
14911    generate this call.  */
14912 machine_mode
14913 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14914 {
14915   enum rtx_code cond1, cond2;
14916   int swapped = 0;
14917
14918   /* Currently we will probably get the wrong result if the individual
14919      comparisons are not simple.  This also ensures that it is safe to
14920      reverse a comparison if necessary.  */
14921   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14922        != CCmode)
14923       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14924           != CCmode))
14925     return CCmode;
14926
14927   /* The if_then_else variant of this tests the second condition if the
14928      first passes, but is true if the first fails.  Reverse the first
14929      condition to get a true "inclusive-or" expression.  */
14930   if (cond_or == DOM_CC_NX_OR_Y)
14931     cond1 = reverse_condition (cond1);
14932
14933   /* If the comparisons are not equal, and one doesn't dominate the other,
14934      then we can't do this.  */
14935   if (cond1 != cond2
14936       && !comparison_dominates_p (cond1, cond2)
14937       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14938     return CCmode;
14939
14940   if (swapped)
14941     std::swap (cond1, cond2);
14942
14943   switch (cond1)
14944     {
14945     case EQ:
14946       if (cond_or == DOM_CC_X_AND_Y)
14947         return CC_DEQmode;
14948
14949       switch (cond2)
14950         {
14951         case EQ: return CC_DEQmode;
14952         case LE: return CC_DLEmode;
14953         case LEU: return CC_DLEUmode;
14954         case GE: return CC_DGEmode;
14955         case GEU: return CC_DGEUmode;
14956         default: gcc_unreachable ();
14957         }
14958
14959     case LT:
14960       if (cond_or == DOM_CC_X_AND_Y)
14961         return CC_DLTmode;
14962
14963       switch (cond2)
14964         {
14965         case  LT:
14966             return CC_DLTmode;
14967         case LE:
14968           return CC_DLEmode;
14969         case NE:
14970           return CC_DNEmode;
14971         default:
14972           gcc_unreachable ();
14973         }
14974
14975     case GT:
14976       if (cond_or == DOM_CC_X_AND_Y)
14977         return CC_DGTmode;
14978
14979       switch (cond2)
14980         {
14981         case GT:
14982           return CC_DGTmode;
14983         case GE:
14984           return CC_DGEmode;
14985         case NE:
14986           return CC_DNEmode;
14987         default:
14988           gcc_unreachable ();
14989         }
14990
14991     case LTU:
14992       if (cond_or == DOM_CC_X_AND_Y)
14993         return CC_DLTUmode;
14994
14995       switch (cond2)
14996         {
14997         case LTU:
14998           return CC_DLTUmode;
14999         case LEU:
15000           return CC_DLEUmode;
15001         case NE:
15002           return CC_DNEmode;
15003         default:
15004           gcc_unreachable ();
15005         }
15006
15007     case GTU:
15008       if (cond_or == DOM_CC_X_AND_Y)
15009         return CC_DGTUmode;
15010
15011       switch (cond2)
15012         {
15013         case GTU:
15014           return CC_DGTUmode;
15015         case GEU:
15016           return CC_DGEUmode;
15017         case NE:
15018           return CC_DNEmode;
15019         default:
15020           gcc_unreachable ();
15021         }
15022
15023     /* The remaining cases only occur when both comparisons are the
15024        same.  */
15025     case NE:
15026       gcc_assert (cond1 == cond2);
15027       return CC_DNEmode;
15028
15029     case LE:
15030       gcc_assert (cond1 == cond2);
15031       return CC_DLEmode;
15032
15033     case GE:
15034       gcc_assert (cond1 == cond2);
15035       return CC_DGEmode;
15036
15037     case LEU:
15038       gcc_assert (cond1 == cond2);
15039       return CC_DLEUmode;
15040
15041     case GEU:
15042       gcc_assert (cond1 == cond2);
15043       return CC_DGEUmode;
15044
15045     default:
15046       gcc_unreachable ();
15047     }
15048 }
15049
15050 machine_mode
15051 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15052 {
15053   /* All floating point compares return CCFP if it is an equality
15054      comparison, and CCFPE otherwise.  */
15055   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15056     {
15057       switch (op)
15058         {
15059         case EQ:
15060         case NE:
15061         case UNORDERED:
15062         case ORDERED:
15063         case UNLT:
15064         case UNLE:
15065         case UNGT:
15066         case UNGE:
15067         case UNEQ:
15068         case LTGT:
15069           return CCFPmode;
15070
15071         case LT:
15072         case LE:
15073         case GT:
15074         case GE:
15075           return CCFPEmode;
15076
15077         default:
15078           gcc_unreachable ();
15079         }
15080     }
15081
15082   /* A compare with a shifted operand.  Because of canonicalization, the
15083      comparison will have to be swapped when we emit the assembler.  */
15084   if (GET_MODE (y) == SImode
15085       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15086       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15087           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15088           || GET_CODE (x) == ROTATERT))
15089     return CC_SWPmode;
15090
15091   /* This operation is performed swapped, but since we only rely on the Z
15092      flag we don't need an additional mode.  */
15093   if (GET_MODE (y) == SImode
15094       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15095       && GET_CODE (x) == NEG
15096       && (op == EQ || op == NE))
15097     return CC_Zmode;
15098
15099   /* This is a special case that is used by combine to allow a
15100      comparison of a shifted byte load to be split into a zero-extend
15101      followed by a comparison of the shifted integer (only valid for
15102      equalities and unsigned inequalities).  */
15103   if (GET_MODE (x) == SImode
15104       && GET_CODE (x) == ASHIFT
15105       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15106       && GET_CODE (XEXP (x, 0)) == SUBREG
15107       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15108       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15109       && (op == EQ || op == NE
15110           || op == GEU || op == GTU || op == LTU || op == LEU)
15111       && CONST_INT_P (y))
15112     return CC_Zmode;
15113
15114   /* A construct for a conditional compare, if the false arm contains
15115      0, then both conditions must be true, otherwise either condition
15116      must be true.  Not all conditions are possible, so CCmode is
15117      returned if it can't be done.  */
15118   if (GET_CODE (x) == IF_THEN_ELSE
15119       && (XEXP (x, 2) == const0_rtx
15120           || XEXP (x, 2) == const1_rtx)
15121       && COMPARISON_P (XEXP (x, 0))
15122       && COMPARISON_P (XEXP (x, 1)))
15123     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15124                                          INTVAL (XEXP (x, 2)));
15125
15126   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15127   if (GET_CODE (x) == AND
15128       && (op == EQ || op == NE)
15129       && COMPARISON_P (XEXP (x, 0))
15130       && COMPARISON_P (XEXP (x, 1)))
15131     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15132                                          DOM_CC_X_AND_Y);
15133
15134   if (GET_CODE (x) == IOR
15135       && (op == EQ || op == NE)
15136       && COMPARISON_P (XEXP (x, 0))
15137       && COMPARISON_P (XEXP (x, 1)))
15138     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15139                                          DOM_CC_X_OR_Y);
15140
15141   /* An operation (on Thumb) where we want to test for a single bit.
15142      This is done by shifting that bit up into the top bit of a
15143      scratch register; we can then branch on the sign bit.  */
15144   if (TARGET_THUMB1
15145       && GET_MODE (x) == SImode
15146       && (op == EQ || op == NE)
15147       && GET_CODE (x) == ZERO_EXTRACT
15148       && XEXP (x, 1) == const1_rtx)
15149     return CC_Nmode;
15150
15151   /* An operation that sets the condition codes as a side-effect, the
15152      V flag is not set correctly, so we can only use comparisons where
15153      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15154      instead.)  */
15155   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15156   if (GET_MODE (x) == SImode
15157       && y == const0_rtx
15158       && (op == EQ || op == NE || op == LT || op == GE)
15159       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15160           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15161           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15162           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15163           || GET_CODE (x) == LSHIFTRT
15164           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15165           || GET_CODE (x) == ROTATERT
15166           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15167     return CC_NOOVmode;
15168
15169   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15170     return CC_Zmode;
15171
15172   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15173       && GET_CODE (x) == PLUS
15174       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15175     return CC_Cmode;
15176
15177   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15178     {
15179       switch (op)
15180         {
15181         case EQ:
15182         case NE:
15183           /* A DImode comparison against zero can be implemented by
15184              or'ing the two halves together.  */
15185           if (y == const0_rtx)
15186             return CC_Zmode;
15187
15188           /* We can do an equality test in three Thumb instructions.  */
15189           if (!TARGET_32BIT)
15190             return CC_Zmode;
15191
15192           /* FALLTHROUGH */
15193
15194         case LTU:
15195         case LEU:
15196         case GTU:
15197         case GEU:
15198           /* DImode unsigned comparisons can be implemented by cmp +
15199              cmpeq without a scratch register.  Not worth doing in
15200              Thumb-2.  */
15201           if (TARGET_32BIT)
15202             return CC_CZmode;
15203
15204           /* FALLTHROUGH */
15205
15206         case LT:
15207         case LE:
15208         case GT:
15209         case GE:
15210           /* DImode signed and unsigned comparisons can be implemented
15211              by cmp + sbcs with a scratch register, but that does not
15212              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15213           gcc_assert (op != EQ && op != NE);
15214           return CC_NCVmode;
15215
15216         default:
15217           gcc_unreachable ();
15218         }
15219     }
15220
15221   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15222     return GET_MODE (x);
15223
15224   return CCmode;
15225 }
15226
15227 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15228    return the rtx for register 0 in the proper mode.  FP means this is a
15229    floating point compare: I don't think that it is needed on the arm.  */
15230 rtx
15231 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15232 {
15233   machine_mode mode;
15234   rtx cc_reg;
15235   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15236
15237   /* We might have X as a constant, Y as a register because of the predicates
15238      used for cmpdi.  If so, force X to a register here.  */
15239   if (dimode_comparison && !REG_P (x))
15240     x = force_reg (DImode, x);
15241
15242   mode = SELECT_CC_MODE (code, x, y);
15243   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15244
15245   if (dimode_comparison
15246       && mode != CC_CZmode)
15247     {
15248       rtx clobber, set;
15249
15250       /* To compare two non-zero values for equality, XOR them and
15251          then compare against zero.  Not used for ARM mode; there
15252          CC_CZmode is cheaper.  */
15253       if (mode == CC_Zmode && y != const0_rtx)
15254         {
15255           gcc_assert (!reload_completed);
15256           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15257           y = const0_rtx;
15258         }
15259
15260       /* A scratch register is required.  */
15261       if (reload_completed)
15262         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15263       else
15264         scratch = gen_rtx_SCRATCH (SImode);
15265
15266       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15267       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15268       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15269     }
15270   else
15271     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15272
15273   return cc_reg;
15274 }
15275
15276 /* Generate a sequence of insns that will generate the correct return
15277    address mask depending on the physical architecture that the program
15278    is running on.  */
15279 rtx
15280 arm_gen_return_addr_mask (void)
15281 {
15282   rtx reg = gen_reg_rtx (Pmode);
15283
15284   emit_insn (gen_return_addr_mask (reg));
15285   return reg;
15286 }
15287
15288 void
15289 arm_reload_in_hi (rtx *operands)
15290 {
15291   rtx ref = operands[1];
15292   rtx base, scratch;
15293   HOST_WIDE_INT offset = 0;
15294
15295   if (GET_CODE (ref) == SUBREG)
15296     {
15297       offset = SUBREG_BYTE (ref);
15298       ref = SUBREG_REG (ref);
15299     }
15300
15301   if (REG_P (ref))
15302     {
15303       /* We have a pseudo which has been spilt onto the stack; there
15304          are two cases here: the first where there is a simple
15305          stack-slot replacement and a second where the stack-slot is
15306          out of range, or is used as a subreg.  */
15307       if (reg_equiv_mem (REGNO (ref)))
15308         {
15309           ref = reg_equiv_mem (REGNO (ref));
15310           base = find_replacement (&XEXP (ref, 0));
15311         }
15312       else
15313         /* The slot is out of range, or was dressed up in a SUBREG.  */
15314         base = reg_equiv_address (REGNO (ref));
15315     }
15316   else
15317     base = find_replacement (&XEXP (ref, 0));
15318
15319   /* Handle the case where the address is too complex to be offset by 1.  */
15320   if (GET_CODE (base) == MINUS
15321       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15322     {
15323       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15324
15325       emit_set_insn (base_plus, base);
15326       base = base_plus;
15327     }
15328   else if (GET_CODE (base) == PLUS)
15329     {
15330       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15331       HOST_WIDE_INT hi, lo;
15332
15333       offset += INTVAL (XEXP (base, 1));
15334       base = XEXP (base, 0);
15335
15336       /* Rework the address into a legal sequence of insns.  */
15337       /* Valid range for lo is -4095 -> 4095 */
15338       lo = (offset >= 0
15339             ? (offset & 0xfff)
15340             : -((-offset) & 0xfff));
15341
15342       /* Corner case, if lo is the max offset then we would be out of range
15343          once we have added the additional 1 below, so bump the msb into the
15344          pre-loading insn(s).  */
15345       if (lo == 4095)
15346         lo &= 0x7ff;
15347
15348       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15349              ^ (HOST_WIDE_INT) 0x80000000)
15350             - (HOST_WIDE_INT) 0x80000000);
15351
15352       gcc_assert (hi + lo == offset);
15353
15354       if (hi != 0)
15355         {
15356           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15357
15358           /* Get the base address; addsi3 knows how to handle constants
15359              that require more than one insn.  */
15360           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15361           base = base_plus;
15362           offset = lo;
15363         }
15364     }
15365
15366   /* Operands[2] may overlap operands[0] (though it won't overlap
15367      operands[1]), that's why we asked for a DImode reg -- so we can
15368      use the bit that does not overlap.  */
15369   if (REGNO (operands[2]) == REGNO (operands[0]))
15370     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15371   else
15372     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15373
15374   emit_insn (gen_zero_extendqisi2 (scratch,
15375                                    gen_rtx_MEM (QImode,
15376                                                 plus_constant (Pmode, base,
15377                                                                offset))));
15378   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15379                                    gen_rtx_MEM (QImode,
15380                                                 plus_constant (Pmode, base,
15381                                                                offset + 1))));
15382   if (!BYTES_BIG_ENDIAN)
15383     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15384                    gen_rtx_IOR (SImode,
15385                                 gen_rtx_ASHIFT
15386                                 (SImode,
15387                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15388                                  GEN_INT (8)),
15389                                 scratch));
15390   else
15391     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15392                    gen_rtx_IOR (SImode,
15393                                 gen_rtx_ASHIFT (SImode, scratch,
15394                                                 GEN_INT (8)),
15395                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15396 }
15397
15398 /* Handle storing a half-word to memory during reload by synthesizing as two
15399    byte stores.  Take care not to clobber the input values until after we
15400    have moved them somewhere safe.  This code assumes that if the DImode
15401    scratch in operands[2] overlaps either the input value or output address
15402    in some way, then that value must die in this insn (we absolutely need
15403    two scratch registers for some corner cases).  */
15404 void
15405 arm_reload_out_hi (rtx *operands)
15406 {
15407   rtx ref = operands[0];
15408   rtx outval = operands[1];
15409   rtx base, scratch;
15410   HOST_WIDE_INT offset = 0;
15411
15412   if (GET_CODE (ref) == SUBREG)
15413     {
15414       offset = SUBREG_BYTE (ref);
15415       ref = SUBREG_REG (ref);
15416     }
15417
15418   if (REG_P (ref))
15419     {
15420       /* We have a pseudo which has been spilt onto the stack; there
15421          are two cases here: the first where there is a simple
15422          stack-slot replacement and a second where the stack-slot is
15423          out of range, or is used as a subreg.  */
15424       if (reg_equiv_mem (REGNO (ref)))
15425         {
15426           ref = reg_equiv_mem (REGNO (ref));
15427           base = find_replacement (&XEXP (ref, 0));
15428         }
15429       else
15430         /* The slot is out of range, or was dressed up in a SUBREG.  */
15431         base = reg_equiv_address (REGNO (ref));
15432     }
15433   else
15434     base = find_replacement (&XEXP (ref, 0));
15435
15436   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15437
15438   /* Handle the case where the address is too complex to be offset by 1.  */
15439   if (GET_CODE (base) == MINUS
15440       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15441     {
15442       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15443
15444       /* Be careful not to destroy OUTVAL.  */
15445       if (reg_overlap_mentioned_p (base_plus, outval))
15446         {
15447           /* Updating base_plus might destroy outval, see if we can
15448              swap the scratch and base_plus.  */
15449           if (!reg_overlap_mentioned_p (scratch, outval))
15450             std::swap (scratch, base_plus);
15451           else
15452             {
15453               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15454
15455               /* Be conservative and copy OUTVAL into the scratch now,
15456                  this should only be necessary if outval is a subreg
15457                  of something larger than a word.  */
15458               /* XXX Might this clobber base?  I can't see how it can,
15459                  since scratch is known to overlap with OUTVAL, and
15460                  must be wider than a word.  */
15461               emit_insn (gen_movhi (scratch_hi, outval));
15462               outval = scratch_hi;
15463             }
15464         }
15465
15466       emit_set_insn (base_plus, base);
15467       base = base_plus;
15468     }
15469   else if (GET_CODE (base) == PLUS)
15470     {
15471       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15472       HOST_WIDE_INT hi, lo;
15473
15474       offset += INTVAL (XEXP (base, 1));
15475       base = XEXP (base, 0);
15476
15477       /* Rework the address into a legal sequence of insns.  */
15478       /* Valid range for lo is -4095 -> 4095 */
15479       lo = (offset >= 0
15480             ? (offset & 0xfff)
15481             : -((-offset) & 0xfff));
15482
15483       /* Corner case, if lo is the max offset then we would be out of range
15484          once we have added the additional 1 below, so bump the msb into the
15485          pre-loading insn(s).  */
15486       if (lo == 4095)
15487         lo &= 0x7ff;
15488
15489       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15490              ^ (HOST_WIDE_INT) 0x80000000)
15491             - (HOST_WIDE_INT) 0x80000000);
15492
15493       gcc_assert (hi + lo == offset);
15494
15495       if (hi != 0)
15496         {
15497           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15498
15499           /* Be careful not to destroy OUTVAL.  */
15500           if (reg_overlap_mentioned_p (base_plus, outval))
15501             {
15502               /* Updating base_plus might destroy outval, see if we
15503                  can swap the scratch and base_plus.  */
15504               if (!reg_overlap_mentioned_p (scratch, outval))
15505                 std::swap (scratch, base_plus);
15506               else
15507                 {
15508                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15509
15510                   /* Be conservative and copy outval into scratch now,
15511                      this should only be necessary if outval is a
15512                      subreg of something larger than a word.  */
15513                   /* XXX Might this clobber base?  I can't see how it
15514                      can, since scratch is known to overlap with
15515                      outval.  */
15516                   emit_insn (gen_movhi (scratch_hi, outval));
15517                   outval = scratch_hi;
15518                 }
15519             }
15520
15521           /* Get the base address; addsi3 knows how to handle constants
15522              that require more than one insn.  */
15523           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15524           base = base_plus;
15525           offset = lo;
15526         }
15527     }
15528
15529   if (BYTES_BIG_ENDIAN)
15530     {
15531       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15532                                          plus_constant (Pmode, base,
15533                                                         offset + 1)),
15534                             gen_lowpart (QImode, outval)));
15535       emit_insn (gen_lshrsi3 (scratch,
15536                               gen_rtx_SUBREG (SImode, outval, 0),
15537                               GEN_INT (8)));
15538       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15539                                                                 offset)),
15540                             gen_lowpart (QImode, scratch)));
15541     }
15542   else
15543     {
15544       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15545                                                                 offset)),
15546                             gen_lowpart (QImode, outval)));
15547       emit_insn (gen_lshrsi3 (scratch,
15548                               gen_rtx_SUBREG (SImode, outval, 0),
15549                               GEN_INT (8)));
15550       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15551                                          plus_constant (Pmode, base,
15552                                                         offset + 1)),
15553                             gen_lowpart (QImode, scratch)));
15554     }
15555 }
15556
15557 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15558    (padded to the size of a word) should be passed in a register.  */
15559
15560 static bool
15561 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15562 {
15563   if (TARGET_AAPCS_BASED)
15564     return must_pass_in_stack_var_size (mode, type);
15565   else
15566     return must_pass_in_stack_var_size_or_pad (mode, type);
15567 }
15568
15569
15570 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15571    Return true if an argument passed on the stack should be padded upwards,
15572    i.e. if the least-significant byte has useful data.
15573    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15574    aggregate types are placed in the lowest memory address.  */
15575
15576 bool
15577 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15578 {
15579   if (!TARGET_AAPCS_BASED)
15580     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15581
15582   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15583     return false;
15584
15585   return true;
15586 }
15587
15588
15589 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15590    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15591    register has useful data, and return the opposite if the most
15592    significant byte does.  */
15593
15594 bool
15595 arm_pad_reg_upward (machine_mode mode,
15596                     tree type, int first ATTRIBUTE_UNUSED)
15597 {
15598   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15599     {
15600       /* For AAPCS, small aggregates, small fixed-point types,
15601          and small complex types are always padded upwards.  */
15602       if (type)
15603         {
15604           if ((AGGREGATE_TYPE_P (type)
15605                || TREE_CODE (type) == COMPLEX_TYPE
15606                || FIXED_POINT_TYPE_P (type))
15607               && int_size_in_bytes (type) <= 4)
15608             return true;
15609         }
15610       else
15611         {
15612           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15613               && GET_MODE_SIZE (mode) <= 4)
15614             return true;
15615         }
15616     }
15617
15618   /* Otherwise, use default padding.  */
15619   return !BYTES_BIG_ENDIAN;
15620 }
15621
15622 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15623    assuming that the address in the base register is word aligned.  */
15624 bool
15625 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15626 {
15627   HOST_WIDE_INT max_offset;
15628
15629   /* Offset must be a multiple of 4 in Thumb mode.  */
15630   if (TARGET_THUMB2 && ((offset & 3) != 0))
15631     return false;
15632
15633   if (TARGET_THUMB2)
15634     max_offset = 1020;
15635   else if (TARGET_ARM)
15636     max_offset = 255;
15637   else
15638     return false;
15639
15640   return ((offset <= max_offset) && (offset >= -max_offset));
15641 }
15642
15643 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15644    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15645    Assumes that the address in the base register RN is word aligned.  Pattern
15646    guarantees that both memory accesses use the same base register,
15647    the offsets are constants within the range, and the gap between the offsets is 4.
15648    If preload complete then check that registers are legal.  WBACK indicates whether
15649    address is updated.  LOAD indicates whether memory access is load or store.  */
15650 bool
15651 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15652                        bool wback, bool load)
15653 {
15654   unsigned int t, t2, n;
15655
15656   if (!reload_completed)
15657     return true;
15658
15659   if (!offset_ok_for_ldrd_strd (offset))
15660     return false;
15661
15662   t = REGNO (rt);
15663   t2 = REGNO (rt2);
15664   n = REGNO (rn);
15665
15666   if ((TARGET_THUMB2)
15667       && ((wback && (n == t || n == t2))
15668           || (t == SP_REGNUM)
15669           || (t == PC_REGNUM)
15670           || (t2 == SP_REGNUM)
15671           || (t2 == PC_REGNUM)
15672           || (!load && (n == PC_REGNUM))
15673           || (load && (t == t2))
15674           /* Triggers Cortex-M3 LDRD errata.  */
15675           || (!wback && load && fix_cm3_ldrd && (n == t))))
15676     return false;
15677
15678   if ((TARGET_ARM)
15679       && ((wback && (n == t || n == t2))
15680           || (t2 == PC_REGNUM)
15681           || (t % 2 != 0)   /* First destination register is not even.  */
15682           || (t2 != t + 1)
15683           /* PC can be used as base register (for offset addressing only),
15684              but it is depricated.  */
15685           || (n == PC_REGNUM)))
15686     return false;
15687
15688   return true;
15689 }
15690
15691 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15692    operand MEM's address contains an immediate offset from the base
15693    register and has no side effects, in which case it sets BASE and
15694    OFFSET accordingly.  */
15695 static bool
15696 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15697 {
15698   rtx addr;
15699
15700   gcc_assert (base != NULL && offset != NULL);
15701
15702   /* TODO: Handle more general memory operand patterns, such as
15703      PRE_DEC and PRE_INC.  */
15704
15705   if (side_effects_p (mem))
15706     return false;
15707
15708   /* Can't deal with subregs.  */
15709   if (GET_CODE (mem) == SUBREG)
15710     return false;
15711
15712   gcc_assert (MEM_P (mem));
15713
15714   *offset = const0_rtx;
15715
15716   addr = XEXP (mem, 0);
15717
15718   /* If addr isn't valid for DImode, then we can't handle it.  */
15719   if (!arm_legitimate_address_p (DImode, addr,
15720                                  reload_in_progress || reload_completed))
15721     return false;
15722
15723   if (REG_P (addr))
15724     {
15725       *base = addr;
15726       return true;
15727     }
15728   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15729     {
15730       *base = XEXP (addr, 0);
15731       *offset = XEXP (addr, 1);
15732       return (REG_P (*base) && CONST_INT_P (*offset));
15733     }
15734
15735   return false;
15736 }
15737
15738 /* Called from a peephole2 to replace two word-size accesses with a
15739    single LDRD/STRD instruction.  Returns true iff we can generate a
15740    new instruction sequence.  That is, both accesses use the same base
15741    register and the gap between constant offsets is 4.  This function
15742    may reorder its operands to match ldrd/strd RTL templates.
15743    OPERANDS are the operands found by the peephole matcher;
15744    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15745    corresponding memory operands.  LOAD indicaates whether the access
15746    is load or store.  CONST_STORE indicates a store of constant
15747    integer values held in OPERANDS[4,5] and assumes that the pattern
15748    is of length 4 insn, for the purpose of checking dead registers.
15749    COMMUTE indicates that register operands may be reordered.  */
15750 bool
15751 gen_operands_ldrd_strd (rtx *operands, bool load,
15752                         bool const_store, bool commute)
15753 {
15754   int nops = 2;
15755   HOST_WIDE_INT offsets[2], offset;
15756   rtx base = NULL_RTX;
15757   rtx cur_base, cur_offset, tmp;
15758   int i, gap;
15759   HARD_REG_SET regset;
15760
15761   gcc_assert (!const_store || !load);
15762   /* Check that the memory references are immediate offsets from the
15763      same base register.  Extract the base register, the destination
15764      registers, and the corresponding memory offsets.  */
15765   for (i = 0; i < nops; i++)
15766     {
15767       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15768         return false;
15769
15770       if (i == 0)
15771         base = cur_base;
15772       else if (REGNO (base) != REGNO (cur_base))
15773         return false;
15774
15775       offsets[i] = INTVAL (cur_offset);
15776       if (GET_CODE (operands[i]) == SUBREG)
15777         {
15778           tmp = SUBREG_REG (operands[i]);
15779           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15780           operands[i] = tmp;
15781         }
15782     }
15783
15784   /* Make sure there is no dependency between the individual loads.  */
15785   if (load && REGNO (operands[0]) == REGNO (base))
15786     return false; /* RAW */
15787
15788   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15789     return false; /* WAW */
15790
15791   /* If the same input register is used in both stores
15792      when storing different constants, try to find a free register.
15793      For example, the code
15794         mov r0, 0
15795         str r0, [r2]
15796         mov r0, 1
15797         str r0, [r2, #4]
15798      can be transformed into
15799         mov r1, 0
15800         strd r1, r0, [r2]
15801      in Thumb mode assuming that r1 is free.  */
15802   if (const_store
15803       && REGNO (operands[0]) == REGNO (operands[1])
15804       && INTVAL (operands[4]) != INTVAL (operands[5]))
15805     {
15806     if (TARGET_THUMB2)
15807       {
15808         CLEAR_HARD_REG_SET (regset);
15809         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15810         if (tmp == NULL_RTX)
15811           return false;
15812
15813         /* Use the new register in the first load to ensure that
15814            if the original input register is not dead after peephole,
15815            then it will have the correct constant value.  */
15816         operands[0] = tmp;
15817       }
15818     else if (TARGET_ARM)
15819       {
15820         return false;
15821         int regno = REGNO (operands[0]);
15822         if (!peep2_reg_dead_p (4, operands[0]))
15823           {
15824             /* When the input register is even and is not dead after the
15825                pattern, it has to hold the second constant but we cannot
15826                form a legal STRD in ARM mode with this register as the second
15827                register.  */
15828             if (regno % 2 == 0)
15829               return false;
15830
15831             /* Is regno-1 free? */
15832             SET_HARD_REG_SET (regset);
15833             CLEAR_HARD_REG_BIT(regset, regno - 1);
15834             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15835             if (tmp == NULL_RTX)
15836               return false;
15837
15838             operands[0] = tmp;
15839           }
15840         else
15841           {
15842             /* Find a DImode register.  */
15843             CLEAR_HARD_REG_SET (regset);
15844             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15845             if (tmp != NULL_RTX)
15846               {
15847                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15848                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15849               }
15850             else
15851               {
15852                 /* Can we use the input register to form a DI register?  */
15853                 SET_HARD_REG_SET (regset);
15854                 CLEAR_HARD_REG_BIT(regset,
15855                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15856                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15857                 if (tmp == NULL_RTX)
15858                   return false;
15859                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15860               }
15861           }
15862
15863         gcc_assert (operands[0] != NULL_RTX);
15864         gcc_assert (operands[1] != NULL_RTX);
15865         gcc_assert (REGNO (operands[0]) % 2 == 0);
15866         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15867       }
15868     }
15869
15870   /* Make sure the instructions are ordered with lower memory access first.  */
15871   if (offsets[0] > offsets[1])
15872     {
15873       gap = offsets[0] - offsets[1];
15874       offset = offsets[1];
15875
15876       /* Swap the instructions such that lower memory is accessed first.  */
15877       std::swap (operands[0], operands[1]);
15878       std::swap (operands[2], operands[3]);
15879       if (const_store)
15880         std::swap (operands[4], operands[5]);
15881     }
15882   else
15883     {
15884       gap = offsets[1] - offsets[0];
15885       offset = offsets[0];
15886     }
15887
15888   /* Make sure accesses are to consecutive memory locations.  */
15889   if (gap != 4)
15890     return false;
15891
15892   /* Make sure we generate legal instructions.  */
15893   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15894                              false, load))
15895     return true;
15896
15897   /* In Thumb state, where registers are almost unconstrained, there
15898      is little hope to fix it.  */
15899   if (TARGET_THUMB2)
15900     return false;
15901
15902   if (load && commute)
15903     {
15904       /* Try reordering registers.  */
15905       std::swap (operands[0], operands[1]);
15906       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15907                                  false, load))
15908         return true;
15909     }
15910
15911   if (const_store)
15912     {
15913       /* If input registers are dead after this pattern, they can be
15914          reordered or replaced by other registers that are free in the
15915          current pattern.  */
15916       if (!peep2_reg_dead_p (4, operands[0])
15917           || !peep2_reg_dead_p (4, operands[1]))
15918         return false;
15919
15920       /* Try to reorder the input registers.  */
15921       /* For example, the code
15922            mov r0, 0
15923            mov r1, 1
15924            str r1, [r2]
15925            str r0, [r2, #4]
15926          can be transformed into
15927            mov r1, 0
15928            mov r0, 1
15929            strd r0, [r2]
15930       */
15931       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15932                                   false, false))
15933         {
15934           std::swap (operands[0], operands[1]);
15935           return true;
15936         }
15937
15938       /* Try to find a free DI register.  */
15939       CLEAR_HARD_REG_SET (regset);
15940       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15941       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15942       while (true)
15943         {
15944           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15945           if (tmp == NULL_RTX)
15946             return false;
15947
15948           /* DREG must be an even-numbered register in DImode.
15949              Split it into SI registers.  */
15950           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15951           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15952           gcc_assert (operands[0] != NULL_RTX);
15953           gcc_assert (operands[1] != NULL_RTX);
15954           gcc_assert (REGNO (operands[0]) % 2 == 0);
15955           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15956
15957           return (operands_ok_ldrd_strd (operands[0], operands[1],
15958                                          base, offset,
15959                                          false, load));
15960         }
15961     }
15962
15963   return false;
15964 }
15965
15966
15967
15968 \f
15969 /* Print a symbolic form of X to the debug file, F.  */
15970 static void
15971 arm_print_value (FILE *f, rtx x)
15972 {
15973   switch (GET_CODE (x))
15974     {
15975     case CONST_INT:
15976       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15977       return;
15978
15979     case CONST_DOUBLE:
15980       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15981       return;
15982
15983     case CONST_VECTOR:
15984       {
15985         int i;
15986
15987         fprintf (f, "<");
15988         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15989           {
15990             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15991             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15992               fputc (',', f);
15993           }
15994         fprintf (f, ">");
15995       }
15996       return;
15997
15998     case CONST_STRING:
15999       fprintf (f, "\"%s\"", XSTR (x, 0));
16000       return;
16001
16002     case SYMBOL_REF:
16003       fprintf (f, "`%s'", XSTR (x, 0));
16004       return;
16005
16006     case LABEL_REF:
16007       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16008       return;
16009
16010     case CONST:
16011       arm_print_value (f, XEXP (x, 0));
16012       return;
16013
16014     case PLUS:
16015       arm_print_value (f, XEXP (x, 0));
16016       fprintf (f, "+");
16017       arm_print_value (f, XEXP (x, 1));
16018       return;
16019
16020     case PC:
16021       fprintf (f, "pc");
16022       return;
16023
16024     default:
16025       fprintf (f, "????");
16026       return;
16027     }
16028 }
16029 \f
16030 /* Routines for manipulation of the constant pool.  */
16031
16032 /* Arm instructions cannot load a large constant directly into a
16033    register; they have to come from a pc relative load.  The constant
16034    must therefore be placed in the addressable range of the pc
16035    relative load.  Depending on the precise pc relative load
16036    instruction the range is somewhere between 256 bytes and 4k.  This
16037    means that we often have to dump a constant inside a function, and
16038    generate code to branch around it.
16039
16040    It is important to minimize this, since the branches will slow
16041    things down and make the code larger.
16042
16043    Normally we can hide the table after an existing unconditional
16044    branch so that there is no interruption of the flow, but in the
16045    worst case the code looks like this:
16046
16047         ldr     rn, L1
16048         ...
16049         b       L2
16050         align
16051         L1:     .long value
16052         L2:
16053         ...
16054
16055         ldr     rn, L3
16056         ...
16057         b       L4
16058         align
16059         L3:     .long value
16060         L4:
16061         ...
16062
16063    We fix this by performing a scan after scheduling, which notices
16064    which instructions need to have their operands fetched from the
16065    constant table and builds the table.
16066
16067    The algorithm starts by building a table of all the constants that
16068    need fixing up and all the natural barriers in the function (places
16069    where a constant table can be dropped without breaking the flow).
16070    For each fixup we note how far the pc-relative replacement will be
16071    able to reach and the offset of the instruction into the function.
16072
16073    Having built the table we then group the fixes together to form
16074    tables that are as large as possible (subject to addressing
16075    constraints) and emit each table of constants after the last
16076    barrier that is within range of all the instructions in the group.
16077    If a group does not contain a barrier, then we forcibly create one
16078    by inserting a jump instruction into the flow.  Once the table has
16079    been inserted, the insns are then modified to reference the
16080    relevant entry in the pool.
16081
16082    Possible enhancements to the algorithm (not implemented) are:
16083
16084    1) For some processors and object formats, there may be benefit in
16085    aligning the pools to the start of cache lines; this alignment
16086    would need to be taken into account when calculating addressability
16087    of a pool.  */
16088
16089 /* These typedefs are located at the start of this file, so that
16090    they can be used in the prototypes there.  This comment is to
16091    remind readers of that fact so that the following structures
16092    can be understood more easily.
16093
16094      typedef struct minipool_node    Mnode;
16095      typedef struct minipool_fixup   Mfix;  */
16096
16097 struct minipool_node
16098 {
16099   /* Doubly linked chain of entries.  */
16100   Mnode * next;
16101   Mnode * prev;
16102   /* The maximum offset into the code that this entry can be placed.  While
16103      pushing fixes for forward references, all entries are sorted in order
16104      of increasing max_address.  */
16105   HOST_WIDE_INT max_address;
16106   /* Similarly for an entry inserted for a backwards ref.  */
16107   HOST_WIDE_INT min_address;
16108   /* The number of fixes referencing this entry.  This can become zero
16109      if we "unpush" an entry.  In this case we ignore the entry when we
16110      come to emit the code.  */
16111   int refcount;
16112   /* The offset from the start of the minipool.  */
16113   HOST_WIDE_INT offset;
16114   /* The value in table.  */
16115   rtx value;
16116   /* The mode of value.  */
16117   machine_mode mode;
16118   /* The size of the value.  With iWMMXt enabled
16119      sizes > 4 also imply an alignment of 8-bytes.  */
16120   int fix_size;
16121 };
16122
16123 struct minipool_fixup
16124 {
16125   Mfix *            next;
16126   rtx_insn *        insn;
16127   HOST_WIDE_INT     address;
16128   rtx *             loc;
16129   machine_mode mode;
16130   int               fix_size;
16131   rtx               value;
16132   Mnode *           minipool;
16133   HOST_WIDE_INT     forwards;
16134   HOST_WIDE_INT     backwards;
16135 };
16136
16137 /* Fixes less than a word need padding out to a word boundary.  */
16138 #define MINIPOOL_FIX_SIZE(mode) \
16139   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16140
16141 static Mnode *  minipool_vector_head;
16142 static Mnode *  minipool_vector_tail;
16143 static rtx_code_label   *minipool_vector_label;
16144 static int      minipool_pad;
16145
16146 /* The linked list of all minipool fixes required for this function.  */
16147 Mfix *          minipool_fix_head;
16148 Mfix *          minipool_fix_tail;
16149 /* The fix entry for the current minipool, once it has been placed.  */
16150 Mfix *          minipool_barrier;
16151
16152 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16153 #define JUMP_TABLES_IN_TEXT_SECTION 0
16154 #endif
16155
16156 static HOST_WIDE_INT
16157 get_jump_table_size (rtx_jump_table_data *insn)
16158 {
16159   /* ADDR_VECs only take room if read-only data does into the text
16160      section.  */
16161   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16162     {
16163       rtx body = PATTERN (insn);
16164       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16165       HOST_WIDE_INT size;
16166       HOST_WIDE_INT modesize;
16167
16168       modesize = GET_MODE_SIZE (GET_MODE (body));
16169       size = modesize * XVECLEN (body, elt);
16170       switch (modesize)
16171         {
16172         case 1:
16173           /* Round up size  of TBB table to a halfword boundary.  */
16174           size = (size + 1) & ~(HOST_WIDE_INT)1;
16175           break;
16176         case 2:
16177           /* No padding necessary for TBH.  */
16178           break;
16179         case 4:
16180           /* Add two bytes for alignment on Thumb.  */
16181           if (TARGET_THUMB)
16182             size += 2;
16183           break;
16184         default:
16185           gcc_unreachable ();
16186         }
16187       return size;
16188     }
16189
16190   return 0;
16191 }
16192
16193 /* Return the maximum amount of padding that will be inserted before
16194    label LABEL.  */
16195
16196 static HOST_WIDE_INT
16197 get_label_padding (rtx label)
16198 {
16199   HOST_WIDE_INT align, min_insn_size;
16200
16201   align = 1 << label_to_alignment (label);
16202   min_insn_size = TARGET_THUMB ? 2 : 4;
16203   return align > min_insn_size ? align - min_insn_size : 0;
16204 }
16205
16206 /* Move a minipool fix MP from its current location to before MAX_MP.
16207    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16208    constraints may need updating.  */
16209 static Mnode *
16210 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16211                                HOST_WIDE_INT max_address)
16212 {
16213   /* The code below assumes these are different.  */
16214   gcc_assert (mp != max_mp);
16215
16216   if (max_mp == NULL)
16217     {
16218       if (max_address < mp->max_address)
16219         mp->max_address = max_address;
16220     }
16221   else
16222     {
16223       if (max_address > max_mp->max_address - mp->fix_size)
16224         mp->max_address = max_mp->max_address - mp->fix_size;
16225       else
16226         mp->max_address = max_address;
16227
16228       /* Unlink MP from its current position.  Since max_mp is non-null,
16229        mp->prev must be non-null.  */
16230       mp->prev->next = mp->next;
16231       if (mp->next != NULL)
16232         mp->next->prev = mp->prev;
16233       else
16234         minipool_vector_tail = mp->prev;
16235
16236       /* Re-insert it before MAX_MP.  */
16237       mp->next = max_mp;
16238       mp->prev = max_mp->prev;
16239       max_mp->prev = mp;
16240
16241       if (mp->prev != NULL)
16242         mp->prev->next = mp;
16243       else
16244         minipool_vector_head = mp;
16245     }
16246
16247   /* Save the new entry.  */
16248   max_mp = mp;
16249
16250   /* Scan over the preceding entries and adjust their addresses as
16251      required.  */
16252   while (mp->prev != NULL
16253          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16254     {
16255       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16256       mp = mp->prev;
16257     }
16258
16259   return max_mp;
16260 }
16261
16262 /* Add a constant to the minipool for a forward reference.  Returns the
16263    node added or NULL if the constant will not fit in this pool.  */
16264 static Mnode *
16265 add_minipool_forward_ref (Mfix *fix)
16266 {
16267   /* If set, max_mp is the first pool_entry that has a lower
16268      constraint than the one we are trying to add.  */
16269   Mnode *       max_mp = NULL;
16270   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16271   Mnode *       mp;
16272
16273   /* If the minipool starts before the end of FIX->INSN then this FIX
16274      can not be placed into the current pool.  Furthermore, adding the
16275      new constant pool entry may cause the pool to start FIX_SIZE bytes
16276      earlier.  */
16277   if (minipool_vector_head &&
16278       (fix->address + get_attr_length (fix->insn)
16279        >= minipool_vector_head->max_address - fix->fix_size))
16280     return NULL;
16281
16282   /* Scan the pool to see if a constant with the same value has
16283      already been added.  While we are doing this, also note the
16284      location where we must insert the constant if it doesn't already
16285      exist.  */
16286   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16287     {
16288       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16289           && fix->mode == mp->mode
16290           && (!LABEL_P (fix->value)
16291               || (CODE_LABEL_NUMBER (fix->value)
16292                   == CODE_LABEL_NUMBER (mp->value)))
16293           && rtx_equal_p (fix->value, mp->value))
16294         {
16295           /* More than one fix references this entry.  */
16296           mp->refcount++;
16297           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16298         }
16299
16300       /* Note the insertion point if necessary.  */
16301       if (max_mp == NULL
16302           && mp->max_address > max_address)
16303         max_mp = mp;
16304
16305       /* If we are inserting an 8-bytes aligned quantity and
16306          we have not already found an insertion point, then
16307          make sure that all such 8-byte aligned quantities are
16308          placed at the start of the pool.  */
16309       if (ARM_DOUBLEWORD_ALIGN
16310           && max_mp == NULL
16311           && fix->fix_size >= 8
16312           && mp->fix_size < 8)
16313         {
16314           max_mp = mp;
16315           max_address = mp->max_address;
16316         }
16317     }
16318
16319   /* The value is not currently in the minipool, so we need to create
16320      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16321      the end of the list since the placement is less constrained than
16322      any existing entry.  Otherwise, we insert the new fix before
16323      MAX_MP and, if necessary, adjust the constraints on the other
16324      entries.  */
16325   mp = XNEW (Mnode);
16326   mp->fix_size = fix->fix_size;
16327   mp->mode = fix->mode;
16328   mp->value = fix->value;
16329   mp->refcount = 1;
16330   /* Not yet required for a backwards ref.  */
16331   mp->min_address = -65536;
16332
16333   if (max_mp == NULL)
16334     {
16335       mp->max_address = max_address;
16336       mp->next = NULL;
16337       mp->prev = minipool_vector_tail;
16338
16339       if (mp->prev == NULL)
16340         {
16341           minipool_vector_head = mp;
16342           minipool_vector_label = gen_label_rtx ();
16343         }
16344       else
16345         mp->prev->next = mp;
16346
16347       minipool_vector_tail = mp;
16348     }
16349   else
16350     {
16351       if (max_address > max_mp->max_address - mp->fix_size)
16352         mp->max_address = max_mp->max_address - mp->fix_size;
16353       else
16354         mp->max_address = max_address;
16355
16356       mp->next = max_mp;
16357       mp->prev = max_mp->prev;
16358       max_mp->prev = mp;
16359       if (mp->prev != NULL)
16360         mp->prev->next = mp;
16361       else
16362         minipool_vector_head = mp;
16363     }
16364
16365   /* Save the new entry.  */
16366   max_mp = mp;
16367
16368   /* Scan over the preceding entries and adjust their addresses as
16369      required.  */
16370   while (mp->prev != NULL
16371          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16372     {
16373       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16374       mp = mp->prev;
16375     }
16376
16377   return max_mp;
16378 }
16379
16380 static Mnode *
16381 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16382                                 HOST_WIDE_INT  min_address)
16383 {
16384   HOST_WIDE_INT offset;
16385
16386   /* The code below assumes these are different.  */
16387   gcc_assert (mp != min_mp);
16388
16389   if (min_mp == NULL)
16390     {
16391       if (min_address > mp->min_address)
16392         mp->min_address = min_address;
16393     }
16394   else
16395     {
16396       /* We will adjust this below if it is too loose.  */
16397       mp->min_address = min_address;
16398
16399       /* Unlink MP from its current position.  Since min_mp is non-null,
16400          mp->next must be non-null.  */
16401       mp->next->prev = mp->prev;
16402       if (mp->prev != NULL)
16403         mp->prev->next = mp->next;
16404       else
16405         minipool_vector_head = mp->next;
16406
16407       /* Reinsert it after MIN_MP.  */
16408       mp->prev = min_mp;
16409       mp->next = min_mp->next;
16410       min_mp->next = mp;
16411       if (mp->next != NULL)
16412         mp->next->prev = mp;
16413       else
16414         minipool_vector_tail = mp;
16415     }
16416
16417   min_mp = mp;
16418
16419   offset = 0;
16420   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16421     {
16422       mp->offset = offset;
16423       if (mp->refcount > 0)
16424         offset += mp->fix_size;
16425
16426       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16427         mp->next->min_address = mp->min_address + mp->fix_size;
16428     }
16429
16430   return min_mp;
16431 }
16432
16433 /* Add a constant to the minipool for a backward reference.  Returns the
16434    node added or NULL if the constant will not fit in this pool.
16435
16436    Note that the code for insertion for a backwards reference can be
16437    somewhat confusing because the calculated offsets for each fix do
16438    not take into account the size of the pool (which is still under
16439    construction.  */
16440 static Mnode *
16441 add_minipool_backward_ref (Mfix *fix)
16442 {
16443   /* If set, min_mp is the last pool_entry that has a lower constraint
16444      than the one we are trying to add.  */
16445   Mnode *min_mp = NULL;
16446   /* This can be negative, since it is only a constraint.  */
16447   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16448   Mnode *mp;
16449
16450   /* If we can't reach the current pool from this insn, or if we can't
16451      insert this entry at the end of the pool without pushing other
16452      fixes out of range, then we don't try.  This ensures that we
16453      can't fail later on.  */
16454   if (min_address >= minipool_barrier->address
16455       || (minipool_vector_tail->min_address + fix->fix_size
16456           >= minipool_barrier->address))
16457     return NULL;
16458
16459   /* Scan the pool to see if a constant with the same value has
16460      already been added.  While we are doing this, also note the
16461      location where we must insert the constant if it doesn't already
16462      exist.  */
16463   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16464     {
16465       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16466           && fix->mode == mp->mode
16467           && (!LABEL_P (fix->value)
16468               || (CODE_LABEL_NUMBER (fix->value)
16469                   == CODE_LABEL_NUMBER (mp->value)))
16470           && rtx_equal_p (fix->value, mp->value)
16471           /* Check that there is enough slack to move this entry to the
16472              end of the table (this is conservative).  */
16473           && (mp->max_address
16474               > (minipool_barrier->address
16475                  + minipool_vector_tail->offset
16476                  + minipool_vector_tail->fix_size)))
16477         {
16478           mp->refcount++;
16479           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16480         }
16481
16482       if (min_mp != NULL)
16483         mp->min_address += fix->fix_size;
16484       else
16485         {
16486           /* Note the insertion point if necessary.  */
16487           if (mp->min_address < min_address)
16488             {
16489               /* For now, we do not allow the insertion of 8-byte alignment
16490                  requiring nodes anywhere but at the start of the pool.  */
16491               if (ARM_DOUBLEWORD_ALIGN
16492                   && fix->fix_size >= 8 && mp->fix_size < 8)
16493                 return NULL;
16494               else
16495                 min_mp = mp;
16496             }
16497           else if (mp->max_address
16498                    < minipool_barrier->address + mp->offset + fix->fix_size)
16499             {
16500               /* Inserting before this entry would push the fix beyond
16501                  its maximum address (which can happen if we have
16502                  re-located a forwards fix); force the new fix to come
16503                  after it.  */
16504               if (ARM_DOUBLEWORD_ALIGN
16505                   && fix->fix_size >= 8 && mp->fix_size < 8)
16506                 return NULL;
16507               else
16508                 {
16509                   min_mp = mp;
16510                   min_address = mp->min_address + fix->fix_size;
16511                 }
16512             }
16513           /* Do not insert a non-8-byte aligned quantity before 8-byte
16514              aligned quantities.  */
16515           else if (ARM_DOUBLEWORD_ALIGN
16516                    && fix->fix_size < 8
16517                    && mp->fix_size >= 8)
16518             {
16519               min_mp = mp;
16520               min_address = mp->min_address + fix->fix_size;
16521             }
16522         }
16523     }
16524
16525   /* We need to create a new entry.  */
16526   mp = XNEW (Mnode);
16527   mp->fix_size = fix->fix_size;
16528   mp->mode = fix->mode;
16529   mp->value = fix->value;
16530   mp->refcount = 1;
16531   mp->max_address = minipool_barrier->address + 65536;
16532
16533   mp->min_address = min_address;
16534
16535   if (min_mp == NULL)
16536     {
16537       mp->prev = NULL;
16538       mp->next = minipool_vector_head;
16539
16540       if (mp->next == NULL)
16541         {
16542           minipool_vector_tail = mp;
16543           minipool_vector_label = gen_label_rtx ();
16544         }
16545       else
16546         mp->next->prev = mp;
16547
16548       minipool_vector_head = mp;
16549     }
16550   else
16551     {
16552       mp->next = min_mp->next;
16553       mp->prev = min_mp;
16554       min_mp->next = mp;
16555
16556       if (mp->next != NULL)
16557         mp->next->prev = mp;
16558       else
16559         minipool_vector_tail = mp;
16560     }
16561
16562   /* Save the new entry.  */
16563   min_mp = mp;
16564
16565   if (mp->prev)
16566     mp = mp->prev;
16567   else
16568     mp->offset = 0;
16569
16570   /* Scan over the following entries and adjust their offsets.  */
16571   while (mp->next != NULL)
16572     {
16573       if (mp->next->min_address < mp->min_address + mp->fix_size)
16574         mp->next->min_address = mp->min_address + mp->fix_size;
16575
16576       if (mp->refcount)
16577         mp->next->offset = mp->offset + mp->fix_size;
16578       else
16579         mp->next->offset = mp->offset;
16580
16581       mp = mp->next;
16582     }
16583
16584   return min_mp;
16585 }
16586
16587 static void
16588 assign_minipool_offsets (Mfix *barrier)
16589 {
16590   HOST_WIDE_INT offset = 0;
16591   Mnode *mp;
16592
16593   minipool_barrier = barrier;
16594
16595   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16596     {
16597       mp->offset = offset;
16598
16599       if (mp->refcount > 0)
16600         offset += mp->fix_size;
16601     }
16602 }
16603
16604 /* Output the literal table */
16605 static void
16606 dump_minipool (rtx_insn *scan)
16607 {
16608   Mnode * mp;
16609   Mnode * nmp;
16610   int align64 = 0;
16611
16612   if (ARM_DOUBLEWORD_ALIGN)
16613     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16614       if (mp->refcount > 0 && mp->fix_size >= 8)
16615         {
16616           align64 = 1;
16617           break;
16618         }
16619
16620   if (dump_file)
16621     fprintf (dump_file,
16622              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16623              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16624
16625   scan = emit_label_after (gen_label_rtx (), scan);
16626   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16627   scan = emit_label_after (minipool_vector_label, scan);
16628
16629   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16630     {
16631       if (mp->refcount > 0)
16632         {
16633           if (dump_file)
16634             {
16635               fprintf (dump_file,
16636                        ";;  Offset %u, min %ld, max %ld ",
16637                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16638                        (unsigned long) mp->max_address);
16639               arm_print_value (dump_file, mp->value);
16640               fputc ('\n', dump_file);
16641             }
16642
16643           switch (GET_MODE_SIZE (mp->mode))
16644             {
16645 #ifdef HAVE_consttable_1
16646             case 1:
16647               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16648               break;
16649
16650 #endif
16651 #ifdef HAVE_consttable_2
16652             case 2:
16653               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16654               break;
16655
16656 #endif
16657 #ifdef HAVE_consttable_4
16658             case 4:
16659               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16660               break;
16661
16662 #endif
16663 #ifdef HAVE_consttable_8
16664             case 8:
16665               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16666               break;
16667
16668 #endif
16669 #ifdef HAVE_consttable_16
16670             case 16:
16671               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16672               break;
16673
16674 #endif
16675             default:
16676               gcc_unreachable ();
16677             }
16678         }
16679
16680       nmp = mp->next;
16681       free (mp);
16682     }
16683
16684   minipool_vector_head = minipool_vector_tail = NULL;
16685   scan = emit_insn_after (gen_consttable_end (), scan);
16686   scan = emit_barrier_after (scan);
16687 }
16688
16689 /* Return the cost of forcibly inserting a barrier after INSN.  */
16690 static int
16691 arm_barrier_cost (rtx insn)
16692 {
16693   /* Basing the location of the pool on the loop depth is preferable,
16694      but at the moment, the basic block information seems to be
16695      corrupt by this stage of the compilation.  */
16696   int base_cost = 50;
16697   rtx next = next_nonnote_insn (insn);
16698
16699   if (next != NULL && LABEL_P (next))
16700     base_cost -= 20;
16701
16702   switch (GET_CODE (insn))
16703     {
16704     case CODE_LABEL:
16705       /* It will always be better to place the table before the label, rather
16706          than after it.  */
16707       return 50;
16708
16709     case INSN:
16710     case CALL_INSN:
16711       return base_cost;
16712
16713     case JUMP_INSN:
16714       return base_cost - 10;
16715
16716     default:
16717       return base_cost + 10;
16718     }
16719 }
16720
16721 /* Find the best place in the insn stream in the range
16722    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16723    Create the barrier by inserting a jump and add a new fix entry for
16724    it.  */
16725 static Mfix *
16726 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16727 {
16728   HOST_WIDE_INT count = 0;
16729   rtx_barrier *barrier;
16730   rtx_insn *from = fix->insn;
16731   /* The instruction after which we will insert the jump.  */
16732   rtx_insn *selected = NULL;
16733   int selected_cost;
16734   /* The address at which the jump instruction will be placed.  */
16735   HOST_WIDE_INT selected_address;
16736   Mfix * new_fix;
16737   HOST_WIDE_INT max_count = max_address - fix->address;
16738   rtx_code_label *label = gen_label_rtx ();
16739
16740   selected_cost = arm_barrier_cost (from);
16741   selected_address = fix->address;
16742
16743   while (from && count < max_count)
16744     {
16745       rtx_jump_table_data *tmp;
16746       int new_cost;
16747
16748       /* This code shouldn't have been called if there was a natural barrier
16749          within range.  */
16750       gcc_assert (!BARRIER_P (from));
16751
16752       /* Count the length of this insn.  This must stay in sync with the
16753          code that pushes minipool fixes.  */
16754       if (LABEL_P (from))
16755         count += get_label_padding (from);
16756       else
16757         count += get_attr_length (from);
16758
16759       /* If there is a jump table, add its length.  */
16760       if (tablejump_p (from, NULL, &tmp))
16761         {
16762           count += get_jump_table_size (tmp);
16763
16764           /* Jump tables aren't in a basic block, so base the cost on
16765              the dispatch insn.  If we select this location, we will
16766              still put the pool after the table.  */
16767           new_cost = arm_barrier_cost (from);
16768
16769           if (count < max_count
16770               && (!selected || new_cost <= selected_cost))
16771             {
16772               selected = tmp;
16773               selected_cost = new_cost;
16774               selected_address = fix->address + count;
16775             }
16776
16777           /* Continue after the dispatch table.  */
16778           from = NEXT_INSN (tmp);
16779           continue;
16780         }
16781
16782       new_cost = arm_barrier_cost (from);
16783
16784       if (count < max_count
16785           && (!selected || new_cost <= selected_cost))
16786         {
16787           selected = from;
16788           selected_cost = new_cost;
16789           selected_address = fix->address + count;
16790         }
16791
16792       from = NEXT_INSN (from);
16793     }
16794
16795   /* Make sure that we found a place to insert the jump.  */
16796   gcc_assert (selected);
16797
16798   /* Make sure we do not split a call and its corresponding
16799      CALL_ARG_LOCATION note.  */
16800   if (CALL_P (selected))
16801     {
16802       rtx_insn *next = NEXT_INSN (selected);
16803       if (next && NOTE_P (next)
16804           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16805           selected = next;
16806     }
16807
16808   /* Create a new JUMP_INSN that branches around a barrier.  */
16809   from = emit_jump_insn_after (gen_jump (label), selected);
16810   JUMP_LABEL (from) = label;
16811   barrier = emit_barrier_after (from);
16812   emit_label_after (label, barrier);
16813
16814   /* Create a minipool barrier entry for the new barrier.  */
16815   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16816   new_fix->insn = barrier;
16817   new_fix->address = selected_address;
16818   new_fix->next = fix->next;
16819   fix->next = new_fix;
16820
16821   return new_fix;
16822 }
16823
16824 /* Record that there is a natural barrier in the insn stream at
16825    ADDRESS.  */
16826 static void
16827 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16828 {
16829   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16830
16831   fix->insn = insn;
16832   fix->address = address;
16833
16834   fix->next = NULL;
16835   if (minipool_fix_head != NULL)
16836     minipool_fix_tail->next = fix;
16837   else
16838     minipool_fix_head = fix;
16839
16840   minipool_fix_tail = fix;
16841 }
16842
16843 /* Record INSN, which will need fixing up to load a value from the
16844    minipool.  ADDRESS is the offset of the insn since the start of the
16845    function; LOC is a pointer to the part of the insn which requires
16846    fixing; VALUE is the constant that must be loaded, which is of type
16847    MODE.  */
16848 static void
16849 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16850                    machine_mode mode, rtx value)
16851 {
16852   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16853
16854   fix->insn = insn;
16855   fix->address = address;
16856   fix->loc = loc;
16857   fix->mode = mode;
16858   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16859   fix->value = value;
16860   fix->forwards = get_attr_pool_range (insn);
16861   fix->backwards = get_attr_neg_pool_range (insn);
16862   fix->minipool = NULL;
16863
16864   /* If an insn doesn't have a range defined for it, then it isn't
16865      expecting to be reworked by this code.  Better to stop now than
16866      to generate duff assembly code.  */
16867   gcc_assert (fix->forwards || fix->backwards);
16868
16869   /* If an entry requires 8-byte alignment then assume all constant pools
16870      require 4 bytes of padding.  Trying to do this later on a per-pool
16871      basis is awkward because existing pool entries have to be modified.  */
16872   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16873     minipool_pad = 4;
16874
16875   if (dump_file)
16876     {
16877       fprintf (dump_file,
16878                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16879                GET_MODE_NAME (mode),
16880                INSN_UID (insn), (unsigned long) address,
16881                -1 * (long)fix->backwards, (long)fix->forwards);
16882       arm_print_value (dump_file, fix->value);
16883       fprintf (dump_file, "\n");
16884     }
16885
16886   /* Add it to the chain of fixes.  */
16887   fix->next = NULL;
16888
16889   if (minipool_fix_head != NULL)
16890     minipool_fix_tail->next = fix;
16891   else
16892     minipool_fix_head = fix;
16893
16894   minipool_fix_tail = fix;
16895 }
16896
16897 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16898    Returns the number of insns needed, or 99 if we always want to synthesize
16899    the value.  */
16900 int
16901 arm_max_const_double_inline_cost ()
16902 {
16903   /* Let the value get synthesized to avoid the use of literal pools.  */
16904   if (arm_disable_literal_pool)
16905     return 99;
16906
16907   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16908 }
16909
16910 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16911    Returns the number of insns needed, or 99 if we don't know how to
16912    do it.  */
16913 int
16914 arm_const_double_inline_cost (rtx val)
16915 {
16916   rtx lowpart, highpart;
16917   machine_mode mode;
16918
16919   mode = GET_MODE (val);
16920
16921   if (mode == VOIDmode)
16922     mode = DImode;
16923
16924   gcc_assert (GET_MODE_SIZE (mode) == 8);
16925
16926   lowpart = gen_lowpart (SImode, val);
16927   highpart = gen_highpart_mode (SImode, mode, val);
16928
16929   gcc_assert (CONST_INT_P (lowpart));
16930   gcc_assert (CONST_INT_P (highpart));
16931
16932   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16933                             NULL_RTX, NULL_RTX, 0, 0)
16934           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16935                               NULL_RTX, NULL_RTX, 0, 0));
16936 }
16937
16938 /* Cost of loading a SImode constant.  */
16939 static inline int
16940 arm_const_inline_cost (enum rtx_code code, rtx val)
16941 {
16942   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16943                            NULL_RTX, NULL_RTX, 1, 0);
16944 }
16945
16946 /* Return true if it is worthwhile to split a 64-bit constant into two
16947    32-bit operations.  This is the case if optimizing for size, or
16948    if we have load delay slots, or if one 32-bit part can be done with
16949    a single data operation.  */
16950 bool
16951 arm_const_double_by_parts (rtx val)
16952 {
16953   machine_mode mode = GET_MODE (val);
16954   rtx part;
16955
16956   if (optimize_size || arm_ld_sched)
16957     return true;
16958
16959   if (mode == VOIDmode)
16960     mode = DImode;
16961
16962   part = gen_highpart_mode (SImode, mode, val);
16963
16964   gcc_assert (CONST_INT_P (part));
16965
16966   if (const_ok_for_arm (INTVAL (part))
16967       || const_ok_for_arm (~INTVAL (part)))
16968     return true;
16969
16970   part = gen_lowpart (SImode, val);
16971
16972   gcc_assert (CONST_INT_P (part));
16973
16974   if (const_ok_for_arm (INTVAL (part))
16975       || const_ok_for_arm (~INTVAL (part)))
16976     return true;
16977
16978   return false;
16979 }
16980
16981 /* Return true if it is possible to inline both the high and low parts
16982    of a 64-bit constant into 32-bit data processing instructions.  */
16983 bool
16984 arm_const_double_by_immediates (rtx val)
16985 {
16986   machine_mode mode = GET_MODE (val);
16987   rtx part;
16988
16989   if (mode == VOIDmode)
16990     mode = DImode;
16991
16992   part = gen_highpart_mode (SImode, mode, val);
16993
16994   gcc_assert (CONST_INT_P (part));
16995
16996   if (!const_ok_for_arm (INTVAL (part)))
16997     return false;
16998
16999   part = gen_lowpart (SImode, val);
17000
17001   gcc_assert (CONST_INT_P (part));
17002
17003   if (!const_ok_for_arm (INTVAL (part)))
17004     return false;
17005
17006   return true;
17007 }
17008
17009 /* Scan INSN and note any of its operands that need fixing.
17010    If DO_PUSHES is false we do not actually push any of the fixups
17011    needed.  */
17012 static void
17013 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17014 {
17015   int opno;
17016
17017   extract_constrain_insn (insn);
17018
17019   if (recog_data.n_alternatives == 0)
17020     return;
17021
17022   /* Fill in recog_op_alt with information about the constraints of
17023      this insn.  */
17024   preprocess_constraints (insn);
17025
17026   const operand_alternative *op_alt = which_op_alt ();
17027   for (opno = 0; opno < recog_data.n_operands; opno++)
17028     {
17029       /* Things we need to fix can only occur in inputs.  */
17030       if (recog_data.operand_type[opno] != OP_IN)
17031         continue;
17032
17033       /* If this alternative is a memory reference, then any mention
17034          of constants in this alternative is really to fool reload
17035          into allowing us to accept one there.  We need to fix them up
17036          now so that we output the right code.  */
17037       if (op_alt[opno].memory_ok)
17038         {
17039           rtx op = recog_data.operand[opno];
17040
17041           if (CONSTANT_P (op))
17042             {
17043               if (do_pushes)
17044                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17045                                    recog_data.operand_mode[opno], op);
17046             }
17047           else if (MEM_P (op)
17048                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17049                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17050             {
17051               if (do_pushes)
17052                 {
17053                   rtx cop = avoid_constant_pool_reference (op);
17054
17055                   /* Casting the address of something to a mode narrower
17056                      than a word can cause avoid_constant_pool_reference()
17057                      to return the pool reference itself.  That's no good to
17058                      us here.  Lets just hope that we can use the
17059                      constant pool value directly.  */
17060                   if (op == cop)
17061                     cop = get_pool_constant (XEXP (op, 0));
17062
17063                   push_minipool_fix (insn, address,
17064                                      recog_data.operand_loc[opno],
17065                                      recog_data.operand_mode[opno], cop);
17066                 }
17067
17068             }
17069         }
17070     }
17071
17072   return;
17073 }
17074
17075 /* Rewrite move insn into subtract of 0 if the condition codes will
17076    be useful in next conditional jump insn.  */
17077
17078 static void
17079 thumb1_reorg (void)
17080 {
17081   basic_block bb;
17082
17083   FOR_EACH_BB_FN (bb, cfun)
17084     {
17085       rtx dest, src;
17086       rtx pat, op0, set = NULL;
17087       rtx_insn *prev, *insn = BB_END (bb);
17088       bool insn_clobbered = false;
17089
17090       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17091         insn = PREV_INSN (insn);
17092
17093       /* Find the last cbranchsi4_insn in basic block BB.  */
17094       if (insn == BB_HEAD (bb)
17095           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17096         continue;
17097
17098       /* Get the register with which we are comparing.  */
17099       pat = PATTERN (insn);
17100       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17101
17102       /* Find the first flag setting insn before INSN in basic block BB.  */
17103       gcc_assert (insn != BB_HEAD (bb));
17104       for (prev = PREV_INSN (insn);
17105            (!insn_clobbered
17106             && prev != BB_HEAD (bb)
17107             && (NOTE_P (prev)
17108                 || DEBUG_INSN_P (prev)
17109                 || ((set = single_set (prev)) != NULL
17110                     && get_attr_conds (prev) == CONDS_NOCOND)));
17111            prev = PREV_INSN (prev))
17112         {
17113           if (reg_set_p (op0, prev))
17114             insn_clobbered = true;
17115         }
17116
17117       /* Skip if op0 is clobbered by insn other than prev. */
17118       if (insn_clobbered)
17119         continue;
17120
17121       if (!set)
17122         continue;
17123
17124       dest = SET_DEST (set);
17125       src = SET_SRC (set);
17126       if (!low_register_operand (dest, SImode)
17127           || !low_register_operand (src, SImode))
17128         continue;
17129
17130       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17131          in INSN.  Both src and dest of the move insn are checked.  */
17132       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17133         {
17134           dest = copy_rtx (dest);
17135           src = copy_rtx (src);
17136           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17137           PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17138           INSN_CODE (prev) = -1;
17139           /* Set test register in INSN to dest.  */
17140           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17141           INSN_CODE (insn) = -1;
17142         }
17143     }
17144 }
17145
17146 /* Convert instructions to their cc-clobbering variant if possible, since
17147    that allows us to use smaller encodings.  */
17148
17149 static void
17150 thumb2_reorg (void)
17151 {
17152   basic_block bb;
17153   regset_head live;
17154
17155   INIT_REG_SET (&live);
17156
17157   /* We are freeing block_for_insn in the toplev to keep compatibility
17158      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17159   compute_bb_for_insn ();
17160   df_analyze ();
17161
17162   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17163
17164   FOR_EACH_BB_FN (bb, cfun)
17165     {
17166       if (current_tune->disparage_flag_setting_t16_encodings
17167           && optimize_bb_for_speed_p (bb))
17168         continue;
17169
17170       rtx_insn *insn;
17171       Convert_Action action = SKIP;
17172       Convert_Action action_for_partial_flag_setting
17173         = (current_tune->disparage_partial_flag_setting_t16_encodings
17174            && optimize_bb_for_speed_p (bb))
17175           ? SKIP : CONV;
17176
17177       COPY_REG_SET (&live, DF_LR_OUT (bb));
17178       df_simulate_initialize_backwards (bb, &live);
17179       FOR_BB_INSNS_REVERSE (bb, insn)
17180         {
17181           if (NONJUMP_INSN_P (insn)
17182               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17183               && GET_CODE (PATTERN (insn)) == SET)
17184             {
17185               action = SKIP;
17186               rtx pat = PATTERN (insn);
17187               rtx dst = XEXP (pat, 0);
17188               rtx src = XEXP (pat, 1);
17189               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17190
17191               if (!OBJECT_P (src))
17192                   op0 = XEXP (src, 0);
17193
17194               if (BINARY_P (src))
17195                   op1 = XEXP (src, 1);
17196
17197               if (low_register_operand (dst, SImode))
17198                 {
17199                   switch (GET_CODE (src))
17200                     {
17201                     case PLUS:
17202                       /* Adding two registers and storing the result
17203                          in the first source is already a 16-bit
17204                          operation.  */
17205                       if (rtx_equal_p (dst, op0)
17206                           && register_operand (op1, SImode))
17207                         break;
17208
17209                       if (low_register_operand (op0, SImode))
17210                         {
17211                           /* ADDS <Rd>,<Rn>,<Rm>  */
17212                           if (low_register_operand (op1, SImode))
17213                             action = CONV;
17214                           /* ADDS <Rdn>,#<imm8>  */
17215                           /* SUBS <Rdn>,#<imm8>  */
17216                           else if (rtx_equal_p (dst, op0)
17217                                    && CONST_INT_P (op1)
17218                                    && IN_RANGE (INTVAL (op1), -255, 255))
17219                             action = CONV;
17220                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17221                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17222                           else if (CONST_INT_P (op1)
17223                                    && IN_RANGE (INTVAL (op1), -7, 7))
17224                             action = CONV;
17225                         }
17226                       /* ADCS <Rd>, <Rn>  */
17227                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17228                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17229                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17230                                                        SImode)
17231                               && COMPARISON_P (op1)
17232                               && cc_register (XEXP (op1, 0), VOIDmode)
17233                               && maybe_get_arm_condition_code (op1) == ARM_CS
17234                               && XEXP (op1, 1) == const0_rtx)
17235                         action = CONV;
17236                       break;
17237
17238                     case MINUS:
17239                       /* RSBS <Rd>,<Rn>,#0
17240                          Not handled here: see NEG below.  */
17241                       /* SUBS <Rd>,<Rn>,#<imm3>
17242                          SUBS <Rdn>,#<imm8>
17243                          Not handled here: see PLUS above.  */
17244                       /* SUBS <Rd>,<Rn>,<Rm>  */
17245                       if (low_register_operand (op0, SImode)
17246                           && low_register_operand (op1, SImode))
17247                             action = CONV;
17248                       break;
17249
17250                     case MULT:
17251                       /* MULS <Rdm>,<Rn>,<Rdm>
17252                          As an exception to the rule, this is only used
17253                          when optimizing for size since MULS is slow on all
17254                          known implementations.  We do not even want to use
17255                          MULS in cold code, if optimizing for speed, so we
17256                          test the global flag here.  */
17257                       if (!optimize_size)
17258                         break;
17259                       /* else fall through.  */
17260                     case AND:
17261                     case IOR:
17262                     case XOR:
17263                       /* ANDS <Rdn>,<Rm>  */
17264                       if (rtx_equal_p (dst, op0)
17265                           && low_register_operand (op1, SImode))
17266                         action = action_for_partial_flag_setting;
17267                       else if (rtx_equal_p (dst, op1)
17268                                && low_register_operand (op0, SImode))
17269                         action = action_for_partial_flag_setting == SKIP
17270                                  ? SKIP : SWAP_CONV;
17271                       break;
17272
17273                     case ASHIFTRT:
17274                     case ASHIFT:
17275                     case LSHIFTRT:
17276                       /* ASRS <Rdn>,<Rm> */
17277                       /* LSRS <Rdn>,<Rm> */
17278                       /* LSLS <Rdn>,<Rm> */
17279                       if (rtx_equal_p (dst, op0)
17280                           && low_register_operand (op1, SImode))
17281                         action = action_for_partial_flag_setting;
17282                       /* ASRS <Rd>,<Rm>,#<imm5> */
17283                       /* LSRS <Rd>,<Rm>,#<imm5> */
17284                       /* LSLS <Rd>,<Rm>,#<imm5> */
17285                       else if (low_register_operand (op0, SImode)
17286                                && CONST_INT_P (op1)
17287                                && IN_RANGE (INTVAL (op1), 0, 31))
17288                         action = action_for_partial_flag_setting;
17289                       break;
17290
17291                     case ROTATERT:
17292                       /* RORS <Rdn>,<Rm>  */
17293                       if (rtx_equal_p (dst, op0)
17294                           && low_register_operand (op1, SImode))
17295                         action = action_for_partial_flag_setting;
17296                       break;
17297
17298                     case NOT:
17299                       /* MVNS <Rd>,<Rm>  */
17300                       if (low_register_operand (op0, SImode))
17301                         action = action_for_partial_flag_setting;
17302                       break;
17303
17304                     case NEG:
17305                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17306                       if (low_register_operand (op0, SImode))
17307                         action = CONV;
17308                       break;
17309
17310                     case CONST_INT:
17311                       /* MOVS <Rd>,#<imm8>  */
17312                       if (CONST_INT_P (src)
17313                           && IN_RANGE (INTVAL (src), 0, 255))
17314                         action = action_for_partial_flag_setting;
17315                       break;
17316
17317                     case REG:
17318                       /* MOVS and MOV<c> with registers have different
17319                          encodings, so are not relevant here.  */
17320                       break;
17321
17322                     default:
17323                       break;
17324                     }
17325                 }
17326
17327               if (action != SKIP)
17328                 {
17329                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17330                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17331                   rtvec vec;
17332
17333                   if (action == SWAP_CONV)
17334                     {
17335                       src = copy_rtx (src);
17336                       XEXP (src, 0) = op1;
17337                       XEXP (src, 1) = op0;
17338                       pat = gen_rtx_SET (VOIDmode, dst, src);
17339                       vec = gen_rtvec (2, pat, clobber);
17340                     }
17341                   else /* action == CONV */
17342                     vec = gen_rtvec (2, pat, clobber);
17343
17344                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17345                   INSN_CODE (insn) = -1;
17346                 }
17347             }
17348
17349           if (NONDEBUG_INSN_P (insn))
17350             df_simulate_one_insn_backwards (bb, insn, &live);
17351         }
17352     }
17353
17354   CLEAR_REG_SET (&live);
17355 }
17356
17357 /* Gcc puts the pool in the wrong place for ARM, since we can only
17358    load addresses a limited distance around the pc.  We do some
17359    special munging to move the constant pool values to the correct
17360    point in the code.  */
17361 static void
17362 arm_reorg (void)
17363 {
17364   rtx_insn *insn;
17365   HOST_WIDE_INT address = 0;
17366   Mfix * fix;
17367
17368   if (TARGET_THUMB1)
17369     thumb1_reorg ();
17370   else if (TARGET_THUMB2)
17371     thumb2_reorg ();
17372
17373   /* Ensure all insns that must be split have been split at this point.
17374      Otherwise, the pool placement code below may compute incorrect
17375      insn lengths.  Note that when optimizing, all insns have already
17376      been split at this point.  */
17377   if (!optimize)
17378     split_all_insns_noflow ();
17379
17380   minipool_fix_head = minipool_fix_tail = NULL;
17381
17382   /* The first insn must always be a note, or the code below won't
17383      scan it properly.  */
17384   insn = get_insns ();
17385   gcc_assert (NOTE_P (insn));
17386   minipool_pad = 0;
17387
17388   /* Scan all the insns and record the operands that will need fixing.  */
17389   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17390     {
17391       if (BARRIER_P (insn))
17392         push_minipool_barrier (insn, address);
17393       else if (INSN_P (insn))
17394         {
17395           rtx_jump_table_data *table;
17396
17397           note_invalid_constants (insn, address, true);
17398           address += get_attr_length (insn);
17399
17400           /* If the insn is a vector jump, add the size of the table
17401              and skip the table.  */
17402           if (tablejump_p (insn, NULL, &table))
17403             {
17404               address += get_jump_table_size (table);
17405               insn = table;
17406             }
17407         }
17408       else if (LABEL_P (insn))
17409         /* Add the worst-case padding due to alignment.  We don't add
17410            the _current_ padding because the minipool insertions
17411            themselves might change it.  */
17412         address += get_label_padding (insn);
17413     }
17414
17415   fix = minipool_fix_head;
17416
17417   /* Now scan the fixups and perform the required changes.  */
17418   while (fix)
17419     {
17420       Mfix * ftmp;
17421       Mfix * fdel;
17422       Mfix *  last_added_fix;
17423       Mfix * last_barrier = NULL;
17424       Mfix * this_fix;
17425
17426       /* Skip any further barriers before the next fix.  */
17427       while (fix && BARRIER_P (fix->insn))
17428         fix = fix->next;
17429
17430       /* No more fixes.  */
17431       if (fix == NULL)
17432         break;
17433
17434       last_added_fix = NULL;
17435
17436       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17437         {
17438           if (BARRIER_P (ftmp->insn))
17439             {
17440               if (ftmp->address >= minipool_vector_head->max_address)
17441                 break;
17442
17443               last_barrier = ftmp;
17444             }
17445           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17446             break;
17447
17448           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17449         }
17450
17451       /* If we found a barrier, drop back to that; any fixes that we
17452          could have reached but come after the barrier will now go in
17453          the next mini-pool.  */
17454       if (last_barrier != NULL)
17455         {
17456           /* Reduce the refcount for those fixes that won't go into this
17457              pool after all.  */
17458           for (fdel = last_barrier->next;
17459                fdel && fdel != ftmp;
17460                fdel = fdel->next)
17461             {
17462               fdel->minipool->refcount--;
17463               fdel->minipool = NULL;
17464             }
17465
17466           ftmp = last_barrier;
17467         }
17468       else
17469         {
17470           /* ftmp is first fix that we can't fit into this pool and
17471              there no natural barriers that we could use.  Insert a
17472              new barrier in the code somewhere between the previous
17473              fix and this one, and arrange to jump around it.  */
17474           HOST_WIDE_INT max_address;
17475
17476           /* The last item on the list of fixes must be a barrier, so
17477              we can never run off the end of the list of fixes without
17478              last_barrier being set.  */
17479           gcc_assert (ftmp);
17480
17481           max_address = minipool_vector_head->max_address;
17482           /* Check that there isn't another fix that is in range that
17483              we couldn't fit into this pool because the pool was
17484              already too large: we need to put the pool before such an
17485              instruction.  The pool itself may come just after the
17486              fix because create_fix_barrier also allows space for a
17487              jump instruction.  */
17488           if (ftmp->address < max_address)
17489             max_address = ftmp->address + 1;
17490
17491           last_barrier = create_fix_barrier (last_added_fix, max_address);
17492         }
17493
17494       assign_minipool_offsets (last_barrier);
17495
17496       while (ftmp)
17497         {
17498           if (!BARRIER_P (ftmp->insn)
17499               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17500                   == NULL))
17501             break;
17502
17503           ftmp = ftmp->next;
17504         }
17505
17506       /* Scan over the fixes we have identified for this pool, fixing them
17507          up and adding the constants to the pool itself.  */
17508       for (this_fix = fix; this_fix && ftmp != this_fix;
17509            this_fix = this_fix->next)
17510         if (!BARRIER_P (this_fix->insn))
17511           {
17512             rtx addr
17513               = plus_constant (Pmode,
17514                                gen_rtx_LABEL_REF (VOIDmode,
17515                                                   minipool_vector_label),
17516                                this_fix->minipool->offset);
17517             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17518           }
17519
17520       dump_minipool (last_barrier->insn);
17521       fix = ftmp;
17522     }
17523
17524   /* From now on we must synthesize any constants that we can't handle
17525      directly.  This can happen if the RTL gets split during final
17526      instruction generation.  */
17527   cfun->machine->after_arm_reorg = 1;
17528
17529   /* Free the minipool memory.  */
17530   obstack_free (&minipool_obstack, minipool_startobj);
17531 }
17532 \f
17533 /* Routines to output assembly language.  */
17534
17535 /* Return string representation of passed in real value.  */
17536 static const char *
17537 fp_const_from_val (REAL_VALUE_TYPE *r)
17538 {
17539   if (!fp_consts_inited)
17540     init_fp_table ();
17541
17542   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17543   return "0";
17544 }
17545
17546 /* OPERANDS[0] is the entire list of insns that constitute pop,
17547    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17548    is in the list, UPDATE is true iff the list contains explicit
17549    update of base register.  */
17550 void
17551 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17552                          bool update)
17553 {
17554   int i;
17555   char pattern[100];
17556   int offset;
17557   const char *conditional;
17558   int num_saves = XVECLEN (operands[0], 0);
17559   unsigned int regno;
17560   unsigned int regno_base = REGNO (operands[1]);
17561
17562   offset = 0;
17563   offset += update ? 1 : 0;
17564   offset += return_pc ? 1 : 0;
17565
17566   /* Is the base register in the list?  */
17567   for (i = offset; i < num_saves; i++)
17568     {
17569       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17570       /* If SP is in the list, then the base register must be SP.  */
17571       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17572       /* If base register is in the list, there must be no explicit update.  */
17573       if (regno == regno_base)
17574         gcc_assert (!update);
17575     }
17576
17577   conditional = reverse ? "%?%D0" : "%?%d0";
17578   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17579     {
17580       /* Output pop (not stmfd) because it has a shorter encoding.  */
17581       gcc_assert (update);
17582       sprintf (pattern, "pop%s\t{", conditional);
17583     }
17584   else
17585     {
17586       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17587          It's just a convention, their semantics are identical.  */
17588       if (regno_base == SP_REGNUM)
17589         sprintf (pattern, "ldm%sfd\t", conditional);
17590       else if (TARGET_UNIFIED_ASM)
17591         sprintf (pattern, "ldmia%s\t", conditional);
17592       else
17593         sprintf (pattern, "ldm%sia\t", conditional);
17594
17595       strcat (pattern, reg_names[regno_base]);
17596       if (update)
17597         strcat (pattern, "!, {");
17598       else
17599         strcat (pattern, ", {");
17600     }
17601
17602   /* Output the first destination register.  */
17603   strcat (pattern,
17604           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17605
17606   /* Output the rest of the destination registers.  */
17607   for (i = offset + 1; i < num_saves; i++)
17608     {
17609       strcat (pattern, ", ");
17610       strcat (pattern,
17611               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17612     }
17613
17614   strcat (pattern, "}");
17615
17616   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17617     strcat (pattern, "^");
17618
17619   output_asm_insn (pattern, &cond);
17620 }
17621
17622
17623 /* Output the assembly for a store multiple.  */
17624
17625 const char *
17626 vfp_output_vstmd (rtx * operands)
17627 {
17628   char pattern[100];
17629   int p;
17630   int base;
17631   int i;
17632   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17633                    ? XEXP (operands[0], 0)
17634                    : XEXP (XEXP (operands[0], 0), 0);
17635   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17636
17637   if (push_p)
17638     strcpy (pattern, "vpush%?.64\t{%P1");
17639   else
17640     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17641
17642   p = strlen (pattern);
17643
17644   gcc_assert (REG_P (operands[1]));
17645
17646   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17647   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17648     {
17649       p += sprintf (&pattern[p], ", d%d", base + i);
17650     }
17651   strcpy (&pattern[p], "}");
17652
17653   output_asm_insn (pattern, operands);
17654   return "";
17655 }
17656
17657
17658 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17659    number of bytes pushed.  */
17660
17661 static int
17662 vfp_emit_fstmd (int base_reg, int count)
17663 {
17664   rtx par;
17665   rtx dwarf;
17666   rtx tmp, reg;
17667   int i;
17668
17669   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17670      register pairs are stored by a store multiple insn.  We avoid this
17671      by pushing an extra pair.  */
17672   if (count == 2 && !arm_arch6)
17673     {
17674       if (base_reg == LAST_VFP_REGNUM - 3)
17675         base_reg -= 2;
17676       count++;
17677     }
17678
17679   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17680      larger stores into multiple parts (up to a maximum of two, in
17681      practice).  */
17682   if (count > 16)
17683     {
17684       int saved;
17685       /* NOTE: base_reg is an internal register number, so each D register
17686          counts as 2.  */
17687       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17688       saved += vfp_emit_fstmd (base_reg, 16);
17689       return saved;
17690     }
17691
17692   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17693   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17694
17695   reg = gen_rtx_REG (DFmode, base_reg);
17696   base_reg += 2;
17697
17698   XVECEXP (par, 0, 0)
17699     = gen_rtx_SET (VOIDmode,
17700                    gen_frame_mem
17701                    (BLKmode,
17702                     gen_rtx_PRE_MODIFY (Pmode,
17703                                         stack_pointer_rtx,
17704                                         plus_constant
17705                                         (Pmode, stack_pointer_rtx,
17706                                          - (count * 8)))
17707                     ),
17708                    gen_rtx_UNSPEC (BLKmode,
17709                                    gen_rtvec (1, reg),
17710                                    UNSPEC_PUSH_MULT));
17711
17712   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17713                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17714   RTX_FRAME_RELATED_P (tmp) = 1;
17715   XVECEXP (dwarf, 0, 0) = tmp;
17716
17717   tmp = gen_rtx_SET (VOIDmode,
17718                      gen_frame_mem (DFmode, stack_pointer_rtx),
17719                      reg);
17720   RTX_FRAME_RELATED_P (tmp) = 1;
17721   XVECEXP (dwarf, 0, 1) = tmp;
17722
17723   for (i = 1; i < count; i++)
17724     {
17725       reg = gen_rtx_REG (DFmode, base_reg);
17726       base_reg += 2;
17727       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17728
17729       tmp = gen_rtx_SET (VOIDmode,
17730                          gen_frame_mem (DFmode,
17731                                         plus_constant (Pmode,
17732                                                        stack_pointer_rtx,
17733                                                        i * 8)),
17734                          reg);
17735       RTX_FRAME_RELATED_P (tmp) = 1;
17736       XVECEXP (dwarf, 0, i + 1) = tmp;
17737     }
17738
17739   par = emit_insn (par);
17740   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17741   RTX_FRAME_RELATED_P (par) = 1;
17742
17743   return count * 8;
17744 }
17745
17746 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17747    the call target.  */
17748
17749 void
17750 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17751 {
17752   rtx insn;
17753
17754   insn = emit_call_insn (pat);
17755
17756   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17757      If the call might use such an entry, add a use of the PIC register
17758      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17759   if (TARGET_VXWORKS_RTP
17760       && flag_pic
17761       && !sibcall
17762       && GET_CODE (addr) == SYMBOL_REF
17763       && (SYMBOL_REF_DECL (addr)
17764           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17765           : !SYMBOL_REF_LOCAL_P (addr)))
17766     {
17767       require_pic_register ();
17768       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17769     }
17770
17771   if (TARGET_AAPCS_BASED)
17772     {
17773       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17774          linker.  We need to add an IP clobber to allow setting
17775          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17776          is not needed since it's a fixed register.  */
17777       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17778       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17779     }
17780 }
17781
17782 /* Output a 'call' insn.  */
17783 const char *
17784 output_call (rtx *operands)
17785 {
17786   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17787
17788   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17789   if (REGNO (operands[0]) == LR_REGNUM)
17790     {
17791       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17792       output_asm_insn ("mov%?\t%0, %|lr", operands);
17793     }
17794
17795   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17796
17797   if (TARGET_INTERWORK || arm_arch4t)
17798     output_asm_insn ("bx%?\t%0", operands);
17799   else
17800     output_asm_insn ("mov%?\t%|pc, %0", operands);
17801
17802   return "";
17803 }
17804
17805 /* Output a 'call' insn that is a reference in memory. This is
17806    disabled for ARMv5 and we prefer a blx instead because otherwise
17807    there's a significant performance overhead.  */
17808 const char *
17809 output_call_mem (rtx *operands)
17810 {
17811   gcc_assert (!arm_arch5);
17812   if (TARGET_INTERWORK)
17813     {
17814       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17815       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17816       output_asm_insn ("bx%?\t%|ip", operands);
17817     }
17818   else if (regno_use_in (LR_REGNUM, operands[0]))
17819     {
17820       /* LR is used in the memory address.  We load the address in the
17821          first instruction.  It's safe to use IP as the target of the
17822          load since the call will kill it anyway.  */
17823       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17824       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17825       if (arm_arch4t)
17826         output_asm_insn ("bx%?\t%|ip", operands);
17827       else
17828         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17829     }
17830   else
17831     {
17832       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17833       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17834     }
17835
17836   return "";
17837 }
17838
17839
17840 /* Output a move from arm registers to arm registers of a long double
17841    OPERANDS[0] is the destination.
17842    OPERANDS[1] is the source.  */
17843 const char *
17844 output_mov_long_double_arm_from_arm (rtx *operands)
17845 {
17846   /* We have to be careful here because the two might overlap.  */
17847   int dest_start = REGNO (operands[0]);
17848   int src_start = REGNO (operands[1]);
17849   rtx ops[2];
17850   int i;
17851
17852   if (dest_start < src_start)
17853     {
17854       for (i = 0; i < 3; i++)
17855         {
17856           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17857           ops[1] = gen_rtx_REG (SImode, src_start + i);
17858           output_asm_insn ("mov%?\t%0, %1", ops);
17859         }
17860     }
17861   else
17862     {
17863       for (i = 2; i >= 0; i--)
17864         {
17865           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17866           ops[1] = gen_rtx_REG (SImode, src_start + i);
17867           output_asm_insn ("mov%?\t%0, %1", ops);
17868         }
17869     }
17870
17871   return "";
17872 }
17873
17874 void
17875 arm_emit_movpair (rtx dest, rtx src)
17876  {
17877   /* If the src is an immediate, simplify it.  */
17878   if (CONST_INT_P (src))
17879     {
17880       HOST_WIDE_INT val = INTVAL (src);
17881       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17882       if ((val >> 16) & 0x0000ffff)
17883         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17884                                              GEN_INT (16)),
17885                        GEN_INT ((val >> 16) & 0x0000ffff));
17886       return;
17887     }
17888    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17889    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17890  }
17891
17892 /* Output a move between double words.  It must be REG<-MEM
17893    or MEM<-REG.  */
17894 const char *
17895 output_move_double (rtx *operands, bool emit, int *count)
17896 {
17897   enum rtx_code code0 = GET_CODE (operands[0]);
17898   enum rtx_code code1 = GET_CODE (operands[1]);
17899   rtx otherops[3];
17900   if (count)
17901     *count = 1;
17902
17903   /* The only case when this might happen is when
17904      you are looking at the length of a DImode instruction
17905      that has an invalid constant in it.  */
17906   if (code0 == REG && code1 != MEM)
17907     {
17908       gcc_assert (!emit);
17909       *count = 2;
17910       return "";
17911     }
17912
17913   if (code0 == REG)
17914     {
17915       unsigned int reg0 = REGNO (operands[0]);
17916
17917       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17918
17919       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17920
17921       switch (GET_CODE (XEXP (operands[1], 0)))
17922         {
17923         case REG:
17924
17925           if (emit)
17926             {
17927               if (TARGET_LDRD
17928                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17929                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17930               else
17931                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17932             }
17933           break;
17934
17935         case PRE_INC:
17936           gcc_assert (TARGET_LDRD);
17937           if (emit)
17938             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17939           break;
17940
17941         case PRE_DEC:
17942           if (emit)
17943             {
17944               if (TARGET_LDRD)
17945                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17946               else
17947                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17948             }
17949           break;
17950
17951         case POST_INC:
17952           if (emit)
17953             {
17954               if (TARGET_LDRD)
17955                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17956               else
17957                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17958             }
17959           break;
17960
17961         case POST_DEC:
17962           gcc_assert (TARGET_LDRD);
17963           if (emit)
17964             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17965           break;
17966
17967         case PRE_MODIFY:
17968         case POST_MODIFY:
17969           /* Autoicrement addressing modes should never have overlapping
17970              base and destination registers, and overlapping index registers
17971              are already prohibited, so this doesn't need to worry about
17972              fix_cm3_ldrd.  */
17973           otherops[0] = operands[0];
17974           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17975           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17976
17977           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17978             {
17979               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17980                 {
17981                   /* Registers overlap so split out the increment.  */
17982                   if (emit)
17983                     {
17984                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
17985                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17986                     }
17987                   if (count)
17988                     *count = 2;
17989                 }
17990               else
17991                 {
17992                   /* Use a single insn if we can.
17993                      FIXME: IWMMXT allows offsets larger than ldrd can
17994                      handle, fix these up with a pair of ldr.  */
17995                   if (TARGET_THUMB2
17996                       || !CONST_INT_P (otherops[2])
17997                       || (INTVAL (otherops[2]) > -256
17998                           && INTVAL (otherops[2]) < 256))
17999                     {
18000                       if (emit)
18001                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18002                     }
18003                   else
18004                     {
18005                       if (emit)
18006                         {
18007                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18008                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18009                         }
18010                       if (count)
18011                         *count = 2;
18012
18013                     }
18014                 }
18015             }
18016           else
18017             {
18018               /* Use a single insn if we can.
18019                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18020                  fix these up with a pair of ldr.  */
18021               if (TARGET_THUMB2
18022                   || !CONST_INT_P (otherops[2])
18023                   || (INTVAL (otherops[2]) > -256
18024                       && INTVAL (otherops[2]) < 256))
18025                 {
18026                   if (emit)
18027                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18028                 }
18029               else
18030                 {
18031                   if (emit)
18032                     {
18033                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18034                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18035                     }
18036                   if (count)
18037                     *count = 2;
18038                 }
18039             }
18040           break;
18041
18042         case LABEL_REF:
18043         case CONST:
18044           /* We might be able to use ldrd %0, %1 here.  However the range is
18045              different to ldr/adr, and it is broken on some ARMv7-M
18046              implementations.  */
18047           /* Use the second register of the pair to avoid problematic
18048              overlap.  */
18049           otherops[1] = operands[1];
18050           if (emit)
18051             output_asm_insn ("adr%?\t%0, %1", otherops);
18052           operands[1] = otherops[0];
18053           if (emit)
18054             {
18055               if (TARGET_LDRD)
18056                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18057               else
18058                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18059             }
18060
18061           if (count)
18062             *count = 2;
18063           break;
18064
18065           /* ??? This needs checking for thumb2.  */
18066         default:
18067           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18068                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18069             {
18070               otherops[0] = operands[0];
18071               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18072               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18073
18074               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18075                 {
18076                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18077                     {
18078                       switch ((int) INTVAL (otherops[2]))
18079                         {
18080                         case -8:
18081                           if (emit)
18082                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18083                           return "";
18084                         case -4:
18085                           if (TARGET_THUMB2)
18086                             break;
18087                           if (emit)
18088                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18089                           return "";
18090                         case 4:
18091                           if (TARGET_THUMB2)
18092                             break;
18093                           if (emit)
18094                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18095                           return "";
18096                         }
18097                     }
18098                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18099                   operands[1] = otherops[0];
18100                   if (TARGET_LDRD
18101                       && (REG_P (otherops[2])
18102                           || TARGET_THUMB2
18103                           || (CONST_INT_P (otherops[2])
18104                               && INTVAL (otherops[2]) > -256
18105                               && INTVAL (otherops[2]) < 256)))
18106                     {
18107                       if (reg_overlap_mentioned_p (operands[0],
18108                                                    otherops[2]))
18109                         {
18110                           /* Swap base and index registers over to
18111                              avoid a conflict.  */
18112                           std::swap (otherops[1], otherops[2]);
18113                         }
18114                       /* If both registers conflict, it will usually
18115                          have been fixed by a splitter.  */
18116                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18117                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18118                         {
18119                           if (emit)
18120                             {
18121                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18122                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18123                             }
18124                           if (count)
18125                             *count = 2;
18126                         }
18127                       else
18128                         {
18129                           otherops[0] = operands[0];
18130                           if (emit)
18131                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18132                         }
18133                       return "";
18134                     }
18135
18136                   if (CONST_INT_P (otherops[2]))
18137                     {
18138                       if (emit)
18139                         {
18140                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18141                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18142                           else
18143                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18144                         }
18145                     }
18146                   else
18147                     {
18148                       if (emit)
18149                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18150                     }
18151                 }
18152               else
18153                 {
18154                   if (emit)
18155                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18156                 }
18157
18158               if (count)
18159                 *count = 2;
18160
18161               if (TARGET_LDRD)
18162                 return "ldr%(d%)\t%0, [%1]";
18163
18164               return "ldm%(ia%)\t%1, %M0";
18165             }
18166           else
18167             {
18168               otherops[1] = adjust_address (operands[1], SImode, 4);
18169               /* Take care of overlapping base/data reg.  */
18170               if (reg_mentioned_p (operands[0], operands[1]))
18171                 {
18172                   if (emit)
18173                     {
18174                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18175                       output_asm_insn ("ldr%?\t%0, %1", operands);
18176                     }
18177                   if (count)
18178                     *count = 2;
18179
18180                 }
18181               else
18182                 {
18183                   if (emit)
18184                     {
18185                       output_asm_insn ("ldr%?\t%0, %1", operands);
18186                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18187                     }
18188                   if (count)
18189                     *count = 2;
18190                 }
18191             }
18192         }
18193     }
18194   else
18195     {
18196       /* Constraints should ensure this.  */
18197       gcc_assert (code0 == MEM && code1 == REG);
18198       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18199                   || (TARGET_ARM && TARGET_LDRD));
18200
18201       switch (GET_CODE (XEXP (operands[0], 0)))
18202         {
18203         case REG:
18204           if (emit)
18205             {
18206               if (TARGET_LDRD)
18207                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18208               else
18209                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18210             }
18211           break;
18212
18213         case PRE_INC:
18214           gcc_assert (TARGET_LDRD);
18215           if (emit)
18216             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18217           break;
18218
18219         case PRE_DEC:
18220           if (emit)
18221             {
18222               if (TARGET_LDRD)
18223                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18224               else
18225                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18226             }
18227           break;
18228
18229         case POST_INC:
18230           if (emit)
18231             {
18232               if (TARGET_LDRD)
18233                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18234               else
18235                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18236             }
18237           break;
18238
18239         case POST_DEC:
18240           gcc_assert (TARGET_LDRD);
18241           if (emit)
18242             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18243           break;
18244
18245         case PRE_MODIFY:
18246         case POST_MODIFY:
18247           otherops[0] = operands[1];
18248           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18249           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18250
18251           /* IWMMXT allows offsets larger than ldrd can handle,
18252              fix these up with a pair of ldr.  */
18253           if (!TARGET_THUMB2
18254               && CONST_INT_P (otherops[2])
18255               && (INTVAL(otherops[2]) <= -256
18256                   || INTVAL(otherops[2]) >= 256))
18257             {
18258               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18259                 {
18260                   if (emit)
18261                     {
18262                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18263                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18264                     }
18265                   if (count)
18266                     *count = 2;
18267                 }
18268               else
18269                 {
18270                   if (emit)
18271                     {
18272                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18273                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18274                     }
18275                   if (count)
18276                     *count = 2;
18277                 }
18278             }
18279           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18280             {
18281               if (emit)
18282                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18283             }
18284           else
18285             {
18286               if (emit)
18287                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18288             }
18289           break;
18290
18291         case PLUS:
18292           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18293           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18294             {
18295               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18296                 {
18297                 case -8:
18298                   if (emit)
18299                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18300                   return "";
18301
18302                 case -4:
18303                   if (TARGET_THUMB2)
18304                     break;
18305                   if (emit)
18306                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18307                   return "";
18308
18309                 case 4:
18310                   if (TARGET_THUMB2)
18311                     break;
18312                   if (emit)
18313                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18314                   return "";
18315                 }
18316             }
18317           if (TARGET_LDRD
18318               && (REG_P (otherops[2])
18319                   || TARGET_THUMB2
18320                   || (CONST_INT_P (otherops[2])
18321                       && INTVAL (otherops[2]) > -256
18322                       && INTVAL (otherops[2]) < 256)))
18323             {
18324               otherops[0] = operands[1];
18325               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18326               if (emit)
18327                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18328               return "";
18329             }
18330           /* Fall through */
18331
18332         default:
18333           otherops[0] = adjust_address (operands[0], SImode, 4);
18334           otherops[1] = operands[1];
18335           if (emit)
18336             {
18337               output_asm_insn ("str%?\t%1, %0", operands);
18338               output_asm_insn ("str%?\t%H1, %0", otherops);
18339             }
18340           if (count)
18341             *count = 2;
18342         }
18343     }
18344
18345   return "";
18346 }
18347
18348 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18349    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18350
18351 const char *
18352 output_move_quad (rtx *operands)
18353 {
18354   if (REG_P (operands[0]))
18355     {
18356       /* Load, or reg->reg move.  */
18357
18358       if (MEM_P (operands[1]))
18359         {
18360           switch (GET_CODE (XEXP (operands[1], 0)))
18361             {
18362             case REG:
18363               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18364               break;
18365
18366             case LABEL_REF:
18367             case CONST:
18368               output_asm_insn ("adr%?\t%0, %1", operands);
18369               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18370               break;
18371
18372             default:
18373               gcc_unreachable ();
18374             }
18375         }
18376       else
18377         {
18378           rtx ops[2];
18379           int dest, src, i;
18380
18381           gcc_assert (REG_P (operands[1]));
18382
18383           dest = REGNO (operands[0]);
18384           src = REGNO (operands[1]);
18385
18386           /* This seems pretty dumb, but hopefully GCC won't try to do it
18387              very often.  */
18388           if (dest < src)
18389             for (i = 0; i < 4; i++)
18390               {
18391                 ops[0] = gen_rtx_REG (SImode, dest + i);
18392                 ops[1] = gen_rtx_REG (SImode, src + i);
18393                 output_asm_insn ("mov%?\t%0, %1", ops);
18394               }
18395           else
18396             for (i = 3; i >= 0; i--)
18397               {
18398                 ops[0] = gen_rtx_REG (SImode, dest + i);
18399                 ops[1] = gen_rtx_REG (SImode, src + i);
18400                 output_asm_insn ("mov%?\t%0, %1", ops);
18401               }
18402         }
18403     }
18404   else
18405     {
18406       gcc_assert (MEM_P (operands[0]));
18407       gcc_assert (REG_P (operands[1]));
18408       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18409
18410       switch (GET_CODE (XEXP (operands[0], 0)))
18411         {
18412         case REG:
18413           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18414           break;
18415
18416         default:
18417           gcc_unreachable ();
18418         }
18419     }
18420
18421   return "";
18422 }
18423
18424 /* Output a VFP load or store instruction.  */
18425
18426 const char *
18427 output_move_vfp (rtx *operands)
18428 {
18429   rtx reg, mem, addr, ops[2];
18430   int load = REG_P (operands[0]);
18431   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18432   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18433   const char *templ;
18434   char buff[50];
18435   machine_mode mode;
18436
18437   reg = operands[!load];
18438   mem = operands[load];
18439
18440   mode = GET_MODE (reg);
18441
18442   gcc_assert (REG_P (reg));
18443   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18444   gcc_assert (mode == SFmode
18445               || mode == DFmode
18446               || mode == SImode
18447               || mode == DImode
18448               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18449   gcc_assert (MEM_P (mem));
18450
18451   addr = XEXP (mem, 0);
18452
18453   switch (GET_CODE (addr))
18454     {
18455     case PRE_DEC:
18456       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18457       ops[0] = XEXP (addr, 0);
18458       ops[1] = reg;
18459       break;
18460
18461     case POST_INC:
18462       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18463       ops[0] = XEXP (addr, 0);
18464       ops[1] = reg;
18465       break;
18466
18467     default:
18468       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18469       ops[0] = reg;
18470       ops[1] = mem;
18471       break;
18472     }
18473
18474   sprintf (buff, templ,
18475            load ? "ld" : "st",
18476            dp ? "64" : "32",
18477            dp ? "P" : "",
18478            integer_p ? "\t%@ int" : "");
18479   output_asm_insn (buff, ops);
18480
18481   return "";
18482 }
18483
18484 /* Output a Neon double-word or quad-word load or store, or a load
18485    or store for larger structure modes.
18486
18487    WARNING: The ordering of elements is weird in big-endian mode,
18488    because the EABI requires that vectors stored in memory appear
18489    as though they were stored by a VSTM, as required by the EABI.
18490    GCC RTL defines element ordering based on in-memory order.
18491    This can be different from the architectural ordering of elements
18492    within a NEON register. The intrinsics defined in arm_neon.h use the
18493    NEON register element ordering, not the GCC RTL element ordering.
18494
18495    For example, the in-memory ordering of a big-endian a quadword
18496    vector with 16-bit elements when stored from register pair {d0,d1}
18497    will be (lowest address first, d0[N] is NEON register element N):
18498
18499      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18500
18501    When necessary, quadword registers (dN, dN+1) are moved to ARM
18502    registers from rN in the order:
18503
18504      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18505
18506    So that STM/LDM can be used on vectors in ARM registers, and the
18507    same memory layout will result as if VSTM/VLDM were used.
18508
18509    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18510    possible, which allows use of appropriate alignment tags.
18511    Note that the choice of "64" is independent of the actual vector
18512    element size; this size simply ensures that the behavior is
18513    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18514
18515    Due to limitations of those instructions, use of VST1.64/VLD1.64
18516    is not possible if:
18517     - the address contains PRE_DEC, or
18518     - the mode refers to more than 4 double-word registers
18519
18520    In those cases, it would be possible to replace VSTM/VLDM by a
18521    sequence of instructions; this is not currently implemented since
18522    this is not certain to actually improve performance.  */
18523
18524 const char *
18525 output_move_neon (rtx *operands)
18526 {
18527   rtx reg, mem, addr, ops[2];
18528   int regno, nregs, load = REG_P (operands[0]);
18529   const char *templ;
18530   char buff[50];
18531   machine_mode mode;
18532
18533   reg = operands[!load];
18534   mem = operands[load];
18535
18536   mode = GET_MODE (reg);
18537
18538   gcc_assert (REG_P (reg));
18539   regno = REGNO (reg);
18540   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18541   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18542               || NEON_REGNO_OK_FOR_QUAD (regno));
18543   gcc_assert (VALID_NEON_DREG_MODE (mode)
18544               || VALID_NEON_QREG_MODE (mode)
18545               || VALID_NEON_STRUCT_MODE (mode));
18546   gcc_assert (MEM_P (mem));
18547
18548   addr = XEXP (mem, 0);
18549
18550   /* Strip off const from addresses like (const (plus (...))).  */
18551   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18552     addr = XEXP (addr, 0);
18553
18554   switch (GET_CODE (addr))
18555     {
18556     case POST_INC:
18557       /* We have to use vldm / vstm for too-large modes.  */
18558       if (nregs > 4)
18559         {
18560           templ = "v%smia%%?\t%%0!, %%h1";
18561           ops[0] = XEXP (addr, 0);
18562         }
18563       else
18564         {
18565           templ = "v%s1.64\t%%h1, %%A0";
18566           ops[0] = mem;
18567         }
18568       ops[1] = reg;
18569       break;
18570
18571     case PRE_DEC:
18572       /* We have to use vldm / vstm in this case, since there is no
18573          pre-decrement form of the vld1 / vst1 instructions.  */
18574       templ = "v%smdb%%?\t%%0!, %%h1";
18575       ops[0] = XEXP (addr, 0);
18576       ops[1] = reg;
18577       break;
18578
18579     case POST_MODIFY:
18580       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18581       gcc_unreachable ();
18582
18583     case REG:
18584       /* We have to use vldm / vstm for too-large modes.  */
18585       if (nregs > 1)
18586         {
18587           if (nregs > 4)
18588             templ = "v%smia%%?\t%%m0, %%h1";
18589           else
18590             templ = "v%s1.64\t%%h1, %%A0";
18591
18592           ops[0] = mem;
18593           ops[1] = reg;
18594           break;
18595         }
18596       /* Fall through.  */
18597     case LABEL_REF:
18598     case PLUS:
18599       {
18600         int i;
18601         int overlap = -1;
18602         for (i = 0; i < nregs; i++)
18603           {
18604             /* We're only using DImode here because it's a convenient size.  */
18605             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18606             ops[1] = adjust_address (mem, DImode, 8 * i);
18607             if (reg_overlap_mentioned_p (ops[0], mem))
18608               {
18609                 gcc_assert (overlap == -1);
18610                 overlap = i;
18611               }
18612             else
18613               {
18614                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18615                 output_asm_insn (buff, ops);
18616               }
18617           }
18618         if (overlap != -1)
18619           {
18620             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18621             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18622             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18623             output_asm_insn (buff, ops);
18624           }
18625
18626         return "";
18627       }
18628
18629     default:
18630       gcc_unreachable ();
18631     }
18632
18633   sprintf (buff, templ, load ? "ld" : "st");
18634   output_asm_insn (buff, ops);
18635
18636   return "";
18637 }
18638
18639 /* Compute and return the length of neon_mov<mode>, where <mode> is
18640    one of VSTRUCT modes: EI, OI, CI or XI.  */
18641 int
18642 arm_attr_length_move_neon (rtx_insn *insn)
18643 {
18644   rtx reg, mem, addr;
18645   int load;
18646   machine_mode mode;
18647
18648   extract_insn_cached (insn);
18649
18650   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18651     {
18652       mode = GET_MODE (recog_data.operand[0]);
18653       switch (mode)
18654         {
18655         case EImode:
18656         case OImode:
18657           return 8;
18658         case CImode:
18659           return 12;
18660         case XImode:
18661           return 16;
18662         default:
18663           gcc_unreachable ();
18664         }
18665     }
18666
18667   load = REG_P (recog_data.operand[0]);
18668   reg = recog_data.operand[!load];
18669   mem = recog_data.operand[load];
18670
18671   gcc_assert (MEM_P (mem));
18672
18673   mode = GET_MODE (reg);
18674   addr = XEXP (mem, 0);
18675
18676   /* Strip off const from addresses like (const (plus (...))).  */
18677   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18678     addr = XEXP (addr, 0);
18679
18680   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18681     {
18682       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18683       return insns * 4;
18684     }
18685   else
18686     return 4;
18687 }
18688
18689 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18690    return zero.  */
18691
18692 int
18693 arm_address_offset_is_imm (rtx_insn *insn)
18694 {
18695   rtx mem, addr;
18696
18697   extract_insn_cached (insn);
18698
18699   if (REG_P (recog_data.operand[0]))
18700     return 0;
18701
18702   mem = recog_data.operand[0];
18703
18704   gcc_assert (MEM_P (mem));
18705
18706   addr = XEXP (mem, 0);
18707
18708   if (REG_P (addr)
18709       || (GET_CODE (addr) == PLUS
18710           && REG_P (XEXP (addr, 0))
18711           && CONST_INT_P (XEXP (addr, 1))))
18712     return 1;
18713   else
18714     return 0;
18715 }
18716
18717 /* Output an ADD r, s, #n where n may be too big for one instruction.
18718    If adding zero to one register, output nothing.  */
18719 const char *
18720 output_add_immediate (rtx *operands)
18721 {
18722   HOST_WIDE_INT n = INTVAL (operands[2]);
18723
18724   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18725     {
18726       if (n < 0)
18727         output_multi_immediate (operands,
18728                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18729                                 -n);
18730       else
18731         output_multi_immediate (operands,
18732                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18733                                 n);
18734     }
18735
18736   return "";
18737 }
18738
18739 /* Output a multiple immediate operation.
18740    OPERANDS is the vector of operands referred to in the output patterns.
18741    INSTR1 is the output pattern to use for the first constant.
18742    INSTR2 is the output pattern to use for subsequent constants.
18743    IMMED_OP is the index of the constant slot in OPERANDS.
18744    N is the constant value.  */
18745 static const char *
18746 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18747                         int immed_op, HOST_WIDE_INT n)
18748 {
18749 #if HOST_BITS_PER_WIDE_INT > 32
18750   n &= 0xffffffff;
18751 #endif
18752
18753   if (n == 0)
18754     {
18755       /* Quick and easy output.  */
18756       operands[immed_op] = const0_rtx;
18757       output_asm_insn (instr1, operands);
18758     }
18759   else
18760     {
18761       int i;
18762       const char * instr = instr1;
18763
18764       /* Note that n is never zero here (which would give no output).  */
18765       for (i = 0; i < 32; i += 2)
18766         {
18767           if (n & (3 << i))
18768             {
18769               operands[immed_op] = GEN_INT (n & (255 << i));
18770               output_asm_insn (instr, operands);
18771               instr = instr2;
18772               i += 6;
18773             }
18774         }
18775     }
18776
18777   return "";
18778 }
18779
18780 /* Return the name of a shifter operation.  */
18781 static const char *
18782 arm_shift_nmem(enum rtx_code code)
18783 {
18784   switch (code)
18785     {
18786     case ASHIFT:
18787       return ARM_LSL_NAME;
18788
18789     case ASHIFTRT:
18790       return "asr";
18791
18792     case LSHIFTRT:
18793       return "lsr";
18794
18795     case ROTATERT:
18796       return "ror";
18797
18798     default:
18799       abort();
18800     }
18801 }
18802
18803 /* Return the appropriate ARM instruction for the operation code.
18804    The returned result should not be overwritten.  OP is the rtx of the
18805    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18806    was shifted.  */
18807 const char *
18808 arithmetic_instr (rtx op, int shift_first_arg)
18809 {
18810   switch (GET_CODE (op))
18811     {
18812     case PLUS:
18813       return "add";
18814
18815     case MINUS:
18816       return shift_first_arg ? "rsb" : "sub";
18817
18818     case IOR:
18819       return "orr";
18820
18821     case XOR:
18822       return "eor";
18823
18824     case AND:
18825       return "and";
18826
18827     case ASHIFT:
18828     case ASHIFTRT:
18829     case LSHIFTRT:
18830     case ROTATERT:
18831       return arm_shift_nmem(GET_CODE(op));
18832
18833     default:
18834       gcc_unreachable ();
18835     }
18836 }
18837
18838 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18839    for the operation code.  The returned result should not be overwritten.
18840    OP is the rtx code of the shift.
18841    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18842    shift.  */
18843 static const char *
18844 shift_op (rtx op, HOST_WIDE_INT *amountp)
18845 {
18846   const char * mnem;
18847   enum rtx_code code = GET_CODE (op);
18848
18849   switch (code)
18850     {
18851     case ROTATE:
18852       if (!CONST_INT_P (XEXP (op, 1)))
18853         {
18854           output_operand_lossage ("invalid shift operand");
18855           return NULL;
18856         }
18857
18858       code = ROTATERT;
18859       *amountp = 32 - INTVAL (XEXP (op, 1));
18860       mnem = "ror";
18861       break;
18862
18863     case ASHIFT:
18864     case ASHIFTRT:
18865     case LSHIFTRT:
18866     case ROTATERT:
18867       mnem = arm_shift_nmem(code);
18868       if (CONST_INT_P (XEXP (op, 1)))
18869         {
18870           *amountp = INTVAL (XEXP (op, 1));
18871         }
18872       else if (REG_P (XEXP (op, 1)))
18873         {
18874           *amountp = -1;
18875           return mnem;
18876         }
18877       else
18878         {
18879           output_operand_lossage ("invalid shift operand");
18880           return NULL;
18881         }
18882       break;
18883
18884     case MULT:
18885       /* We never have to worry about the amount being other than a
18886          power of 2, since this case can never be reloaded from a reg.  */
18887       if (!CONST_INT_P (XEXP (op, 1)))
18888         {
18889           output_operand_lossage ("invalid shift operand");
18890           return NULL;
18891         }
18892
18893       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18894
18895       /* Amount must be a power of two.  */
18896       if (*amountp & (*amountp - 1))
18897         {
18898           output_operand_lossage ("invalid shift operand");
18899           return NULL;
18900         }
18901
18902       *amountp = int_log2 (*amountp);
18903       return ARM_LSL_NAME;
18904
18905     default:
18906       output_operand_lossage ("invalid shift operand");
18907       return NULL;
18908     }
18909
18910   /* This is not 100% correct, but follows from the desire to merge
18911      multiplication by a power of 2 with the recognizer for a
18912      shift.  >=32 is not a valid shift for "lsl", so we must try and
18913      output a shift that produces the correct arithmetical result.
18914      Using lsr #32 is identical except for the fact that the carry bit
18915      is not set correctly if we set the flags; but we never use the
18916      carry bit from such an operation, so we can ignore that.  */
18917   if (code == ROTATERT)
18918     /* Rotate is just modulo 32.  */
18919     *amountp &= 31;
18920   else if (*amountp != (*amountp & 31))
18921     {
18922       if (code == ASHIFT)
18923         mnem = "lsr";
18924       *amountp = 32;
18925     }
18926
18927   /* Shifts of 0 are no-ops.  */
18928   if (*amountp == 0)
18929     return NULL;
18930
18931   return mnem;
18932 }
18933
18934 /* Obtain the shift from the POWER of two.  */
18935
18936 static HOST_WIDE_INT
18937 int_log2 (HOST_WIDE_INT power)
18938 {
18939   HOST_WIDE_INT shift = 0;
18940
18941   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18942     {
18943       gcc_assert (shift <= 31);
18944       shift++;
18945     }
18946
18947   return shift;
18948 }
18949
18950 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18951    because /bin/as is horribly restrictive.  The judgement about
18952    whether or not each character is 'printable' (and can be output as
18953    is) or not (and must be printed with an octal escape) must be made
18954    with reference to the *host* character set -- the situation is
18955    similar to that discussed in the comments above pp_c_char in
18956    c-pretty-print.c.  */
18957
18958 #define MAX_ASCII_LEN 51
18959
18960 void
18961 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18962 {
18963   int i;
18964   int len_so_far = 0;
18965
18966   fputs ("\t.ascii\t\"", stream);
18967
18968   for (i = 0; i < len; i++)
18969     {
18970       int c = p[i];
18971
18972       if (len_so_far >= MAX_ASCII_LEN)
18973         {
18974           fputs ("\"\n\t.ascii\t\"", stream);
18975           len_so_far = 0;
18976         }
18977
18978       if (ISPRINT (c))
18979         {
18980           if (c == '\\' || c == '\"')
18981             {
18982               putc ('\\', stream);
18983               len_so_far++;
18984             }
18985           putc (c, stream);
18986           len_so_far++;
18987         }
18988       else
18989         {
18990           fprintf (stream, "\\%03o", c);
18991           len_so_far += 4;
18992         }
18993     }
18994
18995   fputs ("\"\n", stream);
18996 }
18997 \f
18998 /* Compute the register save mask for registers 0 through 12
18999    inclusive.  This code is used by arm_compute_save_reg_mask.  */
19000
19001 static unsigned long
19002 arm_compute_save_reg0_reg12_mask (void)
19003 {
19004   unsigned long func_type = arm_current_func_type ();
19005   unsigned long save_reg_mask = 0;
19006   unsigned int reg;
19007
19008   if (IS_INTERRUPT (func_type))
19009     {
19010       unsigned int max_reg;
19011       /* Interrupt functions must not corrupt any registers,
19012          even call clobbered ones.  If this is a leaf function
19013          we can just examine the registers used by the RTL, but
19014          otherwise we have to assume that whatever function is
19015          called might clobber anything, and so we have to save
19016          all the call-clobbered registers as well.  */
19017       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19018         /* FIQ handlers have registers r8 - r12 banked, so
19019            we only need to check r0 - r7, Normal ISRs only
19020            bank r14 and r15, so we must check up to r12.
19021            r13 is the stack pointer which is always preserved,
19022            so we do not need to consider it here.  */
19023         max_reg = 7;
19024       else
19025         max_reg = 12;
19026
19027       for (reg = 0; reg <= max_reg; reg++)
19028         if (df_regs_ever_live_p (reg)
19029             || (! crtl->is_leaf && call_used_regs[reg]))
19030           save_reg_mask |= (1 << reg);
19031
19032       /* Also save the pic base register if necessary.  */
19033       if (flag_pic
19034           && !TARGET_SINGLE_PIC_BASE
19035           && arm_pic_register != INVALID_REGNUM
19036           && crtl->uses_pic_offset_table)
19037         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19038     }
19039   else if (IS_VOLATILE(func_type))
19040     {
19041       /* For noreturn functions we historically omitted register saves
19042          altogether.  However this really messes up debugging.  As a
19043          compromise save just the frame pointers.  Combined with the link
19044          register saved elsewhere this should be sufficient to get
19045          a backtrace.  */
19046       if (frame_pointer_needed)
19047         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19048       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19049         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19050       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19051         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19052     }
19053   else
19054     {
19055       /* In the normal case we only need to save those registers
19056          which are call saved and which are used by this function.  */
19057       for (reg = 0; reg <= 11; reg++)
19058         if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19059           save_reg_mask |= (1 << reg);
19060
19061       /* Handle the frame pointer as a special case.  */
19062       if (frame_pointer_needed)
19063         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19064
19065       /* If we aren't loading the PIC register,
19066          don't stack it even though it may be live.  */
19067       if (flag_pic
19068           && !TARGET_SINGLE_PIC_BASE
19069           && arm_pic_register != INVALID_REGNUM
19070           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19071               || crtl->uses_pic_offset_table))
19072         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19073
19074       /* The prologue will copy SP into R0, so save it.  */
19075       if (IS_STACKALIGN (func_type))
19076         save_reg_mask |= 1;
19077     }
19078
19079   /* Save registers so the exception handler can modify them.  */
19080   if (crtl->calls_eh_return)
19081     {
19082       unsigned int i;
19083
19084       for (i = 0; ; i++)
19085         {
19086           reg = EH_RETURN_DATA_REGNO (i);
19087           if (reg == INVALID_REGNUM)
19088             break;
19089           save_reg_mask |= 1 << reg;
19090         }
19091     }
19092
19093   return save_reg_mask;
19094 }
19095
19096 /* Return true if r3 is live at the start of the function.  */
19097
19098 static bool
19099 arm_r3_live_at_start_p (void)
19100 {
19101   /* Just look at cfg info, which is still close enough to correct at this
19102      point.  This gives false positives for broken functions that might use
19103      uninitialized data that happens to be allocated in r3, but who cares?  */
19104   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19105 }
19106
19107 /* Compute the number of bytes used to store the static chain register on the
19108    stack, above the stack frame.  We need to know this accurately to get the
19109    alignment of the rest of the stack frame correct.  */
19110
19111 static int
19112 arm_compute_static_chain_stack_bytes (void)
19113 {
19114   /* See the defining assertion in arm_expand_prologue.  */
19115   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19116       && IS_NESTED (arm_current_func_type ())
19117       && arm_r3_live_at_start_p ()
19118       && crtl->args.pretend_args_size == 0)
19119     return 4;
19120
19121   return 0;
19122 }
19123
19124 /* Compute a bit mask of which registers need to be
19125    saved on the stack for the current function.
19126    This is used by arm_get_frame_offsets, which may add extra registers.  */
19127
19128 static unsigned long
19129 arm_compute_save_reg_mask (void)
19130 {
19131   unsigned int save_reg_mask = 0;
19132   unsigned long func_type = arm_current_func_type ();
19133   unsigned int reg;
19134
19135   if (IS_NAKED (func_type))
19136     /* This should never really happen.  */
19137     return 0;
19138
19139   /* If we are creating a stack frame, then we must save the frame pointer,
19140      IP (which will hold the old stack pointer), LR and the PC.  */
19141   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19142     save_reg_mask |=
19143       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19144       | (1 << IP_REGNUM)
19145       | (1 << LR_REGNUM)
19146       | (1 << PC_REGNUM);
19147
19148   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19149
19150   /* Decide if we need to save the link register.
19151      Interrupt routines have their own banked link register,
19152      so they never need to save it.
19153      Otherwise if we do not use the link register we do not need to save
19154      it.  If we are pushing other registers onto the stack however, we
19155      can save an instruction in the epilogue by pushing the link register
19156      now and then popping it back into the PC.  This incurs extra memory
19157      accesses though, so we only do it when optimizing for size, and only
19158      if we know that we will not need a fancy return sequence.  */
19159   if (df_regs_ever_live_p (LR_REGNUM)
19160       || (save_reg_mask
19161           && optimize_size
19162           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19163           && !crtl->calls_eh_return))
19164     save_reg_mask |= 1 << LR_REGNUM;
19165
19166   if (cfun->machine->lr_save_eliminated)
19167     save_reg_mask &= ~ (1 << LR_REGNUM);
19168
19169   if (TARGET_REALLY_IWMMXT
19170       && ((bit_count (save_reg_mask)
19171            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19172                            arm_compute_static_chain_stack_bytes())
19173            ) % 2) != 0)
19174     {
19175       /* The total number of registers that are going to be pushed
19176          onto the stack is odd.  We need to ensure that the stack
19177          is 64-bit aligned before we start to save iWMMXt registers,
19178          and also before we start to create locals.  (A local variable
19179          might be a double or long long which we will load/store using
19180          an iWMMXt instruction).  Therefore we need to push another
19181          ARM register, so that the stack will be 64-bit aligned.  We
19182          try to avoid using the arg registers (r0 -r3) as they might be
19183          used to pass values in a tail call.  */
19184       for (reg = 4; reg <= 12; reg++)
19185         if ((save_reg_mask & (1 << reg)) == 0)
19186           break;
19187
19188       if (reg <= 12)
19189         save_reg_mask |= (1 << reg);
19190       else
19191         {
19192           cfun->machine->sibcall_blocked = 1;
19193           save_reg_mask |= (1 << 3);
19194         }
19195     }
19196
19197   /* We may need to push an additional register for use initializing the
19198      PIC base register.  */
19199   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19200       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19201     {
19202       reg = thumb_find_work_register (1 << 4);
19203       if (!call_used_regs[reg])
19204         save_reg_mask |= (1 << reg);
19205     }
19206
19207   return save_reg_mask;
19208 }
19209
19210
19211 /* Compute a bit mask of which registers need to be
19212    saved on the stack for the current function.  */
19213 static unsigned long
19214 thumb1_compute_save_reg_mask (void)
19215 {
19216   unsigned long mask;
19217   unsigned reg;
19218
19219   mask = 0;
19220   for (reg = 0; reg < 12; reg ++)
19221     if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19222       mask |= 1 << reg;
19223
19224   if (flag_pic
19225       && !TARGET_SINGLE_PIC_BASE
19226       && arm_pic_register != INVALID_REGNUM
19227       && crtl->uses_pic_offset_table)
19228     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19229
19230   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19231   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19232     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19233
19234   /* LR will also be pushed if any lo regs are pushed.  */
19235   if (mask & 0xff || thumb_force_lr_save ())
19236     mask |= (1 << LR_REGNUM);
19237
19238   /* Make sure we have a low work register if we need one.
19239      We will need one if we are going to push a high register,
19240      but we are not currently intending to push a low register.  */
19241   if ((mask & 0xff) == 0
19242       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19243     {
19244       /* Use thumb_find_work_register to choose which register
19245          we will use.  If the register is live then we will
19246          have to push it.  Use LAST_LO_REGNUM as our fallback
19247          choice for the register to select.  */
19248       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19249       /* Make sure the register returned by thumb_find_work_register is
19250          not part of the return value.  */
19251       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19252         reg = LAST_LO_REGNUM;
19253
19254       if (! call_used_regs[reg])
19255         mask |= 1 << reg;
19256     }
19257
19258   /* The 504 below is 8 bytes less than 512 because there are two possible
19259      alignment words.  We can't tell here if they will be present or not so we
19260      have to play it safe and assume that they are. */
19261   if ((CALLER_INTERWORKING_SLOT_SIZE +
19262        ROUND_UP_WORD (get_frame_size ()) +
19263        crtl->outgoing_args_size) >= 504)
19264     {
19265       /* This is the same as the code in thumb1_expand_prologue() which
19266          determines which register to use for stack decrement. */
19267       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19268         if (mask & (1 << reg))
19269           break;
19270
19271       if (reg > LAST_LO_REGNUM)
19272         {
19273           /* Make sure we have a register available for stack decrement. */
19274           mask |= 1 << LAST_LO_REGNUM;
19275         }
19276     }
19277
19278   return mask;
19279 }
19280
19281
19282 /* Return the number of bytes required to save VFP registers.  */
19283 static int
19284 arm_get_vfp_saved_size (void)
19285 {
19286   unsigned int regno;
19287   int count;
19288   int saved;
19289
19290   saved = 0;
19291   /* Space for saved VFP registers.  */
19292   if (TARGET_HARD_FLOAT && TARGET_VFP)
19293     {
19294       count = 0;
19295       for (regno = FIRST_VFP_REGNUM;
19296            regno < LAST_VFP_REGNUM;
19297            regno += 2)
19298         {
19299           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19300               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19301             {
19302               if (count > 0)
19303                 {
19304                   /* Workaround ARM10 VFPr1 bug.  */
19305                   if (count == 2 && !arm_arch6)
19306                     count++;
19307                   saved += count * 8;
19308                 }
19309               count = 0;
19310             }
19311           else
19312             count++;
19313         }
19314       if (count > 0)
19315         {
19316           if (count == 2 && !arm_arch6)
19317             count++;
19318           saved += count * 8;
19319         }
19320     }
19321   return saved;
19322 }
19323
19324
19325 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19326    everything bar the final return instruction.  If simple_return is true,
19327    then do not output epilogue, because it has already been emitted in RTL.  */
19328 const char *
19329 output_return_instruction (rtx operand, bool really_return, bool reverse,
19330                            bool simple_return)
19331 {
19332   char conditional[10];
19333   char instr[100];
19334   unsigned reg;
19335   unsigned long live_regs_mask;
19336   unsigned long func_type;
19337   arm_stack_offsets *offsets;
19338
19339   func_type = arm_current_func_type ();
19340
19341   if (IS_NAKED (func_type))
19342     return "";
19343
19344   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19345     {
19346       /* If this function was declared non-returning, and we have
19347          found a tail call, then we have to trust that the called
19348          function won't return.  */
19349       if (really_return)
19350         {
19351           rtx ops[2];
19352
19353           /* Otherwise, trap an attempted return by aborting.  */
19354           ops[0] = operand;
19355           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19356                                        : "abort");
19357           assemble_external_libcall (ops[1]);
19358           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19359         }
19360
19361       return "";
19362     }
19363
19364   gcc_assert (!cfun->calls_alloca || really_return);
19365
19366   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19367
19368   cfun->machine->return_used_this_function = 1;
19369
19370   offsets = arm_get_frame_offsets ();
19371   live_regs_mask = offsets->saved_regs_mask;
19372
19373   if (!simple_return && live_regs_mask)
19374     {
19375       const char * return_reg;
19376
19377       /* If we do not have any special requirements for function exit
19378          (e.g. interworking) then we can load the return address
19379          directly into the PC.  Otherwise we must load it into LR.  */
19380       if (really_return
19381           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19382         return_reg = reg_names[PC_REGNUM];
19383       else
19384         return_reg = reg_names[LR_REGNUM];
19385
19386       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19387         {
19388           /* There are three possible reasons for the IP register
19389              being saved.  1) a stack frame was created, in which case
19390              IP contains the old stack pointer, or 2) an ISR routine
19391              corrupted it, or 3) it was saved to align the stack on
19392              iWMMXt.  In case 1, restore IP into SP, otherwise just
19393              restore IP.  */
19394           if (frame_pointer_needed)
19395             {
19396               live_regs_mask &= ~ (1 << IP_REGNUM);
19397               live_regs_mask |=   (1 << SP_REGNUM);
19398             }
19399           else
19400             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19401         }
19402
19403       /* On some ARM architectures it is faster to use LDR rather than
19404          LDM to load a single register.  On other architectures, the
19405          cost is the same.  In 26 bit mode, or for exception handlers,
19406          we have to use LDM to load the PC so that the CPSR is also
19407          restored.  */
19408       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19409         if (live_regs_mask == (1U << reg))
19410           break;
19411
19412       if (reg <= LAST_ARM_REGNUM
19413           && (reg != LR_REGNUM
19414               || ! really_return
19415               || ! IS_INTERRUPT (func_type)))
19416         {
19417           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19418                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19419         }
19420       else
19421         {
19422           char *p;
19423           int first = 1;
19424
19425           /* Generate the load multiple instruction to restore the
19426              registers.  Note we can get here, even if
19427              frame_pointer_needed is true, but only if sp already
19428              points to the base of the saved core registers.  */
19429           if (live_regs_mask & (1 << SP_REGNUM))
19430             {
19431               unsigned HOST_WIDE_INT stack_adjust;
19432
19433               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19434               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19435
19436               if (stack_adjust && arm_arch5 && TARGET_ARM)
19437                 if (TARGET_UNIFIED_ASM)
19438                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19439                 else
19440                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19441               else
19442                 {
19443                   /* If we can't use ldmib (SA110 bug),
19444                      then try to pop r3 instead.  */
19445                   if (stack_adjust)
19446                     live_regs_mask |= 1 << 3;
19447
19448                   if (TARGET_UNIFIED_ASM)
19449                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19450                   else
19451                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19452                 }
19453             }
19454           else
19455             if (TARGET_UNIFIED_ASM)
19456               sprintf (instr, "pop%s\t{", conditional);
19457             else
19458               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19459
19460           p = instr + strlen (instr);
19461
19462           for (reg = 0; reg <= SP_REGNUM; reg++)
19463             if (live_regs_mask & (1 << reg))
19464               {
19465                 int l = strlen (reg_names[reg]);
19466
19467                 if (first)
19468                   first = 0;
19469                 else
19470                   {
19471                     memcpy (p, ", ", 2);
19472                     p += 2;
19473                   }
19474
19475                 memcpy (p, "%|", 2);
19476                 memcpy (p + 2, reg_names[reg], l);
19477                 p += l + 2;
19478               }
19479
19480           if (live_regs_mask & (1 << LR_REGNUM))
19481             {
19482               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19483               /* If returning from an interrupt, restore the CPSR.  */
19484               if (IS_INTERRUPT (func_type))
19485                 strcat (p, "^");
19486             }
19487           else
19488             strcpy (p, "}");
19489         }
19490
19491       output_asm_insn (instr, & operand);
19492
19493       /* See if we need to generate an extra instruction to
19494          perform the actual function return.  */
19495       if (really_return
19496           && func_type != ARM_FT_INTERWORKED
19497           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19498         {
19499           /* The return has already been handled
19500              by loading the LR into the PC.  */
19501           return "";
19502         }
19503     }
19504
19505   if (really_return)
19506     {
19507       switch ((int) ARM_FUNC_TYPE (func_type))
19508         {
19509         case ARM_FT_ISR:
19510         case ARM_FT_FIQ:
19511           /* ??? This is wrong for unified assembly syntax.  */
19512           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19513           break;
19514
19515         case ARM_FT_INTERWORKED:
19516           sprintf (instr, "bx%s\t%%|lr", conditional);
19517           break;
19518
19519         case ARM_FT_EXCEPTION:
19520           /* ??? This is wrong for unified assembly syntax.  */
19521           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19522           break;
19523
19524         default:
19525           /* Use bx if it's available.  */
19526           if (arm_arch5 || arm_arch4t)
19527             sprintf (instr, "bx%s\t%%|lr", conditional);
19528           else
19529             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19530           break;
19531         }
19532
19533       output_asm_insn (instr, & operand);
19534     }
19535
19536   return "";
19537 }
19538
19539 /* Write the function name into the code section, directly preceding
19540    the function prologue.
19541
19542    Code will be output similar to this:
19543      t0
19544          .ascii "arm_poke_function_name", 0
19545          .align
19546      t1
19547          .word 0xff000000 + (t1 - t0)
19548      arm_poke_function_name
19549          mov     ip, sp
19550          stmfd   sp!, {fp, ip, lr, pc}
19551          sub     fp, ip, #4
19552
19553    When performing a stack backtrace, code can inspect the value
19554    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19555    at location pc - 12 and the top 8 bits are set, then we know
19556    that there is a function name embedded immediately preceding this
19557    location and has length ((pc[-3]) & 0xff000000).
19558
19559    We assume that pc is declared as a pointer to an unsigned long.
19560
19561    It is of no benefit to output the function name if we are assembling
19562    a leaf function.  These function types will not contain a stack
19563    backtrace structure, therefore it is not possible to determine the
19564    function name.  */
19565 void
19566 arm_poke_function_name (FILE *stream, const char *name)
19567 {
19568   unsigned long alignlength;
19569   unsigned long length;
19570   rtx           x;
19571
19572   length      = strlen (name) + 1;
19573   alignlength = ROUND_UP_WORD (length);
19574
19575   ASM_OUTPUT_ASCII (stream, name, length);
19576   ASM_OUTPUT_ALIGN (stream, 2);
19577   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19578   assemble_aligned_integer (UNITS_PER_WORD, x);
19579 }
19580
19581 /* Place some comments into the assembler stream
19582    describing the current function.  */
19583 static void
19584 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19585 {
19586   unsigned long func_type;
19587
19588   /* ??? Do we want to print some of the below anyway?  */
19589   if (TARGET_THUMB1)
19590     return;
19591
19592   /* Sanity check.  */
19593   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19594
19595   func_type = arm_current_func_type ();
19596
19597   switch ((int) ARM_FUNC_TYPE (func_type))
19598     {
19599     default:
19600     case ARM_FT_NORMAL:
19601       break;
19602     case ARM_FT_INTERWORKED:
19603       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19604       break;
19605     case ARM_FT_ISR:
19606       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19607       break;
19608     case ARM_FT_FIQ:
19609       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19610       break;
19611     case ARM_FT_EXCEPTION:
19612       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19613       break;
19614     }
19615
19616   if (IS_NAKED (func_type))
19617     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19618
19619   if (IS_VOLATILE (func_type))
19620     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19621
19622   if (IS_NESTED (func_type))
19623     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19624   if (IS_STACKALIGN (func_type))
19625     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19626
19627   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19628                crtl->args.size,
19629                crtl->args.pretend_args_size, frame_size);
19630
19631   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19632                frame_pointer_needed,
19633                cfun->machine->uses_anonymous_args);
19634
19635   if (cfun->machine->lr_save_eliminated)
19636     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19637
19638   if (crtl->calls_eh_return)
19639     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19640
19641 }
19642
19643 static void
19644 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19645                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19646 {
19647   arm_stack_offsets *offsets;
19648
19649   if (TARGET_THUMB1)
19650     {
19651       int regno;
19652
19653       /* Emit any call-via-reg trampolines that are needed for v4t support
19654          of call_reg and call_value_reg type insns.  */
19655       for (regno = 0; regno < LR_REGNUM; regno++)
19656         {
19657           rtx label = cfun->machine->call_via[regno];
19658
19659           if (label != NULL)
19660             {
19661               switch_to_section (function_section (current_function_decl));
19662               targetm.asm_out.internal_label (asm_out_file, "L",
19663                                               CODE_LABEL_NUMBER (label));
19664               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19665             }
19666         }
19667
19668       /* ??? Probably not safe to set this here, since it assumes that a
19669          function will be emitted as assembly immediately after we generate
19670          RTL for it.  This does not happen for inline functions.  */
19671       cfun->machine->return_used_this_function = 0;
19672     }
19673   else /* TARGET_32BIT */
19674     {
19675       /* We need to take into account any stack-frame rounding.  */
19676       offsets = arm_get_frame_offsets ();
19677
19678       gcc_assert (!use_return_insn (FALSE, NULL)
19679                   || (cfun->machine->return_used_this_function != 0)
19680                   || offsets->saved_regs == offsets->outgoing_args
19681                   || frame_pointer_needed);
19682     }
19683 }
19684
19685 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19686    STR and STRD.  If an even number of registers are being pushed, one
19687    or more STRD patterns are created for each register pair.  If an
19688    odd number of registers are pushed, emit an initial STR followed by
19689    as many STRD instructions as are needed.  This works best when the
19690    stack is initially 64-bit aligned (the normal case), since it
19691    ensures that each STRD is also 64-bit aligned.  */
19692 static void
19693 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19694 {
19695   int num_regs = 0;
19696   int i;
19697   int regno;
19698   rtx par = NULL_RTX;
19699   rtx dwarf = NULL_RTX;
19700   rtx tmp;
19701   bool first = true;
19702
19703   num_regs = bit_count (saved_regs_mask);
19704
19705   /* Must be at least one register to save, and can't save SP or PC.  */
19706   gcc_assert (num_regs > 0 && num_regs <= 14);
19707   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19708   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19709
19710   /* Create sequence for DWARF info.  All the frame-related data for
19711      debugging is held in this wrapper.  */
19712   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19713
19714   /* Describe the stack adjustment.  */
19715   tmp = gen_rtx_SET (VOIDmode,
19716                       stack_pointer_rtx,
19717                       plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19718   RTX_FRAME_RELATED_P (tmp) = 1;
19719   XVECEXP (dwarf, 0, 0) = tmp;
19720
19721   /* Find the first register.  */
19722   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19723     ;
19724
19725   i = 0;
19726
19727   /* If there's an odd number of registers to push.  Start off by
19728      pushing a single register.  This ensures that subsequent strd
19729      operations are dword aligned (assuming that SP was originally
19730      64-bit aligned).  */
19731   if ((num_regs & 1) != 0)
19732     {
19733       rtx reg, mem, insn;
19734
19735       reg = gen_rtx_REG (SImode, regno);
19736       if (num_regs == 1)
19737         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19738                                                      stack_pointer_rtx));
19739       else
19740         mem = gen_frame_mem (Pmode,
19741                              gen_rtx_PRE_MODIFY
19742                              (Pmode, stack_pointer_rtx,
19743                               plus_constant (Pmode, stack_pointer_rtx,
19744                                              -4 * num_regs)));
19745
19746       tmp = gen_rtx_SET (VOIDmode, mem, reg);
19747       RTX_FRAME_RELATED_P (tmp) = 1;
19748       insn = emit_insn (tmp);
19749       RTX_FRAME_RELATED_P (insn) = 1;
19750       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19751       tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19752                          reg);
19753       RTX_FRAME_RELATED_P (tmp) = 1;
19754       i++;
19755       regno++;
19756       XVECEXP (dwarf, 0, i) = tmp;
19757       first = false;
19758     }
19759
19760   while (i < num_regs)
19761     if (saved_regs_mask & (1 << regno))
19762       {
19763         rtx reg1, reg2, mem1, mem2;
19764         rtx tmp0, tmp1, tmp2;
19765         int regno2;
19766
19767         /* Find the register to pair with this one.  */
19768         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19769              regno2++)
19770           ;
19771
19772         reg1 = gen_rtx_REG (SImode, regno);
19773         reg2 = gen_rtx_REG (SImode, regno2);
19774
19775         if (first)
19776           {
19777             rtx insn;
19778
19779             first = false;
19780             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19781                                                         stack_pointer_rtx,
19782                                                         -4 * num_regs));
19783             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19784                                                         stack_pointer_rtx,
19785                                                         -4 * (num_regs - 1)));
19786             tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19787                                 plus_constant (Pmode, stack_pointer_rtx,
19788                                                -4 * (num_regs)));
19789             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19790             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19791             RTX_FRAME_RELATED_P (tmp0) = 1;
19792             RTX_FRAME_RELATED_P (tmp1) = 1;
19793             RTX_FRAME_RELATED_P (tmp2) = 1;
19794             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19795             XVECEXP (par, 0, 0) = tmp0;
19796             XVECEXP (par, 0, 1) = tmp1;
19797             XVECEXP (par, 0, 2) = tmp2;
19798             insn = emit_insn (par);
19799             RTX_FRAME_RELATED_P (insn) = 1;
19800             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19801           }
19802         else
19803           {
19804             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19805                                                         stack_pointer_rtx,
19806                                                         4 * i));
19807             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19808                                                         stack_pointer_rtx,
19809                                                         4 * (i + 1)));
19810             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19811             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19812             RTX_FRAME_RELATED_P (tmp1) = 1;
19813             RTX_FRAME_RELATED_P (tmp2) = 1;
19814             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19815             XVECEXP (par, 0, 0) = tmp1;
19816             XVECEXP (par, 0, 1) = tmp2;
19817             emit_insn (par);
19818           }
19819
19820         /* Create unwind information.  This is an approximation.  */
19821         tmp1 = gen_rtx_SET (VOIDmode,
19822                             gen_frame_mem (Pmode,
19823                                            plus_constant (Pmode,
19824                                                           stack_pointer_rtx,
19825                                                           4 * i)),
19826                             reg1);
19827         tmp2 = gen_rtx_SET (VOIDmode,
19828                             gen_frame_mem (Pmode,
19829                                            plus_constant (Pmode,
19830                                                           stack_pointer_rtx,
19831                                                           4 * (i + 1))),
19832                             reg2);
19833
19834         RTX_FRAME_RELATED_P (tmp1) = 1;
19835         RTX_FRAME_RELATED_P (tmp2) = 1;
19836         XVECEXP (dwarf, 0, i + 1) = tmp1;
19837         XVECEXP (dwarf, 0, i + 2) = tmp2;
19838         i += 2;
19839         regno = regno2 + 1;
19840       }
19841     else
19842       regno++;
19843
19844   return;
19845 }
19846
19847 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19848    whenever possible, otherwise it emits single-word stores.  The first store
19849    also allocates stack space for all saved registers, using writeback with
19850    post-addressing mode.  All other stores use offset addressing.  If no STRD
19851    can be emitted, this function emits a sequence of single-word stores,
19852    and not an STM as before, because single-word stores provide more freedom
19853    scheduling and can be turned into an STM by peephole optimizations.  */
19854 static void
19855 arm_emit_strd_push (unsigned long saved_regs_mask)
19856 {
19857   int num_regs = 0;
19858   int i, j, dwarf_index  = 0;
19859   int offset = 0;
19860   rtx dwarf = NULL_RTX;
19861   rtx insn = NULL_RTX;
19862   rtx tmp, mem;
19863
19864   /* TODO: A more efficient code can be emitted by changing the
19865      layout, e.g., first push all pairs that can use STRD to keep the
19866      stack aligned, and then push all other registers.  */
19867   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19868     if (saved_regs_mask & (1 << i))
19869       num_regs++;
19870
19871   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19872   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19873   gcc_assert (num_regs > 0);
19874
19875   /* Create sequence for DWARF info.  */
19876   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19877
19878   /* For dwarf info, we generate explicit stack update.  */
19879   tmp = gen_rtx_SET (VOIDmode,
19880                      stack_pointer_rtx,
19881                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19882   RTX_FRAME_RELATED_P (tmp) = 1;
19883   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19884
19885   /* Save registers.  */
19886   offset = - 4 * num_regs;
19887   j = 0;
19888   while (j <= LAST_ARM_REGNUM)
19889     if (saved_regs_mask & (1 << j))
19890       {
19891         if ((j % 2 == 0)
19892             && (saved_regs_mask & (1 << (j + 1))))
19893           {
19894             /* Current register and previous register form register pair for
19895                which STRD can be generated.  */
19896             if (offset < 0)
19897               {
19898                 /* Allocate stack space for all saved registers.  */
19899                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19900                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19901                 mem = gen_frame_mem (DImode, tmp);
19902                 offset = 0;
19903               }
19904             else if (offset > 0)
19905               mem = gen_frame_mem (DImode,
19906                                    plus_constant (Pmode,
19907                                                   stack_pointer_rtx,
19908                                                   offset));
19909             else
19910               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19911
19912             tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19913             RTX_FRAME_RELATED_P (tmp) = 1;
19914             tmp = emit_insn (tmp);
19915
19916             /* Record the first store insn.  */
19917             if (dwarf_index == 1)
19918               insn = tmp;
19919
19920             /* Generate dwarf info.  */
19921             mem = gen_frame_mem (SImode,
19922                                  plus_constant (Pmode,
19923                                                 stack_pointer_rtx,
19924                                                 offset));
19925             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19926             RTX_FRAME_RELATED_P (tmp) = 1;
19927             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19928
19929             mem = gen_frame_mem (SImode,
19930                                  plus_constant (Pmode,
19931                                                 stack_pointer_rtx,
19932                                                 offset + 4));
19933             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19934             RTX_FRAME_RELATED_P (tmp) = 1;
19935             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19936
19937             offset += 8;
19938             j += 2;
19939           }
19940         else
19941           {
19942             /* Emit a single word store.  */
19943             if (offset < 0)
19944               {
19945                 /* Allocate stack space for all saved registers.  */
19946                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19947                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19948                 mem = gen_frame_mem (SImode, tmp);
19949                 offset = 0;
19950               }
19951             else if (offset > 0)
19952               mem = gen_frame_mem (SImode,
19953                                    plus_constant (Pmode,
19954                                                   stack_pointer_rtx,
19955                                                   offset));
19956             else
19957               mem = gen_frame_mem (SImode, stack_pointer_rtx);
19958
19959             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19960             RTX_FRAME_RELATED_P (tmp) = 1;
19961             tmp = emit_insn (tmp);
19962
19963             /* Record the first store insn.  */
19964             if (dwarf_index == 1)
19965               insn = tmp;
19966
19967             /* Generate dwarf info.  */
19968             mem = gen_frame_mem (SImode,
19969                                  plus_constant(Pmode,
19970                                                stack_pointer_rtx,
19971                                                offset));
19972             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19973             RTX_FRAME_RELATED_P (tmp) = 1;
19974             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19975
19976             offset += 4;
19977             j += 1;
19978           }
19979       }
19980     else
19981       j++;
19982
19983   /* Attach dwarf info to the first insn we generate.  */
19984   gcc_assert (insn != NULL_RTX);
19985   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19986   RTX_FRAME_RELATED_P (insn) = 1;
19987 }
19988
19989 /* Generate and emit an insn that we will recognize as a push_multi.
19990    Unfortunately, since this insn does not reflect very well the actual
19991    semantics of the operation, we need to annotate the insn for the benefit
19992    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
19993    MASK for registers that should be annotated for DWARF2 frame unwind
19994    information.  */
19995 static rtx
19996 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19997 {
19998   int num_regs = 0;
19999   int num_dwarf_regs = 0;
20000   int i, j;
20001   rtx par;
20002   rtx dwarf;
20003   int dwarf_par_index;
20004   rtx tmp, reg;
20005
20006   /* We don't record the PC in the dwarf frame information.  */
20007   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20008
20009   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20010     {
20011       if (mask & (1 << i))
20012         num_regs++;
20013       if (dwarf_regs_mask & (1 << i))
20014         num_dwarf_regs++;
20015     }
20016
20017   gcc_assert (num_regs && num_regs <= 16);
20018   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20019
20020   /* For the body of the insn we are going to generate an UNSPEC in
20021      parallel with several USEs.  This allows the insn to be recognized
20022      by the push_multi pattern in the arm.md file.
20023
20024      The body of the insn looks something like this:
20025
20026        (parallel [
20027            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20028                                         (const_int:SI <num>)))
20029                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20030            (use (reg:SI XX))
20031            (use (reg:SI YY))
20032            ...
20033         ])
20034
20035      For the frame note however, we try to be more explicit and actually
20036      show each register being stored into the stack frame, plus a (single)
20037      decrement of the stack pointer.  We do it this way in order to be
20038      friendly to the stack unwinding code, which only wants to see a single
20039      stack decrement per instruction.  The RTL we generate for the note looks
20040      something like this:
20041
20042       (sequence [
20043            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20044            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20045            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20046            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20047            ...
20048         ])
20049
20050      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20051      instead we'd have a parallel expression detailing all
20052      the stores to the various memory addresses so that debug
20053      information is more up-to-date. Remember however while writing
20054      this to take care of the constraints with the push instruction.
20055
20056      Note also that this has to be taken care of for the VFP registers.
20057
20058      For more see PR43399.  */
20059
20060   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20061   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20062   dwarf_par_index = 1;
20063
20064   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20065     {
20066       if (mask & (1 << i))
20067         {
20068           reg = gen_rtx_REG (SImode, i);
20069
20070           XVECEXP (par, 0, 0)
20071             = gen_rtx_SET (VOIDmode,
20072                            gen_frame_mem
20073                            (BLKmode,
20074                             gen_rtx_PRE_MODIFY (Pmode,
20075                                                 stack_pointer_rtx,
20076                                                 plus_constant
20077                                                 (Pmode, stack_pointer_rtx,
20078                                                  -4 * num_regs))
20079                             ),
20080                            gen_rtx_UNSPEC (BLKmode,
20081                                            gen_rtvec (1, reg),
20082                                            UNSPEC_PUSH_MULT));
20083
20084           if (dwarf_regs_mask & (1 << i))
20085             {
20086               tmp = gen_rtx_SET (VOIDmode,
20087                                  gen_frame_mem (SImode, stack_pointer_rtx),
20088                                  reg);
20089               RTX_FRAME_RELATED_P (tmp) = 1;
20090               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20091             }
20092
20093           break;
20094         }
20095     }
20096
20097   for (j = 1, i++; j < num_regs; i++)
20098     {
20099       if (mask & (1 << i))
20100         {
20101           reg = gen_rtx_REG (SImode, i);
20102
20103           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20104
20105           if (dwarf_regs_mask & (1 << i))
20106             {
20107               tmp
20108                 = gen_rtx_SET (VOIDmode,
20109                                gen_frame_mem
20110                                (SImode,
20111                                 plus_constant (Pmode, stack_pointer_rtx,
20112                                                4 * j)),
20113                                reg);
20114               RTX_FRAME_RELATED_P (tmp) = 1;
20115               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20116             }
20117
20118           j++;
20119         }
20120     }
20121
20122   par = emit_insn (par);
20123
20124   tmp = gen_rtx_SET (VOIDmode,
20125                      stack_pointer_rtx,
20126                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20127   RTX_FRAME_RELATED_P (tmp) = 1;
20128   XVECEXP (dwarf, 0, 0) = tmp;
20129
20130   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20131
20132   return par;
20133 }
20134
20135 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20136    SIZE is the offset to be adjusted.
20137    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20138 static void
20139 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20140 {
20141   rtx dwarf;
20142
20143   RTX_FRAME_RELATED_P (insn) = 1;
20144   dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20145   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20146 }
20147
20148 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20149    SAVED_REGS_MASK shows which registers need to be restored.
20150
20151    Unfortunately, since this insn does not reflect very well the actual
20152    semantics of the operation, we need to annotate the insn for the benefit
20153    of DWARF2 frame unwind information.  */
20154 static void
20155 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20156 {
20157   int num_regs = 0;
20158   int i, j;
20159   rtx par;
20160   rtx dwarf = NULL_RTX;
20161   rtx tmp, reg;
20162   bool return_in_pc;
20163   int offset_adj;
20164   int emit_update;
20165
20166   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20167   offset_adj = return_in_pc ? 1 : 0;
20168   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20169     if (saved_regs_mask & (1 << i))
20170       num_regs++;
20171
20172   gcc_assert (num_regs && num_regs <= 16);
20173
20174   /* If SP is in reglist, then we don't emit SP update insn.  */
20175   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20176
20177   /* The parallel needs to hold num_regs SETs
20178      and one SET for the stack update.  */
20179   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20180
20181   if (return_in_pc)
20182     {
20183       tmp = ret_rtx;
20184       XVECEXP (par, 0, 0) = tmp;
20185     }
20186
20187   if (emit_update)
20188     {
20189       /* Increment the stack pointer, based on there being
20190          num_regs 4-byte registers to restore.  */
20191       tmp = gen_rtx_SET (VOIDmode,
20192                          stack_pointer_rtx,
20193                          plus_constant (Pmode,
20194                                         stack_pointer_rtx,
20195                                         4 * num_regs));
20196       RTX_FRAME_RELATED_P (tmp) = 1;
20197       XVECEXP (par, 0, offset_adj) = tmp;
20198     }
20199
20200   /* Now restore every reg, which may include PC.  */
20201   for (j = 0, i = 0; j < num_regs; i++)
20202     if (saved_regs_mask & (1 << i))
20203       {
20204         reg = gen_rtx_REG (SImode, i);
20205         if ((num_regs == 1) && emit_update && !return_in_pc)
20206           {
20207             /* Emit single load with writeback.  */
20208             tmp = gen_frame_mem (SImode,
20209                                  gen_rtx_POST_INC (Pmode,
20210                                                    stack_pointer_rtx));
20211             tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20212             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20213             return;
20214           }
20215
20216         tmp = gen_rtx_SET (VOIDmode,
20217                            reg,
20218                            gen_frame_mem
20219                            (SImode,
20220                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20221         RTX_FRAME_RELATED_P (tmp) = 1;
20222         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20223
20224         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20225            should not have PC, skip PC.  */
20226         if (i != PC_REGNUM)
20227           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20228
20229         j++;
20230       }
20231
20232   if (return_in_pc)
20233     par = emit_jump_insn (par);
20234   else
20235     par = emit_insn (par);
20236
20237   REG_NOTES (par) = dwarf;
20238   if (!return_in_pc)
20239     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20240                                  stack_pointer_rtx, stack_pointer_rtx);
20241 }
20242
20243 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20244    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20245
20246    Unfortunately, since this insn does not reflect very well the actual
20247    semantics of the operation, we need to annotate the insn for the benefit
20248    of DWARF2 frame unwind information.  */
20249 static void
20250 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20251 {
20252   int i, j;
20253   rtx par;
20254   rtx dwarf = NULL_RTX;
20255   rtx tmp, reg;
20256
20257   gcc_assert (num_regs && num_regs <= 32);
20258
20259     /* Workaround ARM10 VFPr1 bug.  */
20260   if (num_regs == 2 && !arm_arch6)
20261     {
20262       if (first_reg == 15)
20263         first_reg--;
20264
20265       num_regs++;
20266     }
20267
20268   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20269      there could be up to 32 D-registers to restore.
20270      If there are more than 16 D-registers, make two recursive calls,
20271      each of which emits one pop_multi instruction.  */
20272   if (num_regs > 16)
20273     {
20274       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20275       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20276       return;
20277     }
20278
20279   /* The parallel needs to hold num_regs SETs
20280      and one SET for the stack update.  */
20281   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20282
20283   /* Increment the stack pointer, based on there being
20284      num_regs 8-byte registers to restore.  */
20285   tmp = gen_rtx_SET (VOIDmode,
20286                      base_reg,
20287                      plus_constant (Pmode, base_reg, 8 * num_regs));
20288   RTX_FRAME_RELATED_P (tmp) = 1;
20289   XVECEXP (par, 0, 0) = tmp;
20290
20291   /* Now show every reg that will be restored, using a SET for each.  */
20292   for (j = 0, i=first_reg; j < num_regs; i += 2)
20293     {
20294       reg = gen_rtx_REG (DFmode, i);
20295
20296       tmp = gen_rtx_SET (VOIDmode,
20297                          reg,
20298                          gen_frame_mem
20299                          (DFmode,
20300                           plus_constant (Pmode, base_reg, 8 * j)));
20301       RTX_FRAME_RELATED_P (tmp) = 1;
20302       XVECEXP (par, 0, j + 1) = tmp;
20303
20304       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20305
20306       j++;
20307     }
20308
20309   par = emit_insn (par);
20310   REG_NOTES (par) = dwarf;
20311
20312   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20313   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20314     {
20315       RTX_FRAME_RELATED_P (par) = 1;
20316       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20317     }
20318   else
20319     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20320                                  base_reg, base_reg);
20321 }
20322
20323 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20324    number of registers are being popped, multiple LDRD patterns are created for
20325    all register pairs.  If odd number of registers are popped, last register is
20326    loaded by using LDR pattern.  */
20327 static void
20328 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20329 {
20330   int num_regs = 0;
20331   int i, j;
20332   rtx par = NULL_RTX;
20333   rtx dwarf = NULL_RTX;
20334   rtx tmp, reg, tmp1;
20335   bool return_in_pc;
20336
20337   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20338   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20339     if (saved_regs_mask & (1 << i))
20340       num_regs++;
20341
20342   gcc_assert (num_regs && num_regs <= 16);
20343
20344   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20345      to be popped.  So, if num_regs is even, now it will become odd,
20346      and we can generate pop with PC.  If num_regs is odd, it will be
20347      even now, and ldr with return can be generated for PC.  */
20348   if (return_in_pc)
20349     num_regs--;
20350
20351   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20352
20353   /* Var j iterates over all the registers to gather all the registers in
20354      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20355      A PARALLEL RTX of register-pair is created here, so that pattern for
20356      LDRD can be matched.  As PC is always last register to be popped, and
20357      we have already decremented num_regs if PC, we don't have to worry
20358      about PC in this loop.  */
20359   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20360     if (saved_regs_mask & (1 << j))
20361       {
20362         /* Create RTX for memory load.  */
20363         reg = gen_rtx_REG (SImode, j);
20364         tmp = gen_rtx_SET (SImode,
20365                            reg,
20366                            gen_frame_mem (SImode,
20367                                plus_constant (Pmode,
20368                                               stack_pointer_rtx, 4 * i)));
20369         RTX_FRAME_RELATED_P (tmp) = 1;
20370
20371         if (i % 2 == 0)
20372           {
20373             /* When saved-register index (i) is even, the RTX to be emitted is
20374                yet to be created.  Hence create it first.  The LDRD pattern we
20375                are generating is :
20376                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20377                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20378                where target registers need not be consecutive.  */
20379             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20380             dwarf = NULL_RTX;
20381           }
20382
20383         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20384            added as 0th element and if i is odd, reg_i is added as 1st element
20385            of LDRD pattern shown above.  */
20386         XVECEXP (par, 0, (i % 2)) = tmp;
20387         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20388
20389         if ((i % 2) == 1)
20390           {
20391             /* When saved-register index (i) is odd, RTXs for both the registers
20392                to be loaded are generated in above given LDRD pattern, and the
20393                pattern can be emitted now.  */
20394             par = emit_insn (par);
20395             REG_NOTES (par) = dwarf;
20396             RTX_FRAME_RELATED_P (par) = 1;
20397           }
20398
20399         i++;
20400       }
20401
20402   /* If the number of registers pushed is odd AND return_in_pc is false OR
20403      number of registers are even AND return_in_pc is true, last register is
20404      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20405      then LDR with post increment.  */
20406
20407   /* Increment the stack pointer, based on there being
20408      num_regs 4-byte registers to restore.  */
20409   tmp = gen_rtx_SET (VOIDmode,
20410                      stack_pointer_rtx,
20411                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20412   RTX_FRAME_RELATED_P (tmp) = 1;
20413   tmp = emit_insn (tmp);
20414   if (!return_in_pc)
20415     {
20416       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20417                                    stack_pointer_rtx, stack_pointer_rtx);
20418     }
20419
20420   dwarf = NULL_RTX;
20421
20422   if (((num_regs % 2) == 1 && !return_in_pc)
20423       || ((num_regs % 2) == 0 && return_in_pc))
20424     {
20425       /* Scan for the single register to be popped.  Skip until the saved
20426          register is found.  */
20427       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20428
20429       /* Gen LDR with post increment here.  */
20430       tmp1 = gen_rtx_MEM (SImode,
20431                           gen_rtx_POST_INC (SImode,
20432                                             stack_pointer_rtx));
20433       set_mem_alias_set (tmp1, get_frame_alias_set ());
20434
20435       reg = gen_rtx_REG (SImode, j);
20436       tmp = gen_rtx_SET (SImode, reg, tmp1);
20437       RTX_FRAME_RELATED_P (tmp) = 1;
20438       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20439
20440       if (return_in_pc)
20441         {
20442           /* If return_in_pc, j must be PC_REGNUM.  */
20443           gcc_assert (j == PC_REGNUM);
20444           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20445           XVECEXP (par, 0, 0) = ret_rtx;
20446           XVECEXP (par, 0, 1) = tmp;
20447           par = emit_jump_insn (par);
20448         }
20449       else
20450         {
20451           par = emit_insn (tmp);
20452           REG_NOTES (par) = dwarf;
20453           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20454                                        stack_pointer_rtx, stack_pointer_rtx);
20455         }
20456
20457     }
20458   else if ((num_regs % 2) == 1 && return_in_pc)
20459     {
20460       /* There are 2 registers to be popped.  So, generate the pattern
20461          pop_multiple_with_stack_update_and_return to pop in PC.  */
20462       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20463     }
20464
20465   return;
20466 }
20467
20468 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20469    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20470    offset addressing and then generates one separate stack udpate. This provides
20471    more scheduling freedom, compared to writeback on every load.  However,
20472    if the function returns using load into PC directly
20473    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20474    before the last load.  TODO: Add a peephole optimization to recognize
20475    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20476    peephole optimization to merge the load at stack-offset zero
20477    with the stack update instruction using load with writeback
20478    in post-index addressing mode.  */
20479 static void
20480 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20481 {
20482   int j = 0;
20483   int offset = 0;
20484   rtx par = NULL_RTX;
20485   rtx dwarf = NULL_RTX;
20486   rtx tmp, mem;
20487
20488   /* Restore saved registers.  */
20489   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20490   j = 0;
20491   while (j <= LAST_ARM_REGNUM)
20492     if (saved_regs_mask & (1 << j))
20493       {
20494         if ((j % 2) == 0
20495             && (saved_regs_mask & (1 << (j + 1)))
20496             && (j + 1) != PC_REGNUM)
20497           {
20498             /* Current register and next register form register pair for which
20499                LDRD can be generated. PC is always the last register popped, and
20500                we handle it separately.  */
20501             if (offset > 0)
20502               mem = gen_frame_mem (DImode,
20503                                    plus_constant (Pmode,
20504                                                   stack_pointer_rtx,
20505                                                   offset));
20506             else
20507               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20508
20509             tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20510             tmp = emit_insn (tmp);
20511             RTX_FRAME_RELATED_P (tmp) = 1;
20512
20513             /* Generate dwarf info.  */
20514
20515             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20516                                     gen_rtx_REG (SImode, j),
20517                                     NULL_RTX);
20518             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20519                                     gen_rtx_REG (SImode, j + 1),
20520                                     dwarf);
20521
20522             REG_NOTES (tmp) = dwarf;
20523
20524             offset += 8;
20525             j += 2;
20526           }
20527         else if (j != PC_REGNUM)
20528           {
20529             /* Emit a single word load.  */
20530             if (offset > 0)
20531               mem = gen_frame_mem (SImode,
20532                                    plus_constant (Pmode,
20533                                                   stack_pointer_rtx,
20534                                                   offset));
20535             else
20536               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20537
20538             tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20539             tmp = emit_insn (tmp);
20540             RTX_FRAME_RELATED_P (tmp) = 1;
20541
20542             /* Generate dwarf info.  */
20543             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20544                                               gen_rtx_REG (SImode, j),
20545                                               NULL_RTX);
20546
20547             offset += 4;
20548             j += 1;
20549           }
20550         else /* j == PC_REGNUM */
20551           j++;
20552       }
20553     else
20554       j++;
20555
20556   /* Update the stack.  */
20557   if (offset > 0)
20558     {
20559       tmp = gen_rtx_SET (Pmode,
20560                          stack_pointer_rtx,
20561                          plus_constant (Pmode,
20562                                         stack_pointer_rtx,
20563                                         offset));
20564       tmp = emit_insn (tmp);
20565       arm_add_cfa_adjust_cfa_note (tmp, offset,
20566                                    stack_pointer_rtx, stack_pointer_rtx);
20567       offset = 0;
20568     }
20569
20570   if (saved_regs_mask & (1 << PC_REGNUM))
20571     {
20572       /* Only PC is to be popped.  */
20573       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20574       XVECEXP (par, 0, 0) = ret_rtx;
20575       tmp = gen_rtx_SET (SImode,
20576                          gen_rtx_REG (SImode, PC_REGNUM),
20577                          gen_frame_mem (SImode,
20578                                         gen_rtx_POST_INC (SImode,
20579                                                           stack_pointer_rtx)));
20580       RTX_FRAME_RELATED_P (tmp) = 1;
20581       XVECEXP (par, 0, 1) = tmp;
20582       par = emit_jump_insn (par);
20583
20584       /* Generate dwarf info.  */
20585       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20586                               gen_rtx_REG (SImode, PC_REGNUM),
20587                               NULL_RTX);
20588       REG_NOTES (par) = dwarf;
20589       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20590                                    stack_pointer_rtx, stack_pointer_rtx);
20591     }
20592 }
20593
20594 /* Calculate the size of the return value that is passed in registers.  */
20595 static unsigned
20596 arm_size_return_regs (void)
20597 {
20598   machine_mode mode;
20599
20600   if (crtl->return_rtx != 0)
20601     mode = GET_MODE (crtl->return_rtx);
20602   else
20603     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20604
20605   return GET_MODE_SIZE (mode);
20606 }
20607
20608 /* Return true if the current function needs to save/restore LR.  */
20609 static bool
20610 thumb_force_lr_save (void)
20611 {
20612   return !cfun->machine->lr_save_eliminated
20613          && (!leaf_function_p ()
20614              || thumb_far_jump_used_p ()
20615              || df_regs_ever_live_p (LR_REGNUM));
20616 }
20617
20618 /* We do not know if r3 will be available because
20619    we do have an indirect tailcall happening in this
20620    particular case.  */
20621 static bool
20622 is_indirect_tailcall_p (rtx call)
20623 {
20624   rtx pat = PATTERN (call);
20625
20626   /* Indirect tail call.  */
20627   pat = XVECEXP (pat, 0, 0);
20628   if (GET_CODE (pat) == SET)
20629     pat = SET_SRC (pat);
20630
20631   pat = XEXP (XEXP (pat, 0), 0);
20632   return REG_P (pat);
20633 }
20634
20635 /* Return true if r3 is used by any of the tail call insns in the
20636    current function.  */
20637 static bool
20638 any_sibcall_could_use_r3 (void)
20639 {
20640   edge_iterator ei;
20641   edge e;
20642
20643   if (!crtl->tail_call_emit)
20644     return false;
20645   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20646     if (e->flags & EDGE_SIBCALL)
20647       {
20648         rtx call = BB_END (e->src);
20649         if (!CALL_P (call))
20650           call = prev_nonnote_nondebug_insn (call);
20651         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20652         if (find_regno_fusage (call, USE, 3)
20653             || is_indirect_tailcall_p (call))
20654           return true;
20655       }
20656   return false;
20657 }
20658
20659
20660 /* Compute the distance from register FROM to register TO.
20661    These can be the arg pointer (26), the soft frame pointer (25),
20662    the stack pointer (13) or the hard frame pointer (11).
20663    In thumb mode r7 is used as the soft frame pointer, if needed.
20664    Typical stack layout looks like this:
20665
20666        old stack pointer -> |    |
20667                              ----
20668                             |    | \
20669                             |    |   saved arguments for
20670                             |    |   vararg functions
20671                             |    | /
20672                               --
20673    hard FP & arg pointer -> |    | \
20674                             |    |   stack
20675                             |    |   frame
20676                             |    | /
20677                               --
20678                             |    | \
20679                             |    |   call saved
20680                             |    |   registers
20681       soft frame pointer -> |    | /
20682                               --
20683                             |    | \
20684                             |    |   local
20685                             |    |   variables
20686      locals base pointer -> |    | /
20687                               --
20688                             |    | \
20689                             |    |   outgoing
20690                             |    |   arguments
20691    current stack pointer -> |    | /
20692                               --
20693
20694   For a given function some or all of these stack components
20695   may not be needed, giving rise to the possibility of
20696   eliminating some of the registers.
20697
20698   The values returned by this function must reflect the behavior
20699   of arm_expand_prologue() and arm_compute_save_reg_mask().
20700
20701   The sign of the number returned reflects the direction of stack
20702   growth, so the values are positive for all eliminations except
20703   from the soft frame pointer to the hard frame pointer.
20704
20705   SFP may point just inside the local variables block to ensure correct
20706   alignment.  */
20707
20708
20709 /* Calculate stack offsets.  These are used to calculate register elimination
20710    offsets and in prologue/epilogue code.  Also calculates which registers
20711    should be saved.  */
20712
20713 static arm_stack_offsets *
20714 arm_get_frame_offsets (void)
20715 {
20716   struct arm_stack_offsets *offsets;
20717   unsigned long func_type;
20718   int leaf;
20719   int saved;
20720   int core_saved;
20721   HOST_WIDE_INT frame_size;
20722   int i;
20723
20724   offsets = &cfun->machine->stack_offsets;
20725
20726   /* We need to know if we are a leaf function.  Unfortunately, it
20727      is possible to be called after start_sequence has been called,
20728      which causes get_insns to return the insns for the sequence,
20729      not the function, which will cause leaf_function_p to return
20730      the incorrect result.
20731
20732      to know about leaf functions once reload has completed, and the
20733      frame size cannot be changed after that time, so we can safely
20734      use the cached value.  */
20735
20736   if (reload_completed)
20737     return offsets;
20738
20739   /* Initially this is the size of the local variables.  It will translated
20740      into an offset once we have determined the size of preceding data.  */
20741   frame_size = ROUND_UP_WORD (get_frame_size ());
20742
20743   leaf = leaf_function_p ();
20744
20745   /* Space for variadic functions.  */
20746   offsets->saved_args = crtl->args.pretend_args_size;
20747
20748   /* In Thumb mode this is incorrect, but never used.  */
20749   offsets->frame
20750     = (offsets->saved_args
20751        + arm_compute_static_chain_stack_bytes ()
20752        + (frame_pointer_needed ? 4 : 0));
20753
20754   if (TARGET_32BIT)
20755     {
20756       unsigned int regno;
20757
20758       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20759       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20760       saved = core_saved;
20761
20762       /* We know that SP will be doubleword aligned on entry, and we must
20763          preserve that condition at any subroutine call.  We also require the
20764          soft frame pointer to be doubleword aligned.  */
20765
20766       if (TARGET_REALLY_IWMMXT)
20767         {
20768           /* Check for the call-saved iWMMXt registers.  */
20769           for (regno = FIRST_IWMMXT_REGNUM;
20770                regno <= LAST_IWMMXT_REGNUM;
20771                regno++)
20772             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20773               saved += 8;
20774         }
20775
20776       func_type = arm_current_func_type ();
20777       /* Space for saved VFP registers.  */
20778       if (! IS_VOLATILE (func_type)
20779           && TARGET_HARD_FLOAT && TARGET_VFP)
20780         saved += arm_get_vfp_saved_size ();
20781     }
20782   else /* TARGET_THUMB1 */
20783     {
20784       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20785       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20786       saved = core_saved;
20787       if (TARGET_BACKTRACE)
20788         saved += 16;
20789     }
20790
20791   /* Saved registers include the stack frame.  */
20792   offsets->saved_regs
20793     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20794   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20795
20796   /* A leaf function does not need any stack alignment if it has nothing
20797      on the stack.  */
20798   if (leaf && frame_size == 0
20799       /* However if it calls alloca(), we have a dynamically allocated
20800          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20801       && ! cfun->calls_alloca)
20802     {
20803       offsets->outgoing_args = offsets->soft_frame;
20804       offsets->locals_base = offsets->soft_frame;
20805       return offsets;
20806     }
20807
20808   /* Ensure SFP has the correct alignment.  */
20809   if (ARM_DOUBLEWORD_ALIGN
20810       && (offsets->soft_frame & 7))
20811     {
20812       offsets->soft_frame += 4;
20813       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20814          when there is a stack frame as the alignment will be rolled into
20815          the normal stack adjustment.  */
20816       if (frame_size + crtl->outgoing_args_size == 0)
20817         {
20818           int reg = -1;
20819
20820           /* Register r3 is caller-saved.  Normally it does not need to be
20821              saved on entry by the prologue.  However if we choose to save
20822              it for padding then we may confuse the compiler into thinking
20823              a prologue sequence is required when in fact it is not.  This
20824              will occur when shrink-wrapping if r3 is used as a scratch
20825              register and there are no other callee-saved writes.
20826
20827              This situation can be avoided when other callee-saved registers
20828              are available and r3 is not mandatory if we choose a callee-saved
20829              register for padding.  */
20830           bool prefer_callee_reg_p = false;
20831
20832           /* If it is safe to use r3, then do so.  This sometimes
20833              generates better code on Thumb-2 by avoiding the need to
20834              use 32-bit push/pop instructions.  */
20835           if (! any_sibcall_could_use_r3 ()
20836               && arm_size_return_regs () <= 12
20837               && (offsets->saved_regs_mask & (1 << 3)) == 0
20838               && (TARGET_THUMB2
20839                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20840             {
20841               reg = 3;
20842               if (!TARGET_THUMB2)
20843                 prefer_callee_reg_p = true;
20844             }
20845           if (reg == -1
20846               || prefer_callee_reg_p)
20847             {
20848               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20849                 {
20850                   /* Avoid fixed registers; they may be changed at
20851                      arbitrary times so it's unsafe to restore them
20852                      during the epilogue.  */
20853                   if (!fixed_regs[i]
20854                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20855                     {
20856                       reg = i;
20857                       break;
20858                     }
20859                 }
20860             }
20861
20862           if (reg != -1)
20863             {
20864               offsets->saved_regs += 4;
20865               offsets->saved_regs_mask |= (1 << reg);
20866             }
20867         }
20868     }
20869
20870   offsets->locals_base = offsets->soft_frame + frame_size;
20871   offsets->outgoing_args = (offsets->locals_base
20872                             + crtl->outgoing_args_size);
20873
20874   if (ARM_DOUBLEWORD_ALIGN)
20875     {
20876       /* Ensure SP remains doubleword aligned.  */
20877       if (offsets->outgoing_args & 7)
20878         offsets->outgoing_args += 4;
20879       gcc_assert (!(offsets->outgoing_args & 7));
20880     }
20881
20882   return offsets;
20883 }
20884
20885
20886 /* Calculate the relative offsets for the different stack pointers.  Positive
20887    offsets are in the direction of stack growth.  */
20888
20889 HOST_WIDE_INT
20890 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20891 {
20892   arm_stack_offsets *offsets;
20893
20894   offsets = arm_get_frame_offsets ();
20895
20896   /* OK, now we have enough information to compute the distances.
20897      There must be an entry in these switch tables for each pair
20898      of registers in ELIMINABLE_REGS, even if some of the entries
20899      seem to be redundant or useless.  */
20900   switch (from)
20901     {
20902     case ARG_POINTER_REGNUM:
20903       switch (to)
20904         {
20905         case THUMB_HARD_FRAME_POINTER_REGNUM:
20906           return 0;
20907
20908         case FRAME_POINTER_REGNUM:
20909           /* This is the reverse of the soft frame pointer
20910              to hard frame pointer elimination below.  */
20911           return offsets->soft_frame - offsets->saved_args;
20912
20913         case ARM_HARD_FRAME_POINTER_REGNUM:
20914           /* This is only non-zero in the case where the static chain register
20915              is stored above the frame.  */
20916           return offsets->frame - offsets->saved_args - 4;
20917
20918         case STACK_POINTER_REGNUM:
20919           /* If nothing has been pushed on the stack at all
20920              then this will return -4.  This *is* correct!  */
20921           return offsets->outgoing_args - (offsets->saved_args + 4);
20922
20923         default:
20924           gcc_unreachable ();
20925         }
20926       gcc_unreachable ();
20927
20928     case FRAME_POINTER_REGNUM:
20929       switch (to)
20930         {
20931         case THUMB_HARD_FRAME_POINTER_REGNUM:
20932           return 0;
20933
20934         case ARM_HARD_FRAME_POINTER_REGNUM:
20935           /* The hard frame pointer points to the top entry in the
20936              stack frame.  The soft frame pointer to the bottom entry
20937              in the stack frame.  If there is no stack frame at all,
20938              then they are identical.  */
20939
20940           return offsets->frame - offsets->soft_frame;
20941
20942         case STACK_POINTER_REGNUM:
20943           return offsets->outgoing_args - offsets->soft_frame;
20944
20945         default:
20946           gcc_unreachable ();
20947         }
20948       gcc_unreachable ();
20949
20950     default:
20951       /* You cannot eliminate from the stack pointer.
20952          In theory you could eliminate from the hard frame
20953          pointer to the stack pointer, but this will never
20954          happen, since if a stack frame is not needed the
20955          hard frame pointer will never be used.  */
20956       gcc_unreachable ();
20957     }
20958 }
20959
20960 /* Given FROM and TO register numbers, say whether this elimination is
20961    allowed.  Frame pointer elimination is automatically handled.
20962
20963    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
20964    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
20965    pointer, we must eliminate FRAME_POINTER_REGNUM into
20966    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20967    ARG_POINTER_REGNUM.  */
20968
20969 bool
20970 arm_can_eliminate (const int from, const int to)
20971 {
20972   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20973           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20974           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20975           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20976            true);
20977 }
20978
20979 /* Emit RTL to save coprocessor registers on function entry.  Returns the
20980    number of bytes pushed.  */
20981
20982 static int
20983 arm_save_coproc_regs(void)
20984 {
20985   int saved_size = 0;
20986   unsigned reg;
20987   unsigned start_reg;
20988   rtx insn;
20989
20990   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20991     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20992       {
20993         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20994         insn = gen_rtx_MEM (V2SImode, insn);
20995         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20996         RTX_FRAME_RELATED_P (insn) = 1;
20997         saved_size += 8;
20998       }
20999
21000   if (TARGET_HARD_FLOAT && TARGET_VFP)
21001     {
21002       start_reg = FIRST_VFP_REGNUM;
21003
21004       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21005         {
21006           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21007               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21008             {
21009               if (start_reg != reg)
21010                 saved_size += vfp_emit_fstmd (start_reg,
21011                                               (reg - start_reg) / 2);
21012               start_reg = reg + 2;
21013             }
21014         }
21015       if (start_reg != reg)
21016         saved_size += vfp_emit_fstmd (start_reg,
21017                                       (reg - start_reg) / 2);
21018     }
21019   return saved_size;
21020 }
21021
21022
21023 /* Set the Thumb frame pointer from the stack pointer.  */
21024
21025 static void
21026 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21027 {
21028   HOST_WIDE_INT amount;
21029   rtx insn, dwarf;
21030
21031   amount = offsets->outgoing_args - offsets->locals_base;
21032   if (amount < 1024)
21033     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21034                                   stack_pointer_rtx, GEN_INT (amount)));
21035   else
21036     {
21037       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21038       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21039          expects the first two operands to be the same.  */
21040       if (TARGET_THUMB2)
21041         {
21042           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21043                                         stack_pointer_rtx,
21044                                         hard_frame_pointer_rtx));
21045         }
21046       else
21047         {
21048           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21049                                         hard_frame_pointer_rtx,
21050                                         stack_pointer_rtx));
21051         }
21052       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21053                            plus_constant (Pmode, stack_pointer_rtx, amount));
21054       RTX_FRAME_RELATED_P (dwarf) = 1;
21055       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21056     }
21057
21058   RTX_FRAME_RELATED_P (insn) = 1;
21059 }
21060
21061 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21062    function.  */
21063 void
21064 arm_expand_prologue (void)
21065 {
21066   rtx amount;
21067   rtx insn;
21068   rtx ip_rtx;
21069   unsigned long live_regs_mask;
21070   unsigned long func_type;
21071   int fp_offset = 0;
21072   int saved_pretend_args = 0;
21073   int saved_regs = 0;
21074   unsigned HOST_WIDE_INT args_to_push;
21075   arm_stack_offsets *offsets;
21076
21077   func_type = arm_current_func_type ();
21078
21079   /* Naked functions don't have prologues.  */
21080   if (IS_NAKED (func_type))
21081     return;
21082
21083   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21084   args_to_push = crtl->args.pretend_args_size;
21085
21086   /* Compute which register we will have to save onto the stack.  */
21087   offsets = arm_get_frame_offsets ();
21088   live_regs_mask = offsets->saved_regs_mask;
21089
21090   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21091
21092   if (IS_STACKALIGN (func_type))
21093     {
21094       rtx r0, r1;
21095
21096       /* Handle a word-aligned stack pointer.  We generate the following:
21097
21098           mov r0, sp
21099           bic r1, r0, #7
21100           mov sp, r1
21101           <save and restore r0 in normal prologue/epilogue>
21102           mov sp, r0
21103           bx lr
21104
21105          The unwinder doesn't need to know about the stack realignment.
21106          Just tell it we saved SP in r0.  */
21107       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21108
21109       r0 = gen_rtx_REG (SImode, 0);
21110       r1 = gen_rtx_REG (SImode, 1);
21111
21112       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21113       RTX_FRAME_RELATED_P (insn) = 1;
21114       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21115
21116       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21117
21118       /* ??? The CFA changes here, which may cause GDB to conclude that it
21119          has entered a different function.  That said, the unwind info is
21120          correct, individually, before and after this instruction because
21121          we've described the save of SP, which will override the default
21122          handling of SP as restoring from the CFA.  */
21123       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21124     }
21125
21126   /* For APCS frames, if IP register is clobbered
21127      when creating frame, save that register in a special
21128      way.  */
21129   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21130     {
21131       if (IS_INTERRUPT (func_type))
21132         {
21133           /* Interrupt functions must not corrupt any registers.
21134              Creating a frame pointer however, corrupts the IP
21135              register, so we must push it first.  */
21136           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21137
21138           /* Do not set RTX_FRAME_RELATED_P on this insn.
21139              The dwarf stack unwinding code only wants to see one
21140              stack decrement per function, and this is not it.  If
21141              this instruction is labeled as being part of the frame
21142              creation sequence then dwarf2out_frame_debug_expr will
21143              die when it encounters the assignment of IP to FP
21144              later on, since the use of SP here establishes SP as
21145              the CFA register and not IP.
21146
21147              Anyway this instruction is not really part of the stack
21148              frame creation although it is part of the prologue.  */
21149         }
21150       else if (IS_NESTED (func_type))
21151         {
21152           /* The static chain register is the same as the IP register
21153              used as a scratch register during stack frame creation.
21154              To get around this need to find somewhere to store IP
21155              whilst the frame is being created.  We try the following
21156              places in order:
21157
21158                1. The last argument register r3 if it is available.
21159                2. A slot on the stack above the frame if there are no
21160                   arguments to push onto the stack.
21161                3. Register r3 again, after pushing the argument registers
21162                   onto the stack, if this is a varargs function.
21163                4. The last slot on the stack created for the arguments to
21164                   push, if this isn't a varargs function.
21165
21166              Note - we only need to tell the dwarf2 backend about the SP
21167              adjustment in the second variant; the static chain register
21168              doesn't need to be unwound, as it doesn't contain a value
21169              inherited from the caller.  */
21170
21171           if (!arm_r3_live_at_start_p ())
21172             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21173           else if (args_to_push == 0)
21174             {
21175               rtx addr, dwarf;
21176
21177               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21178               saved_regs += 4;
21179
21180               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21181               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21182               fp_offset = 4;
21183
21184               /* Just tell the dwarf backend that we adjusted SP.  */
21185               dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21186                                    plus_constant (Pmode, stack_pointer_rtx,
21187                                                   -fp_offset));
21188               RTX_FRAME_RELATED_P (insn) = 1;
21189               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21190             }
21191           else
21192             {
21193               /* Store the args on the stack.  */
21194               if (cfun->machine->uses_anonymous_args)
21195                 {
21196                   insn
21197                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21198                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21199                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21200                   saved_pretend_args = 1;
21201                 }
21202               else
21203                 {
21204                   rtx addr, dwarf;
21205
21206                   if (args_to_push == 4)
21207                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21208                   else
21209                     addr
21210                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21211                                             plus_constant (Pmode,
21212                                                            stack_pointer_rtx,
21213                                                            -args_to_push));
21214
21215                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21216
21217                   /* Just tell the dwarf backend that we adjusted SP.  */
21218                   dwarf
21219                     = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21220                                    plus_constant (Pmode, stack_pointer_rtx,
21221                                                   -args_to_push));
21222                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21223                 }
21224
21225               RTX_FRAME_RELATED_P (insn) = 1;
21226               fp_offset = args_to_push;
21227               args_to_push = 0;
21228             }
21229         }
21230
21231       insn = emit_set_insn (ip_rtx,
21232                             plus_constant (Pmode, stack_pointer_rtx,
21233                                            fp_offset));
21234       RTX_FRAME_RELATED_P (insn) = 1;
21235     }
21236
21237   if (args_to_push)
21238     {
21239       /* Push the argument registers, or reserve space for them.  */
21240       if (cfun->machine->uses_anonymous_args)
21241         insn = emit_multi_reg_push
21242           ((0xf0 >> (args_to_push / 4)) & 0xf,
21243            (0xf0 >> (args_to_push / 4)) & 0xf);
21244       else
21245         insn = emit_insn
21246           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21247                        GEN_INT (- args_to_push)));
21248       RTX_FRAME_RELATED_P (insn) = 1;
21249     }
21250
21251   /* If this is an interrupt service routine, and the link register
21252      is going to be pushed, and we're not generating extra
21253      push of IP (needed when frame is needed and frame layout if apcs),
21254      subtracting four from LR now will mean that the function return
21255      can be done with a single instruction.  */
21256   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21257       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21258       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21259       && TARGET_ARM)
21260     {
21261       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21262
21263       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21264     }
21265
21266   if (live_regs_mask)
21267     {
21268       unsigned long dwarf_regs_mask = live_regs_mask;
21269
21270       saved_regs += bit_count (live_regs_mask) * 4;
21271       if (optimize_size && !frame_pointer_needed
21272           && saved_regs == offsets->saved_regs - offsets->saved_args)
21273         {
21274           /* If no coprocessor registers are being pushed and we don't have
21275              to worry about a frame pointer then push extra registers to
21276              create the stack frame.  This is done is a way that does not
21277              alter the frame layout, so is independent of the epilogue.  */
21278           int n;
21279           int frame;
21280           n = 0;
21281           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21282             n++;
21283           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21284           if (frame && n * 4 >= frame)
21285             {
21286               n = frame / 4;
21287               live_regs_mask |= (1 << n) - 1;
21288               saved_regs += frame;
21289             }
21290         }
21291
21292       if (TARGET_LDRD
21293           && current_tune->prefer_ldrd_strd
21294           && !optimize_function_for_size_p (cfun))
21295         {
21296           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21297           if (TARGET_THUMB2)
21298             thumb2_emit_strd_push (live_regs_mask);
21299           else if (TARGET_ARM
21300                    && !TARGET_APCS_FRAME
21301                    && !IS_INTERRUPT (func_type))
21302             arm_emit_strd_push (live_regs_mask);
21303           else
21304             {
21305               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21306               RTX_FRAME_RELATED_P (insn) = 1;
21307             }
21308         }
21309       else
21310         {
21311           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21312           RTX_FRAME_RELATED_P (insn) = 1;
21313         }
21314     }
21315
21316   if (! IS_VOLATILE (func_type))
21317     saved_regs += arm_save_coproc_regs ();
21318
21319   if (frame_pointer_needed && TARGET_ARM)
21320     {
21321       /* Create the new frame pointer.  */
21322       if (TARGET_APCS_FRAME)
21323         {
21324           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21325           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21326           RTX_FRAME_RELATED_P (insn) = 1;
21327
21328           if (IS_NESTED (func_type))
21329             {
21330               /* Recover the static chain register.  */
21331               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21332                 insn = gen_rtx_REG (SImode, 3);
21333               else
21334                 {
21335                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21336                   insn = gen_frame_mem (SImode, insn);
21337                 }
21338               emit_set_insn (ip_rtx, insn);
21339               /* Add a USE to stop propagate_one_insn() from barfing.  */
21340               emit_insn (gen_force_register_use (ip_rtx));
21341             }
21342         }
21343       else
21344         {
21345           insn = GEN_INT (saved_regs - 4);
21346           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21347                                         stack_pointer_rtx, insn));
21348           RTX_FRAME_RELATED_P (insn) = 1;
21349         }
21350     }
21351
21352   if (flag_stack_usage_info)
21353     current_function_static_stack_size
21354       = offsets->outgoing_args - offsets->saved_args;
21355
21356   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21357     {
21358       /* This add can produce multiple insns for a large constant, so we
21359          need to get tricky.  */
21360       rtx_insn *last = get_last_insn ();
21361
21362       amount = GEN_INT (offsets->saved_args + saved_regs
21363                         - offsets->outgoing_args);
21364
21365       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21366                                     amount));
21367       do
21368         {
21369           last = last ? NEXT_INSN (last) : get_insns ();
21370           RTX_FRAME_RELATED_P (last) = 1;
21371         }
21372       while (last != insn);
21373
21374       /* If the frame pointer is needed, emit a special barrier that
21375          will prevent the scheduler from moving stores to the frame
21376          before the stack adjustment.  */
21377       if (frame_pointer_needed)
21378         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21379                                          hard_frame_pointer_rtx));
21380     }
21381
21382
21383   if (frame_pointer_needed && TARGET_THUMB2)
21384     thumb_set_frame_pointer (offsets);
21385
21386   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21387     {
21388       unsigned long mask;
21389
21390       mask = live_regs_mask;
21391       mask &= THUMB2_WORK_REGS;
21392       if (!IS_NESTED (func_type))
21393         mask |= (1 << IP_REGNUM);
21394       arm_load_pic_register (mask);
21395     }
21396
21397   /* If we are profiling, make sure no instructions are scheduled before
21398      the call to mcount.  Similarly if the user has requested no
21399      scheduling in the prolog.  Similarly if we want non-call exceptions
21400      using the EABI unwinder, to prevent faulting instructions from being
21401      swapped with a stack adjustment.  */
21402   if (crtl->profile || !TARGET_SCHED_PROLOG
21403       || (arm_except_unwind_info (&global_options) == UI_TARGET
21404           && cfun->can_throw_non_call_exceptions))
21405     emit_insn (gen_blockage ());
21406
21407   /* If the link register is being kept alive, with the return address in it,
21408      then make sure that it does not get reused by the ce2 pass.  */
21409   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21410     cfun->machine->lr_save_eliminated = 1;
21411 }
21412 \f
21413 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21414 static void
21415 arm_print_condition (FILE *stream)
21416 {
21417   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21418     {
21419       /* Branch conversion is not implemented for Thumb-2.  */
21420       if (TARGET_THUMB)
21421         {
21422           output_operand_lossage ("predicated Thumb instruction");
21423           return;
21424         }
21425       if (current_insn_predicate != NULL)
21426         {
21427           output_operand_lossage
21428             ("predicated instruction in conditional sequence");
21429           return;
21430         }
21431
21432       fputs (arm_condition_codes[arm_current_cc], stream);
21433     }
21434   else if (current_insn_predicate)
21435     {
21436       enum arm_cond_code code;
21437
21438       if (TARGET_THUMB1)
21439         {
21440           output_operand_lossage ("predicated Thumb instruction");
21441           return;
21442         }
21443
21444       code = get_arm_condition_code (current_insn_predicate);
21445       fputs (arm_condition_codes[code], stream);
21446     }
21447 }
21448
21449
21450 /* Globally reserved letters: acln
21451    Puncutation letters currently used: @_|?().!#
21452    Lower case letters currently used: bcdefhimpqtvwxyz
21453    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21454    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21455
21456    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21457
21458    If CODE is 'd', then the X is a condition operand and the instruction
21459    should only be executed if the condition is true.
21460    if CODE is 'D', then the X is a condition operand and the instruction
21461    should only be executed if the condition is false: however, if the mode
21462    of the comparison is CCFPEmode, then always execute the instruction -- we
21463    do this because in these circumstances !GE does not necessarily imply LT;
21464    in these cases the instruction pattern will take care to make sure that
21465    an instruction containing %d will follow, thereby undoing the effects of
21466    doing this instruction unconditionally.
21467    If CODE is 'N' then X is a floating point operand that must be negated
21468    before output.
21469    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21470    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21471 static void
21472 arm_print_operand (FILE *stream, rtx x, int code)
21473 {
21474   switch (code)
21475     {
21476     case '@':
21477       fputs (ASM_COMMENT_START, stream);
21478       return;
21479
21480     case '_':
21481       fputs (user_label_prefix, stream);
21482       return;
21483
21484     case '|':
21485       fputs (REGISTER_PREFIX, stream);
21486       return;
21487
21488     case '?':
21489       arm_print_condition (stream);
21490       return;
21491
21492     case '(':
21493       /* Nothing in unified syntax, otherwise the current condition code.  */
21494       if (!TARGET_UNIFIED_ASM)
21495         arm_print_condition (stream);
21496       break;
21497
21498     case ')':
21499       /* The current condition code in unified syntax, otherwise nothing.  */
21500       if (TARGET_UNIFIED_ASM)
21501         arm_print_condition (stream);
21502       break;
21503
21504     case '.':
21505       /* The current condition code for a condition code setting instruction.
21506          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21507       if (TARGET_UNIFIED_ASM)
21508         {
21509           fputc('s', stream);
21510           arm_print_condition (stream);
21511         }
21512       else
21513         {
21514           arm_print_condition (stream);
21515           fputc('s', stream);
21516         }
21517       return;
21518
21519     case '!':
21520       /* If the instruction is conditionally executed then print
21521          the current condition code, otherwise print 's'.  */
21522       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21523       if (current_insn_predicate)
21524         arm_print_condition (stream);
21525       else
21526         fputc('s', stream);
21527       break;
21528
21529     /* %# is a "break" sequence. It doesn't output anything, but is used to
21530        separate e.g. operand numbers from following text, if that text consists
21531        of further digits which we don't want to be part of the operand
21532        number.  */
21533     case '#':
21534       return;
21535
21536     case 'N':
21537       {
21538         REAL_VALUE_TYPE r;
21539         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21540         r = real_value_negate (&r);
21541         fprintf (stream, "%s", fp_const_from_val (&r));
21542       }
21543       return;
21544
21545     /* An integer or symbol address without a preceding # sign.  */
21546     case 'c':
21547       switch (GET_CODE (x))
21548         {
21549         case CONST_INT:
21550           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21551           break;
21552
21553         case SYMBOL_REF:
21554           output_addr_const (stream, x);
21555           break;
21556
21557         case CONST:
21558           if (GET_CODE (XEXP (x, 0)) == PLUS
21559               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21560             {
21561               output_addr_const (stream, x);
21562               break;
21563             }
21564           /* Fall through.  */
21565
21566         default:
21567           output_operand_lossage ("Unsupported operand for code '%c'", code);
21568         }
21569       return;
21570
21571     /* An integer that we want to print in HEX.  */
21572     case 'x':
21573       switch (GET_CODE (x))
21574         {
21575         case CONST_INT:
21576           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21577           break;
21578
21579         default:
21580           output_operand_lossage ("Unsupported operand for code '%c'", code);
21581         }
21582       return;
21583
21584     case 'B':
21585       if (CONST_INT_P (x))
21586         {
21587           HOST_WIDE_INT val;
21588           val = ARM_SIGN_EXTEND (~INTVAL (x));
21589           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21590         }
21591       else
21592         {
21593           putc ('~', stream);
21594           output_addr_const (stream, x);
21595         }
21596       return;
21597
21598     case 'b':
21599       /* Print the log2 of a CONST_INT.  */
21600       {
21601         HOST_WIDE_INT val;
21602
21603         if (!CONST_INT_P (x)
21604             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21605           output_operand_lossage ("Unsupported operand for code '%c'", code);
21606         else
21607           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21608       }
21609       return;
21610
21611     case 'L':
21612       /* The low 16 bits of an immediate constant.  */
21613       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21614       return;
21615
21616     case 'i':
21617       fprintf (stream, "%s", arithmetic_instr (x, 1));
21618       return;
21619
21620     case 'I':
21621       fprintf (stream, "%s", arithmetic_instr (x, 0));
21622       return;
21623
21624     case 'S':
21625       {
21626         HOST_WIDE_INT val;
21627         const char *shift;
21628
21629         shift = shift_op (x, &val);
21630
21631         if (shift)
21632           {
21633             fprintf (stream, ", %s ", shift);
21634             if (val == -1)
21635               arm_print_operand (stream, XEXP (x, 1), 0);
21636             else
21637               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21638           }
21639       }
21640       return;
21641
21642       /* An explanation of the 'Q', 'R' and 'H' register operands:
21643
21644          In a pair of registers containing a DI or DF value the 'Q'
21645          operand returns the register number of the register containing
21646          the least significant part of the value.  The 'R' operand returns
21647          the register number of the register containing the most
21648          significant part of the value.
21649
21650          The 'H' operand returns the higher of the two register numbers.
21651          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21652          same as the 'Q' operand, since the most significant part of the
21653          value is held in the lower number register.  The reverse is true
21654          on systems where WORDS_BIG_ENDIAN is false.
21655
21656          The purpose of these operands is to distinguish between cases
21657          where the endian-ness of the values is important (for example
21658          when they are added together), and cases where the endian-ness
21659          is irrelevant, but the order of register operations is important.
21660          For example when loading a value from memory into a register
21661          pair, the endian-ness does not matter.  Provided that the value
21662          from the lower memory address is put into the lower numbered
21663          register, and the value from the higher address is put into the
21664          higher numbered register, the load will work regardless of whether
21665          the value being loaded is big-wordian or little-wordian.  The
21666          order of the two register loads can matter however, if the address
21667          of the memory location is actually held in one of the registers
21668          being overwritten by the load.
21669
21670          The 'Q' and 'R' constraints are also available for 64-bit
21671          constants.  */
21672     case 'Q':
21673       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21674         {
21675           rtx part = gen_lowpart (SImode, x);
21676           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21677           return;
21678         }
21679
21680       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21681         {
21682           output_operand_lossage ("invalid operand for code '%c'", code);
21683           return;
21684         }
21685
21686       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21687       return;
21688
21689     case 'R':
21690       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21691         {
21692           machine_mode mode = GET_MODE (x);
21693           rtx part;
21694
21695           if (mode == VOIDmode)
21696             mode = DImode;
21697           part = gen_highpart_mode (SImode, mode, x);
21698           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21699           return;
21700         }
21701
21702       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21703         {
21704           output_operand_lossage ("invalid operand for code '%c'", code);
21705           return;
21706         }
21707
21708       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21709       return;
21710
21711     case 'H':
21712       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21713         {
21714           output_operand_lossage ("invalid operand for code '%c'", code);
21715           return;
21716         }
21717
21718       asm_fprintf (stream, "%r", REGNO (x) + 1);
21719       return;
21720
21721     case 'J':
21722       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21723         {
21724           output_operand_lossage ("invalid operand for code '%c'", code);
21725           return;
21726         }
21727
21728       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21729       return;
21730
21731     case 'K':
21732       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21733         {
21734           output_operand_lossage ("invalid operand for code '%c'", code);
21735           return;
21736         }
21737
21738       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21739       return;
21740
21741     case 'm':
21742       asm_fprintf (stream, "%r",
21743                    REG_P (XEXP (x, 0))
21744                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21745       return;
21746
21747     case 'M':
21748       asm_fprintf (stream, "{%r-%r}",
21749                    REGNO (x),
21750                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21751       return;
21752
21753     /* Like 'M', but writing doubleword vector registers, for use by Neon
21754        insns.  */
21755     case 'h':
21756       {
21757         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21758         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21759         if (numregs == 1)
21760           asm_fprintf (stream, "{d%d}", regno);
21761         else
21762           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21763       }
21764       return;
21765
21766     case 'd':
21767       /* CONST_TRUE_RTX means always -- that's the default.  */
21768       if (x == const_true_rtx)
21769         return;
21770
21771       if (!COMPARISON_P (x))
21772         {
21773           output_operand_lossage ("invalid operand for code '%c'", code);
21774           return;
21775         }
21776
21777       fputs (arm_condition_codes[get_arm_condition_code (x)],
21778              stream);
21779       return;
21780
21781     case 'D':
21782       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21783          want to do that.  */
21784       if (x == const_true_rtx)
21785         {
21786           output_operand_lossage ("instruction never executed");
21787           return;
21788         }
21789       if (!COMPARISON_P (x))
21790         {
21791           output_operand_lossage ("invalid operand for code '%c'", code);
21792           return;
21793         }
21794
21795       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21796                                  (get_arm_condition_code (x))],
21797              stream);
21798       return;
21799
21800     case 's':
21801     case 'V':
21802     case 'W':
21803     case 'X':
21804     case 'Y':
21805     case 'Z':
21806       /* Former Maverick support, removed after GCC-4.7.  */
21807       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21808       return;
21809
21810     case 'U':
21811       if (!REG_P (x)
21812           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21813           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21814         /* Bad value for wCG register number.  */
21815         {
21816           output_operand_lossage ("invalid operand for code '%c'", code);
21817           return;
21818         }
21819
21820       else
21821         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21822       return;
21823
21824       /* Print an iWMMXt control register name.  */
21825     case 'w':
21826       if (!CONST_INT_P (x)
21827           || INTVAL (x) < 0
21828           || INTVAL (x) >= 16)
21829         /* Bad value for wC register number.  */
21830         {
21831           output_operand_lossage ("invalid operand for code '%c'", code);
21832           return;
21833         }
21834
21835       else
21836         {
21837           static const char * wc_reg_names [16] =
21838             {
21839               "wCID",  "wCon",  "wCSSF", "wCASF",
21840               "wC4",   "wC5",   "wC6",   "wC7",
21841               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21842               "wC12",  "wC13",  "wC14",  "wC15"
21843             };
21844
21845           fputs (wc_reg_names [INTVAL (x)], stream);
21846         }
21847       return;
21848
21849     /* Print the high single-precision register of a VFP double-precision
21850        register.  */
21851     case 'p':
21852       {
21853         machine_mode mode = GET_MODE (x);
21854         int regno;
21855
21856         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21857           {
21858             output_operand_lossage ("invalid operand for code '%c'", code);
21859             return;
21860           }
21861
21862         regno = REGNO (x);
21863         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21864           {
21865             output_operand_lossage ("invalid operand for code '%c'", code);
21866             return;
21867           }
21868
21869         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21870       }
21871       return;
21872
21873     /* Print a VFP/Neon double precision or quad precision register name.  */
21874     case 'P':
21875     case 'q':
21876       {
21877         machine_mode mode = GET_MODE (x);
21878         int is_quad = (code == 'q');
21879         int regno;
21880
21881         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21882           {
21883             output_operand_lossage ("invalid operand for code '%c'", code);
21884             return;
21885           }
21886
21887         if (!REG_P (x)
21888             || !IS_VFP_REGNUM (REGNO (x)))
21889           {
21890             output_operand_lossage ("invalid operand for code '%c'", code);
21891             return;
21892           }
21893
21894         regno = REGNO (x);
21895         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21896             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21897           {
21898             output_operand_lossage ("invalid operand for code '%c'", code);
21899             return;
21900           }
21901
21902         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21903           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21904       }
21905       return;
21906
21907     /* These two codes print the low/high doubleword register of a Neon quad
21908        register, respectively.  For pair-structure types, can also print
21909        low/high quadword registers.  */
21910     case 'e':
21911     case 'f':
21912       {
21913         machine_mode mode = GET_MODE (x);
21914         int regno;
21915
21916         if ((GET_MODE_SIZE (mode) != 16
21917              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21918           {
21919             output_operand_lossage ("invalid operand for code '%c'", code);
21920             return;
21921           }
21922
21923         regno = REGNO (x);
21924         if (!NEON_REGNO_OK_FOR_QUAD (regno))
21925           {
21926             output_operand_lossage ("invalid operand for code '%c'", code);
21927             return;
21928           }
21929
21930         if (GET_MODE_SIZE (mode) == 16)
21931           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21932                                   + (code == 'f' ? 1 : 0));
21933         else
21934           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21935                                   + (code == 'f' ? 1 : 0));
21936       }
21937       return;
21938
21939     /* Print a VFPv3 floating-point constant, represented as an integer
21940        index.  */
21941     case 'G':
21942       {
21943         int index = vfp3_const_double_index (x);
21944         gcc_assert (index != -1);
21945         fprintf (stream, "%d", index);
21946       }
21947       return;
21948
21949     /* Print bits representing opcode features for Neon.
21950
21951        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
21952        and polynomials as unsigned.
21953
21954        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21955
21956        Bit 2 is 1 for rounding functions, 0 otherwise.  */
21957
21958     /* Identify the type as 's', 'u', 'p' or 'f'.  */
21959     case 'T':
21960       {
21961         HOST_WIDE_INT bits = INTVAL (x);
21962         fputc ("uspf"[bits & 3], stream);
21963       }
21964       return;
21965
21966     /* Likewise, but signed and unsigned integers are both 'i'.  */
21967     case 'F':
21968       {
21969         HOST_WIDE_INT bits = INTVAL (x);
21970         fputc ("iipf"[bits & 3], stream);
21971       }
21972       return;
21973
21974     /* As for 'T', but emit 'u' instead of 'p'.  */
21975     case 't':
21976       {
21977         HOST_WIDE_INT bits = INTVAL (x);
21978         fputc ("usuf"[bits & 3], stream);
21979       }
21980       return;
21981
21982     /* Bit 2: rounding (vs none).  */
21983     case 'O':
21984       {
21985         HOST_WIDE_INT bits = INTVAL (x);
21986         fputs ((bits & 4) != 0 ? "r" : "", stream);
21987       }
21988       return;
21989
21990     /* Memory operand for vld1/vst1 instruction.  */
21991     case 'A':
21992       {
21993         rtx addr;
21994         bool postinc = FALSE;
21995         rtx postinc_reg = NULL;
21996         unsigned align, memsize, align_bits;
21997
21998         gcc_assert (MEM_P (x));
21999         addr = XEXP (x, 0);
22000         if (GET_CODE (addr) == POST_INC)
22001           {
22002             postinc = 1;
22003             addr = XEXP (addr, 0);
22004           }
22005         if (GET_CODE (addr) == POST_MODIFY)
22006           {
22007             postinc_reg = XEXP( XEXP (addr, 1), 1);
22008             addr = XEXP (addr, 0);
22009           }
22010         asm_fprintf (stream, "[%r", REGNO (addr));
22011
22012         /* We know the alignment of this access, so we can emit a hint in the
22013            instruction (for some alignments) as an aid to the memory subsystem
22014            of the target.  */
22015         align = MEM_ALIGN (x) >> 3;
22016         memsize = MEM_SIZE (x);
22017
22018         /* Only certain alignment specifiers are supported by the hardware.  */
22019         if (memsize == 32 && (align % 32) == 0)
22020           align_bits = 256;
22021         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22022           align_bits = 128;
22023         else if (memsize >= 8 && (align % 8) == 0)
22024           align_bits = 64;
22025         else
22026           align_bits = 0;
22027
22028         if (align_bits != 0)
22029           asm_fprintf (stream, ":%d", align_bits);
22030
22031         asm_fprintf (stream, "]");
22032
22033         if (postinc)
22034           fputs("!", stream);
22035         if (postinc_reg)
22036           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22037       }
22038       return;
22039
22040     case 'C':
22041       {
22042         rtx addr;
22043
22044         gcc_assert (MEM_P (x));
22045         addr = XEXP (x, 0);
22046         gcc_assert (REG_P (addr));
22047         asm_fprintf (stream, "[%r]", REGNO (addr));
22048       }
22049       return;
22050
22051     /* Translate an S register number into a D register number and element index.  */
22052     case 'y':
22053       {
22054         machine_mode mode = GET_MODE (x);
22055         int regno;
22056
22057         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22058           {
22059             output_operand_lossage ("invalid operand for code '%c'", code);
22060             return;
22061           }
22062
22063         regno = REGNO (x);
22064         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22065           {
22066             output_operand_lossage ("invalid operand for code '%c'", code);
22067             return;
22068           }
22069
22070         regno = regno - FIRST_VFP_REGNUM;
22071         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22072       }
22073       return;
22074
22075     case 'v':
22076         gcc_assert (CONST_DOUBLE_P (x));
22077         int result;
22078         result = vfp3_const_double_for_fract_bits (x);
22079         if (result == 0)
22080           result = vfp3_const_double_for_bits (x);
22081         fprintf (stream, "#%d", result);
22082         return;
22083
22084     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22085        number into a D register number and element index.  */
22086     case 'z':
22087       {
22088         machine_mode mode = GET_MODE (x);
22089         int regno;
22090
22091         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22092           {
22093             output_operand_lossage ("invalid operand for code '%c'", code);
22094             return;
22095           }
22096
22097         regno = REGNO (x);
22098         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22099           {
22100             output_operand_lossage ("invalid operand for code '%c'", code);
22101             return;
22102           }
22103
22104         regno = regno - FIRST_VFP_REGNUM;
22105         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22106       }
22107       return;
22108
22109     default:
22110       if (x == 0)
22111         {
22112           output_operand_lossage ("missing operand");
22113           return;
22114         }
22115
22116       switch (GET_CODE (x))
22117         {
22118         case REG:
22119           asm_fprintf (stream, "%r", REGNO (x));
22120           break;
22121
22122         case MEM:
22123           output_memory_reference_mode = GET_MODE (x);
22124           output_address (XEXP (x, 0));
22125           break;
22126
22127         case CONST_DOUBLE:
22128           {
22129             char fpstr[20];
22130             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22131                               sizeof (fpstr), 0, 1);
22132             fprintf (stream, "#%s", fpstr);
22133           }
22134           break;
22135
22136         default:
22137           gcc_assert (GET_CODE (x) != NEG);
22138           fputc ('#', stream);
22139           if (GET_CODE (x) == HIGH)
22140             {
22141               fputs (":lower16:", stream);
22142               x = XEXP (x, 0);
22143             }
22144
22145           output_addr_const (stream, x);
22146           break;
22147         }
22148     }
22149 }
22150 \f
22151 /* Target hook for printing a memory address.  */
22152 static void
22153 arm_print_operand_address (FILE *stream, rtx x)
22154 {
22155   if (TARGET_32BIT)
22156     {
22157       int is_minus = GET_CODE (x) == MINUS;
22158
22159       if (REG_P (x))
22160         asm_fprintf (stream, "[%r]", REGNO (x));
22161       else if (GET_CODE (x) == PLUS || is_minus)
22162         {
22163           rtx base = XEXP (x, 0);
22164           rtx index = XEXP (x, 1);
22165           HOST_WIDE_INT offset = 0;
22166           if (!REG_P (base)
22167               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22168             {
22169               /* Ensure that BASE is a register.  */
22170               /* (one of them must be).  */
22171               /* Also ensure the SP is not used as in index register.  */
22172               std::swap (base, index);
22173             }
22174           switch (GET_CODE (index))
22175             {
22176             case CONST_INT:
22177               offset = INTVAL (index);
22178               if (is_minus)
22179                 offset = -offset;
22180               asm_fprintf (stream, "[%r, #%wd]",
22181                            REGNO (base), offset);
22182               break;
22183
22184             case REG:
22185               asm_fprintf (stream, "[%r, %s%r]",
22186                            REGNO (base), is_minus ? "-" : "",
22187                            REGNO (index));
22188               break;
22189
22190             case MULT:
22191             case ASHIFTRT:
22192             case LSHIFTRT:
22193             case ASHIFT:
22194             case ROTATERT:
22195               {
22196                 asm_fprintf (stream, "[%r, %s%r",
22197                              REGNO (base), is_minus ? "-" : "",
22198                              REGNO (XEXP (index, 0)));
22199                 arm_print_operand (stream, index, 'S');
22200                 fputs ("]", stream);
22201                 break;
22202               }
22203
22204             default:
22205               gcc_unreachable ();
22206             }
22207         }
22208       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22209                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22210         {
22211           extern machine_mode output_memory_reference_mode;
22212
22213           gcc_assert (REG_P (XEXP (x, 0)));
22214
22215           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22216             asm_fprintf (stream, "[%r, #%s%d]!",
22217                          REGNO (XEXP (x, 0)),
22218                          GET_CODE (x) == PRE_DEC ? "-" : "",
22219                          GET_MODE_SIZE (output_memory_reference_mode));
22220           else
22221             asm_fprintf (stream, "[%r], #%s%d",
22222                          REGNO (XEXP (x, 0)),
22223                          GET_CODE (x) == POST_DEC ? "-" : "",
22224                          GET_MODE_SIZE (output_memory_reference_mode));
22225         }
22226       else if (GET_CODE (x) == PRE_MODIFY)
22227         {
22228           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22229           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22230             asm_fprintf (stream, "#%wd]!",
22231                          INTVAL (XEXP (XEXP (x, 1), 1)));
22232           else
22233             asm_fprintf (stream, "%r]!",
22234                          REGNO (XEXP (XEXP (x, 1), 1)));
22235         }
22236       else if (GET_CODE (x) == POST_MODIFY)
22237         {
22238           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22239           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22240             asm_fprintf (stream, "#%wd",
22241                          INTVAL (XEXP (XEXP (x, 1), 1)));
22242           else
22243             asm_fprintf (stream, "%r",
22244                          REGNO (XEXP (XEXP (x, 1), 1)));
22245         }
22246       else output_addr_const (stream, x);
22247     }
22248   else
22249     {
22250       if (REG_P (x))
22251         asm_fprintf (stream, "[%r]", REGNO (x));
22252       else if (GET_CODE (x) == POST_INC)
22253         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22254       else if (GET_CODE (x) == PLUS)
22255         {
22256           gcc_assert (REG_P (XEXP (x, 0)));
22257           if (CONST_INT_P (XEXP (x, 1)))
22258             asm_fprintf (stream, "[%r, #%wd]",
22259                          REGNO (XEXP (x, 0)),
22260                          INTVAL (XEXP (x, 1)));
22261           else
22262             asm_fprintf (stream, "[%r, %r]",
22263                          REGNO (XEXP (x, 0)),
22264                          REGNO (XEXP (x, 1)));
22265         }
22266       else
22267         output_addr_const (stream, x);
22268     }
22269 }
22270 \f
22271 /* Target hook for indicating whether a punctuation character for
22272    TARGET_PRINT_OPERAND is valid.  */
22273 static bool
22274 arm_print_operand_punct_valid_p (unsigned char code)
22275 {
22276   return (code == '@' || code == '|' || code == '.'
22277           || code == '(' || code == ')' || code == '#'
22278           || (TARGET_32BIT && (code == '?'))
22279           || (TARGET_THUMB2 && (code == '!'))
22280           || (TARGET_THUMB && (code == '_')));
22281 }
22282 \f
22283 /* Target hook for assembling integer objects.  The ARM version needs to
22284    handle word-sized values specially.  */
22285 static bool
22286 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22287 {
22288   machine_mode mode;
22289
22290   if (size == UNITS_PER_WORD && aligned_p)
22291     {
22292       fputs ("\t.word\t", asm_out_file);
22293       output_addr_const (asm_out_file, x);
22294
22295       /* Mark symbols as position independent.  We only do this in the
22296          .text segment, not in the .data segment.  */
22297       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22298           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22299         {
22300           /* See legitimize_pic_address for an explanation of the
22301              TARGET_VXWORKS_RTP check.  */
22302           if (!arm_pic_data_is_text_relative
22303               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22304             fputs ("(GOT)", asm_out_file);
22305           else
22306             fputs ("(GOTOFF)", asm_out_file);
22307         }
22308       fputc ('\n', asm_out_file);
22309       return true;
22310     }
22311
22312   mode = GET_MODE (x);
22313
22314   if (arm_vector_mode_supported_p (mode))
22315     {
22316       int i, units;
22317
22318       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22319
22320       units = CONST_VECTOR_NUNITS (x);
22321       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22322
22323       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22324         for (i = 0; i < units; i++)
22325           {
22326             rtx elt = CONST_VECTOR_ELT (x, i);
22327             assemble_integer
22328               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22329           }
22330       else
22331         for (i = 0; i < units; i++)
22332           {
22333             rtx elt = CONST_VECTOR_ELT (x, i);
22334             REAL_VALUE_TYPE rval;
22335
22336             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22337
22338             assemble_real
22339               (rval, GET_MODE_INNER (mode),
22340               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22341           }
22342
22343       return true;
22344     }
22345
22346   return default_assemble_integer (x, size, aligned_p);
22347 }
22348
22349 static void
22350 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22351 {
22352   section *s;
22353
22354   if (!TARGET_AAPCS_BASED)
22355     {
22356       (is_ctor ?
22357        default_named_section_asm_out_constructor
22358        : default_named_section_asm_out_destructor) (symbol, priority);
22359       return;
22360     }
22361
22362   /* Put these in the .init_array section, using a special relocation.  */
22363   if (priority != DEFAULT_INIT_PRIORITY)
22364     {
22365       char buf[18];
22366       sprintf (buf, "%s.%.5u",
22367                is_ctor ? ".init_array" : ".fini_array",
22368                priority);
22369       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22370     }
22371   else if (is_ctor)
22372     s = ctors_section;
22373   else
22374     s = dtors_section;
22375
22376   switch_to_section (s);
22377   assemble_align (POINTER_SIZE);
22378   fputs ("\t.word\t", asm_out_file);
22379   output_addr_const (asm_out_file, symbol);
22380   fputs ("(target1)\n", asm_out_file);
22381 }
22382
22383 /* Add a function to the list of static constructors.  */
22384
22385 static void
22386 arm_elf_asm_constructor (rtx symbol, int priority)
22387 {
22388   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22389 }
22390
22391 /* Add a function to the list of static destructors.  */
22392
22393 static void
22394 arm_elf_asm_destructor (rtx symbol, int priority)
22395 {
22396   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22397 }
22398 \f
22399 /* A finite state machine takes care of noticing whether or not instructions
22400    can be conditionally executed, and thus decrease execution time and code
22401    size by deleting branch instructions.  The fsm is controlled by
22402    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22403
22404 /* The state of the fsm controlling condition codes are:
22405    0: normal, do nothing special
22406    1: make ASM_OUTPUT_OPCODE not output this instruction
22407    2: make ASM_OUTPUT_OPCODE not output this instruction
22408    3: make instructions conditional
22409    4: make instructions conditional
22410
22411    State transitions (state->state by whom under condition):
22412    0 -> 1 final_prescan_insn if the `target' is a label
22413    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22414    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22415    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22416    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22417           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22418    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22419           (the target insn is arm_target_insn).
22420
22421    If the jump clobbers the conditions then we use states 2 and 4.
22422
22423    A similar thing can be done with conditional return insns.
22424
22425    XXX In case the `target' is an unconditional branch, this conditionalising
22426    of the instructions always reduces code size, but not always execution
22427    time.  But then, I want to reduce the code size to somewhere near what
22428    /bin/cc produces.  */
22429
22430 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22431    instructions.  When a COND_EXEC instruction is seen the subsequent
22432    instructions are scanned so that multiple conditional instructions can be
22433    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22434    specify the length and true/false mask for the IT block.  These will be
22435    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22436
22437 /* Returns the index of the ARM condition code string in
22438    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22439    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22440
22441 enum arm_cond_code
22442 maybe_get_arm_condition_code (rtx comparison)
22443 {
22444   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22445   enum arm_cond_code code;
22446   enum rtx_code comp_code = GET_CODE (comparison);
22447
22448   if (GET_MODE_CLASS (mode) != MODE_CC)
22449     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22450                            XEXP (comparison, 1));
22451
22452   switch (mode)
22453     {
22454     case CC_DNEmode: code = ARM_NE; goto dominance;
22455     case CC_DEQmode: code = ARM_EQ; goto dominance;
22456     case CC_DGEmode: code = ARM_GE; goto dominance;
22457     case CC_DGTmode: code = ARM_GT; goto dominance;
22458     case CC_DLEmode: code = ARM_LE; goto dominance;
22459     case CC_DLTmode: code = ARM_LT; goto dominance;
22460     case CC_DGEUmode: code = ARM_CS; goto dominance;
22461     case CC_DGTUmode: code = ARM_HI; goto dominance;
22462     case CC_DLEUmode: code = ARM_LS; goto dominance;
22463     case CC_DLTUmode: code = ARM_CC;
22464
22465     dominance:
22466       if (comp_code == EQ)
22467         return ARM_INVERSE_CONDITION_CODE (code);
22468       if (comp_code == NE)
22469         return code;
22470       return ARM_NV;
22471
22472     case CC_NOOVmode:
22473       switch (comp_code)
22474         {
22475         case NE: return ARM_NE;
22476         case EQ: return ARM_EQ;
22477         case GE: return ARM_PL;
22478         case LT: return ARM_MI;
22479         default: return ARM_NV;
22480         }
22481
22482     case CC_Zmode:
22483       switch (comp_code)
22484         {
22485         case NE: return ARM_NE;
22486         case EQ: return ARM_EQ;
22487         default: return ARM_NV;
22488         }
22489
22490     case CC_Nmode:
22491       switch (comp_code)
22492         {
22493         case NE: return ARM_MI;
22494         case EQ: return ARM_PL;
22495         default: return ARM_NV;
22496         }
22497
22498     case CCFPEmode:
22499     case CCFPmode:
22500       /* We can handle all cases except UNEQ and LTGT.  */
22501       switch (comp_code)
22502         {
22503         case GE: return ARM_GE;
22504         case GT: return ARM_GT;
22505         case LE: return ARM_LS;
22506         case LT: return ARM_MI;
22507         case NE: return ARM_NE;
22508         case EQ: return ARM_EQ;
22509         case ORDERED: return ARM_VC;
22510         case UNORDERED: return ARM_VS;
22511         case UNLT: return ARM_LT;
22512         case UNLE: return ARM_LE;
22513         case UNGT: return ARM_HI;
22514         case UNGE: return ARM_PL;
22515           /* UNEQ and LTGT do not have a representation.  */
22516         case UNEQ: /* Fall through.  */
22517         case LTGT: /* Fall through.  */
22518         default: return ARM_NV;
22519         }
22520
22521     case CC_SWPmode:
22522       switch (comp_code)
22523         {
22524         case NE: return ARM_NE;
22525         case EQ: return ARM_EQ;
22526         case GE: return ARM_LE;
22527         case GT: return ARM_LT;
22528         case LE: return ARM_GE;
22529         case LT: return ARM_GT;
22530         case GEU: return ARM_LS;
22531         case GTU: return ARM_CC;
22532         case LEU: return ARM_CS;
22533         case LTU: return ARM_HI;
22534         default: return ARM_NV;
22535         }
22536
22537     case CC_Cmode:
22538       switch (comp_code)
22539         {
22540         case LTU: return ARM_CS;
22541         case GEU: return ARM_CC;
22542         default: return ARM_NV;
22543         }
22544
22545     case CC_CZmode:
22546       switch (comp_code)
22547         {
22548         case NE: return ARM_NE;
22549         case EQ: return ARM_EQ;
22550         case GEU: return ARM_CS;
22551         case GTU: return ARM_HI;
22552         case LEU: return ARM_LS;
22553         case LTU: return ARM_CC;
22554         default: return ARM_NV;
22555         }
22556
22557     case CC_NCVmode:
22558       switch (comp_code)
22559         {
22560         case GE: return ARM_GE;
22561         case LT: return ARM_LT;
22562         case GEU: return ARM_CS;
22563         case LTU: return ARM_CC;
22564         default: return ARM_NV;
22565         }
22566
22567     case CCmode:
22568       switch (comp_code)
22569         {
22570         case NE: return ARM_NE;
22571         case EQ: return ARM_EQ;
22572         case GE: return ARM_GE;
22573         case GT: return ARM_GT;
22574         case LE: return ARM_LE;
22575         case LT: return ARM_LT;
22576         case GEU: return ARM_CS;
22577         case GTU: return ARM_HI;
22578         case LEU: return ARM_LS;
22579         case LTU: return ARM_CC;
22580         default: return ARM_NV;
22581         }
22582
22583     default: gcc_unreachable ();
22584     }
22585 }
22586
22587 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22588 static enum arm_cond_code
22589 get_arm_condition_code (rtx comparison)
22590 {
22591   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22592   gcc_assert (code != ARM_NV);
22593   return code;
22594 }
22595
22596 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22597    instructions.  */
22598 void
22599 thumb2_final_prescan_insn (rtx_insn *insn)
22600 {
22601   rtx_insn *first_insn = insn;
22602   rtx body = PATTERN (insn);
22603   rtx predicate;
22604   enum arm_cond_code code;
22605   int n;
22606   int mask;
22607   int max;
22608
22609   /* max_insns_skipped in the tune was already taken into account in the
22610      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22611      just emit the IT blocks as we can.  It does not make sense to split
22612      the IT blocks.  */
22613   max = MAX_INSN_PER_IT_BLOCK;
22614
22615   /* Remove the previous insn from the count of insns to be output.  */
22616   if (arm_condexec_count)
22617       arm_condexec_count--;
22618
22619   /* Nothing to do if we are already inside a conditional block.  */
22620   if (arm_condexec_count)
22621     return;
22622
22623   if (GET_CODE (body) != COND_EXEC)
22624     return;
22625
22626   /* Conditional jumps are implemented directly.  */
22627   if (JUMP_P (insn))
22628     return;
22629
22630   predicate = COND_EXEC_TEST (body);
22631   arm_current_cc = get_arm_condition_code (predicate);
22632
22633   n = get_attr_ce_count (insn);
22634   arm_condexec_count = 1;
22635   arm_condexec_mask = (1 << n) - 1;
22636   arm_condexec_masklen = n;
22637   /* See if subsequent instructions can be combined into the same block.  */
22638   for (;;)
22639     {
22640       insn = next_nonnote_insn (insn);
22641
22642       /* Jumping into the middle of an IT block is illegal, so a label or
22643          barrier terminates the block.  */
22644       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22645         break;
22646
22647       body = PATTERN (insn);
22648       /* USE and CLOBBER aren't really insns, so just skip them.  */
22649       if (GET_CODE (body) == USE
22650           || GET_CODE (body) == CLOBBER)
22651         continue;
22652
22653       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22654       if (GET_CODE (body) != COND_EXEC)
22655         break;
22656       /* Maximum number of conditionally executed instructions in a block.  */
22657       n = get_attr_ce_count (insn);
22658       if (arm_condexec_masklen + n > max)
22659         break;
22660
22661       predicate = COND_EXEC_TEST (body);
22662       code = get_arm_condition_code (predicate);
22663       mask = (1 << n) - 1;
22664       if (arm_current_cc == code)
22665         arm_condexec_mask |= (mask << arm_condexec_masklen);
22666       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22667         break;
22668
22669       arm_condexec_count++;
22670       arm_condexec_masklen += n;
22671
22672       /* A jump must be the last instruction in a conditional block.  */
22673       if (JUMP_P (insn))
22674         break;
22675     }
22676   /* Restore recog_data (getting the attributes of other insns can
22677      destroy this array, but final.c assumes that it remains intact
22678      across this call).  */
22679   extract_constrain_insn_cached (first_insn);
22680 }
22681
22682 void
22683 arm_final_prescan_insn (rtx_insn *insn)
22684 {
22685   /* BODY will hold the body of INSN.  */
22686   rtx body = PATTERN (insn);
22687
22688   /* This will be 1 if trying to repeat the trick, and things need to be
22689      reversed if it appears to fail.  */
22690   int reverse = 0;
22691
22692   /* If we start with a return insn, we only succeed if we find another one.  */
22693   int seeking_return = 0;
22694   enum rtx_code return_code = UNKNOWN;
22695
22696   /* START_INSN will hold the insn from where we start looking.  This is the
22697      first insn after the following code_label if REVERSE is true.  */
22698   rtx_insn *start_insn = insn;
22699
22700   /* If in state 4, check if the target branch is reached, in order to
22701      change back to state 0.  */
22702   if (arm_ccfsm_state == 4)
22703     {
22704       if (insn == arm_target_insn)
22705         {
22706           arm_target_insn = NULL;
22707           arm_ccfsm_state = 0;
22708         }
22709       return;
22710     }
22711
22712   /* If in state 3, it is possible to repeat the trick, if this insn is an
22713      unconditional branch to a label, and immediately following this branch
22714      is the previous target label which is only used once, and the label this
22715      branch jumps to is not too far off.  */
22716   if (arm_ccfsm_state == 3)
22717     {
22718       if (simplejump_p (insn))
22719         {
22720           start_insn = next_nonnote_insn (start_insn);
22721           if (BARRIER_P (start_insn))
22722             {
22723               /* XXX Isn't this always a barrier?  */
22724               start_insn = next_nonnote_insn (start_insn);
22725             }
22726           if (LABEL_P (start_insn)
22727               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22728               && LABEL_NUSES (start_insn) == 1)
22729             reverse = TRUE;
22730           else
22731             return;
22732         }
22733       else if (ANY_RETURN_P (body))
22734         {
22735           start_insn = next_nonnote_insn (start_insn);
22736           if (BARRIER_P (start_insn))
22737             start_insn = next_nonnote_insn (start_insn);
22738           if (LABEL_P (start_insn)
22739               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22740               && LABEL_NUSES (start_insn) == 1)
22741             {
22742               reverse = TRUE;
22743               seeking_return = 1;
22744               return_code = GET_CODE (body);
22745             }
22746           else
22747             return;
22748         }
22749       else
22750         return;
22751     }
22752
22753   gcc_assert (!arm_ccfsm_state || reverse);
22754   if (!JUMP_P (insn))
22755     return;
22756
22757   /* This jump might be paralleled with a clobber of the condition codes
22758      the jump should always come first */
22759   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22760     body = XVECEXP (body, 0, 0);
22761
22762   if (reverse
22763       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22764           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22765     {
22766       int insns_skipped;
22767       int fail = FALSE, succeed = FALSE;
22768       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22769       int then_not_else = TRUE;
22770       rtx_insn *this_insn = start_insn;
22771       rtx label = 0;
22772
22773       /* Register the insn jumped to.  */
22774       if (reverse)
22775         {
22776           if (!seeking_return)
22777             label = XEXP (SET_SRC (body), 0);
22778         }
22779       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22780         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22781       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22782         {
22783           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22784           then_not_else = FALSE;
22785         }
22786       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22787         {
22788           seeking_return = 1;
22789           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22790         }
22791       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22792         {
22793           seeking_return = 1;
22794           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22795           then_not_else = FALSE;
22796         }
22797       else
22798         gcc_unreachable ();
22799
22800       /* See how many insns this branch skips, and what kind of insns.  If all
22801          insns are okay, and the label or unconditional branch to the same
22802          label is not too far away, succeed.  */
22803       for (insns_skipped = 0;
22804            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22805         {
22806           rtx scanbody;
22807
22808           this_insn = next_nonnote_insn (this_insn);
22809           if (!this_insn)
22810             break;
22811
22812           switch (GET_CODE (this_insn))
22813             {
22814             case CODE_LABEL:
22815               /* Succeed if it is the target label, otherwise fail since
22816                  control falls in from somewhere else.  */
22817               if (this_insn == label)
22818                 {
22819                   arm_ccfsm_state = 1;
22820                   succeed = TRUE;
22821                 }
22822               else
22823                 fail = TRUE;
22824               break;
22825
22826             case BARRIER:
22827               /* Succeed if the following insn is the target label.
22828                  Otherwise fail.
22829                  If return insns are used then the last insn in a function
22830                  will be a barrier.  */
22831               this_insn = next_nonnote_insn (this_insn);
22832               if (this_insn && this_insn == label)
22833                 {
22834                   arm_ccfsm_state = 1;
22835                   succeed = TRUE;
22836                 }
22837               else
22838                 fail = TRUE;
22839               break;
22840
22841             case CALL_INSN:
22842               /* The AAPCS says that conditional calls should not be
22843                  used since they make interworking inefficient (the
22844                  linker can't transform BL<cond> into BLX).  That's
22845                  only a problem if the machine has BLX.  */
22846               if (arm_arch5)
22847                 {
22848                   fail = TRUE;
22849                   break;
22850                 }
22851
22852               /* Succeed if the following insn is the target label, or
22853                  if the following two insns are a barrier and the
22854                  target label.  */
22855               this_insn = next_nonnote_insn (this_insn);
22856               if (this_insn && BARRIER_P (this_insn))
22857                 this_insn = next_nonnote_insn (this_insn);
22858
22859               if (this_insn && this_insn == label
22860                   && insns_skipped < max_insns_skipped)
22861                 {
22862                   arm_ccfsm_state = 1;
22863                   succeed = TRUE;
22864                 }
22865               else
22866                 fail = TRUE;
22867               break;
22868
22869             case JUMP_INSN:
22870               /* If this is an unconditional branch to the same label, succeed.
22871                  If it is to another label, do nothing.  If it is conditional,
22872                  fail.  */
22873               /* XXX Probably, the tests for SET and the PC are
22874                  unnecessary.  */
22875
22876               scanbody = PATTERN (this_insn);
22877               if (GET_CODE (scanbody) == SET
22878                   && GET_CODE (SET_DEST (scanbody)) == PC)
22879                 {
22880                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22881                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22882                     {
22883                       arm_ccfsm_state = 2;
22884                       succeed = TRUE;
22885                     }
22886                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22887                     fail = TRUE;
22888                 }
22889               /* Fail if a conditional return is undesirable (e.g. on a
22890                  StrongARM), but still allow this if optimizing for size.  */
22891               else if (GET_CODE (scanbody) == return_code
22892                        && !use_return_insn (TRUE, NULL)
22893                        && !optimize_size)
22894                 fail = TRUE;
22895               else if (GET_CODE (scanbody) == return_code)
22896                 {
22897                   arm_ccfsm_state = 2;
22898                   succeed = TRUE;
22899                 }
22900               else if (GET_CODE (scanbody) == PARALLEL)
22901                 {
22902                   switch (get_attr_conds (this_insn))
22903                     {
22904                     case CONDS_NOCOND:
22905                       break;
22906                     default:
22907                       fail = TRUE;
22908                       break;
22909                     }
22910                 }
22911               else
22912                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
22913
22914               break;
22915
22916             case INSN:
22917               /* Instructions using or affecting the condition codes make it
22918                  fail.  */
22919               scanbody = PATTERN (this_insn);
22920               if (!(GET_CODE (scanbody) == SET
22921                     || GET_CODE (scanbody) == PARALLEL)
22922                   || get_attr_conds (this_insn) != CONDS_NOCOND)
22923                 fail = TRUE;
22924               break;
22925
22926             default:
22927               break;
22928             }
22929         }
22930       if (succeed)
22931         {
22932           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22933             arm_target_label = CODE_LABEL_NUMBER (label);
22934           else
22935             {
22936               gcc_assert (seeking_return || arm_ccfsm_state == 2);
22937
22938               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22939                 {
22940                   this_insn = next_nonnote_insn (this_insn);
22941                   gcc_assert (!this_insn
22942                               || (!BARRIER_P (this_insn)
22943                                   && !LABEL_P (this_insn)));
22944                 }
22945               if (!this_insn)
22946                 {
22947                   /* Oh, dear! we ran off the end.. give up.  */
22948                   extract_constrain_insn_cached (insn);
22949                   arm_ccfsm_state = 0;
22950                   arm_target_insn = NULL;
22951                   return;
22952                 }
22953               arm_target_insn = this_insn;
22954             }
22955
22956           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22957              what it was.  */
22958           if (!reverse)
22959             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22960
22961           if (reverse || then_not_else)
22962             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22963         }
22964
22965       /* Restore recog_data (getting the attributes of other insns can
22966          destroy this array, but final.c assumes that it remains intact
22967          across this call.  */
22968       extract_constrain_insn_cached (insn);
22969     }
22970 }
22971
22972 /* Output IT instructions.  */
22973 void
22974 thumb2_asm_output_opcode (FILE * stream)
22975 {
22976   char buff[5];
22977   int n;
22978
22979   if (arm_condexec_mask)
22980     {
22981       for (n = 0; n < arm_condexec_masklen; n++)
22982         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22983       buff[n] = 0;
22984       asm_fprintf(stream, "i%s\t%s\n\t", buff,
22985                   arm_condition_codes[arm_current_cc]);
22986       arm_condexec_mask = 0;
22987     }
22988 }
22989
22990 /* Returns true if REGNO is a valid register
22991    for holding a quantity of type MODE.  */
22992 int
22993 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22994 {
22995   if (GET_MODE_CLASS (mode) == MODE_CC)
22996     return (regno == CC_REGNUM
22997             || (TARGET_HARD_FLOAT && TARGET_VFP
22998                 && regno == VFPCC_REGNUM));
22999
23000   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23001     return false;
23002
23003   if (TARGET_THUMB1)
23004     /* For the Thumb we only allow values bigger than SImode in
23005        registers 0 - 6, so that there is always a second low
23006        register available to hold the upper part of the value.
23007        We probably we ought to ensure that the register is the
23008        start of an even numbered register pair.  */
23009     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23010
23011   if (TARGET_HARD_FLOAT && TARGET_VFP
23012       && IS_VFP_REGNUM (regno))
23013     {
23014       if (mode == SFmode || mode == SImode)
23015         return VFP_REGNO_OK_FOR_SINGLE (regno);
23016
23017       if (mode == DFmode)
23018         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23019
23020       /* VFP registers can hold HFmode values, but there is no point in
23021          putting them there unless we have hardware conversion insns. */
23022       if (mode == HFmode)
23023         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23024
23025       if (TARGET_NEON)
23026         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23027                || (VALID_NEON_QREG_MODE (mode)
23028                    && NEON_REGNO_OK_FOR_QUAD (regno))
23029                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23030                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23031                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23032                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23033                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23034
23035       return FALSE;
23036     }
23037
23038   if (TARGET_REALLY_IWMMXT)
23039     {
23040       if (IS_IWMMXT_GR_REGNUM (regno))
23041         return mode == SImode;
23042
23043       if (IS_IWMMXT_REGNUM (regno))
23044         return VALID_IWMMXT_REG_MODE (mode);
23045     }
23046
23047   /* We allow almost any value to be stored in the general registers.
23048      Restrict doubleword quantities to even register pairs in ARM state
23049      so that we can use ldrd.  Do not allow very large Neon structure
23050      opaque modes in general registers; they would use too many.  */
23051   if (regno <= LAST_ARM_REGNUM)
23052     {
23053       if (ARM_NUM_REGS (mode) > 4)
23054           return FALSE;
23055
23056       if (TARGET_THUMB2)
23057         return TRUE;
23058
23059       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23060     }
23061
23062   if (regno == FRAME_POINTER_REGNUM
23063       || regno == ARG_POINTER_REGNUM)
23064     /* We only allow integers in the fake hard registers.  */
23065     return GET_MODE_CLASS (mode) == MODE_INT;
23066
23067   return FALSE;
23068 }
23069
23070 /* Implement MODES_TIEABLE_P.  */
23071
23072 bool
23073 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23074 {
23075   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23076     return true;
23077
23078   /* We specifically want to allow elements of "structure" modes to
23079      be tieable to the structure.  This more general condition allows
23080      other rarer situations too.  */
23081   if (TARGET_NEON
23082       && (VALID_NEON_DREG_MODE (mode1)
23083           || VALID_NEON_QREG_MODE (mode1)
23084           || VALID_NEON_STRUCT_MODE (mode1))
23085       && (VALID_NEON_DREG_MODE (mode2)
23086           || VALID_NEON_QREG_MODE (mode2)
23087           || VALID_NEON_STRUCT_MODE (mode2)))
23088     return true;
23089
23090   return false;
23091 }
23092
23093 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23094    not used in arm mode.  */
23095
23096 enum reg_class
23097 arm_regno_class (int regno)
23098 {
23099   if (regno == PC_REGNUM)
23100     return NO_REGS;
23101
23102   if (TARGET_THUMB1)
23103     {
23104       if (regno == STACK_POINTER_REGNUM)
23105         return STACK_REG;
23106       if (regno == CC_REGNUM)
23107         return CC_REG;
23108       if (regno < 8)
23109         return LO_REGS;
23110       return HI_REGS;
23111     }
23112
23113   if (TARGET_THUMB2 && regno < 8)
23114     return LO_REGS;
23115
23116   if (   regno <= LAST_ARM_REGNUM
23117       || regno == FRAME_POINTER_REGNUM
23118       || regno == ARG_POINTER_REGNUM)
23119     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23120
23121   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23122     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23123
23124   if (IS_VFP_REGNUM (regno))
23125     {
23126       if (regno <= D7_VFP_REGNUM)
23127         return VFP_D0_D7_REGS;
23128       else if (regno <= LAST_LO_VFP_REGNUM)
23129         return VFP_LO_REGS;
23130       else
23131         return VFP_HI_REGS;
23132     }
23133
23134   if (IS_IWMMXT_REGNUM (regno))
23135     return IWMMXT_REGS;
23136
23137   if (IS_IWMMXT_GR_REGNUM (regno))
23138     return IWMMXT_GR_REGS;
23139
23140   return NO_REGS;
23141 }
23142
23143 /* Handle a special case when computing the offset
23144    of an argument from the frame pointer.  */
23145 int
23146 arm_debugger_arg_offset (int value, rtx addr)
23147 {
23148   rtx_insn *insn;
23149
23150   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23151   if (value != 0)
23152     return 0;
23153
23154   /* We can only cope with the case where the address is held in a register.  */
23155   if (!REG_P (addr))
23156     return 0;
23157
23158   /* If we are using the frame pointer to point at the argument, then
23159      an offset of 0 is correct.  */
23160   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23161     return 0;
23162
23163   /* If we are using the stack pointer to point at the
23164      argument, then an offset of 0 is correct.  */
23165   /* ??? Check this is consistent with thumb2 frame layout.  */
23166   if ((TARGET_THUMB || !frame_pointer_needed)
23167       && REGNO (addr) == SP_REGNUM)
23168     return 0;
23169
23170   /* Oh dear.  The argument is pointed to by a register rather
23171      than being held in a register, or being stored at a known
23172      offset from the frame pointer.  Since GDB only understands
23173      those two kinds of argument we must translate the address
23174      held in the register into an offset from the frame pointer.
23175      We do this by searching through the insns for the function
23176      looking to see where this register gets its value.  If the
23177      register is initialized from the frame pointer plus an offset
23178      then we are in luck and we can continue, otherwise we give up.
23179
23180      This code is exercised by producing debugging information
23181      for a function with arguments like this:
23182
23183            double func (double a, double b, int c, double d) {return d;}
23184
23185      Without this code the stab for parameter 'd' will be set to
23186      an offset of 0 from the frame pointer, rather than 8.  */
23187
23188   /* The if() statement says:
23189
23190      If the insn is a normal instruction
23191      and if the insn is setting the value in a register
23192      and if the register being set is the register holding the address of the argument
23193      and if the address is computing by an addition
23194      that involves adding to a register
23195      which is the frame pointer
23196      a constant integer
23197
23198      then...  */
23199
23200   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23201     {
23202       if (   NONJUMP_INSN_P (insn)
23203           && GET_CODE (PATTERN (insn)) == SET
23204           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23205           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23206           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23207           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23208           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23209              )
23210         {
23211           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23212
23213           break;
23214         }
23215     }
23216
23217   if (value == 0)
23218     {
23219       debug_rtx (addr);
23220       warning (0, "unable to compute real location of stacked parameter");
23221       value = 8; /* XXX magic hack */
23222     }
23223
23224   return value;
23225 }
23226 \f
23227 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23228
23229 static const char *
23230 arm_invalid_parameter_type (const_tree t)
23231 {
23232   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23233     return N_("function parameters cannot have __fp16 type");
23234   return NULL;
23235 }
23236
23237 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23238
23239 static const char *
23240 arm_invalid_return_type (const_tree t)
23241 {
23242   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23243     return N_("functions cannot return __fp16 type");
23244   return NULL;
23245 }
23246
23247 /* Implement TARGET_PROMOTED_TYPE.  */
23248
23249 static tree
23250 arm_promoted_type (const_tree t)
23251 {
23252   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23253     return float_type_node;
23254   return NULL_TREE;
23255 }
23256
23257 /* Implement TARGET_CONVERT_TO_TYPE.
23258    Specifically, this hook implements the peculiarity of the ARM
23259    half-precision floating-point C semantics that requires conversions between
23260    __fp16 to or from double to do an intermediate conversion to float.  */
23261
23262 static tree
23263 arm_convert_to_type (tree type, tree expr)
23264 {
23265   tree fromtype = TREE_TYPE (expr);
23266   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23267     return NULL_TREE;
23268   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23269       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23270     return convert (type, convert (float_type_node, expr));
23271   return NULL_TREE;
23272 }
23273
23274 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23275    This simply adds HFmode as a supported mode; even though we don't
23276    implement arithmetic on this type directly, it's supported by
23277    optabs conversions, much the way the double-word arithmetic is
23278    special-cased in the default hook.  */
23279
23280 static bool
23281 arm_scalar_mode_supported_p (machine_mode mode)
23282 {
23283   if (mode == HFmode)
23284     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23285   else if (ALL_FIXED_POINT_MODE_P (mode))
23286     return true;
23287   else
23288     return default_scalar_mode_supported_p (mode);
23289 }
23290
23291 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
23292 void
23293 neon_reinterpret (rtx dest, rtx src)
23294 {
23295   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23296 }
23297
23298 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23299    not to early-clobber SRC registers in the process.
23300
23301    We assume that the operands described by SRC and DEST represent a
23302    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23303    number of components into which the copy has been decomposed.  */
23304 void
23305 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23306 {
23307   unsigned int i;
23308
23309   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23310       || REGNO (operands[0]) < REGNO (operands[1]))
23311     {
23312       for (i = 0; i < count; i++)
23313         {
23314           operands[2 * i] = dest[i];
23315           operands[2 * i + 1] = src[i];
23316         }
23317     }
23318   else
23319     {
23320       for (i = 0; i < count; i++)
23321         {
23322           operands[2 * i] = dest[count - i - 1];
23323           operands[2 * i + 1] = src[count - i - 1];
23324         }
23325     }
23326 }
23327
23328 /* Split operands into moves from op[1] + op[2] into op[0].  */
23329
23330 void
23331 neon_split_vcombine (rtx operands[3])
23332 {
23333   unsigned int dest = REGNO (operands[0]);
23334   unsigned int src1 = REGNO (operands[1]);
23335   unsigned int src2 = REGNO (operands[2]);
23336   machine_mode halfmode = GET_MODE (operands[1]);
23337   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23338   rtx destlo, desthi;
23339
23340   if (src1 == dest && src2 == dest + halfregs)
23341     {
23342       /* No-op move.  Can't split to nothing; emit something.  */
23343       emit_note (NOTE_INSN_DELETED);
23344       return;
23345     }
23346
23347   /* Preserve register attributes for variable tracking.  */
23348   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23349   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23350                                GET_MODE_SIZE (halfmode));
23351
23352   /* Special case of reversed high/low parts.  Use VSWP.  */
23353   if (src2 == dest && src1 == dest + halfregs)
23354     {
23355       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23356       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23357       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23358       return;
23359     }
23360
23361   if (!reg_overlap_mentioned_p (operands[2], destlo))
23362     {
23363       /* Try to avoid unnecessary moves if part of the result
23364          is in the right place already.  */
23365       if (src1 != dest)
23366         emit_move_insn (destlo, operands[1]);
23367       if (src2 != dest + halfregs)
23368         emit_move_insn (desthi, operands[2]);
23369     }
23370   else
23371     {
23372       if (src2 != dest + halfregs)
23373         emit_move_insn (desthi, operands[2]);
23374       if (src1 != dest)
23375         emit_move_insn (destlo, operands[1]);
23376     }
23377 }
23378 \f
23379 /* Return the number (counting from 0) of
23380    the least significant set bit in MASK.  */
23381
23382 inline static int
23383 number_of_first_bit_set (unsigned mask)
23384 {
23385   return ctz_hwi (mask);
23386 }
23387
23388 /* Like emit_multi_reg_push, but allowing for a different set of
23389    registers to be described as saved.  MASK is the set of registers
23390    to be saved; REAL_REGS is the set of registers to be described as
23391    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23392
23393 static rtx_insn *
23394 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23395 {
23396   unsigned long regno;
23397   rtx par[10], tmp, reg;
23398   rtx_insn *insn;
23399   int i, j;
23400
23401   /* Build the parallel of the registers actually being stored.  */
23402   for (i = 0; mask; ++i, mask &= mask - 1)
23403     {
23404       regno = ctz_hwi (mask);
23405       reg = gen_rtx_REG (SImode, regno);
23406
23407       if (i == 0)
23408         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23409       else
23410         tmp = gen_rtx_USE (VOIDmode, reg);
23411
23412       par[i] = tmp;
23413     }
23414
23415   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23416   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23417   tmp = gen_frame_mem (BLKmode, tmp);
23418   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23419   par[0] = tmp;
23420
23421   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23422   insn = emit_insn (tmp);
23423
23424   /* Always build the stack adjustment note for unwind info.  */
23425   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23426   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23427   par[0] = tmp;
23428
23429   /* Build the parallel of the registers recorded as saved for unwind.  */
23430   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23431     {
23432       regno = ctz_hwi (real_regs);
23433       reg = gen_rtx_REG (SImode, regno);
23434
23435       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23436       tmp = gen_frame_mem (SImode, tmp);
23437       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23438       RTX_FRAME_RELATED_P (tmp) = 1;
23439       par[j + 1] = tmp;
23440     }
23441
23442   if (j == 0)
23443     tmp = par[0];
23444   else
23445     {
23446       RTX_FRAME_RELATED_P (par[0]) = 1;
23447       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23448     }
23449
23450   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23451
23452   return insn;
23453 }
23454
23455 /* Emit code to push or pop registers to or from the stack.  F is the
23456    assembly file.  MASK is the registers to pop.  */
23457 static void
23458 thumb_pop (FILE *f, unsigned long mask)
23459 {
23460   int regno;
23461   int lo_mask = mask & 0xFF;
23462   int pushed_words = 0;
23463
23464   gcc_assert (mask);
23465
23466   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23467     {
23468       /* Special case.  Do not generate a POP PC statement here, do it in
23469          thumb_exit() */
23470       thumb_exit (f, -1);
23471       return;
23472     }
23473
23474   fprintf (f, "\tpop\t{");
23475
23476   /* Look at the low registers first.  */
23477   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23478     {
23479       if (lo_mask & 1)
23480         {
23481           asm_fprintf (f, "%r", regno);
23482
23483           if ((lo_mask & ~1) != 0)
23484             fprintf (f, ", ");
23485
23486           pushed_words++;
23487         }
23488     }
23489
23490   if (mask & (1 << PC_REGNUM))
23491     {
23492       /* Catch popping the PC.  */
23493       if (TARGET_INTERWORK || TARGET_BACKTRACE
23494           || crtl->calls_eh_return)
23495         {
23496           /* The PC is never poped directly, instead
23497              it is popped into r3 and then BX is used.  */
23498           fprintf (f, "}\n");
23499
23500           thumb_exit (f, -1);
23501
23502           return;
23503         }
23504       else
23505         {
23506           if (mask & 0xFF)
23507             fprintf (f, ", ");
23508
23509           asm_fprintf (f, "%r", PC_REGNUM);
23510         }
23511     }
23512
23513   fprintf (f, "}\n");
23514 }
23515
23516 /* Generate code to return from a thumb function.
23517    If 'reg_containing_return_addr' is -1, then the return address is
23518    actually on the stack, at the stack pointer.  */
23519 static void
23520 thumb_exit (FILE *f, int reg_containing_return_addr)
23521 {
23522   unsigned regs_available_for_popping;
23523   unsigned regs_to_pop;
23524   int pops_needed;
23525   unsigned available;
23526   unsigned required;
23527   machine_mode mode;
23528   int size;
23529   int restore_a4 = FALSE;
23530
23531   /* Compute the registers we need to pop.  */
23532   regs_to_pop = 0;
23533   pops_needed = 0;
23534
23535   if (reg_containing_return_addr == -1)
23536     {
23537       regs_to_pop |= 1 << LR_REGNUM;
23538       ++pops_needed;
23539     }
23540
23541   if (TARGET_BACKTRACE)
23542     {
23543       /* Restore the (ARM) frame pointer and stack pointer.  */
23544       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23545       pops_needed += 2;
23546     }
23547
23548   /* If there is nothing to pop then just emit the BX instruction and
23549      return.  */
23550   if (pops_needed == 0)
23551     {
23552       if (crtl->calls_eh_return)
23553         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23554
23555       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23556       return;
23557     }
23558   /* Otherwise if we are not supporting interworking and we have not created
23559      a backtrace structure and the function was not entered in ARM mode then
23560      just pop the return address straight into the PC.  */
23561   else if (!TARGET_INTERWORK
23562            && !TARGET_BACKTRACE
23563            && !is_called_in_ARM_mode (current_function_decl)
23564            && !crtl->calls_eh_return)
23565     {
23566       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23567       return;
23568     }
23569
23570   /* Find out how many of the (return) argument registers we can corrupt.  */
23571   regs_available_for_popping = 0;
23572
23573   /* If returning via __builtin_eh_return, the bottom three registers
23574      all contain information needed for the return.  */
23575   if (crtl->calls_eh_return)
23576     size = 12;
23577   else
23578     {
23579       /* If we can deduce the registers used from the function's
23580          return value.  This is more reliable that examining
23581          df_regs_ever_live_p () because that will be set if the register is
23582          ever used in the function, not just if the register is used
23583          to hold a return value.  */
23584
23585       if (crtl->return_rtx != 0)
23586         mode = GET_MODE (crtl->return_rtx);
23587       else
23588         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23589
23590       size = GET_MODE_SIZE (mode);
23591
23592       if (size == 0)
23593         {
23594           /* In a void function we can use any argument register.
23595              In a function that returns a structure on the stack
23596              we can use the second and third argument registers.  */
23597           if (mode == VOIDmode)
23598             regs_available_for_popping =
23599               (1 << ARG_REGISTER (1))
23600               | (1 << ARG_REGISTER (2))
23601               | (1 << ARG_REGISTER (3));
23602           else
23603             regs_available_for_popping =
23604               (1 << ARG_REGISTER (2))
23605               | (1 << ARG_REGISTER (3));
23606         }
23607       else if (size <= 4)
23608         regs_available_for_popping =
23609           (1 << ARG_REGISTER (2))
23610           | (1 << ARG_REGISTER (3));
23611       else if (size <= 8)
23612         regs_available_for_popping =
23613           (1 << ARG_REGISTER (3));
23614     }
23615
23616   /* Match registers to be popped with registers into which we pop them.  */
23617   for (available = regs_available_for_popping,
23618        required  = regs_to_pop;
23619        required != 0 && available != 0;
23620        available &= ~(available & - available),
23621        required  &= ~(required  & - required))
23622     -- pops_needed;
23623
23624   /* If we have any popping registers left over, remove them.  */
23625   if (available > 0)
23626     regs_available_for_popping &= ~available;
23627
23628   /* Otherwise if we need another popping register we can use
23629      the fourth argument register.  */
23630   else if (pops_needed)
23631     {
23632       /* If we have not found any free argument registers and
23633          reg a4 contains the return address, we must move it.  */
23634       if (regs_available_for_popping == 0
23635           && reg_containing_return_addr == LAST_ARG_REGNUM)
23636         {
23637           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23638           reg_containing_return_addr = LR_REGNUM;
23639         }
23640       else if (size > 12)
23641         {
23642           /* Register a4 is being used to hold part of the return value,
23643              but we have dire need of a free, low register.  */
23644           restore_a4 = TRUE;
23645
23646           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23647         }
23648
23649       if (reg_containing_return_addr != LAST_ARG_REGNUM)
23650         {
23651           /* The fourth argument register is available.  */
23652           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23653
23654           --pops_needed;
23655         }
23656     }
23657
23658   /* Pop as many registers as we can.  */
23659   thumb_pop (f, regs_available_for_popping);
23660
23661   /* Process the registers we popped.  */
23662   if (reg_containing_return_addr == -1)
23663     {
23664       /* The return address was popped into the lowest numbered register.  */
23665       regs_to_pop &= ~(1 << LR_REGNUM);
23666
23667       reg_containing_return_addr =
23668         number_of_first_bit_set (regs_available_for_popping);
23669
23670       /* Remove this register for the mask of available registers, so that
23671          the return address will not be corrupted by further pops.  */
23672       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23673     }
23674
23675   /* If we popped other registers then handle them here.  */
23676   if (regs_available_for_popping)
23677     {
23678       int frame_pointer;
23679
23680       /* Work out which register currently contains the frame pointer.  */
23681       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23682
23683       /* Move it into the correct place.  */
23684       asm_fprintf (f, "\tmov\t%r, %r\n",
23685                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23686
23687       /* (Temporarily) remove it from the mask of popped registers.  */
23688       regs_available_for_popping &= ~(1 << frame_pointer);
23689       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23690
23691       if (regs_available_for_popping)
23692         {
23693           int stack_pointer;
23694
23695           /* We popped the stack pointer as well,
23696              find the register that contains it.  */
23697           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23698
23699           /* Move it into the stack register.  */
23700           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23701
23702           /* At this point we have popped all necessary registers, so
23703              do not worry about restoring regs_available_for_popping
23704              to its correct value:
23705
23706              assert (pops_needed == 0)
23707              assert (regs_available_for_popping == (1 << frame_pointer))
23708              assert (regs_to_pop == (1 << STACK_POINTER))  */
23709         }
23710       else
23711         {
23712           /* Since we have just move the popped value into the frame
23713              pointer, the popping register is available for reuse, and
23714              we know that we still have the stack pointer left to pop.  */
23715           regs_available_for_popping |= (1 << frame_pointer);
23716         }
23717     }
23718
23719   /* If we still have registers left on the stack, but we no longer have
23720      any registers into which we can pop them, then we must move the return
23721      address into the link register and make available the register that
23722      contained it.  */
23723   if (regs_available_for_popping == 0 && pops_needed > 0)
23724     {
23725       regs_available_for_popping |= 1 << reg_containing_return_addr;
23726
23727       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23728                    reg_containing_return_addr);
23729
23730       reg_containing_return_addr = LR_REGNUM;
23731     }
23732
23733   /* If we have registers left on the stack then pop some more.
23734      We know that at most we will want to pop FP and SP.  */
23735   if (pops_needed > 0)
23736     {
23737       int  popped_into;
23738       int  move_to;
23739
23740       thumb_pop (f, regs_available_for_popping);
23741
23742       /* We have popped either FP or SP.
23743          Move whichever one it is into the correct register.  */
23744       popped_into = number_of_first_bit_set (regs_available_for_popping);
23745       move_to     = number_of_first_bit_set (regs_to_pop);
23746
23747       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23748
23749       regs_to_pop &= ~(1 << move_to);
23750
23751       --pops_needed;
23752     }
23753
23754   /* If we still have not popped everything then we must have only
23755      had one register available to us and we are now popping the SP.  */
23756   if (pops_needed > 0)
23757     {
23758       int  popped_into;
23759
23760       thumb_pop (f, regs_available_for_popping);
23761
23762       popped_into = number_of_first_bit_set (regs_available_for_popping);
23763
23764       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23765       /*
23766         assert (regs_to_pop == (1 << STACK_POINTER))
23767         assert (pops_needed == 1)
23768       */
23769     }
23770
23771   /* If necessary restore the a4 register.  */
23772   if (restore_a4)
23773     {
23774       if (reg_containing_return_addr != LR_REGNUM)
23775         {
23776           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23777           reg_containing_return_addr = LR_REGNUM;
23778         }
23779
23780       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23781     }
23782
23783   if (crtl->calls_eh_return)
23784     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23785
23786   /* Return to caller.  */
23787   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23788 }
23789 \f
23790 /* Scan INSN just before assembler is output for it.
23791    For Thumb-1, we track the status of the condition codes; this
23792    information is used in the cbranchsi4_insn pattern.  */
23793 void
23794 thumb1_final_prescan_insn (rtx_insn *insn)
23795 {
23796   if (flag_print_asm_name)
23797     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23798                  INSN_ADDRESSES (INSN_UID (insn)));
23799   /* Don't overwrite the previous setter when we get to a cbranch.  */
23800   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23801     {
23802       enum attr_conds conds;
23803
23804       if (cfun->machine->thumb1_cc_insn)
23805         {
23806           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23807               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23808             CC_STATUS_INIT;
23809         }
23810       conds = get_attr_conds (insn);
23811       if (conds == CONDS_SET)
23812         {
23813           rtx set = single_set (insn);
23814           cfun->machine->thumb1_cc_insn = insn;
23815           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23816           cfun->machine->thumb1_cc_op1 = const0_rtx;
23817           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23818           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23819             {
23820               rtx src1 = XEXP (SET_SRC (set), 1);
23821               if (src1 == const0_rtx)
23822                 cfun->machine->thumb1_cc_mode = CCmode;
23823             }
23824           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23825             {
23826               /* Record the src register operand instead of dest because
23827                  cprop_hardreg pass propagates src.  */
23828               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23829             }
23830         }
23831       else if (conds != CONDS_NOCOND)
23832         cfun->machine->thumb1_cc_insn = NULL_RTX;
23833     }
23834
23835     /* Check if unexpected far jump is used.  */
23836     if (cfun->machine->lr_save_eliminated
23837         && get_attr_far_jump (insn) == FAR_JUMP_YES)
23838       internal_error("Unexpected thumb1 far jump");
23839 }
23840
23841 int
23842 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23843 {
23844   unsigned HOST_WIDE_INT mask = 0xff;
23845   int i;
23846
23847   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23848   if (val == 0) /* XXX */
23849     return 0;
23850
23851   for (i = 0; i < 25; i++)
23852     if ((val & (mask << i)) == val)
23853       return 1;
23854
23855   return 0;
23856 }
23857
23858 /* Returns nonzero if the current function contains,
23859    or might contain a far jump.  */
23860 static int
23861 thumb_far_jump_used_p (void)
23862 {
23863   rtx_insn *insn;
23864   bool far_jump = false;
23865   unsigned int func_size = 0;
23866
23867   /* This test is only important for leaf functions.  */
23868   /* assert (!leaf_function_p ()); */
23869
23870   /* If we have already decided that far jumps may be used,
23871      do not bother checking again, and always return true even if
23872      it turns out that they are not being used.  Once we have made
23873      the decision that far jumps are present (and that hence the link
23874      register will be pushed onto the stack) we cannot go back on it.  */
23875   if (cfun->machine->far_jump_used)
23876     return 1;
23877
23878   /* If this function is not being called from the prologue/epilogue
23879      generation code then it must be being called from the
23880      INITIAL_ELIMINATION_OFFSET macro.  */
23881   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23882     {
23883       /* In this case we know that we are being asked about the elimination
23884          of the arg pointer register.  If that register is not being used,
23885          then there are no arguments on the stack, and we do not have to
23886          worry that a far jump might force the prologue to push the link
23887          register, changing the stack offsets.  In this case we can just
23888          return false, since the presence of far jumps in the function will
23889          not affect stack offsets.
23890
23891          If the arg pointer is live (or if it was live, but has now been
23892          eliminated and so set to dead) then we do have to test to see if
23893          the function might contain a far jump.  This test can lead to some
23894          false negatives, since before reload is completed, then length of
23895          branch instructions is not known, so gcc defaults to returning their
23896          longest length, which in turn sets the far jump attribute to true.
23897
23898          A false negative will not result in bad code being generated, but it
23899          will result in a needless push and pop of the link register.  We
23900          hope that this does not occur too often.
23901
23902          If we need doubleword stack alignment this could affect the other
23903          elimination offsets so we can't risk getting it wrong.  */
23904       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23905         cfun->machine->arg_pointer_live = 1;
23906       else if (!cfun->machine->arg_pointer_live)
23907         return 0;
23908     }
23909
23910   /* We should not change far_jump_used during or after reload, as there is
23911      no chance to change stack frame layout.  */
23912   if (reload_in_progress || reload_completed)
23913     return 0;
23914
23915   /* Check to see if the function contains a branch
23916      insn with the far jump attribute set.  */
23917   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23918     {
23919       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23920         {
23921           far_jump = true;
23922         }
23923       func_size += get_attr_length (insn);
23924     }
23925
23926   /* Attribute far_jump will always be true for thumb1 before
23927      shorten_branch pass.  So checking far_jump attribute before
23928      shorten_branch isn't much useful.
23929
23930      Following heuristic tries to estimate more accurately if a far jump
23931      may finally be used.  The heuristic is very conservative as there is
23932      no chance to roll-back the decision of not to use far jump.
23933
23934      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
23935      2-byte insn is associated with a 4 byte constant pool.  Using
23936      function size 2048/3 as the threshold is conservative enough.  */
23937   if (far_jump)
23938     {
23939       if ((func_size * 3) >= 2048)
23940         {
23941           /* Record the fact that we have decided that
23942              the function does use far jumps.  */
23943           cfun->machine->far_jump_used = 1;
23944           return 1;
23945         }
23946     }
23947
23948   return 0;
23949 }
23950
23951 /* Return nonzero if FUNC must be entered in ARM mode.  */
23952 int
23953 is_called_in_ARM_mode (tree func)
23954 {
23955   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23956
23957   /* Ignore the problem about functions whose address is taken.  */
23958   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23959     return TRUE;
23960
23961 #ifdef ARM_PE
23962   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23963 #else
23964   return FALSE;
23965 #endif
23966 }
23967
23968 /* Given the stack offsets and register mask in OFFSETS, decide how
23969    many additional registers to push instead of subtracting a constant
23970    from SP.  For epilogues the principle is the same except we use pop.
23971    FOR_PROLOGUE indicates which we're generating.  */
23972 static int
23973 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23974 {
23975   HOST_WIDE_INT amount;
23976   unsigned long live_regs_mask = offsets->saved_regs_mask;
23977   /* Extract a mask of the ones we can give to the Thumb's push/pop
23978      instruction.  */
23979   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23980   /* Then count how many other high registers will need to be pushed.  */
23981   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23982   int n_free, reg_base, size;
23983
23984   if (!for_prologue && frame_pointer_needed)
23985     amount = offsets->locals_base - offsets->saved_regs;
23986   else
23987     amount = offsets->outgoing_args - offsets->saved_regs;
23988
23989   /* If the stack frame size is 512 exactly, we can save one load
23990      instruction, which should make this a win even when optimizing
23991      for speed.  */
23992   if (!optimize_size && amount != 512)
23993     return 0;
23994
23995   /* Can't do this if there are high registers to push.  */
23996   if (high_regs_pushed != 0)
23997     return 0;
23998
23999   /* Shouldn't do it in the prologue if no registers would normally
24000      be pushed at all.  In the epilogue, also allow it if we'll have
24001      a pop insn for the PC.  */
24002   if  (l_mask == 0
24003        && (for_prologue
24004            || TARGET_BACKTRACE
24005            || (live_regs_mask & 1 << LR_REGNUM) == 0
24006            || TARGET_INTERWORK
24007            || crtl->args.pretend_args_size != 0))
24008     return 0;
24009
24010   /* Don't do this if thumb_expand_prologue wants to emit instructions
24011      between the push and the stack frame allocation.  */
24012   if (for_prologue
24013       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24014           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24015     return 0;
24016
24017   reg_base = 0;
24018   n_free = 0;
24019   if (!for_prologue)
24020     {
24021       size = arm_size_return_regs ();
24022       reg_base = ARM_NUM_INTS (size);
24023       live_regs_mask >>= reg_base;
24024     }
24025
24026   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24027          && (for_prologue || call_used_regs[reg_base + n_free]))
24028     {
24029       live_regs_mask >>= 1;
24030       n_free++;
24031     }
24032
24033   if (n_free == 0)
24034     return 0;
24035   gcc_assert (amount / 4 * 4 == amount);
24036
24037   if (amount >= 512 && (amount - n_free * 4) < 512)
24038     return (amount - 508) / 4;
24039   if (amount <= n_free * 4)
24040     return amount / 4;
24041   return 0;
24042 }
24043
24044 /* The bits which aren't usefully expanded as rtl.  */
24045 const char *
24046 thumb1_unexpanded_epilogue (void)
24047 {
24048   arm_stack_offsets *offsets;
24049   int regno;
24050   unsigned long live_regs_mask = 0;
24051   int high_regs_pushed = 0;
24052   int extra_pop;
24053   int had_to_push_lr;
24054   int size;
24055
24056   if (cfun->machine->return_used_this_function != 0)
24057     return "";
24058
24059   if (IS_NAKED (arm_current_func_type ()))
24060     return "";
24061
24062   offsets = arm_get_frame_offsets ();
24063   live_regs_mask = offsets->saved_regs_mask;
24064   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24065
24066   /* If we can deduce the registers used from the function's return value.
24067      This is more reliable that examining df_regs_ever_live_p () because that
24068      will be set if the register is ever used in the function, not just if
24069      the register is used to hold a return value.  */
24070   size = arm_size_return_regs ();
24071
24072   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24073   if (extra_pop > 0)
24074     {
24075       unsigned long extra_mask = (1 << extra_pop) - 1;
24076       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24077     }
24078
24079   /* The prolog may have pushed some high registers to use as
24080      work registers.  e.g. the testsuite file:
24081      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24082      compiles to produce:
24083         push    {r4, r5, r6, r7, lr}
24084         mov     r7, r9
24085         mov     r6, r8
24086         push    {r6, r7}
24087      as part of the prolog.  We have to undo that pushing here.  */
24088
24089   if (high_regs_pushed)
24090     {
24091       unsigned long mask = live_regs_mask & 0xff;
24092       int next_hi_reg;
24093
24094       /* The available low registers depend on the size of the value we are
24095          returning.  */
24096       if (size <= 12)
24097         mask |=  1 << 3;
24098       if (size <= 8)
24099         mask |= 1 << 2;
24100
24101       if (mask == 0)
24102         /* Oh dear!  We have no low registers into which we can pop
24103            high registers!  */
24104         internal_error
24105           ("no low registers available for popping high registers");
24106
24107       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24108         if (live_regs_mask & (1 << next_hi_reg))
24109           break;
24110
24111       while (high_regs_pushed)
24112         {
24113           /* Find lo register(s) into which the high register(s) can
24114              be popped.  */
24115           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24116             {
24117               if (mask & (1 << regno))
24118                 high_regs_pushed--;
24119               if (high_regs_pushed == 0)
24120                 break;
24121             }
24122
24123           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24124
24125           /* Pop the values into the low register(s).  */
24126           thumb_pop (asm_out_file, mask);
24127
24128           /* Move the value(s) into the high registers.  */
24129           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24130             {
24131               if (mask & (1 << regno))
24132                 {
24133                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24134                                regno);
24135
24136                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24137                     if (live_regs_mask & (1 << next_hi_reg))
24138                       break;
24139                 }
24140             }
24141         }
24142       live_regs_mask &= ~0x0f00;
24143     }
24144
24145   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24146   live_regs_mask &= 0xff;
24147
24148   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24149     {
24150       /* Pop the return address into the PC.  */
24151       if (had_to_push_lr)
24152         live_regs_mask |= 1 << PC_REGNUM;
24153
24154       /* Either no argument registers were pushed or a backtrace
24155          structure was created which includes an adjusted stack
24156          pointer, so just pop everything.  */
24157       if (live_regs_mask)
24158         thumb_pop (asm_out_file, live_regs_mask);
24159
24160       /* We have either just popped the return address into the
24161          PC or it is was kept in LR for the entire function.
24162          Note that thumb_pop has already called thumb_exit if the
24163          PC was in the list.  */
24164       if (!had_to_push_lr)
24165         thumb_exit (asm_out_file, LR_REGNUM);
24166     }
24167   else
24168     {
24169       /* Pop everything but the return address.  */
24170       if (live_regs_mask)
24171         thumb_pop (asm_out_file, live_regs_mask);
24172
24173       if (had_to_push_lr)
24174         {
24175           if (size > 12)
24176             {
24177               /* We have no free low regs, so save one.  */
24178               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24179                            LAST_ARG_REGNUM);
24180             }
24181
24182           /* Get the return address into a temporary register.  */
24183           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24184
24185           if (size > 12)
24186             {
24187               /* Move the return address to lr.  */
24188               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24189                            LAST_ARG_REGNUM);
24190               /* Restore the low register.  */
24191               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24192                            IP_REGNUM);
24193               regno = LR_REGNUM;
24194             }
24195           else
24196             regno = LAST_ARG_REGNUM;
24197         }
24198       else
24199         regno = LR_REGNUM;
24200
24201       /* Remove the argument registers that were pushed onto the stack.  */
24202       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24203                    SP_REGNUM, SP_REGNUM,
24204                    crtl->args.pretend_args_size);
24205
24206       thumb_exit (asm_out_file, regno);
24207     }
24208
24209   return "";
24210 }
24211
24212 /* Functions to save and restore machine-specific function data.  */
24213 static struct machine_function *
24214 arm_init_machine_status (void)
24215 {
24216   struct machine_function *machine;
24217   machine = ggc_cleared_alloc<machine_function> ();
24218
24219 #if ARM_FT_UNKNOWN != 0
24220   machine->func_type = ARM_FT_UNKNOWN;
24221 #endif
24222   return machine;
24223 }
24224
24225 /* Return an RTX indicating where the return address to the
24226    calling function can be found.  */
24227 rtx
24228 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24229 {
24230   if (count != 0)
24231     return NULL_RTX;
24232
24233   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24234 }
24235
24236 /* Do anything needed before RTL is emitted for each function.  */
24237 void
24238 arm_init_expanders (void)
24239 {
24240   /* Arrange to initialize and mark the machine per-function status.  */
24241   init_machine_status = arm_init_machine_status;
24242
24243   /* This is to stop the combine pass optimizing away the alignment
24244      adjustment of va_arg.  */
24245   /* ??? It is claimed that this should not be necessary.  */
24246   if (cfun)
24247     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24248 }
24249
24250
24251 /* Like arm_compute_initial_elimination offset.  Simpler because there
24252    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24253    to point at the base of the local variables after static stack
24254    space for a function has been allocated.  */
24255
24256 HOST_WIDE_INT
24257 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24258 {
24259   arm_stack_offsets *offsets;
24260
24261   offsets = arm_get_frame_offsets ();
24262
24263   switch (from)
24264     {
24265     case ARG_POINTER_REGNUM:
24266       switch (to)
24267         {
24268         case STACK_POINTER_REGNUM:
24269           return offsets->outgoing_args - offsets->saved_args;
24270
24271         case FRAME_POINTER_REGNUM:
24272           return offsets->soft_frame - offsets->saved_args;
24273
24274         case ARM_HARD_FRAME_POINTER_REGNUM:
24275           return offsets->saved_regs - offsets->saved_args;
24276
24277         case THUMB_HARD_FRAME_POINTER_REGNUM:
24278           return offsets->locals_base - offsets->saved_args;
24279
24280         default:
24281           gcc_unreachable ();
24282         }
24283       break;
24284
24285     case FRAME_POINTER_REGNUM:
24286       switch (to)
24287         {
24288         case STACK_POINTER_REGNUM:
24289           return offsets->outgoing_args - offsets->soft_frame;
24290
24291         case ARM_HARD_FRAME_POINTER_REGNUM:
24292           return offsets->saved_regs - offsets->soft_frame;
24293
24294         case THUMB_HARD_FRAME_POINTER_REGNUM:
24295           return offsets->locals_base - offsets->soft_frame;
24296
24297         default:
24298           gcc_unreachable ();
24299         }
24300       break;
24301
24302     default:
24303       gcc_unreachable ();
24304     }
24305 }
24306
24307 /* Generate the function's prologue.  */
24308
24309 void
24310 thumb1_expand_prologue (void)
24311 {
24312   rtx_insn *insn;
24313
24314   HOST_WIDE_INT amount;
24315   arm_stack_offsets *offsets;
24316   unsigned long func_type;
24317   int regno;
24318   unsigned long live_regs_mask;
24319   unsigned long l_mask;
24320   unsigned high_regs_pushed = 0;
24321
24322   func_type = arm_current_func_type ();
24323
24324   /* Naked functions don't have prologues.  */
24325   if (IS_NAKED (func_type))
24326     return;
24327
24328   if (IS_INTERRUPT (func_type))
24329     {
24330       error ("interrupt Service Routines cannot be coded in Thumb mode");
24331       return;
24332     }
24333
24334   if (is_called_in_ARM_mode (current_function_decl))
24335     emit_insn (gen_prologue_thumb1_interwork ());
24336
24337   offsets = arm_get_frame_offsets ();
24338   live_regs_mask = offsets->saved_regs_mask;
24339
24340   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24341   l_mask = live_regs_mask & 0x40ff;
24342   /* Then count how many other high registers will need to be pushed.  */
24343   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24344
24345   if (crtl->args.pretend_args_size)
24346     {
24347       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24348
24349       if (cfun->machine->uses_anonymous_args)
24350         {
24351           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24352           unsigned long mask;
24353
24354           mask = 1ul << (LAST_ARG_REGNUM + 1);
24355           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24356
24357           insn = thumb1_emit_multi_reg_push (mask, 0);
24358         }
24359       else
24360         {
24361           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24362                                         stack_pointer_rtx, x));
24363         }
24364       RTX_FRAME_RELATED_P (insn) = 1;
24365     }
24366
24367   if (TARGET_BACKTRACE)
24368     {
24369       HOST_WIDE_INT offset = 0;
24370       unsigned work_register;
24371       rtx work_reg, x, arm_hfp_rtx;
24372
24373       /* We have been asked to create a stack backtrace structure.
24374          The code looks like this:
24375
24376          0   .align 2
24377          0   func:
24378          0     sub   SP, #16         Reserve space for 4 registers.
24379          2     push  {R7}            Push low registers.
24380          4     add   R7, SP, #20     Get the stack pointer before the push.
24381          6     str   R7, [SP, #8]    Store the stack pointer
24382                                         (before reserving the space).
24383          8     mov   R7, PC          Get hold of the start of this code + 12.
24384         10     str   R7, [SP, #16]   Store it.
24385         12     mov   R7, FP          Get hold of the current frame pointer.
24386         14     str   R7, [SP, #4]    Store it.
24387         16     mov   R7, LR          Get hold of the current return address.
24388         18     str   R7, [SP, #12]   Store it.
24389         20     add   R7, SP, #16     Point at the start of the
24390                                         backtrace structure.
24391         22     mov   FP, R7          Put this value into the frame pointer.  */
24392
24393       work_register = thumb_find_work_register (live_regs_mask);
24394       work_reg = gen_rtx_REG (SImode, work_register);
24395       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24396
24397       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24398                                     stack_pointer_rtx, GEN_INT (-16)));
24399       RTX_FRAME_RELATED_P (insn) = 1;
24400
24401       if (l_mask)
24402         {
24403           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24404           RTX_FRAME_RELATED_P (insn) = 1;
24405
24406           offset = bit_count (l_mask) * UNITS_PER_WORD;
24407         }
24408
24409       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24410       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24411
24412       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24413       x = gen_frame_mem (SImode, x);
24414       emit_move_insn (x, work_reg);
24415
24416       /* Make sure that the instruction fetching the PC is in the right place
24417          to calculate "start of backtrace creation code + 12".  */
24418       /* ??? The stores using the common WORK_REG ought to be enough to
24419          prevent the scheduler from doing anything weird.  Failing that
24420          we could always move all of the following into an UNSPEC_VOLATILE.  */
24421       if (l_mask)
24422         {
24423           x = gen_rtx_REG (SImode, PC_REGNUM);
24424           emit_move_insn (work_reg, x);
24425
24426           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24427           x = gen_frame_mem (SImode, x);
24428           emit_move_insn (x, work_reg);
24429
24430           emit_move_insn (work_reg, arm_hfp_rtx);
24431
24432           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24433           x = gen_frame_mem (SImode, x);
24434           emit_move_insn (x, work_reg);
24435         }
24436       else
24437         {
24438           emit_move_insn (work_reg, arm_hfp_rtx);
24439
24440           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24441           x = gen_frame_mem (SImode, x);
24442           emit_move_insn (x, work_reg);
24443
24444           x = gen_rtx_REG (SImode, PC_REGNUM);
24445           emit_move_insn (work_reg, x);
24446
24447           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24448           x = gen_frame_mem (SImode, x);
24449           emit_move_insn (x, work_reg);
24450         }
24451
24452       x = gen_rtx_REG (SImode, LR_REGNUM);
24453       emit_move_insn (work_reg, x);
24454
24455       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24456       x = gen_frame_mem (SImode, x);
24457       emit_move_insn (x, work_reg);
24458
24459       x = GEN_INT (offset + 12);
24460       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24461
24462       emit_move_insn (arm_hfp_rtx, work_reg);
24463     }
24464   /* Optimization:  If we are not pushing any low registers but we are going
24465      to push some high registers then delay our first push.  This will just
24466      be a push of LR and we can combine it with the push of the first high
24467      register.  */
24468   else if ((l_mask & 0xff) != 0
24469            || (high_regs_pushed == 0 && l_mask))
24470     {
24471       unsigned long mask = l_mask;
24472       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24473       insn = thumb1_emit_multi_reg_push (mask, mask);
24474       RTX_FRAME_RELATED_P (insn) = 1;
24475     }
24476
24477   if (high_regs_pushed)
24478     {
24479       unsigned pushable_regs;
24480       unsigned next_hi_reg;
24481       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24482                                                  : crtl->args.info.nregs;
24483       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24484
24485       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24486         if (live_regs_mask & (1 << next_hi_reg))
24487           break;
24488
24489       /* Here we need to mask out registers used for passing arguments
24490          even if they can be pushed.  This is to avoid using them to stash the high
24491          registers.  Such kind of stash may clobber the use of arguments.  */
24492       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24493
24494       if (pushable_regs == 0)
24495         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24496
24497       while (high_regs_pushed > 0)
24498         {
24499           unsigned long real_regs_mask = 0;
24500
24501           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24502             {
24503               if (pushable_regs & (1 << regno))
24504                 {
24505                   emit_move_insn (gen_rtx_REG (SImode, regno),
24506                                   gen_rtx_REG (SImode, next_hi_reg));
24507
24508                   high_regs_pushed --;
24509                   real_regs_mask |= (1 << next_hi_reg);
24510
24511                   if (high_regs_pushed)
24512                     {
24513                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24514                            next_hi_reg --)
24515                         if (live_regs_mask & (1 << next_hi_reg))
24516                           break;
24517                     }
24518                   else
24519                     {
24520                       pushable_regs &= ~((1 << regno) - 1);
24521                       break;
24522                     }
24523                 }
24524             }
24525
24526           /* If we had to find a work register and we have not yet
24527              saved the LR then add it to the list of regs to push.  */
24528           if (l_mask == (1 << LR_REGNUM))
24529             {
24530               pushable_regs |= l_mask;
24531               real_regs_mask |= l_mask;
24532               l_mask = 0;
24533             }
24534
24535           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24536           RTX_FRAME_RELATED_P (insn) = 1;
24537         }
24538     }
24539
24540   /* Load the pic register before setting the frame pointer,
24541      so we can use r7 as a temporary work register.  */
24542   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24543     arm_load_pic_register (live_regs_mask);
24544
24545   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24546     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24547                     stack_pointer_rtx);
24548
24549   if (flag_stack_usage_info)
24550     current_function_static_stack_size
24551       = offsets->outgoing_args - offsets->saved_args;
24552
24553   amount = offsets->outgoing_args - offsets->saved_regs;
24554   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24555   if (amount)
24556     {
24557       if (amount < 512)
24558         {
24559           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24560                                         GEN_INT (- amount)));
24561           RTX_FRAME_RELATED_P (insn) = 1;
24562         }
24563       else
24564         {
24565           rtx reg, dwarf;
24566
24567           /* The stack decrement is too big for an immediate value in a single
24568              insn.  In theory we could issue multiple subtracts, but after
24569              three of them it becomes more space efficient to place the full
24570              value in the constant pool and load into a register.  (Also the
24571              ARM debugger really likes to see only one stack decrement per
24572              function).  So instead we look for a scratch register into which
24573              we can load the decrement, and then we subtract this from the
24574              stack pointer.  Unfortunately on the thumb the only available
24575              scratch registers are the argument registers, and we cannot use
24576              these as they may hold arguments to the function.  Instead we
24577              attempt to locate a call preserved register which is used by this
24578              function.  If we can find one, then we know that it will have
24579              been pushed at the start of the prologue and so we can corrupt
24580              it now.  */
24581           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24582             if (live_regs_mask & (1 << regno))
24583               break;
24584
24585           gcc_assert(regno <= LAST_LO_REGNUM);
24586
24587           reg = gen_rtx_REG (SImode, regno);
24588
24589           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24590
24591           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24592                                         stack_pointer_rtx, reg));
24593
24594           dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24595                                plus_constant (Pmode, stack_pointer_rtx,
24596                                               -amount));
24597           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24598           RTX_FRAME_RELATED_P (insn) = 1;
24599         }
24600     }
24601
24602   if (frame_pointer_needed)
24603     thumb_set_frame_pointer (offsets);
24604
24605   /* If we are profiling, make sure no instructions are scheduled before
24606      the call to mcount.  Similarly if the user has requested no
24607      scheduling in the prolog.  Similarly if we want non-call exceptions
24608      using the EABI unwinder, to prevent faulting instructions from being
24609      swapped with a stack adjustment.  */
24610   if (crtl->profile || !TARGET_SCHED_PROLOG
24611       || (arm_except_unwind_info (&global_options) == UI_TARGET
24612           && cfun->can_throw_non_call_exceptions))
24613     emit_insn (gen_blockage ());
24614
24615   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24616   if (live_regs_mask & 0xff)
24617     cfun->machine->lr_save_eliminated = 0;
24618 }
24619
24620 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24621    POP instruction can be generated.  LR should be replaced by PC.  All
24622    the checks required are already done by  USE_RETURN_INSN ().  Hence,
24623    all we really need to check here is if single register is to be
24624    returned, or multiple register return.  */
24625 void
24626 thumb2_expand_return (bool simple_return)
24627 {
24628   int i, num_regs;
24629   unsigned long saved_regs_mask;
24630   arm_stack_offsets *offsets;
24631
24632   offsets = arm_get_frame_offsets ();
24633   saved_regs_mask = offsets->saved_regs_mask;
24634
24635   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24636     if (saved_regs_mask & (1 << i))
24637       num_regs++;
24638
24639   if (!simple_return && saved_regs_mask)
24640     {
24641       if (num_regs == 1)
24642         {
24643           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24644           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24645           rtx addr = gen_rtx_MEM (SImode,
24646                                   gen_rtx_POST_INC (SImode,
24647                                                     stack_pointer_rtx));
24648           set_mem_alias_set (addr, get_frame_alias_set ());
24649           XVECEXP (par, 0, 0) = ret_rtx;
24650           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24651           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24652           emit_jump_insn (par);
24653         }
24654       else
24655         {
24656           saved_regs_mask &= ~ (1 << LR_REGNUM);
24657           saved_regs_mask |=   (1 << PC_REGNUM);
24658           arm_emit_multi_reg_pop (saved_regs_mask);
24659         }
24660     }
24661   else
24662     {
24663       emit_jump_insn (simple_return_rtx);
24664     }
24665 }
24666
24667 void
24668 thumb1_expand_epilogue (void)
24669 {
24670   HOST_WIDE_INT amount;
24671   arm_stack_offsets *offsets;
24672   int regno;
24673
24674   /* Naked functions don't have prologues.  */
24675   if (IS_NAKED (arm_current_func_type ()))
24676     return;
24677
24678   offsets = arm_get_frame_offsets ();
24679   amount = offsets->outgoing_args - offsets->saved_regs;
24680
24681   if (frame_pointer_needed)
24682     {
24683       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24684       amount = offsets->locals_base - offsets->saved_regs;
24685     }
24686   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24687
24688   gcc_assert (amount >= 0);
24689   if (amount)
24690     {
24691       emit_insn (gen_blockage ());
24692
24693       if (amount < 512)
24694         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24695                                GEN_INT (amount)));
24696       else
24697         {
24698           /* r3 is always free in the epilogue.  */
24699           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24700
24701           emit_insn (gen_movsi (reg, GEN_INT (amount)));
24702           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24703         }
24704     }
24705
24706   /* Emit a USE (stack_pointer_rtx), so that
24707      the stack adjustment will not be deleted.  */
24708   emit_insn (gen_force_register_use (stack_pointer_rtx));
24709
24710   if (crtl->profile || !TARGET_SCHED_PROLOG)
24711     emit_insn (gen_blockage ());
24712
24713   /* Emit a clobber for each insn that will be restored in the epilogue,
24714      so that flow2 will get register lifetimes correct.  */
24715   for (regno = 0; regno < 13; regno++)
24716     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24717       emit_clobber (gen_rtx_REG (SImode, regno));
24718
24719   if (! df_regs_ever_live_p (LR_REGNUM))
24720     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24721 }
24722
24723 /* Epilogue code for APCS frame.  */
24724 static void
24725 arm_expand_epilogue_apcs_frame (bool really_return)
24726 {
24727   unsigned long func_type;
24728   unsigned long saved_regs_mask;
24729   int num_regs = 0;
24730   int i;
24731   int floats_from_frame = 0;
24732   arm_stack_offsets *offsets;
24733
24734   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24735   func_type = arm_current_func_type ();
24736
24737   /* Get frame offsets for ARM.  */
24738   offsets = arm_get_frame_offsets ();
24739   saved_regs_mask = offsets->saved_regs_mask;
24740
24741   /* Find the offset of the floating-point save area in the frame.  */
24742   floats_from_frame
24743     = (offsets->saved_args
24744        + arm_compute_static_chain_stack_bytes ()
24745        - offsets->frame);
24746
24747   /* Compute how many core registers saved and how far away the floats are.  */
24748   for (i = 0; i <= LAST_ARM_REGNUM; i++)
24749     if (saved_regs_mask & (1 << i))
24750       {
24751         num_regs++;
24752         floats_from_frame += 4;
24753       }
24754
24755   if (TARGET_HARD_FLOAT && TARGET_VFP)
24756     {
24757       int start_reg;
24758       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24759
24760       /* The offset is from IP_REGNUM.  */
24761       int saved_size = arm_get_vfp_saved_size ();
24762       if (saved_size > 0)
24763         {
24764           rtx_insn *insn;
24765           floats_from_frame += saved_size;
24766           insn = emit_insn (gen_addsi3 (ip_rtx,
24767                                         hard_frame_pointer_rtx,
24768                                         GEN_INT (-floats_from_frame)));
24769           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24770                                        ip_rtx, hard_frame_pointer_rtx);
24771         }
24772
24773       /* Generate VFP register multi-pop.  */
24774       start_reg = FIRST_VFP_REGNUM;
24775
24776       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24777         /* Look for a case where a reg does not need restoring.  */
24778         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24779             && (!df_regs_ever_live_p (i + 1)
24780                 || call_used_regs[i + 1]))
24781           {
24782             if (start_reg != i)
24783               arm_emit_vfp_multi_reg_pop (start_reg,
24784                                           (i - start_reg) / 2,
24785                                           gen_rtx_REG (SImode,
24786                                                        IP_REGNUM));
24787             start_reg = i + 2;
24788           }
24789
24790       /* Restore the remaining regs that we have discovered (or possibly
24791          even all of them, if the conditional in the for loop never
24792          fired).  */
24793       if (start_reg != i)
24794         arm_emit_vfp_multi_reg_pop (start_reg,
24795                                     (i - start_reg) / 2,
24796                                     gen_rtx_REG (SImode, IP_REGNUM));
24797     }
24798
24799   if (TARGET_IWMMXT)
24800     {
24801       /* The frame pointer is guaranteed to be non-double-word aligned, as
24802          it is set to double-word-aligned old_stack_pointer - 4.  */
24803       rtx_insn *insn;
24804       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24805
24806       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24807         if (df_regs_ever_live_p (i) && !call_used_regs[i])
24808           {
24809             rtx addr = gen_frame_mem (V2SImode,
24810                                  plus_constant (Pmode, hard_frame_pointer_rtx,
24811                                                 - lrm_count * 4));
24812             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24813             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24814                                                gen_rtx_REG (V2SImode, i),
24815                                                NULL_RTX);
24816             lrm_count += 2;
24817           }
24818     }
24819
24820   /* saved_regs_mask should contain IP which contains old stack pointer
24821      at the time of activation creation.  Since SP and IP are adjacent registers,
24822      we can restore the value directly into SP.  */
24823   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24824   saved_regs_mask &= ~(1 << IP_REGNUM);
24825   saved_regs_mask |= (1 << SP_REGNUM);
24826
24827   /* There are two registers left in saved_regs_mask - LR and PC.  We
24828      only need to restore LR (the return address), but to
24829      save time we can load it directly into PC, unless we need a
24830      special function exit sequence, or we are not really returning.  */
24831   if (really_return
24832       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24833       && !crtl->calls_eh_return)
24834     /* Delete LR from the register mask, so that LR on
24835        the stack is loaded into the PC in the register mask.  */
24836     saved_regs_mask &= ~(1 << LR_REGNUM);
24837   else
24838     saved_regs_mask &= ~(1 << PC_REGNUM);
24839
24840   num_regs = bit_count (saved_regs_mask);
24841   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24842     {
24843       rtx_insn *insn;
24844       emit_insn (gen_blockage ());
24845       /* Unwind the stack to just below the saved registers.  */
24846       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24847                                     hard_frame_pointer_rtx,
24848                                     GEN_INT (- 4 * num_regs)));
24849
24850       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24851                                    stack_pointer_rtx, hard_frame_pointer_rtx);
24852     }
24853
24854   arm_emit_multi_reg_pop (saved_regs_mask);
24855
24856   if (IS_INTERRUPT (func_type))
24857     {
24858       /* Interrupt handlers will have pushed the
24859          IP onto the stack, so restore it now.  */
24860       rtx_insn *insn;
24861       rtx addr = gen_rtx_MEM (SImode,
24862                               gen_rtx_POST_INC (SImode,
24863                               stack_pointer_rtx));
24864       set_mem_alias_set (addr, get_frame_alias_set ());
24865       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24866       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24867                                          gen_rtx_REG (SImode, IP_REGNUM),
24868                                          NULL_RTX);
24869     }
24870
24871   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24872     return;
24873
24874   if (crtl->calls_eh_return)
24875     emit_insn (gen_addsi3 (stack_pointer_rtx,
24876                            stack_pointer_rtx,
24877                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24878
24879   if (IS_STACKALIGN (func_type))
24880     /* Restore the original stack pointer.  Before prologue, the stack was
24881        realigned and the original stack pointer saved in r0.  For details,
24882        see comment in arm_expand_prologue.  */
24883     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24884
24885   emit_jump_insn (simple_return_rtx);
24886 }
24887
24888 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
24889    function is not a sibcall.  */
24890 void
24891 arm_expand_epilogue (bool really_return)
24892 {
24893   unsigned long func_type;
24894   unsigned long saved_regs_mask;
24895   int num_regs = 0;
24896   int i;
24897   int amount;
24898   arm_stack_offsets *offsets;
24899
24900   func_type = arm_current_func_type ();
24901
24902   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
24903      let output_return_instruction take care of instruction emission if any.  */
24904   if (IS_NAKED (func_type)
24905       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24906     {
24907       if (really_return)
24908         emit_jump_insn (simple_return_rtx);
24909       return;
24910     }
24911
24912   /* If we are throwing an exception, then we really must be doing a
24913      return, so we can't tail-call.  */
24914   gcc_assert (!crtl->calls_eh_return || really_return);
24915
24916   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24917     {
24918       arm_expand_epilogue_apcs_frame (really_return);
24919       return;
24920     }
24921
24922   /* Get frame offsets for ARM.  */
24923   offsets = arm_get_frame_offsets ();
24924   saved_regs_mask = offsets->saved_regs_mask;
24925   num_regs = bit_count (saved_regs_mask);
24926
24927   if (frame_pointer_needed)
24928     {
24929       rtx_insn *insn;
24930       /* Restore stack pointer if necessary.  */
24931       if (TARGET_ARM)
24932         {
24933           /* In ARM mode, frame pointer points to first saved register.
24934              Restore stack pointer to last saved register.  */
24935           amount = offsets->frame - offsets->saved_regs;
24936
24937           /* Force out any pending memory operations that reference stacked data
24938              before stack de-allocation occurs.  */
24939           emit_insn (gen_blockage ());
24940           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24941                             hard_frame_pointer_rtx,
24942                             GEN_INT (amount)));
24943           arm_add_cfa_adjust_cfa_note (insn, amount,
24944                                        stack_pointer_rtx,
24945                                        hard_frame_pointer_rtx);
24946
24947           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24948              deleted.  */
24949           emit_insn (gen_force_register_use (stack_pointer_rtx));
24950         }
24951       else
24952         {
24953           /* In Thumb-2 mode, the frame pointer points to the last saved
24954              register.  */
24955           amount = offsets->locals_base - offsets->saved_regs;
24956           if (amount)
24957             {
24958               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24959                                 hard_frame_pointer_rtx,
24960                                 GEN_INT (amount)));
24961               arm_add_cfa_adjust_cfa_note (insn, amount,
24962                                            hard_frame_pointer_rtx,
24963                                            hard_frame_pointer_rtx);
24964             }
24965
24966           /* Force out any pending memory operations that reference stacked data
24967              before stack de-allocation occurs.  */
24968           emit_insn (gen_blockage ());
24969           insn = emit_insn (gen_movsi (stack_pointer_rtx,
24970                                        hard_frame_pointer_rtx));
24971           arm_add_cfa_adjust_cfa_note (insn, 0,
24972                                        stack_pointer_rtx,
24973                                        hard_frame_pointer_rtx);
24974           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24975              deleted.  */
24976           emit_insn (gen_force_register_use (stack_pointer_rtx));
24977         }
24978     }
24979   else
24980     {
24981       /* Pop off outgoing args and local frame to adjust stack pointer to
24982          last saved register.  */
24983       amount = offsets->outgoing_args - offsets->saved_regs;
24984       if (amount)
24985         {
24986           rtx_insn *tmp;
24987           /* Force out any pending memory operations that reference stacked data
24988              before stack de-allocation occurs.  */
24989           emit_insn (gen_blockage ());
24990           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24991                                        stack_pointer_rtx,
24992                                        GEN_INT (amount)));
24993           arm_add_cfa_adjust_cfa_note (tmp, amount,
24994                                        stack_pointer_rtx, stack_pointer_rtx);
24995           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24996              not deleted.  */
24997           emit_insn (gen_force_register_use (stack_pointer_rtx));
24998         }
24999     }
25000
25001   if (TARGET_HARD_FLOAT && TARGET_VFP)
25002     {
25003       /* Generate VFP register multi-pop.  */
25004       int end_reg = LAST_VFP_REGNUM + 1;
25005
25006       /* Scan the registers in reverse order.  We need to match
25007          any groupings made in the prologue and generate matching
25008          vldm operations.  The need to match groups is because,
25009          unlike pop, vldm can only do consecutive regs.  */
25010       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25011         /* Look for a case where a reg does not need restoring.  */
25012         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25013             && (!df_regs_ever_live_p (i + 1)
25014                 || call_used_regs[i + 1]))
25015           {
25016             /* Restore the regs discovered so far (from reg+2 to
25017                end_reg).  */
25018             if (end_reg > i + 2)
25019               arm_emit_vfp_multi_reg_pop (i + 2,
25020                                           (end_reg - (i + 2)) / 2,
25021                                           stack_pointer_rtx);
25022             end_reg = i;
25023           }
25024
25025       /* Restore the remaining regs that we have discovered (or possibly
25026          even all of them, if the conditional in the for loop never
25027          fired).  */
25028       if (end_reg > i + 2)
25029         arm_emit_vfp_multi_reg_pop (i + 2,
25030                                     (end_reg - (i + 2)) / 2,
25031                                     stack_pointer_rtx);
25032     }
25033
25034   if (TARGET_IWMMXT)
25035     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25036       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25037         {
25038           rtx_insn *insn;
25039           rtx addr = gen_rtx_MEM (V2SImode,
25040                                   gen_rtx_POST_INC (SImode,
25041                                                     stack_pointer_rtx));
25042           set_mem_alias_set (addr, get_frame_alias_set ());
25043           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25044           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25045                                              gen_rtx_REG (V2SImode, i),
25046                                              NULL_RTX);
25047           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25048                                        stack_pointer_rtx, stack_pointer_rtx);
25049         }
25050
25051   if (saved_regs_mask)
25052     {
25053       rtx insn;
25054       bool return_in_pc = false;
25055
25056       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25057           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25058           && !IS_STACKALIGN (func_type)
25059           && really_return
25060           && crtl->args.pretend_args_size == 0
25061           && saved_regs_mask & (1 << LR_REGNUM)
25062           && !crtl->calls_eh_return)
25063         {
25064           saved_regs_mask &= ~(1 << LR_REGNUM);
25065           saved_regs_mask |= (1 << PC_REGNUM);
25066           return_in_pc = true;
25067         }
25068
25069       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25070         {
25071           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25072             if (saved_regs_mask & (1 << i))
25073               {
25074                 rtx addr = gen_rtx_MEM (SImode,
25075                                         gen_rtx_POST_INC (SImode,
25076                                                           stack_pointer_rtx));
25077                 set_mem_alias_set (addr, get_frame_alias_set ());
25078
25079                 if (i == PC_REGNUM)
25080                   {
25081                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25082                     XVECEXP (insn, 0, 0) = ret_rtx;
25083                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25084                                                         gen_rtx_REG (SImode, i),
25085                                                         addr);
25086                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25087                     insn = emit_jump_insn (insn);
25088                   }
25089                 else
25090                   {
25091                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25092                                                  addr));
25093                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25094                                                        gen_rtx_REG (SImode, i),
25095                                                        NULL_RTX);
25096                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25097                                                  stack_pointer_rtx,
25098                                                  stack_pointer_rtx);
25099                   }
25100               }
25101         }
25102       else
25103         {
25104           if (TARGET_LDRD
25105               && current_tune->prefer_ldrd_strd
25106               && !optimize_function_for_size_p (cfun))
25107             {
25108               if (TARGET_THUMB2)
25109                 thumb2_emit_ldrd_pop (saved_regs_mask);
25110               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25111                 arm_emit_ldrd_pop (saved_regs_mask);
25112               else
25113                 arm_emit_multi_reg_pop (saved_regs_mask);
25114             }
25115           else
25116             arm_emit_multi_reg_pop (saved_regs_mask);
25117         }
25118
25119       if (return_in_pc == true)
25120         return;
25121     }
25122
25123   if (crtl->args.pretend_args_size)
25124     {
25125       int i, j;
25126       rtx dwarf = NULL_RTX;
25127       rtx_insn *tmp =
25128         emit_insn (gen_addsi3 (stack_pointer_rtx,
25129                                stack_pointer_rtx,
25130                                GEN_INT (crtl->args.pretend_args_size)));
25131
25132       RTX_FRAME_RELATED_P (tmp) = 1;
25133
25134       if (cfun->machine->uses_anonymous_args)
25135         {
25136           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25137              pretend_args in stack.  */
25138           int num_regs = crtl->args.pretend_args_size / 4;
25139           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25140           for (j = 0, i = 0; j < num_regs; i++)
25141             if (saved_regs_mask & (1 << i))
25142               {
25143                 rtx reg = gen_rtx_REG (SImode, i);
25144                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25145                 j++;
25146               }
25147           REG_NOTES (tmp) = dwarf;
25148         }
25149       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25150                                    stack_pointer_rtx, stack_pointer_rtx);
25151     }
25152
25153   if (!really_return)
25154     return;
25155
25156   if (crtl->calls_eh_return)
25157     emit_insn (gen_addsi3 (stack_pointer_rtx,
25158                            stack_pointer_rtx,
25159                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25160
25161   if (IS_STACKALIGN (func_type))
25162     /* Restore the original stack pointer.  Before prologue, the stack was
25163        realigned and the original stack pointer saved in r0.  For details,
25164        see comment in arm_expand_prologue.  */
25165     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25166
25167   emit_jump_insn (simple_return_rtx);
25168 }
25169
25170 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25171    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25172
25173 const char *
25174 thumb1_output_interwork (void)
25175 {
25176   const char * name;
25177   FILE *f = asm_out_file;
25178
25179   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25180   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25181               == SYMBOL_REF);
25182   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25183
25184   /* Generate code sequence to switch us into Thumb mode.  */
25185   /* The .code 32 directive has already been emitted by
25186      ASM_DECLARE_FUNCTION_NAME.  */
25187   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25188   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25189
25190   /* Generate a label, so that the debugger will notice the
25191      change in instruction sets.  This label is also used by
25192      the assembler to bypass the ARM code when this function
25193      is called from a Thumb encoded function elsewhere in the
25194      same file.  Hence the definition of STUB_NAME here must
25195      agree with the definition in gas/config/tc-arm.c.  */
25196
25197 #define STUB_NAME ".real_start_of"
25198
25199   fprintf (f, "\t.code\t16\n");
25200 #ifdef ARM_PE
25201   if (arm_dllexport_name_p (name))
25202     name = arm_strip_name_encoding (name);
25203 #endif
25204   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25205   fprintf (f, "\t.thumb_func\n");
25206   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25207
25208   return "";
25209 }
25210
25211 /* Handle the case of a double word load into a low register from
25212    a computed memory address.  The computed address may involve a
25213    register which is overwritten by the load.  */
25214 const char *
25215 thumb_load_double_from_address (rtx *operands)
25216 {
25217   rtx addr;
25218   rtx base;
25219   rtx offset;
25220   rtx arg1;
25221   rtx arg2;
25222
25223   gcc_assert (REG_P (operands[0]));
25224   gcc_assert (MEM_P (operands[1]));
25225
25226   /* Get the memory address.  */
25227   addr = XEXP (operands[1], 0);
25228
25229   /* Work out how the memory address is computed.  */
25230   switch (GET_CODE (addr))
25231     {
25232     case REG:
25233       operands[2] = adjust_address (operands[1], SImode, 4);
25234
25235       if (REGNO (operands[0]) == REGNO (addr))
25236         {
25237           output_asm_insn ("ldr\t%H0, %2", operands);
25238           output_asm_insn ("ldr\t%0, %1", operands);
25239         }
25240       else
25241         {
25242           output_asm_insn ("ldr\t%0, %1", operands);
25243           output_asm_insn ("ldr\t%H0, %2", operands);
25244         }
25245       break;
25246
25247     case CONST:
25248       /* Compute <address> + 4 for the high order load.  */
25249       operands[2] = adjust_address (operands[1], SImode, 4);
25250
25251       output_asm_insn ("ldr\t%0, %1", operands);
25252       output_asm_insn ("ldr\t%H0, %2", operands);
25253       break;
25254
25255     case PLUS:
25256       arg1   = XEXP (addr, 0);
25257       arg2   = XEXP (addr, 1);
25258
25259       if (CONSTANT_P (arg1))
25260         base = arg2, offset = arg1;
25261       else
25262         base = arg1, offset = arg2;
25263
25264       gcc_assert (REG_P (base));
25265
25266       /* Catch the case of <address> = <reg> + <reg> */
25267       if (REG_P (offset))
25268         {
25269           int reg_offset = REGNO (offset);
25270           int reg_base   = REGNO (base);
25271           int reg_dest   = REGNO (operands[0]);
25272
25273           /* Add the base and offset registers together into the
25274              higher destination register.  */
25275           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25276                        reg_dest + 1, reg_base, reg_offset);
25277
25278           /* Load the lower destination register from the address in
25279              the higher destination register.  */
25280           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25281                        reg_dest, reg_dest + 1);
25282
25283           /* Load the higher destination register from its own address
25284              plus 4.  */
25285           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25286                        reg_dest + 1, reg_dest + 1);
25287         }
25288       else
25289         {
25290           /* Compute <address> + 4 for the high order load.  */
25291           operands[2] = adjust_address (operands[1], SImode, 4);
25292
25293           /* If the computed address is held in the low order register
25294              then load the high order register first, otherwise always
25295              load the low order register first.  */
25296           if (REGNO (operands[0]) == REGNO (base))
25297             {
25298               output_asm_insn ("ldr\t%H0, %2", operands);
25299               output_asm_insn ("ldr\t%0, %1", operands);
25300             }
25301           else
25302             {
25303               output_asm_insn ("ldr\t%0, %1", operands);
25304               output_asm_insn ("ldr\t%H0, %2", operands);
25305             }
25306         }
25307       break;
25308
25309     case LABEL_REF:
25310       /* With no registers to worry about we can just load the value
25311          directly.  */
25312       operands[2] = adjust_address (operands[1], SImode, 4);
25313
25314       output_asm_insn ("ldr\t%H0, %2", operands);
25315       output_asm_insn ("ldr\t%0, %1", operands);
25316       break;
25317
25318     default:
25319       gcc_unreachable ();
25320     }
25321
25322   return "";
25323 }
25324
25325 const char *
25326 thumb_output_move_mem_multiple (int n, rtx *operands)
25327 {
25328   rtx tmp;
25329
25330   switch (n)
25331     {
25332     case 2:
25333       if (REGNO (operands[4]) > REGNO (operands[5]))
25334         {
25335           tmp = operands[4];
25336           operands[4] = operands[5];
25337           operands[5] = tmp;
25338         }
25339       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25340       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25341       break;
25342
25343     case 3:
25344       if (REGNO (operands[4]) > REGNO (operands[5]))
25345         std::swap (operands[4], operands[5]);
25346       if (REGNO (operands[5]) > REGNO (operands[6]))
25347         std::swap (operands[5], operands[6]);
25348       if (REGNO (operands[4]) > REGNO (operands[5]))
25349         std::swap (operands[4], operands[5]);
25350
25351       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25352       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25353       break;
25354
25355     default:
25356       gcc_unreachable ();
25357     }
25358
25359   return "";
25360 }
25361
25362 /* Output a call-via instruction for thumb state.  */
25363 const char *
25364 thumb_call_via_reg (rtx reg)
25365 {
25366   int regno = REGNO (reg);
25367   rtx *labelp;
25368
25369   gcc_assert (regno < LR_REGNUM);
25370
25371   /* If we are in the normal text section we can use a single instance
25372      per compilation unit.  If we are doing function sections, then we need
25373      an entry per section, since we can't rely on reachability.  */
25374   if (in_section == text_section)
25375     {
25376       thumb_call_reg_needed = 1;
25377
25378       if (thumb_call_via_label[regno] == NULL)
25379         thumb_call_via_label[regno] = gen_label_rtx ();
25380       labelp = thumb_call_via_label + regno;
25381     }
25382   else
25383     {
25384       if (cfun->machine->call_via[regno] == NULL)
25385         cfun->machine->call_via[regno] = gen_label_rtx ();
25386       labelp = cfun->machine->call_via + regno;
25387     }
25388
25389   output_asm_insn ("bl\t%a0", labelp);
25390   return "";
25391 }
25392
25393 /* Routines for generating rtl.  */
25394 void
25395 thumb_expand_movmemqi (rtx *operands)
25396 {
25397   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25398   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25399   HOST_WIDE_INT len = INTVAL (operands[2]);
25400   HOST_WIDE_INT offset = 0;
25401
25402   while (len >= 12)
25403     {
25404       emit_insn (gen_movmem12b (out, in, out, in));
25405       len -= 12;
25406     }
25407
25408   if (len >= 8)
25409     {
25410       emit_insn (gen_movmem8b (out, in, out, in));
25411       len -= 8;
25412     }
25413
25414   if (len >= 4)
25415     {
25416       rtx reg = gen_reg_rtx (SImode);
25417       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25418       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25419       len -= 4;
25420       offset += 4;
25421     }
25422
25423   if (len >= 2)
25424     {
25425       rtx reg = gen_reg_rtx (HImode);
25426       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25427                                               plus_constant (Pmode, in,
25428                                                              offset))));
25429       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25430                                                                 offset)),
25431                             reg));
25432       len -= 2;
25433       offset += 2;
25434     }
25435
25436   if (len)
25437     {
25438       rtx reg = gen_reg_rtx (QImode);
25439       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25440                                               plus_constant (Pmode, in,
25441                                                              offset))));
25442       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25443                                                                 offset)),
25444                             reg));
25445     }
25446 }
25447
25448 void
25449 thumb_reload_out_hi (rtx *operands)
25450 {
25451   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25452 }
25453
25454 /* Handle reading a half-word from memory during reload.  */
25455 void
25456 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25457 {
25458   gcc_unreachable ();
25459 }
25460
25461 /* Return the length of a function name prefix
25462     that starts with the character 'c'.  */
25463 static int
25464 arm_get_strip_length (int c)
25465 {
25466   switch (c)
25467     {
25468     ARM_NAME_ENCODING_LENGTHS
25469       default: return 0;
25470     }
25471 }
25472
25473 /* Return a pointer to a function's name with any
25474    and all prefix encodings stripped from it.  */
25475 const char *
25476 arm_strip_name_encoding (const char *name)
25477 {
25478   int skip;
25479
25480   while ((skip = arm_get_strip_length (* name)))
25481     name += skip;
25482
25483   return name;
25484 }
25485
25486 /* If there is a '*' anywhere in the name's prefix, then
25487    emit the stripped name verbatim, otherwise prepend an
25488    underscore if leading underscores are being used.  */
25489 void
25490 arm_asm_output_labelref (FILE *stream, const char *name)
25491 {
25492   int skip;
25493   int verbatim = 0;
25494
25495   while ((skip = arm_get_strip_length (* name)))
25496     {
25497       verbatim |= (*name == '*');
25498       name += skip;
25499     }
25500
25501   if (verbatim)
25502     fputs (name, stream);
25503   else
25504     asm_fprintf (stream, "%U%s", name);
25505 }
25506
25507 /* This function is used to emit an EABI tag and its associated value.
25508    We emit the numerical value of the tag in case the assembler does not
25509    support textual tags.  (Eg gas prior to 2.20).  If requested we include
25510    the tag name in a comment so that anyone reading the assembler output
25511    will know which tag is being set.
25512
25513    This function is not static because arm-c.c needs it too.  */
25514
25515 void
25516 arm_emit_eabi_attribute (const char *name, int num, int val)
25517 {
25518   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25519   if (flag_verbose_asm || flag_debug_asm)
25520     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25521   asm_fprintf (asm_out_file, "\n");
25522 }
25523
25524 static void
25525 arm_file_start (void)
25526 {
25527   int val;
25528
25529   if (TARGET_UNIFIED_ASM)
25530     asm_fprintf (asm_out_file, "\t.syntax unified\n");
25531
25532   if (TARGET_BPABI)
25533     {
25534       const char *fpu_name;
25535       if (arm_selected_arch)
25536         {
25537           /* armv7ve doesn't support any extensions.  */
25538           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25539             {
25540               /* Keep backward compatability for assemblers
25541                  which don't support armv7ve.  */
25542               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25543               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25544               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25545               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25546               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25547             }
25548           else
25549             {
25550               const char* pos = strchr (arm_selected_arch->name, '+');
25551               if (pos)
25552                 {
25553                   char buf[15];
25554                   gcc_assert (strlen (arm_selected_arch->name)
25555                               <= sizeof (buf) / sizeof (*pos));
25556                   strncpy (buf, arm_selected_arch->name,
25557                                 (pos - arm_selected_arch->name) * sizeof (*pos));
25558                   buf[pos - arm_selected_arch->name] = '\0';
25559                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25560                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25561                 }
25562               else
25563                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25564             }
25565         }
25566       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25567         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25568       else
25569         {
25570           const char* truncated_name
25571             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25572           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25573         }
25574
25575       if (TARGET_SOFT_FLOAT)
25576         {
25577           fpu_name = "softvfp";
25578         }
25579       else
25580         {
25581           fpu_name = arm_fpu_desc->name;
25582           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25583             {
25584               if (TARGET_HARD_FLOAT)
25585                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25586               if (TARGET_HARD_FLOAT_ABI)
25587                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25588             }
25589         }
25590       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25591
25592       /* Some of these attributes only apply when the corresponding features
25593          are used.  However we don't have any easy way of figuring this out.
25594          Conservatively record the setting that would have been used.  */
25595
25596       if (flag_rounding_math)
25597         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25598
25599       if (!flag_unsafe_math_optimizations)
25600         {
25601           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25602           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25603         }
25604       if (flag_signaling_nans)
25605         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25606
25607       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25608                            flag_finite_math_only ? 1 : 3);
25609
25610       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25611       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25612       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25613                                flag_short_enums ? 1 : 2);
25614
25615       /* Tag_ABI_optimization_goals.  */
25616       if (optimize_size)
25617         val = 4;
25618       else if (optimize >= 2)
25619         val = 2;
25620       else if (optimize)
25621         val = 1;
25622       else
25623         val = 6;
25624       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25625
25626       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25627                                unaligned_access);
25628
25629       if (arm_fp16_format)
25630         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25631                              (int) arm_fp16_format);
25632
25633       if (arm_lang_output_object_attributes_hook)
25634         arm_lang_output_object_attributes_hook();
25635     }
25636
25637   default_file_start ();
25638 }
25639
25640 static void
25641 arm_file_end (void)
25642 {
25643   int regno;
25644
25645   if (NEED_INDICATE_EXEC_STACK)
25646     /* Add .note.GNU-stack.  */
25647     file_end_indicate_exec_stack ();
25648
25649   if (! thumb_call_reg_needed)
25650     return;
25651
25652   switch_to_section (text_section);
25653   asm_fprintf (asm_out_file, "\t.code 16\n");
25654   ASM_OUTPUT_ALIGN (asm_out_file, 1);
25655
25656   for (regno = 0; regno < LR_REGNUM; regno++)
25657     {
25658       rtx label = thumb_call_via_label[regno];
25659
25660       if (label != 0)
25661         {
25662           targetm.asm_out.internal_label (asm_out_file, "L",
25663                                           CODE_LABEL_NUMBER (label));
25664           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25665         }
25666     }
25667 }
25668
25669 #ifndef ARM_PE
25670 /* Symbols in the text segment can be accessed without indirecting via the
25671    constant pool; it may take an extra binary operation, but this is still
25672    faster than indirecting via memory.  Don't do this when not optimizing,
25673    since we won't be calculating al of the offsets necessary to do this
25674    simplification.  */
25675
25676 static void
25677 arm_encode_section_info (tree decl, rtx rtl, int first)
25678 {
25679   if (optimize > 0 && TREE_CONSTANT (decl))
25680     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25681
25682   default_encode_section_info (decl, rtl, first);
25683 }
25684 #endif /* !ARM_PE */
25685
25686 static void
25687 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25688 {
25689   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25690       && !strcmp (prefix, "L"))
25691     {
25692       arm_ccfsm_state = 0;
25693       arm_target_insn = NULL;
25694     }
25695   default_internal_label (stream, prefix, labelno);
25696 }
25697
25698 /* Output code to add DELTA to the first argument, and then jump
25699    to FUNCTION.  Used for C++ multiple inheritance.  */
25700 static void
25701 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25702                      HOST_WIDE_INT delta,
25703                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25704                      tree function)
25705 {
25706   static int thunk_label = 0;
25707   char label[256];
25708   char labelpc[256];
25709   int mi_delta = delta;
25710   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25711   int shift = 0;
25712   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25713                     ? 1 : 0);
25714   if (mi_delta < 0)
25715     mi_delta = - mi_delta;
25716
25717   final_start_function (emit_barrier (), file, 1);
25718
25719   if (TARGET_THUMB1)
25720     {
25721       int labelno = thunk_label++;
25722       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25723       /* Thunks are entered in arm mode when avaiable.  */
25724       if (TARGET_THUMB1_ONLY)
25725         {
25726           /* push r3 so we can use it as a temporary.  */
25727           /* TODO: Omit this save if r3 is not used.  */
25728           fputs ("\tpush {r3}\n", file);
25729           fputs ("\tldr\tr3, ", file);
25730         }
25731       else
25732         {
25733           fputs ("\tldr\tr12, ", file);
25734         }
25735       assemble_name (file, label);
25736       fputc ('\n', file);
25737       if (flag_pic)
25738         {
25739           /* If we are generating PIC, the ldr instruction below loads
25740              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
25741              the address of the add + 8, so we have:
25742
25743              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25744                  = target + 1.
25745
25746              Note that we have "+ 1" because some versions of GNU ld
25747              don't set the low bit of the result for R_ARM_REL32
25748              relocations against thumb function symbols.
25749              On ARMv6M this is +4, not +8.  */
25750           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25751           assemble_name (file, labelpc);
25752           fputs (":\n", file);
25753           if (TARGET_THUMB1_ONLY)
25754             {
25755               /* This is 2 insns after the start of the thunk, so we know it
25756                  is 4-byte aligned.  */
25757               fputs ("\tadd\tr3, pc, r3\n", file);
25758               fputs ("\tmov r12, r3\n", file);
25759             }
25760           else
25761             fputs ("\tadd\tr12, pc, r12\n", file);
25762         }
25763       else if (TARGET_THUMB1_ONLY)
25764         fputs ("\tmov r12, r3\n", file);
25765     }
25766   if (TARGET_THUMB1_ONLY)
25767     {
25768       if (mi_delta > 255)
25769         {
25770           fputs ("\tldr\tr3, ", file);
25771           assemble_name (file, label);
25772           fputs ("+4\n", file);
25773           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25774                        mi_op, this_regno, this_regno);
25775         }
25776       else if (mi_delta != 0)
25777         {
25778           /* Thumb1 unified syntax requires s suffix in instruction name when
25779              one of the operands is immediate.  */
25780           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25781                        mi_op, this_regno, this_regno,
25782                        mi_delta);
25783         }
25784     }
25785   else
25786     {
25787       /* TODO: Use movw/movt for large constants when available.  */
25788       while (mi_delta != 0)
25789         {
25790           if ((mi_delta & (3 << shift)) == 0)
25791             shift += 2;
25792           else
25793             {
25794               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25795                            mi_op, this_regno, this_regno,
25796                            mi_delta & (0xff << shift));
25797               mi_delta &= ~(0xff << shift);
25798               shift += 8;
25799             }
25800         }
25801     }
25802   if (TARGET_THUMB1)
25803     {
25804       if (TARGET_THUMB1_ONLY)
25805         fputs ("\tpop\t{r3}\n", file);
25806
25807       fprintf (file, "\tbx\tr12\n");
25808       ASM_OUTPUT_ALIGN (file, 2);
25809       assemble_name (file, label);
25810       fputs (":\n", file);
25811       if (flag_pic)
25812         {
25813           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
25814           rtx tem = XEXP (DECL_RTL (function), 0);
25815           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25816              pipeline offset is four rather than eight.  Adjust the offset
25817              accordingly.  */
25818           tem = plus_constant (GET_MODE (tem), tem,
25819                                TARGET_THUMB1_ONLY ? -3 : -7);
25820           tem = gen_rtx_MINUS (GET_MODE (tem),
25821                                tem,
25822                                gen_rtx_SYMBOL_REF (Pmode,
25823                                                    ggc_strdup (labelpc)));
25824           assemble_integer (tem, 4, BITS_PER_WORD, 1);
25825         }
25826       else
25827         /* Output ".word .LTHUNKn".  */
25828         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25829
25830       if (TARGET_THUMB1_ONLY && mi_delta > 255)
25831         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25832     }
25833   else
25834     {
25835       fputs ("\tb\t", file);
25836       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25837       if (NEED_PLT_RELOC)
25838         fputs ("(PLT)", file);
25839       fputc ('\n', file);
25840     }
25841
25842   final_end_function ();
25843 }
25844
25845 int
25846 arm_emit_vector_const (FILE *file, rtx x)
25847 {
25848   int i;
25849   const char * pattern;
25850
25851   gcc_assert (GET_CODE (x) == CONST_VECTOR);
25852
25853   switch (GET_MODE (x))
25854     {
25855     case V2SImode: pattern = "%08x"; break;
25856     case V4HImode: pattern = "%04x"; break;
25857     case V8QImode: pattern = "%02x"; break;
25858     default:       gcc_unreachable ();
25859     }
25860
25861   fprintf (file, "0x");
25862   for (i = CONST_VECTOR_NUNITS (x); i--;)
25863     {
25864       rtx element;
25865
25866       element = CONST_VECTOR_ELT (x, i);
25867       fprintf (file, pattern, INTVAL (element));
25868     }
25869
25870   return 1;
25871 }
25872
25873 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25874    HFmode constant pool entries are actually loaded with ldr.  */
25875 void
25876 arm_emit_fp16_const (rtx c)
25877 {
25878   REAL_VALUE_TYPE r;
25879   long bits;
25880
25881   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25882   bits = real_to_target (NULL, &r, HFmode);
25883   if (WORDS_BIG_ENDIAN)
25884     assemble_zeros (2);
25885   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25886   if (!WORDS_BIG_ENDIAN)
25887     assemble_zeros (2);
25888 }
25889
25890 const char *
25891 arm_output_load_gr (rtx *operands)
25892 {
25893   rtx reg;
25894   rtx offset;
25895   rtx wcgr;
25896   rtx sum;
25897
25898   if (!MEM_P (operands [1])
25899       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25900       || !REG_P (reg = XEXP (sum, 0))
25901       || !CONST_INT_P (offset = XEXP (sum, 1))
25902       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25903     return "wldrw%?\t%0, %1";
25904
25905   /* Fix up an out-of-range load of a GR register.  */
25906   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25907   wcgr = operands[0];
25908   operands[0] = reg;
25909   output_asm_insn ("ldr%?\t%0, %1", operands);
25910
25911   operands[0] = wcgr;
25912   operands[1] = reg;
25913   output_asm_insn ("tmcr%?\t%0, %1", operands);
25914   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25915
25916   return "";
25917 }
25918
25919 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25920
25921    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25922    named arg and all anonymous args onto the stack.
25923    XXX I know the prologue shouldn't be pushing registers, but it is faster
25924    that way.  */
25925
25926 static void
25927 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25928                             machine_mode mode,
25929                             tree type,
25930                             int *pretend_size,
25931                             int second_time ATTRIBUTE_UNUSED)
25932 {
25933   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25934   int nregs;
25935
25936   cfun->machine->uses_anonymous_args = 1;
25937   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25938     {
25939       nregs = pcum->aapcs_ncrn;
25940       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25941         nregs++;
25942     }
25943   else
25944     nregs = pcum->nregs;
25945
25946   if (nregs < NUM_ARG_REGS)
25947     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25948 }
25949
25950 /* We can't rely on the caller doing the proper promotion when
25951    using APCS or ATPCS.  */
25952
25953 static bool
25954 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25955 {
25956     return !TARGET_AAPCS_BASED;
25957 }
25958
25959 static machine_mode
25960 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25961                            machine_mode mode,
25962                            int *punsignedp ATTRIBUTE_UNUSED,
25963                            const_tree fntype ATTRIBUTE_UNUSED,
25964                            int for_return ATTRIBUTE_UNUSED)
25965 {
25966   if (GET_MODE_CLASS (mode) == MODE_INT
25967       && GET_MODE_SIZE (mode) < 4)
25968     return SImode;
25969
25970   return mode;
25971 }
25972
25973 /* AAPCS based ABIs use short enums by default.  */
25974
25975 static bool
25976 arm_default_short_enums (void)
25977 {
25978   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25979 }
25980
25981
25982 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
25983
25984 static bool
25985 arm_align_anon_bitfield (void)
25986 {
25987   return TARGET_AAPCS_BASED;
25988 }
25989
25990
25991 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
25992
25993 static tree
25994 arm_cxx_guard_type (void)
25995 {
25996   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25997 }
25998
25999
26000 /* The EABI says test the least significant bit of a guard variable.  */
26001
26002 static bool
26003 arm_cxx_guard_mask_bit (void)
26004 {
26005   return TARGET_AAPCS_BASED;
26006 }
26007
26008
26009 /* The EABI specifies that all array cookies are 8 bytes long.  */
26010
26011 static tree
26012 arm_get_cookie_size (tree type)
26013 {
26014   tree size;
26015
26016   if (!TARGET_AAPCS_BASED)
26017     return default_cxx_get_cookie_size (type);
26018
26019   size = build_int_cst (sizetype, 8);
26020   return size;
26021 }
26022
26023
26024 /* The EABI says that array cookies should also contain the element size.  */
26025
26026 static bool
26027 arm_cookie_has_size (void)
26028 {
26029   return TARGET_AAPCS_BASED;
26030 }
26031
26032
26033 /* The EABI says constructors and destructors should return a pointer to
26034    the object constructed/destroyed.  */
26035
26036 static bool
26037 arm_cxx_cdtor_returns_this (void)
26038 {
26039   return TARGET_AAPCS_BASED;
26040 }
26041
26042 /* The EABI says that an inline function may never be the key
26043    method.  */
26044
26045 static bool
26046 arm_cxx_key_method_may_be_inline (void)
26047 {
26048   return !TARGET_AAPCS_BASED;
26049 }
26050
26051 static void
26052 arm_cxx_determine_class_data_visibility (tree decl)
26053 {
26054   if (!TARGET_AAPCS_BASED
26055       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26056     return;
26057
26058   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26059      is exported.  However, on systems without dynamic vague linkage,
26060      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26061   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26062     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26063   else
26064     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26065   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26066 }
26067
26068 static bool
26069 arm_cxx_class_data_always_comdat (void)
26070 {
26071   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26072      vague linkage if the class has no key function.  */
26073   return !TARGET_AAPCS_BASED;
26074 }
26075
26076
26077 /* The EABI says __aeabi_atexit should be used to register static
26078    destructors.  */
26079
26080 static bool
26081 arm_cxx_use_aeabi_atexit (void)
26082 {
26083   return TARGET_AAPCS_BASED;
26084 }
26085
26086
26087 void
26088 arm_set_return_address (rtx source, rtx scratch)
26089 {
26090   arm_stack_offsets *offsets;
26091   HOST_WIDE_INT delta;
26092   rtx addr;
26093   unsigned long saved_regs;
26094
26095   offsets = arm_get_frame_offsets ();
26096   saved_regs = offsets->saved_regs_mask;
26097
26098   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26099     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26100   else
26101     {
26102       if (frame_pointer_needed)
26103         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26104       else
26105         {
26106           /* LR will be the first saved register.  */
26107           delta = offsets->outgoing_args - (offsets->frame + 4);
26108
26109
26110           if (delta >= 4096)
26111             {
26112               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26113                                      GEN_INT (delta & ~4095)));
26114               addr = scratch;
26115               delta &= 4095;
26116             }
26117           else
26118             addr = stack_pointer_rtx;
26119
26120           addr = plus_constant (Pmode, addr, delta);
26121         }
26122       /* The store needs to be marked as frame related in order to prevent
26123          DSE from deleting it as dead if it is based on fp.  */
26124       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26125       RTX_FRAME_RELATED_P (insn) = 1;
26126       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26127     }
26128 }
26129
26130
26131 void
26132 thumb_set_return_address (rtx source, rtx scratch)
26133 {
26134   arm_stack_offsets *offsets;
26135   HOST_WIDE_INT delta;
26136   HOST_WIDE_INT limit;
26137   int reg;
26138   rtx addr;
26139   unsigned long mask;
26140
26141   emit_use (source);
26142
26143   offsets = arm_get_frame_offsets ();
26144   mask = offsets->saved_regs_mask;
26145   if (mask & (1 << LR_REGNUM))
26146     {
26147       limit = 1024;
26148       /* Find the saved regs.  */
26149       if (frame_pointer_needed)
26150         {
26151           delta = offsets->soft_frame - offsets->saved_args;
26152           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26153           if (TARGET_THUMB1)
26154             limit = 128;
26155         }
26156       else
26157         {
26158           delta = offsets->outgoing_args - offsets->saved_args;
26159           reg = SP_REGNUM;
26160         }
26161       /* Allow for the stack frame.  */
26162       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26163         delta -= 16;
26164       /* The link register is always the first saved register.  */
26165       delta -= 4;
26166
26167       /* Construct the address.  */
26168       addr = gen_rtx_REG (SImode, reg);
26169       if (delta > limit)
26170         {
26171           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26172           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26173           addr = scratch;
26174         }
26175       else
26176         addr = plus_constant (Pmode, addr, delta);
26177
26178       /* The store needs to be marked as frame related in order to prevent
26179          DSE from deleting it as dead if it is based on fp.  */
26180       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26181       RTX_FRAME_RELATED_P (insn) = 1;
26182       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26183     }
26184   else
26185     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26186 }
26187
26188 /* Implements target hook vector_mode_supported_p.  */
26189 bool
26190 arm_vector_mode_supported_p (machine_mode mode)
26191 {
26192   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26193   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26194       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26195     return true;
26196
26197   if ((TARGET_NEON || TARGET_IWMMXT)
26198       && ((mode == V2SImode)
26199           || (mode == V4HImode)
26200           || (mode == V8QImode)))
26201     return true;
26202
26203   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26204       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26205       || mode == V2HAmode))
26206     return true;
26207
26208   return false;
26209 }
26210
26211 /* Implements target hook array_mode_supported_p.  */
26212
26213 static bool
26214 arm_array_mode_supported_p (machine_mode mode,
26215                             unsigned HOST_WIDE_INT nelems)
26216 {
26217   if (TARGET_NEON
26218       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26219       && (nelems >= 2 && nelems <= 4))
26220     return true;
26221
26222   return false;
26223 }
26224
26225 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26226    registers when autovectorizing for Neon, at least until multiple vector
26227    widths are supported properly by the middle-end.  */
26228
26229 static machine_mode
26230 arm_preferred_simd_mode (machine_mode mode)
26231 {
26232   if (TARGET_NEON)
26233     switch (mode)
26234       {
26235       case SFmode:
26236         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26237       case SImode:
26238         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26239       case HImode:
26240         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26241       case QImode:
26242         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26243       case DImode:
26244         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26245           return V2DImode;
26246         break;
26247
26248       default:;
26249       }
26250
26251   if (TARGET_REALLY_IWMMXT)
26252     switch (mode)
26253       {
26254       case SImode:
26255         return V2SImode;
26256       case HImode:
26257         return V4HImode;
26258       case QImode:
26259         return V8QImode;
26260
26261       default:;
26262       }
26263
26264   return word_mode;
26265 }
26266
26267 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26268
26269    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26270    using r0-r4 for function arguments, r7 for the stack frame and don't have
26271    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26272    potentially problematic instructions accept high registers so this is not
26273    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26274    that require many low registers.  */
26275 static bool
26276 arm_class_likely_spilled_p (reg_class_t rclass)
26277 {
26278   if ((TARGET_THUMB1 && rclass == LO_REGS)
26279       || rclass  == CC_REG)
26280     return true;
26281
26282   return false;
26283 }
26284
26285 /* Implements target hook small_register_classes_for_mode_p.  */
26286 bool
26287 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26288 {
26289   return TARGET_THUMB1;
26290 }
26291
26292 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
26293    ARM insns and therefore guarantee that the shift count is modulo 256.
26294    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26295    guarantee no particular behavior for out-of-range counts.  */
26296
26297 static unsigned HOST_WIDE_INT
26298 arm_shift_truncation_mask (machine_mode mode)
26299 {
26300   return mode == SImode ? 255 : 0;
26301 }
26302
26303
26304 /* Map internal gcc register numbers to DWARF2 register numbers.  */
26305
26306 unsigned int
26307 arm_dbx_register_number (unsigned int regno)
26308 {
26309   if (regno < 16)
26310     return regno;
26311
26312   if (IS_VFP_REGNUM (regno))
26313     {
26314       /* See comment in arm_dwarf_register_span.  */
26315       if (VFP_REGNO_OK_FOR_SINGLE (regno))
26316         return 64 + regno - FIRST_VFP_REGNUM;
26317       else
26318         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26319     }
26320
26321   if (IS_IWMMXT_GR_REGNUM (regno))
26322     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26323
26324   if (IS_IWMMXT_REGNUM (regno))
26325     return 112 + regno - FIRST_IWMMXT_REGNUM;
26326
26327   gcc_unreachable ();
26328 }
26329
26330 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26331    GCC models tham as 64 32-bit registers, so we need to describe this to
26332    the DWARF generation code.  Other registers can use the default.  */
26333 static rtx
26334 arm_dwarf_register_span (rtx rtl)
26335 {
26336   machine_mode mode;
26337   unsigned regno;
26338   rtx parts[16];
26339   int nregs;
26340   int i;
26341
26342   regno = REGNO (rtl);
26343   if (!IS_VFP_REGNUM (regno))
26344     return NULL_RTX;
26345
26346   /* XXX FIXME: The EABI defines two VFP register ranges:
26347         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26348         256-287: D0-D31
26349      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26350      corresponding D register.  Until GDB supports this, we shall use the
26351      legacy encodings.  We also use these encodings for D0-D15 for
26352      compatibility with older debuggers.  */
26353   mode = GET_MODE (rtl);
26354   if (GET_MODE_SIZE (mode) < 8)
26355     return NULL_RTX;
26356
26357   if (VFP_REGNO_OK_FOR_SINGLE (regno))
26358     {
26359       nregs = GET_MODE_SIZE (mode) / 4;
26360       for (i = 0; i < nregs; i += 2)
26361         if (TARGET_BIG_END)
26362           {
26363             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26364             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26365           }
26366         else
26367           {
26368             parts[i] = gen_rtx_REG (SImode, regno + i);
26369             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26370           }
26371     }
26372   else
26373     {
26374       nregs = GET_MODE_SIZE (mode) / 8;
26375       for (i = 0; i < nregs; i++)
26376         parts[i] = gen_rtx_REG (DImode, regno + i);
26377     }
26378
26379   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26380 }
26381
26382 #if ARM_UNWIND_INFO
26383 /* Emit unwind directives for a store-multiple instruction or stack pointer
26384    push during alignment.
26385    These should only ever be generated by the function prologue code, so
26386    expect them to have a particular form.
26387    The store-multiple instruction sometimes pushes pc as the last register,
26388    although it should not be tracked into unwind information, or for -Os
26389    sometimes pushes some dummy registers before first register that needs
26390    to be tracked in unwind information; such dummy registers are there just
26391    to avoid separate stack adjustment, and will not be restored in the
26392    epilogue.  */
26393
26394 static void
26395 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26396 {
26397   int i;
26398   HOST_WIDE_INT offset;
26399   HOST_WIDE_INT nregs;
26400   int reg_size;
26401   unsigned reg;
26402   unsigned lastreg;
26403   unsigned padfirst = 0, padlast = 0;
26404   rtx e;
26405
26406   e = XVECEXP (p, 0, 0);
26407   gcc_assert (GET_CODE (e) == SET);
26408
26409   /* First insn will adjust the stack pointer.  */
26410   gcc_assert (GET_CODE (e) == SET
26411               && REG_P (SET_DEST (e))
26412               && REGNO (SET_DEST (e)) == SP_REGNUM
26413               && GET_CODE (SET_SRC (e)) == PLUS);
26414
26415   offset = -INTVAL (XEXP (SET_SRC (e), 1));
26416   nregs = XVECLEN (p, 0) - 1;
26417   gcc_assert (nregs);
26418
26419   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26420   if (reg < 16)
26421     {
26422       /* For -Os dummy registers can be pushed at the beginning to
26423          avoid separate stack pointer adjustment.  */
26424       e = XVECEXP (p, 0, 1);
26425       e = XEXP (SET_DEST (e), 0);
26426       if (GET_CODE (e) == PLUS)
26427         padfirst = INTVAL (XEXP (e, 1));
26428       gcc_assert (padfirst == 0 || optimize_size);
26429       /* The function prologue may also push pc, but not annotate it as it is
26430          never restored.  We turn this into a stack pointer adjustment.  */
26431       e = XVECEXP (p, 0, nregs);
26432       e = XEXP (SET_DEST (e), 0);
26433       if (GET_CODE (e) == PLUS)
26434         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26435       else
26436         padlast = offset - 4;
26437       gcc_assert (padlast == 0 || padlast == 4);
26438       if (padlast == 4)
26439         fprintf (asm_out_file, "\t.pad #4\n");
26440       reg_size = 4;
26441       fprintf (asm_out_file, "\t.save {");
26442     }
26443   else if (IS_VFP_REGNUM (reg))
26444     {
26445       reg_size = 8;
26446       fprintf (asm_out_file, "\t.vsave {");
26447     }
26448   else
26449     /* Unknown register type.  */
26450     gcc_unreachable ();
26451
26452   /* If the stack increment doesn't match the size of the saved registers,
26453      something has gone horribly wrong.  */
26454   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26455
26456   offset = padfirst;
26457   lastreg = 0;
26458   /* The remaining insns will describe the stores.  */
26459   for (i = 1; i <= nregs; i++)
26460     {
26461       /* Expect (set (mem <addr>) (reg)).
26462          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
26463       e = XVECEXP (p, 0, i);
26464       gcc_assert (GET_CODE (e) == SET
26465                   && MEM_P (SET_DEST (e))
26466                   && REG_P (SET_SRC (e)));
26467
26468       reg = REGNO (SET_SRC (e));
26469       gcc_assert (reg >= lastreg);
26470
26471       if (i != 1)
26472         fprintf (asm_out_file, ", ");
26473       /* We can't use %r for vfp because we need to use the
26474          double precision register names.  */
26475       if (IS_VFP_REGNUM (reg))
26476         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26477       else
26478         asm_fprintf (asm_out_file, "%r", reg);
26479
26480 #ifdef ENABLE_CHECKING
26481       /* Check that the addresses are consecutive.  */
26482       e = XEXP (SET_DEST (e), 0);
26483       if (GET_CODE (e) == PLUS)
26484         gcc_assert (REG_P (XEXP (e, 0))
26485                     && REGNO (XEXP (e, 0)) == SP_REGNUM
26486                     && CONST_INT_P (XEXP (e, 1))
26487                     && offset == INTVAL (XEXP (e, 1)));
26488       else
26489         gcc_assert (i == 1
26490                     && REG_P (e)
26491                     && REGNO (e) == SP_REGNUM);
26492       offset += reg_size;
26493 #endif
26494     }
26495   fprintf (asm_out_file, "}\n");
26496   if (padfirst)
26497     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26498 }
26499
26500 /*  Emit unwind directives for a SET.  */
26501
26502 static void
26503 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26504 {
26505   rtx e0;
26506   rtx e1;
26507   unsigned reg;
26508
26509   e0 = XEXP (p, 0);
26510   e1 = XEXP (p, 1);
26511   switch (GET_CODE (e0))
26512     {
26513     case MEM:
26514       /* Pushing a single register.  */
26515       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26516           || !REG_P (XEXP (XEXP (e0, 0), 0))
26517           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26518         abort ();
26519
26520       asm_fprintf (asm_out_file, "\t.save ");
26521       if (IS_VFP_REGNUM (REGNO (e1)))
26522         asm_fprintf(asm_out_file, "{d%d}\n",
26523                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26524       else
26525         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26526       break;
26527
26528     case REG:
26529       if (REGNO (e0) == SP_REGNUM)
26530         {
26531           /* A stack increment.  */
26532           if (GET_CODE (e1) != PLUS
26533               || !REG_P (XEXP (e1, 0))
26534               || REGNO (XEXP (e1, 0)) != SP_REGNUM
26535               || !CONST_INT_P (XEXP (e1, 1)))
26536             abort ();
26537
26538           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26539                        -INTVAL (XEXP (e1, 1)));
26540         }
26541       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26542         {
26543           HOST_WIDE_INT offset;
26544
26545           if (GET_CODE (e1) == PLUS)
26546             {
26547               if (!REG_P (XEXP (e1, 0))
26548                   || !CONST_INT_P (XEXP (e1, 1)))
26549                 abort ();
26550               reg = REGNO (XEXP (e1, 0));
26551               offset = INTVAL (XEXP (e1, 1));
26552               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26553                            HARD_FRAME_POINTER_REGNUM, reg,
26554                            offset);
26555             }
26556           else if (REG_P (e1))
26557             {
26558               reg = REGNO (e1);
26559               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26560                            HARD_FRAME_POINTER_REGNUM, reg);
26561             }
26562           else
26563             abort ();
26564         }
26565       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26566         {
26567           /* Move from sp to reg.  */
26568           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26569         }
26570      else if (GET_CODE (e1) == PLUS
26571               && REG_P (XEXP (e1, 0))
26572               && REGNO (XEXP (e1, 0)) == SP_REGNUM
26573               && CONST_INT_P (XEXP (e1, 1)))
26574         {
26575           /* Set reg to offset from sp.  */
26576           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26577                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26578         }
26579       else
26580         abort ();
26581       break;
26582
26583     default:
26584       abort ();
26585     }
26586 }
26587
26588
26589 /* Emit unwind directives for the given insn.  */
26590
26591 static void
26592 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26593 {
26594   rtx note, pat;
26595   bool handled_one = false;
26596
26597   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26598     return;
26599
26600   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26601       && (TREE_NOTHROW (current_function_decl)
26602           || crtl->all_throwers_are_sibcalls))
26603     return;
26604
26605   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26606     return;
26607
26608   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26609     {
26610       switch (REG_NOTE_KIND (note))
26611         {
26612         case REG_FRAME_RELATED_EXPR:
26613           pat = XEXP (note, 0);
26614           goto found;
26615
26616         case REG_CFA_REGISTER:
26617           pat = XEXP (note, 0);
26618           if (pat == NULL)
26619             {
26620               pat = PATTERN (insn);
26621               if (GET_CODE (pat) == PARALLEL)
26622                 pat = XVECEXP (pat, 0, 0);
26623             }
26624
26625           /* Only emitted for IS_STACKALIGN re-alignment.  */
26626           {
26627             rtx dest, src;
26628             unsigned reg;
26629
26630             src = SET_SRC (pat);
26631             dest = SET_DEST (pat);
26632
26633             gcc_assert (src == stack_pointer_rtx);
26634             reg = REGNO (dest);
26635             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26636                          reg + 0x90, reg);
26637           }
26638           handled_one = true;
26639           break;
26640
26641         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
26642            to get correct dwarf information for shrink-wrap.  We should not
26643            emit unwind information for it because these are used either for
26644            pretend arguments or notes to adjust sp and restore registers from
26645            stack.  */
26646         case REG_CFA_DEF_CFA:
26647         case REG_CFA_ADJUST_CFA:
26648         case REG_CFA_RESTORE:
26649           return;
26650
26651         case REG_CFA_EXPRESSION:
26652         case REG_CFA_OFFSET:
26653           /* ??? Only handling here what we actually emit.  */
26654           gcc_unreachable ();
26655
26656         default:
26657           break;
26658         }
26659     }
26660   if (handled_one)
26661     return;
26662   pat = PATTERN (insn);
26663  found:
26664
26665   switch (GET_CODE (pat))
26666     {
26667     case SET:
26668       arm_unwind_emit_set (asm_out_file, pat);
26669       break;
26670
26671     case SEQUENCE:
26672       /* Store multiple.  */
26673       arm_unwind_emit_sequence (asm_out_file, pat);
26674       break;
26675
26676     default:
26677       abort();
26678     }
26679 }
26680
26681
26682 /* Output a reference from a function exception table to the type_info
26683    object X.  The EABI specifies that the symbol should be relocated by
26684    an R_ARM_TARGET2 relocation.  */
26685
26686 static bool
26687 arm_output_ttype (rtx x)
26688 {
26689   fputs ("\t.word\t", asm_out_file);
26690   output_addr_const (asm_out_file, x);
26691   /* Use special relocations for symbol references.  */
26692   if (!CONST_INT_P (x))
26693     fputs ("(TARGET2)", asm_out_file);
26694   fputc ('\n', asm_out_file);
26695
26696   return TRUE;
26697 }
26698
26699 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
26700
26701 static void
26702 arm_asm_emit_except_personality (rtx personality)
26703 {
26704   fputs ("\t.personality\t", asm_out_file);
26705   output_addr_const (asm_out_file, personality);
26706   fputc ('\n', asm_out_file);
26707 }
26708
26709 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
26710
26711 static void
26712 arm_asm_init_sections (void)
26713 {
26714   exception_section = get_unnamed_section (0, output_section_asm_op,
26715                                            "\t.handlerdata");
26716 }
26717 #endif /* ARM_UNWIND_INFO */
26718
26719 /* Output unwind directives for the start/end of a function.  */
26720
26721 void
26722 arm_output_fn_unwind (FILE * f, bool prologue)
26723 {
26724   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26725     return;
26726
26727   if (prologue)
26728     fputs ("\t.fnstart\n", f);
26729   else
26730     {
26731       /* If this function will never be unwound, then mark it as such.
26732          The came condition is used in arm_unwind_emit to suppress
26733          the frame annotations.  */
26734       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26735           && (TREE_NOTHROW (current_function_decl)
26736               || crtl->all_throwers_are_sibcalls))
26737         fputs("\t.cantunwind\n", f);
26738
26739       fputs ("\t.fnend\n", f);
26740     }
26741 }
26742
26743 static bool
26744 arm_emit_tls_decoration (FILE *fp, rtx x)
26745 {
26746   enum tls_reloc reloc;
26747   rtx val;
26748
26749   val = XVECEXP (x, 0, 0);
26750   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26751
26752   output_addr_const (fp, val);
26753
26754   switch (reloc)
26755     {
26756     case TLS_GD32:
26757       fputs ("(tlsgd)", fp);
26758       break;
26759     case TLS_LDM32:
26760       fputs ("(tlsldm)", fp);
26761       break;
26762     case TLS_LDO32:
26763       fputs ("(tlsldo)", fp);
26764       break;
26765     case TLS_IE32:
26766       fputs ("(gottpoff)", fp);
26767       break;
26768     case TLS_LE32:
26769       fputs ("(tpoff)", fp);
26770       break;
26771     case TLS_DESCSEQ:
26772       fputs ("(tlsdesc)", fp);
26773       break;
26774     default:
26775       gcc_unreachable ();
26776     }
26777
26778   switch (reloc)
26779     {
26780     case TLS_GD32:
26781     case TLS_LDM32:
26782     case TLS_IE32:
26783     case TLS_DESCSEQ:
26784       fputs (" + (. - ", fp);
26785       output_addr_const (fp, XVECEXP (x, 0, 2));
26786       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26787       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26788       output_addr_const (fp, XVECEXP (x, 0, 3));
26789       fputc (')', fp);
26790       break;
26791     default:
26792       break;
26793     }
26794
26795   return TRUE;
26796 }
26797
26798 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
26799
26800 static void
26801 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26802 {
26803   gcc_assert (size == 4);
26804   fputs ("\t.word\t", file);
26805   output_addr_const (file, x);
26806   fputs ("(tlsldo)", file);
26807 }
26808
26809 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
26810
26811 static bool
26812 arm_output_addr_const_extra (FILE *fp, rtx x)
26813 {
26814   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26815     return arm_emit_tls_decoration (fp, x);
26816   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26817     {
26818       char label[256];
26819       int labelno = INTVAL (XVECEXP (x, 0, 0));
26820
26821       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26822       assemble_name_raw (fp, label);
26823
26824       return TRUE;
26825     }
26826   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26827     {
26828       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26829       if (GOT_PCREL)
26830         fputs ("+.", fp);
26831       fputs ("-(", fp);
26832       output_addr_const (fp, XVECEXP (x, 0, 0));
26833       fputc (')', fp);
26834       return TRUE;
26835     }
26836   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26837     {
26838       output_addr_const (fp, XVECEXP (x, 0, 0));
26839       if (GOT_PCREL)
26840         fputs ("+.", fp);
26841       fputs ("-(", fp);
26842       output_addr_const (fp, XVECEXP (x, 0, 1));
26843       fputc (')', fp);
26844       return TRUE;
26845     }
26846   else if (GET_CODE (x) == CONST_VECTOR)
26847     return arm_emit_vector_const (fp, x);
26848
26849   return FALSE;
26850 }
26851
26852 /* Output assembly for a shift instruction.
26853    SET_FLAGS determines how the instruction modifies the condition codes.
26854    0 - Do not set condition codes.
26855    1 - Set condition codes.
26856    2 - Use smallest instruction.  */
26857 const char *
26858 arm_output_shift(rtx * operands, int set_flags)
26859 {
26860   char pattern[100];
26861   static const char flag_chars[3] = {'?', '.', '!'};
26862   const char *shift;
26863   HOST_WIDE_INT val;
26864   char c;
26865
26866   c = flag_chars[set_flags];
26867   if (TARGET_UNIFIED_ASM)
26868     {
26869       shift = shift_op(operands[3], &val);
26870       if (shift)
26871         {
26872           if (val != -1)
26873             operands[2] = GEN_INT(val);
26874           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26875         }
26876       else
26877         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26878     }
26879   else
26880     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26881   output_asm_insn (pattern, operands);
26882   return "";
26883 }
26884
26885 /* Output assembly for a WMMX immediate shift instruction.  */
26886 const char *
26887 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26888 {
26889   int shift = INTVAL (operands[2]);
26890   char templ[50];
26891   machine_mode opmode = GET_MODE (operands[0]);
26892
26893   gcc_assert (shift >= 0);
26894
26895   /* If the shift value in the register versions is > 63 (for D qualifier),
26896      31 (for W qualifier) or 15 (for H qualifier).  */
26897   if (((opmode == V4HImode) && (shift > 15))
26898         || ((opmode == V2SImode) && (shift > 31))
26899         || ((opmode == DImode) && (shift > 63)))
26900   {
26901     if (wror_or_wsra)
26902       {
26903         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26904         output_asm_insn (templ, operands);
26905         if (opmode == DImode)
26906           {
26907             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26908             output_asm_insn (templ, operands);
26909           }
26910       }
26911     else
26912       {
26913         /* The destination register will contain all zeros.  */
26914         sprintf (templ, "wzero\t%%0");
26915         output_asm_insn (templ, operands);
26916       }
26917     return "";
26918   }
26919
26920   if ((opmode == DImode) && (shift > 32))
26921     {
26922       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26923       output_asm_insn (templ, operands);
26924       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26925       output_asm_insn (templ, operands);
26926     }
26927   else
26928     {
26929       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26930       output_asm_insn (templ, operands);
26931     }
26932   return "";
26933 }
26934
26935 /* Output assembly for a WMMX tinsr instruction.  */
26936 const char *
26937 arm_output_iwmmxt_tinsr (rtx *operands)
26938 {
26939   int mask = INTVAL (operands[3]);
26940   int i;
26941   char templ[50];
26942   int units = mode_nunits[GET_MODE (operands[0])];
26943   gcc_assert ((mask & (mask - 1)) == 0);
26944   for (i = 0; i < units; ++i)
26945     {
26946       if ((mask & 0x01) == 1)
26947         {
26948           break;
26949         }
26950       mask >>= 1;
26951     }
26952   gcc_assert (i < units);
26953   {
26954     switch (GET_MODE (operands[0]))
26955       {
26956       case V8QImode:
26957         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26958         break;
26959       case V4HImode:
26960         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26961         break;
26962       case V2SImode:
26963         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26964         break;
26965       default:
26966         gcc_unreachable ();
26967         break;
26968       }
26969     output_asm_insn (templ, operands);
26970   }
26971   return "";
26972 }
26973
26974 /* Output a Thumb-1 casesi dispatch sequence.  */
26975 const char *
26976 thumb1_output_casesi (rtx *operands)
26977 {
26978   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26979
26980   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26981
26982   switch (GET_MODE(diff_vec))
26983     {
26984     case QImode:
26985       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26986               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26987     case HImode:
26988       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26989               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26990     case SImode:
26991       return "bl\t%___gnu_thumb1_case_si";
26992     default:
26993       gcc_unreachable ();
26994     }
26995 }
26996
26997 /* Output a Thumb-2 casesi instruction.  */
26998 const char *
26999 thumb2_output_casesi (rtx *operands)
27000 {
27001   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27002
27003   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27004
27005   output_asm_insn ("cmp\t%0, %1", operands);
27006   output_asm_insn ("bhi\t%l3", operands);
27007   switch (GET_MODE(diff_vec))
27008     {
27009     case QImode:
27010       return "tbb\t[%|pc, %0]";
27011     case HImode:
27012       return "tbh\t[%|pc, %0, lsl #1]";
27013     case SImode:
27014       if (flag_pic)
27015         {
27016           output_asm_insn ("adr\t%4, %l2", operands);
27017           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27018           output_asm_insn ("add\t%4, %4, %5", operands);
27019           return "bx\t%4";
27020         }
27021       else
27022         {
27023           output_asm_insn ("adr\t%4, %l2", operands);
27024           return "ldr\t%|pc, [%4, %0, lsl #2]";
27025         }
27026     default:
27027       gcc_unreachable ();
27028     }
27029 }
27030
27031 /* Most ARM cores are single issue, but some newer ones can dual issue.
27032    The scheduler descriptions rely on this being correct.  */
27033 static int
27034 arm_issue_rate (void)
27035 {
27036   switch (arm_tune)
27037     {
27038     case cortexa15:
27039     case cortexa57:
27040       return 3;
27041
27042     case cortexm7:
27043     case cortexr4:
27044     case cortexr4f:
27045     case cortexr5:
27046     case genericv7a:
27047     case cortexa5:
27048     case cortexa7:
27049     case cortexa8:
27050     case cortexa9:
27051     case cortexa12:
27052     case cortexa17:
27053     case cortexa53:
27054     case fa726te:
27055     case marvell_pj4:
27056       return 2;
27057
27058     default:
27059       return 1;
27060     }
27061 }
27062
27063 const char *
27064 arm_mangle_type (const_tree type)
27065 {
27066   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27067      has to be managled as if it is in the "std" namespace.  */
27068   if (TARGET_AAPCS_BASED
27069       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27070     return "St9__va_list";
27071
27072   /* Half-precision float.  */
27073   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27074     return "Dh";
27075
27076   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27077      builtin type.  */
27078   if (TYPE_NAME (type) != NULL)
27079     return arm_mangle_builtin_type (type);
27080
27081   /* Use the default mangling.  */
27082   return NULL;
27083 }
27084
27085 /* Order of allocation of core registers for Thumb: this allocation is
27086    written over the corresponding initial entries of the array
27087    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27088    first.  Saving and restoring a low register is usually cheaper than
27089    using a call-clobbered high register.  */
27090
27091 static const int thumb_core_reg_alloc_order[] =
27092 {
27093    3,  2,  1,  0,  4,  5,  6,  7,
27094   14, 12,  8,  9, 10, 11
27095 };
27096
27097 /* Adjust register allocation order when compiling for Thumb.  */
27098
27099 void
27100 arm_order_regs_for_local_alloc (void)
27101 {
27102   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27103   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27104   if (TARGET_THUMB)
27105     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27106             sizeof (thumb_core_reg_alloc_order));
27107 }
27108
27109 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27110
27111 bool
27112 arm_frame_pointer_required (void)
27113 {
27114   return (cfun->has_nonlocal_label
27115           || SUBTARGET_FRAME_POINTER_REQUIRED
27116           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27117 }
27118
27119 /* Only thumb1 can't support conditional execution, so return true if
27120    the target is not thumb1.  */
27121 static bool
27122 arm_have_conditional_execution (void)
27123 {
27124   return !TARGET_THUMB1;
27125 }
27126
27127 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27128 static HOST_WIDE_INT
27129 arm_vector_alignment (const_tree type)
27130 {
27131   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27132
27133   if (TARGET_AAPCS_BASED)
27134     align = MIN (align, 64);
27135
27136   return align;
27137 }
27138
27139 static unsigned int
27140 arm_autovectorize_vector_sizes (void)
27141 {
27142   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27143 }
27144
27145 static bool
27146 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27147 {
27148   /* Vectors which aren't in packed structures will not be less aligned than
27149      the natural alignment of their element type, so this is safe.  */
27150   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27151     return !is_packed;
27152
27153   return default_builtin_vector_alignment_reachable (type, is_packed);
27154 }
27155
27156 static bool
27157 arm_builtin_support_vector_misalignment (machine_mode mode,
27158                                          const_tree type, int misalignment,
27159                                          bool is_packed)
27160 {
27161   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27162     {
27163       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27164
27165       if (is_packed)
27166         return align == 1;
27167
27168       /* If the misalignment is unknown, we should be able to handle the access
27169          so long as it is not to a member of a packed data structure.  */
27170       if (misalignment == -1)
27171         return true;
27172
27173       /* Return true if the misalignment is a multiple of the natural alignment
27174          of the vector's element type.  This is probably always going to be
27175          true in practice, since we've already established that this isn't a
27176          packed access.  */
27177       return ((misalignment % align) == 0);
27178     }
27179
27180   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27181                                                       is_packed);
27182 }
27183
27184 static void
27185 arm_conditional_register_usage (void)
27186 {
27187   int regno;
27188
27189   if (TARGET_THUMB1 && optimize_size)
27190     {
27191       /* When optimizing for size on Thumb-1, it's better not
27192         to use the HI regs, because of the overhead of
27193         stacking them.  */
27194       for (regno = FIRST_HI_REGNUM;
27195            regno <= LAST_HI_REGNUM; ++regno)
27196         fixed_regs[regno] = call_used_regs[regno] = 1;
27197     }
27198
27199   /* The link register can be clobbered by any branch insn,
27200      but we have no way to track that at present, so mark
27201      it as unavailable.  */
27202   if (TARGET_THUMB1)
27203     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27204
27205   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27206     {
27207       /* VFPv3 registers are disabled when earlier VFP
27208          versions are selected due to the definition of
27209          LAST_VFP_REGNUM.  */
27210       for (regno = FIRST_VFP_REGNUM;
27211            regno <= LAST_VFP_REGNUM; ++ regno)
27212         {
27213           fixed_regs[regno] = 0;
27214           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27215             || regno >= FIRST_VFP_REGNUM + 32;
27216         }
27217     }
27218
27219   if (TARGET_REALLY_IWMMXT)
27220     {
27221       regno = FIRST_IWMMXT_GR_REGNUM;
27222       /* The 2002/10/09 revision of the XScale ABI has wCG0
27223          and wCG1 as call-preserved registers.  The 2002/11/21
27224          revision changed this so that all wCG registers are
27225          scratch registers.  */
27226       for (regno = FIRST_IWMMXT_GR_REGNUM;
27227            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27228         fixed_regs[regno] = 0;
27229       /* The XScale ABI has wR0 - wR9 as scratch registers,
27230          the rest as call-preserved registers.  */
27231       for (regno = FIRST_IWMMXT_REGNUM;
27232            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27233         {
27234           fixed_regs[regno] = 0;
27235           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27236         }
27237     }
27238
27239   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27240     {
27241       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27242       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27243     }
27244   else if (TARGET_APCS_STACK)
27245     {
27246       fixed_regs[10]     = 1;
27247       call_used_regs[10] = 1;
27248     }
27249   /* -mcaller-super-interworking reserves r11 for calls to
27250      _interwork_r11_call_via_rN().  Making the register global
27251      is an easy way of ensuring that it remains valid for all
27252      calls.  */
27253   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27254       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27255     {
27256       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27257       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27258       if (TARGET_CALLER_INTERWORKING)
27259         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27260     }
27261   SUBTARGET_CONDITIONAL_REGISTER_USAGE
27262 }
27263
27264 static reg_class_t
27265 arm_preferred_rename_class (reg_class_t rclass)
27266 {
27267   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27268      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
27269      and code size can be reduced.  */
27270   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27271     return LO_REGS;
27272   else
27273     return NO_REGS;
27274 }
27275
27276 /* Compute the atrribute "length" of insn "*push_multi".
27277    So this function MUST be kept in sync with that insn pattern.  */
27278 int
27279 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27280 {
27281   int i, regno, hi_reg;
27282   int num_saves = XVECLEN (parallel_op, 0);
27283
27284   /* ARM mode.  */
27285   if (TARGET_ARM)
27286     return 4;
27287   /* Thumb1 mode.  */
27288   if (TARGET_THUMB1)
27289     return 2;
27290
27291   /* Thumb2 mode.  */
27292   regno = REGNO (first_op);
27293   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27294   for (i = 1; i < num_saves && !hi_reg; i++)
27295     {
27296       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27297       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27298     }
27299
27300   if (!hi_reg)
27301     return 2;
27302   return 4;
27303 }
27304
27305 /* Compute the number of instructions emitted by output_move_double.  */
27306 int
27307 arm_count_output_move_double_insns (rtx *operands)
27308 {
27309   int count;
27310   rtx ops[2];
27311   /* output_move_double may modify the operands array, so call it
27312      here on a copy of the array.  */
27313   ops[0] = operands[0];
27314   ops[1] = operands[1];
27315   output_move_double (ops, false, &count);
27316   return count;
27317 }
27318
27319 int
27320 vfp3_const_double_for_fract_bits (rtx operand)
27321 {
27322   REAL_VALUE_TYPE r0;
27323
27324   if (!CONST_DOUBLE_P (operand))
27325     return 0;
27326
27327   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27328   if (exact_real_inverse (DFmode, &r0))
27329     {
27330       if (exact_real_truncate (DFmode, &r0))
27331         {
27332           HOST_WIDE_INT value = real_to_integer (&r0);
27333           value = value & 0xffffffff;
27334           if ((value != 0) && ( (value & (value - 1)) == 0))
27335             return int_log2 (value);
27336         }
27337     }
27338   return 0;
27339 }
27340
27341 int
27342 vfp3_const_double_for_bits (rtx operand)
27343 {
27344   REAL_VALUE_TYPE r0;
27345
27346   if (!CONST_DOUBLE_P (operand))
27347     return 0;
27348
27349   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27350   if (exact_real_truncate (DFmode, &r0))
27351     {
27352       HOST_WIDE_INT value = real_to_integer (&r0);
27353       value = value & 0xffffffff;
27354       if ((value != 0) && ( (value & (value - 1)) == 0))
27355         return int_log2 (value);
27356     }
27357
27358   return 0;
27359 }
27360 \f
27361 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
27362
27363 static void
27364 arm_pre_atomic_barrier (enum memmodel model)
27365 {
27366   if (need_atomic_barrier_p (model, true))
27367     emit_insn (gen_memory_barrier ());
27368 }
27369
27370 static void
27371 arm_post_atomic_barrier (enum memmodel model)
27372 {
27373   if (need_atomic_barrier_p (model, false))
27374     emit_insn (gen_memory_barrier ());
27375 }
27376
27377 /* Emit the load-exclusive and store-exclusive instructions.
27378    Use acquire and release versions if necessary.  */
27379
27380 static void
27381 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27382 {
27383   rtx (*gen) (rtx, rtx);
27384
27385   if (acq)
27386     {
27387       switch (mode)
27388         {
27389         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27390         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27391         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27392         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27393         default:
27394           gcc_unreachable ();
27395         }
27396     }
27397   else
27398     {
27399       switch (mode)
27400         {
27401         case QImode: gen = gen_arm_load_exclusiveqi; break;
27402         case HImode: gen = gen_arm_load_exclusivehi; break;
27403         case SImode: gen = gen_arm_load_exclusivesi; break;
27404         case DImode: gen = gen_arm_load_exclusivedi; break;
27405         default:
27406           gcc_unreachable ();
27407         }
27408     }
27409
27410   emit_insn (gen (rval, mem));
27411 }
27412
27413 static void
27414 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27415                           rtx mem, bool rel)
27416 {
27417   rtx (*gen) (rtx, rtx, rtx);
27418
27419   if (rel)
27420     {
27421       switch (mode)
27422         {
27423         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27424         case HImode: gen = gen_arm_store_release_exclusivehi; break;
27425         case SImode: gen = gen_arm_store_release_exclusivesi; break;
27426         case DImode: gen = gen_arm_store_release_exclusivedi; break;
27427         default:
27428           gcc_unreachable ();
27429         }
27430     }
27431   else
27432     {
27433       switch (mode)
27434         {
27435         case QImode: gen = gen_arm_store_exclusiveqi; break;
27436         case HImode: gen = gen_arm_store_exclusivehi; break;
27437         case SImode: gen = gen_arm_store_exclusivesi; break;
27438         case DImode: gen = gen_arm_store_exclusivedi; break;
27439         default:
27440           gcc_unreachable ();
27441         }
27442     }
27443
27444   emit_insn (gen (bval, rval, mem));
27445 }
27446
27447 /* Mark the previous jump instruction as unlikely.  */
27448
27449 static void
27450 emit_unlikely_jump (rtx insn)
27451 {
27452   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27453
27454   insn = emit_jump_insn (insn);
27455   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27456 }
27457
27458 /* Expand a compare and swap pattern.  */
27459
27460 void
27461 arm_expand_compare_and_swap (rtx operands[])
27462 {
27463   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27464   machine_mode mode;
27465   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27466
27467   bval = operands[0];
27468   rval = operands[1];
27469   mem = operands[2];
27470   oldval = operands[3];
27471   newval = operands[4];
27472   is_weak = operands[5];
27473   mod_s = operands[6];
27474   mod_f = operands[7];
27475   mode = GET_MODE (mem);
27476
27477   /* Normally the succ memory model must be stronger than fail, but in the
27478      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27479      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
27480
27481   if (TARGET_HAVE_LDACQ
27482       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27483       && INTVAL (mod_s) == MEMMODEL_RELEASE)
27484     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27485
27486   switch (mode)
27487     {
27488     case QImode:
27489     case HImode:
27490       /* For narrow modes, we're going to perform the comparison in SImode,
27491          so do the zero-extension now.  */
27492       rval = gen_reg_rtx (SImode);
27493       oldval = convert_modes (SImode, mode, oldval, true);
27494       /* FALLTHRU */
27495
27496     case SImode:
27497       /* Force the value into a register if needed.  We waited until after
27498          the zero-extension above to do this properly.  */
27499       if (!arm_add_operand (oldval, SImode))
27500         oldval = force_reg (SImode, oldval);
27501       break;
27502
27503     case DImode:
27504       if (!cmpdi_operand (oldval, mode))
27505         oldval = force_reg (mode, oldval);
27506       break;
27507
27508     default:
27509       gcc_unreachable ();
27510     }
27511
27512   switch (mode)
27513     {
27514     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27515     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27516     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27517     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27518     default:
27519       gcc_unreachable ();
27520     }
27521
27522   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27523
27524   if (mode == QImode || mode == HImode)
27525     emit_move_insn (operands[1], gen_lowpart (mode, rval));
27526
27527   /* In all cases, we arrange for success to be signaled by Z set.
27528      This arrangement allows for the boolean result to be used directly
27529      in a subsequent branch, post optimization.  */
27530   x = gen_rtx_REG (CCmode, CC_REGNUM);
27531   x = gen_rtx_EQ (SImode, x, const0_rtx);
27532   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27533 }
27534
27535 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
27536    another memory store between the load-exclusive and store-exclusive can
27537    reset the monitor from Exclusive to Open state.  This means we must wait
27538    until after reload to split the pattern, lest we get a register spill in
27539    the middle of the atomic sequence.  */
27540
27541 void
27542 arm_split_compare_and_swap (rtx operands[])
27543 {
27544   rtx rval, mem, oldval, newval, scratch;
27545   machine_mode mode;
27546   enum memmodel mod_s, mod_f;
27547   bool is_weak;
27548   rtx_code_label *label1, *label2;
27549   rtx x, cond;
27550
27551   rval = operands[0];
27552   mem = operands[1];
27553   oldval = operands[2];
27554   newval = operands[3];
27555   is_weak = (operands[4] != const0_rtx);
27556   mod_s = (enum memmodel) INTVAL (operands[5]);
27557   mod_f = (enum memmodel) INTVAL (operands[6]);
27558   scratch = operands[7];
27559   mode = GET_MODE (mem);
27560
27561   bool use_acquire = TARGET_HAVE_LDACQ
27562                      && !(mod_s == MEMMODEL_RELAXED
27563                           || mod_s == MEMMODEL_CONSUME
27564                           || mod_s == MEMMODEL_RELEASE);
27565
27566   bool use_release = TARGET_HAVE_LDACQ
27567                      && !(mod_s == MEMMODEL_RELAXED
27568                           || mod_s == MEMMODEL_CONSUME
27569                           || mod_s == MEMMODEL_ACQUIRE);
27570
27571   /* Checks whether a barrier is needed and emits one accordingly.  */
27572   if (!(use_acquire || use_release))
27573     arm_pre_atomic_barrier (mod_s);
27574
27575   label1 = NULL;
27576   if (!is_weak)
27577     {
27578       label1 = gen_label_rtx ();
27579       emit_label (label1);
27580     }
27581   label2 = gen_label_rtx ();
27582
27583   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27584
27585   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27586   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27587   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27588                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27589   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27590
27591   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27592
27593   /* Weak or strong, we want EQ to be true for success, so that we
27594      match the flags that we got from the compare above.  */
27595   cond = gen_rtx_REG (CCmode, CC_REGNUM);
27596   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27597   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27598
27599   if (!is_weak)
27600     {
27601       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27602       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27603                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27604       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27605     }
27606
27607   if (mod_f != MEMMODEL_RELAXED)
27608     emit_label (label2);
27609
27610   /* Checks whether a barrier is needed and emits one accordingly.  */
27611   if (!(use_acquire || use_release))
27612     arm_post_atomic_barrier (mod_s);
27613
27614   if (mod_f == MEMMODEL_RELAXED)
27615     emit_label (label2);
27616 }
27617
27618 void
27619 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27620                      rtx value, rtx model_rtx, rtx cond)
27621 {
27622   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27623   machine_mode mode = GET_MODE (mem);
27624   machine_mode wmode = (mode == DImode ? DImode : SImode);
27625   rtx_code_label *label;
27626   rtx x;
27627
27628   bool use_acquire = TARGET_HAVE_LDACQ
27629                      && !(model == MEMMODEL_RELAXED
27630                           || model == MEMMODEL_CONSUME
27631                           || model == MEMMODEL_RELEASE);
27632
27633   bool use_release = TARGET_HAVE_LDACQ
27634                      && !(model == MEMMODEL_RELAXED
27635                           || model == MEMMODEL_CONSUME
27636                           || model == MEMMODEL_ACQUIRE);
27637
27638   /* Checks whether a barrier is needed and emits one accordingly.  */
27639   if (!(use_acquire || use_release))
27640     arm_pre_atomic_barrier (model);
27641
27642   label = gen_label_rtx ();
27643   emit_label (label);
27644
27645   if (new_out)
27646     new_out = gen_lowpart (wmode, new_out);
27647   if (old_out)
27648     old_out = gen_lowpart (wmode, old_out);
27649   else
27650     old_out = new_out;
27651   value = simplify_gen_subreg (wmode, value, mode, 0);
27652
27653   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27654
27655   switch (code)
27656     {
27657     case SET:
27658       new_out = value;
27659       break;
27660
27661     case NOT:
27662       x = gen_rtx_AND (wmode, old_out, value);
27663       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27664       x = gen_rtx_NOT (wmode, new_out);
27665       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27666       break;
27667
27668     case MINUS:
27669       if (CONST_INT_P (value))
27670         {
27671           value = GEN_INT (-INTVAL (value));
27672           code = PLUS;
27673         }
27674       /* FALLTHRU */
27675
27676     case PLUS:
27677       if (mode == DImode)
27678         {
27679           /* DImode plus/minus need to clobber flags.  */
27680           /* The adddi3 and subdi3 patterns are incorrectly written so that
27681              they require matching operands, even when we could easily support
27682              three operands.  Thankfully, this can be fixed up post-splitting,
27683              as the individual add+adc patterns do accept three operands and
27684              post-reload cprop can make these moves go away.  */
27685           emit_move_insn (new_out, old_out);
27686           if (code == PLUS)
27687             x = gen_adddi3 (new_out, new_out, value);
27688           else
27689             x = gen_subdi3 (new_out, new_out, value);
27690           emit_insn (x);
27691           break;
27692         }
27693       /* FALLTHRU */
27694
27695     default:
27696       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27697       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27698       break;
27699     }
27700
27701   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27702                             use_release);
27703
27704   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27705   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27706
27707   /* Checks whether a barrier is needed and emits one accordingly.  */
27708   if (!(use_acquire || use_release))
27709     arm_post_atomic_barrier (model);
27710 }
27711 \f
27712 #define MAX_VECT_LEN 16
27713
27714 struct expand_vec_perm_d
27715 {
27716   rtx target, op0, op1;
27717   unsigned char perm[MAX_VECT_LEN];
27718   machine_mode vmode;
27719   unsigned char nelt;
27720   bool one_vector_p;
27721   bool testing_p;
27722 };
27723
27724 /* Generate a variable permutation.  */
27725
27726 static void
27727 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27728 {
27729   machine_mode vmode = GET_MODE (target);
27730   bool one_vector_p = rtx_equal_p (op0, op1);
27731
27732   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27733   gcc_checking_assert (GET_MODE (op0) == vmode);
27734   gcc_checking_assert (GET_MODE (op1) == vmode);
27735   gcc_checking_assert (GET_MODE (sel) == vmode);
27736   gcc_checking_assert (TARGET_NEON);
27737
27738   if (one_vector_p)
27739     {
27740       if (vmode == V8QImode)
27741         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27742       else
27743         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27744     }
27745   else
27746     {
27747       rtx pair;
27748
27749       if (vmode == V8QImode)
27750         {
27751           pair = gen_reg_rtx (V16QImode);
27752           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27753           pair = gen_lowpart (TImode, pair);
27754           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27755         }
27756       else
27757         {
27758           pair = gen_reg_rtx (OImode);
27759           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27760           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27761         }
27762     }
27763 }
27764
27765 void
27766 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27767 {
27768   machine_mode vmode = GET_MODE (target);
27769   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27770   bool one_vector_p = rtx_equal_p (op0, op1);
27771   rtx rmask[MAX_VECT_LEN], mask;
27772
27773   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
27774      numbering of elements for big-endian, we must reverse the order.  */
27775   gcc_checking_assert (!BYTES_BIG_ENDIAN);
27776
27777   /* The VTBL instruction does not use a modulo index, so we must take care
27778      of that ourselves.  */
27779   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27780   for (i = 0; i < nelt; ++i)
27781     rmask[i] = mask;
27782   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27783   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27784
27785   arm_expand_vec_perm_1 (target, op0, op1, sel);
27786 }
27787
27788 /* Generate or test for an insn that supports a constant permutation.  */
27789
27790 /* Recognize patterns for the VUZP insns.  */
27791
27792 static bool
27793 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27794 {
27795   unsigned int i, odd, mask, nelt = d->nelt;
27796   rtx out0, out1, in0, in1, x;
27797   rtx (*gen)(rtx, rtx, rtx, rtx);
27798
27799   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27800     return false;
27801
27802   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27803   if (d->perm[0] == 0)
27804     odd = 0;
27805   else if (d->perm[0] == 1)
27806     odd = 1;
27807   else
27808     return false;
27809   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27810
27811   for (i = 0; i < nelt; i++)
27812     {
27813       unsigned elt = (i * 2 + odd) & mask;
27814       if (d->perm[i] != elt)
27815         return false;
27816     }
27817
27818   /* Success!  */
27819   if (d->testing_p)
27820     return true;
27821
27822   switch (d->vmode)
27823     {
27824     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27825     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
27826     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
27827     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
27828     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
27829     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
27830     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
27831     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
27832     default:
27833       gcc_unreachable ();
27834     }
27835
27836   in0 = d->op0;
27837   in1 = d->op1;
27838   if (BYTES_BIG_ENDIAN)
27839     {
27840       x = in0, in0 = in1, in1 = x;
27841       odd = !odd;
27842     }
27843
27844   out0 = d->target;
27845   out1 = gen_reg_rtx (d->vmode);
27846   if (odd)
27847     x = out0, out0 = out1, out1 = x;
27848
27849   emit_insn (gen (out0, in0, in1, out1));
27850   return true;
27851 }
27852
27853 /* Recognize patterns for the VZIP insns.  */
27854
27855 static bool
27856 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27857 {
27858   unsigned int i, high, mask, nelt = d->nelt;
27859   rtx out0, out1, in0, in1, x;
27860   rtx (*gen)(rtx, rtx, rtx, rtx);
27861
27862   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27863     return false;
27864
27865   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27866   high = nelt / 2;
27867   if (d->perm[0] == high)
27868     ;
27869   else if (d->perm[0] == 0)
27870     high = 0;
27871   else
27872     return false;
27873   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27874
27875   for (i = 0; i < nelt / 2; i++)
27876     {
27877       unsigned elt = (i + high) & mask;
27878       if (d->perm[i * 2] != elt)
27879         return false;
27880       elt = (elt + nelt) & mask;
27881       if (d->perm[i * 2 + 1] != elt)
27882         return false;
27883     }
27884
27885   /* Success!  */
27886   if (d->testing_p)
27887     return true;
27888
27889   switch (d->vmode)
27890     {
27891     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27892     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
27893     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
27894     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
27895     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
27896     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
27897     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
27898     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
27899     default:
27900       gcc_unreachable ();
27901     }
27902
27903   in0 = d->op0;
27904   in1 = d->op1;
27905   if (BYTES_BIG_ENDIAN)
27906     {
27907       x = in0, in0 = in1, in1 = x;
27908       high = !high;
27909     }
27910
27911   out0 = d->target;
27912   out1 = gen_reg_rtx (d->vmode);
27913   if (high)
27914     x = out0, out0 = out1, out1 = x;
27915
27916   emit_insn (gen (out0, in0, in1, out1));
27917   return true;
27918 }
27919
27920 /* Recognize patterns for the VREV insns.  */
27921
27922 static bool
27923 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27924 {
27925   unsigned int i, j, diff, nelt = d->nelt;
27926   rtx (*gen)(rtx, rtx);
27927
27928   if (!d->one_vector_p)
27929     return false;
27930
27931   diff = d->perm[0];
27932   switch (diff)
27933     {
27934     case 7:
27935       switch (d->vmode)
27936         {
27937         case V16QImode: gen = gen_neon_vrev64v16qi; break;
27938         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
27939         default:
27940           return false;
27941         }
27942       break;
27943     case 3:
27944       switch (d->vmode)
27945         {
27946         case V16QImode: gen = gen_neon_vrev32v16qi; break;
27947         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
27948         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
27949         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
27950         default:
27951           return false;
27952         }
27953       break;
27954     case 1:
27955       switch (d->vmode)
27956         {
27957         case V16QImode: gen = gen_neon_vrev16v16qi; break;
27958         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
27959         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
27960         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
27961         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
27962         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
27963         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
27964         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
27965         default:
27966           return false;
27967         }
27968       break;
27969     default:
27970       return false;
27971     }
27972
27973   for (i = 0; i < nelt ; i += diff + 1)
27974     for (j = 0; j <= diff; j += 1)
27975       {
27976         /* This is guaranteed to be true as the value of diff
27977            is 7, 3, 1 and we should have enough elements in the
27978            queue to generate this. Getting a vector mask with a
27979            value of diff other than these values implies that
27980            something is wrong by the time we get here.  */
27981         gcc_assert (i + j < nelt);
27982         if (d->perm[i + j] != i + diff - j)
27983           return false;
27984       }
27985
27986   /* Success! */
27987   if (d->testing_p)
27988     return true;
27989
27990   emit_insn (gen (d->target, d->op0));
27991   return true;
27992 }
27993
27994 /* Recognize patterns for the VTRN insns.  */
27995
27996 static bool
27997 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27998 {
27999   unsigned int i, odd, mask, nelt = d->nelt;
28000   rtx out0, out1, in0, in1, x;
28001   rtx (*gen)(rtx, rtx, rtx, rtx);
28002
28003   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28004     return false;
28005
28006   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28007   if (d->perm[0] == 0)
28008     odd = 0;
28009   else if (d->perm[0] == 1)
28010     odd = 1;
28011   else
28012     return false;
28013   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28014
28015   for (i = 0; i < nelt; i += 2)
28016     {
28017       if (d->perm[i] != i + odd)
28018         return false;
28019       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28020         return false;
28021     }
28022
28023   /* Success!  */
28024   if (d->testing_p)
28025     return true;
28026
28027   switch (d->vmode)
28028     {
28029     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28030     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28031     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28032     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28033     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
28034     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
28035     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
28036     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
28037     default:
28038       gcc_unreachable ();
28039     }
28040
28041   in0 = d->op0;
28042   in1 = d->op1;
28043   if (BYTES_BIG_ENDIAN)
28044     {
28045       x = in0, in0 = in1, in1 = x;
28046       odd = !odd;
28047     }
28048
28049   out0 = d->target;
28050   out1 = gen_reg_rtx (d->vmode);
28051   if (odd)
28052     x = out0, out0 = out1, out1 = x;
28053
28054   emit_insn (gen (out0, in0, in1, out1));
28055   return true;
28056 }
28057
28058 /* Recognize patterns for the VEXT insns.  */
28059
28060 static bool
28061 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28062 {
28063   unsigned int i, nelt = d->nelt;
28064   rtx (*gen) (rtx, rtx, rtx, rtx);
28065   rtx offset;
28066
28067   unsigned int location;
28068
28069   unsigned int next  = d->perm[0] + 1;
28070
28071   /* TODO: Handle GCC's numbering of elements for big-endian.  */
28072   if (BYTES_BIG_ENDIAN)
28073     return false;
28074
28075   /* Check if the extracted indexes are increasing by one.  */
28076   for (i = 1; i < nelt; next++, i++)
28077     {
28078       /* If we hit the most significant element of the 2nd vector in
28079          the previous iteration, no need to test further.  */
28080       if (next == 2 * nelt)
28081         return false;
28082
28083       /* If we are operating on only one vector: it could be a
28084          rotation.  If there are only two elements of size < 64, let
28085          arm_evpc_neon_vrev catch it.  */
28086       if (d->one_vector_p && (next == nelt))
28087         {
28088           if ((nelt == 2) && (d->vmode != V2DImode))
28089             return false;
28090           else
28091             next = 0;
28092         }
28093
28094       if (d->perm[i] != next)
28095         return false;
28096     }
28097
28098   location = d->perm[0];
28099
28100   switch (d->vmode)
28101     {
28102     case V16QImode: gen = gen_neon_vextv16qi; break;
28103     case V8QImode: gen = gen_neon_vextv8qi; break;
28104     case V4HImode: gen = gen_neon_vextv4hi; break;
28105     case V8HImode: gen = gen_neon_vextv8hi; break;
28106     case V2SImode: gen = gen_neon_vextv2si; break;
28107     case V4SImode: gen = gen_neon_vextv4si; break;
28108     case V2SFmode: gen = gen_neon_vextv2sf; break;
28109     case V4SFmode: gen = gen_neon_vextv4sf; break;
28110     case V2DImode: gen = gen_neon_vextv2di; break;
28111     default:
28112       return false;
28113     }
28114
28115   /* Success! */
28116   if (d->testing_p)
28117     return true;
28118
28119   offset = GEN_INT (location);
28120   emit_insn (gen (d->target, d->op0, d->op1, offset));
28121   return true;
28122 }
28123
28124 /* The NEON VTBL instruction is a fully variable permuation that's even
28125    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
28126    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
28127    can do slightly better by expanding this as a constant where we don't
28128    have to apply a mask.  */
28129
28130 static bool
28131 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28132 {
28133   rtx rperm[MAX_VECT_LEN], sel;
28134   machine_mode vmode = d->vmode;
28135   unsigned int i, nelt = d->nelt;
28136
28137   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28138      numbering of elements for big-endian, we must reverse the order.  */
28139   if (BYTES_BIG_ENDIAN)
28140     return false;
28141
28142   if (d->testing_p)
28143     return true;
28144
28145   /* Generic code will try constant permutation twice.  Once with the
28146      original mode and again with the elements lowered to QImode.
28147      So wait and don't do the selector expansion ourselves.  */
28148   if (vmode != V8QImode && vmode != V16QImode)
28149     return false;
28150
28151   for (i = 0; i < nelt; ++i)
28152     rperm[i] = GEN_INT (d->perm[i]);
28153   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28154   sel = force_reg (vmode, sel);
28155
28156   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28157   return true;
28158 }
28159
28160 static bool
28161 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28162 {
28163   /* Check if the input mask matches vext before reordering the
28164      operands.  */
28165   if (TARGET_NEON)
28166     if (arm_evpc_neon_vext (d))
28167       return true;
28168
28169   /* The pattern matching functions above are written to look for a small
28170      number to begin the sequence (0, 1, N/2).  If we begin with an index
28171      from the second operand, we can swap the operands.  */
28172   if (d->perm[0] >= d->nelt)
28173     {
28174       unsigned i, nelt = d->nelt;
28175       rtx x;
28176
28177       for (i = 0; i < nelt; ++i)
28178         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28179
28180       x = d->op0;
28181       d->op0 = d->op1;
28182       d->op1 = x;
28183     }
28184
28185   if (TARGET_NEON)
28186     {
28187       if (arm_evpc_neon_vuzp (d))
28188         return true;
28189       if (arm_evpc_neon_vzip (d))
28190         return true;
28191       if (arm_evpc_neon_vrev (d))
28192         return true;
28193       if (arm_evpc_neon_vtrn (d))
28194         return true;
28195       return arm_evpc_neon_vtbl (d);
28196     }
28197   return false;
28198 }
28199
28200 /* Expand a vec_perm_const pattern.  */
28201
28202 bool
28203 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28204 {
28205   struct expand_vec_perm_d d;
28206   int i, nelt, which;
28207
28208   d.target = target;
28209   d.op0 = op0;
28210   d.op1 = op1;
28211
28212   d.vmode = GET_MODE (target);
28213   gcc_assert (VECTOR_MODE_P (d.vmode));
28214   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28215   d.testing_p = false;
28216
28217   for (i = which = 0; i < nelt; ++i)
28218     {
28219       rtx e = XVECEXP (sel, 0, i);
28220       int ei = INTVAL (e) & (2 * nelt - 1);
28221       which |= (ei < nelt ? 1 : 2);
28222       d.perm[i] = ei;
28223     }
28224
28225   switch (which)
28226     {
28227     default:
28228       gcc_unreachable();
28229
28230     case 3:
28231       d.one_vector_p = false;
28232       if (!rtx_equal_p (op0, op1))
28233         break;
28234
28235       /* The elements of PERM do not suggest that only the first operand
28236          is used, but both operands are identical.  Allow easier matching
28237          of the permutation by folding the permutation into the single
28238          input vector.  */
28239       /* FALLTHRU */
28240     case 2:
28241       for (i = 0; i < nelt; ++i)
28242         d.perm[i] &= nelt - 1;
28243       d.op0 = op1;
28244       d.one_vector_p = true;
28245       break;
28246
28247     case 1:
28248       d.op1 = op0;
28249       d.one_vector_p = true;
28250       break;
28251     }
28252
28253   return arm_expand_vec_perm_const_1 (&d);
28254 }
28255
28256 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
28257
28258 static bool
28259 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28260                                  const unsigned char *sel)
28261 {
28262   struct expand_vec_perm_d d;
28263   unsigned int i, nelt, which;
28264   bool ret;
28265
28266   d.vmode = vmode;
28267   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28268   d.testing_p = true;
28269   memcpy (d.perm, sel, nelt);
28270
28271   /* Categorize the set of elements in the selector.  */
28272   for (i = which = 0; i < nelt; ++i)
28273     {
28274       unsigned char e = d.perm[i];
28275       gcc_assert (e < 2 * nelt);
28276       which |= (e < nelt ? 1 : 2);
28277     }
28278
28279   /* For all elements from second vector, fold the elements to first.  */
28280   if (which == 2)
28281     for (i = 0; i < nelt; ++i)
28282       d.perm[i] -= nelt;
28283
28284   /* Check whether the mask can be applied to the vector type.  */
28285   d.one_vector_p = (which != 3);
28286
28287   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28288   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28289   if (!d.one_vector_p)
28290     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28291
28292   start_sequence ();
28293   ret = arm_expand_vec_perm_const_1 (&d);
28294   end_sequence ();
28295
28296   return ret;
28297 }
28298
28299 bool
28300 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28301 {
28302   /* If we are soft float and we do not have ldrd
28303      then all auto increment forms are ok.  */
28304   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28305     return true;
28306
28307   switch (code)
28308     {
28309       /* Post increment and Pre Decrement are supported for all
28310          instruction forms except for vector forms.  */
28311     case ARM_POST_INC:
28312     case ARM_PRE_DEC:
28313       if (VECTOR_MODE_P (mode))
28314         {
28315           if (code != ARM_PRE_DEC)
28316             return true;
28317           else
28318             return false;
28319         }
28320
28321       return true;
28322
28323     case ARM_POST_DEC:
28324     case ARM_PRE_INC:
28325       /* Without LDRD and mode size greater than
28326          word size, there is no point in auto-incrementing
28327          because ldm and stm will not have these forms.  */
28328       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28329         return false;
28330
28331       /* Vector and floating point modes do not support
28332          these auto increment forms.  */
28333       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28334         return false;
28335
28336       return true;
28337
28338     default:
28339       return false;
28340
28341     }
28342
28343   return false;
28344 }
28345
28346 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28347    on ARM, since we know that shifts by negative amounts are no-ops.
28348    Additionally, the default expansion code is not available or suitable
28349    for post-reload insn splits (this can occur when the register allocator
28350    chooses not to do a shift in NEON).
28351
28352    This function is used in both initial expand and post-reload splits, and
28353    handles all kinds of 64-bit shifts.
28354
28355    Input requirements:
28356     - It is safe for the input and output to be the same register, but
28357       early-clobber rules apply for the shift amount and scratch registers.
28358     - Shift by register requires both scratch registers.  In all other cases
28359       the scratch registers may be NULL.
28360     - Ashiftrt by a register also clobbers the CC register.  */
28361 void
28362 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28363                                rtx amount, rtx scratch1, rtx scratch2)
28364 {
28365   rtx out_high = gen_highpart (SImode, out);
28366   rtx out_low = gen_lowpart (SImode, out);
28367   rtx in_high = gen_highpart (SImode, in);
28368   rtx in_low = gen_lowpart (SImode, in);
28369
28370   /* Terminology:
28371         in = the register pair containing the input value.
28372         out = the destination register pair.
28373         up = the high- or low-part of each pair.
28374         down = the opposite part to "up".
28375      In a shift, we can consider bits to shift from "up"-stream to
28376      "down"-stream, so in a left-shift "up" is the low-part and "down"
28377      is the high-part of each register pair.  */
28378
28379   rtx out_up   = code == ASHIFT ? out_low : out_high;
28380   rtx out_down = code == ASHIFT ? out_high : out_low;
28381   rtx in_up   = code == ASHIFT ? in_low : in_high;
28382   rtx in_down = code == ASHIFT ? in_high : in_low;
28383
28384   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28385   gcc_assert (out
28386               && (REG_P (out) || GET_CODE (out) == SUBREG)
28387               && GET_MODE (out) == DImode);
28388   gcc_assert (in
28389               && (REG_P (in) || GET_CODE (in) == SUBREG)
28390               && GET_MODE (in) == DImode);
28391   gcc_assert (amount
28392               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28393                    && GET_MODE (amount) == SImode)
28394                   || CONST_INT_P (amount)));
28395   gcc_assert (scratch1 == NULL
28396               || (GET_CODE (scratch1) == SCRATCH)
28397               || (GET_MODE (scratch1) == SImode
28398                   && REG_P (scratch1)));
28399   gcc_assert (scratch2 == NULL
28400               || (GET_CODE (scratch2) == SCRATCH)
28401               || (GET_MODE (scratch2) == SImode
28402                   && REG_P (scratch2)));
28403   gcc_assert (!REG_P (out) || !REG_P (amount)
28404               || !HARD_REGISTER_P (out)
28405               || (REGNO (out) != REGNO (amount)
28406                   && REGNO (out) + 1 != REGNO (amount)));
28407
28408   /* Macros to make following code more readable.  */
28409   #define SUB_32(DEST,SRC) \
28410             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28411   #define RSB_32(DEST,SRC) \
28412             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28413   #define SUB_S_32(DEST,SRC) \
28414             gen_addsi3_compare0 ((DEST), (SRC), \
28415                                  GEN_INT (-32))
28416   #define SET(DEST,SRC) \
28417             gen_rtx_SET (SImode, (DEST), (SRC))
28418   #define SHIFT(CODE,SRC,AMOUNT) \
28419             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28420   #define LSHIFT(CODE,SRC,AMOUNT) \
28421             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28422                             SImode, (SRC), (AMOUNT))
28423   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28424             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28425                             SImode, (SRC), (AMOUNT))
28426   #define ORR(A,B) \
28427             gen_rtx_IOR (SImode, (A), (B))
28428   #define BRANCH(COND,LABEL) \
28429             gen_arm_cond_branch ((LABEL), \
28430                                  gen_rtx_ ## COND (CCmode, cc_reg, \
28431                                                    const0_rtx), \
28432                                  cc_reg)
28433
28434   /* Shifts by register and shifts by constant are handled separately.  */
28435   if (CONST_INT_P (amount))
28436     {
28437       /* We have a shift-by-constant.  */
28438
28439       /* First, handle out-of-range shift amounts.
28440          In both cases we try to match the result an ARM instruction in a
28441          shift-by-register would give.  This helps reduce execution
28442          differences between optimization levels, but it won't stop other
28443          parts of the compiler doing different things.  This is "undefined
28444          behaviour, in any case.  */
28445       if (INTVAL (amount) <= 0)
28446         emit_insn (gen_movdi (out, in));
28447       else if (INTVAL (amount) >= 64)
28448         {
28449           if (code == ASHIFTRT)
28450             {
28451               rtx const31_rtx = GEN_INT (31);
28452               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28453               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28454             }
28455           else
28456             emit_insn (gen_movdi (out, const0_rtx));
28457         }
28458
28459       /* Now handle valid shifts. */
28460       else if (INTVAL (amount) < 32)
28461         {
28462           /* Shifts by a constant less than 32.  */
28463           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28464
28465           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28466           emit_insn (SET (out_down,
28467                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
28468                                out_down)));
28469           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28470         }
28471       else
28472         {
28473           /* Shifts by a constant greater than 31.  */
28474           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28475
28476           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28477           if (code == ASHIFTRT)
28478             emit_insn (gen_ashrsi3 (out_up, in_up,
28479                                     GEN_INT (31)));
28480           else
28481             emit_insn (SET (out_up, const0_rtx));
28482         }
28483     }
28484   else
28485     {
28486       /* We have a shift-by-register.  */
28487       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28488
28489       /* This alternative requires the scratch registers.  */
28490       gcc_assert (scratch1 && REG_P (scratch1));
28491       gcc_assert (scratch2 && REG_P (scratch2));
28492
28493       /* We will need the values "amount-32" and "32-amount" later.
28494          Swapping them around now allows the later code to be more general. */
28495       switch (code)
28496         {
28497         case ASHIFT:
28498           emit_insn (SUB_32 (scratch1, amount));
28499           emit_insn (RSB_32 (scratch2, amount));
28500           break;
28501         case ASHIFTRT:
28502           emit_insn (RSB_32 (scratch1, amount));
28503           /* Also set CC = amount > 32.  */
28504           emit_insn (SUB_S_32 (scratch2, amount));
28505           break;
28506         case LSHIFTRT:
28507           emit_insn (RSB_32 (scratch1, amount));
28508           emit_insn (SUB_32 (scratch2, amount));
28509           break;
28510         default:
28511           gcc_unreachable ();
28512         }
28513
28514       /* Emit code like this:
28515
28516          arithmetic-left:
28517             out_down = in_down << amount;
28518             out_down = (in_up << (amount - 32)) | out_down;
28519             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28520             out_up = in_up << amount;
28521
28522          arithmetic-right:
28523             out_down = in_down >> amount;
28524             out_down = (in_up << (32 - amount)) | out_down;
28525             if (amount < 32)
28526               out_down = ((signed)in_up >> (amount - 32)) | out_down;
28527             out_up = in_up << amount;
28528
28529          logical-right:
28530             out_down = in_down >> amount;
28531             out_down = (in_up << (32 - amount)) | out_down;
28532             if (amount < 32)
28533               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28534             out_up = in_up << amount;
28535
28536           The ARM and Thumb2 variants are the same but implemented slightly
28537           differently.  If this were only called during expand we could just
28538           use the Thumb2 case and let combine do the right thing, but this
28539           can also be called from post-reload splitters.  */
28540
28541       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28542
28543       if (!TARGET_THUMB2)
28544         {
28545           /* Emit code for ARM mode.  */
28546           emit_insn (SET (out_down,
28547                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28548           if (code == ASHIFTRT)
28549             {
28550               rtx_code_label *done_label = gen_label_rtx ();
28551               emit_jump_insn (BRANCH (LT, done_label));
28552               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28553                                              out_down)));
28554               emit_label (done_label);
28555             }
28556           else
28557             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28558                                            out_down)));
28559         }
28560       else
28561         {
28562           /* Emit code for Thumb2 mode.
28563              Thumb2 can't do shift and or in one insn.  */
28564           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28565           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28566
28567           if (code == ASHIFTRT)
28568             {
28569               rtx_code_label *done_label = gen_label_rtx ();
28570               emit_jump_insn (BRANCH (LT, done_label));
28571               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28572               emit_insn (SET (out_down, ORR (out_down, scratch2)));
28573               emit_label (done_label);
28574             }
28575           else
28576             {
28577               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28578               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28579             }
28580         }
28581
28582       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28583     }
28584
28585   #undef SUB_32
28586   #undef RSB_32
28587   #undef SUB_S_32
28588   #undef SET
28589   #undef SHIFT
28590   #undef LSHIFT
28591   #undef REV_LSHIFT
28592   #undef ORR
28593   #undef BRANCH
28594 }
28595
28596
28597 /* Returns true if a valid comparison operation and makes
28598    the operands in a form that is valid.  */
28599 bool
28600 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28601 {
28602   enum rtx_code code = GET_CODE (*comparison);
28603   int code_int;
28604   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28605     ? GET_MODE (*op2) : GET_MODE (*op1);
28606
28607   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28608
28609   if (code == UNEQ || code == LTGT)
28610     return false;
28611
28612   code_int = (int)code;
28613   arm_canonicalize_comparison (&code_int, op1, op2, 0);
28614   PUT_CODE (*comparison, (enum rtx_code)code_int);
28615
28616   switch (mode)
28617     {
28618     case SImode:
28619       if (!arm_add_operand (*op1, mode))
28620         *op1 = force_reg (mode, *op1);
28621       if (!arm_add_operand (*op2, mode))
28622         *op2 = force_reg (mode, *op2);
28623       return true;
28624
28625     case DImode:
28626       if (!cmpdi_operand (*op1, mode))
28627         *op1 = force_reg (mode, *op1);
28628       if (!cmpdi_operand (*op2, mode))
28629         *op2 = force_reg (mode, *op2);
28630       return true;
28631
28632     case SFmode:
28633     case DFmode:
28634       if (!arm_float_compare_operand (*op1, mode))
28635         *op1 = force_reg (mode, *op1);
28636       if (!arm_float_compare_operand (*op2, mode))
28637         *op2 = force_reg (mode, *op2);
28638       return true;
28639     default:
28640       break;
28641     }
28642
28643   return false;
28644
28645 }
28646
28647 /* Maximum number of instructions to set block of memory.  */
28648 static int
28649 arm_block_set_max_insns (void)
28650 {
28651   if (optimize_function_for_size_p (cfun))
28652     return 4;
28653   else
28654     return current_tune->max_insns_inline_memset;
28655 }
28656
28657 /* Return TRUE if it's profitable to set block of memory for
28658    non-vectorized case.  VAL is the value to set the memory
28659    with.  LENGTH is the number of bytes to set.  ALIGN is the
28660    alignment of the destination memory in bytes.  UNALIGNED_P
28661    is TRUE if we can only set the memory with instructions
28662    meeting alignment requirements.  USE_STRD_P is TRUE if we
28663    can use strd to set the memory.  */
28664 static bool
28665 arm_block_set_non_vect_profit_p (rtx val,
28666                                  unsigned HOST_WIDE_INT length,
28667                                  unsigned HOST_WIDE_INT align,
28668                                  bool unaligned_p, bool use_strd_p)
28669 {
28670   int num = 0;
28671   /* For leftovers in bytes of 0-7, we can set the memory block using
28672      strb/strh/str with minimum instruction number.  */
28673   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28674
28675   if (unaligned_p)
28676     {
28677       num = arm_const_inline_cost (SET, val);
28678       num += length / align + length % align;
28679     }
28680   else if (use_strd_p)
28681     {
28682       num = arm_const_double_inline_cost (val);
28683       num += (length >> 3) + leftover[length & 7];
28684     }
28685   else
28686     {
28687       num = arm_const_inline_cost (SET, val);
28688       num += (length >> 2) + leftover[length & 3];
28689     }
28690
28691   /* We may be able to combine last pair STRH/STRB into a single STR
28692      by shifting one byte back.  */
28693   if (unaligned_access && length > 3 && (length & 3) == 3)
28694     num--;
28695
28696   return (num <= arm_block_set_max_insns ());
28697 }
28698
28699 /* Return TRUE if it's profitable to set block of memory for
28700    vectorized case.  LENGTH is the number of bytes to set.
28701    ALIGN is the alignment of destination memory in bytes.
28702    MODE is the vector mode used to set the memory.  */
28703 static bool
28704 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28705                              unsigned HOST_WIDE_INT align,
28706                              machine_mode mode)
28707 {
28708   int num;
28709   bool unaligned_p = ((align & 3) != 0);
28710   unsigned int nelt = GET_MODE_NUNITS (mode);
28711
28712   /* Instruction loading constant value.  */
28713   num = 1;
28714   /* Instructions storing the memory.  */
28715   num += (length + nelt - 1) / nelt;
28716   /* Instructions adjusting the address expression.  Only need to
28717      adjust address expression if it's 4 bytes aligned and bytes
28718      leftover can only be stored by mis-aligned store instruction.  */
28719   if (!unaligned_p && (length & 3) != 0)
28720     num++;
28721
28722   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
28723   if (!unaligned_p && mode == V16QImode)
28724     num--;
28725
28726   return (num <= arm_block_set_max_insns ());
28727 }
28728
28729 /* Set a block of memory using vectorization instructions for the
28730    unaligned case.  We fill the first LENGTH bytes of the memory
28731    area starting from DSTBASE with byte constant VALUE.  ALIGN is
28732    the alignment requirement of memory.  Return TRUE if succeeded.  */
28733 static bool
28734 arm_block_set_unaligned_vect (rtx dstbase,
28735                               unsigned HOST_WIDE_INT length,
28736                               unsigned HOST_WIDE_INT value,
28737                               unsigned HOST_WIDE_INT align)
28738 {
28739   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28740   rtx dst, mem;
28741   rtx val_elt, val_vec, reg;
28742   rtx rval[MAX_VECT_LEN];
28743   rtx (*gen_func) (rtx, rtx);
28744   machine_mode mode;
28745   unsigned HOST_WIDE_INT v = value;
28746
28747   gcc_assert ((align & 0x3) != 0);
28748   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28749   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28750   if (length >= nelt_v16)
28751     {
28752       mode = V16QImode;
28753       gen_func = gen_movmisalignv16qi;
28754     }
28755   else
28756     {
28757       mode = V8QImode;
28758       gen_func = gen_movmisalignv8qi;
28759     }
28760   nelt_mode = GET_MODE_NUNITS (mode);
28761   gcc_assert (length >= nelt_mode);
28762   /* Skip if it isn't profitable.  */
28763   if (!arm_block_set_vect_profit_p (length, align, mode))
28764     return false;
28765
28766   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28767   mem = adjust_automodify_address (dstbase, mode, dst, 0);
28768
28769   v = sext_hwi (v, BITS_PER_WORD);
28770   val_elt = GEN_INT (v);
28771   for (j = 0; j < nelt_mode; j++)
28772     rval[j] = val_elt;
28773
28774   reg = gen_reg_rtx (mode);
28775   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28776   /* Emit instruction loading the constant value.  */
28777   emit_move_insn (reg, val_vec);
28778
28779   /* Handle nelt_mode bytes in a vector.  */
28780   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28781     {
28782       emit_insn ((*gen_func) (mem, reg));
28783       if (i + 2 * nelt_mode <= length)
28784         emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28785     }
28786
28787   /* If there are not less than nelt_v8 bytes leftover, we must be in
28788      V16QI mode.  */
28789   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28790
28791   /* Handle (8, 16) bytes leftover.  */
28792   if (i + nelt_v8 < length)
28793     {
28794       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28795       /* We are shifting bytes back, set the alignment accordingly.  */
28796       if ((length & 1) != 0 && align >= 2)
28797         set_mem_align (mem, BITS_PER_UNIT);
28798
28799       emit_insn (gen_movmisalignv16qi (mem, reg));
28800     }
28801   /* Handle (0, 8] bytes leftover.  */
28802   else if (i < length && i + nelt_v8 >= length)
28803     {
28804       if (mode == V16QImode)
28805         {
28806           reg = gen_lowpart (V8QImode, reg);
28807           mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28808         }
28809       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28810                                               + (nelt_mode - nelt_v8))));
28811       /* We are shifting bytes back, set the alignment accordingly.  */
28812       if ((length & 1) != 0 && align >= 2)
28813         set_mem_align (mem, BITS_PER_UNIT);
28814
28815       emit_insn (gen_movmisalignv8qi (mem, reg));
28816     }
28817
28818   return true;
28819 }
28820
28821 /* Set a block of memory using vectorization instructions for the
28822    aligned case.  We fill the first LENGTH bytes of the memory area
28823    starting from DSTBASE with byte constant VALUE.  ALIGN is the
28824    alignment requirement of memory.  Return TRUE if succeeded.  */
28825 static bool
28826 arm_block_set_aligned_vect (rtx dstbase,
28827                             unsigned HOST_WIDE_INT length,
28828                             unsigned HOST_WIDE_INT value,
28829                             unsigned HOST_WIDE_INT align)
28830 {
28831   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28832   rtx dst, addr, mem;
28833   rtx val_elt, val_vec, reg;
28834   rtx rval[MAX_VECT_LEN];
28835   machine_mode mode;
28836   unsigned HOST_WIDE_INT v = value;
28837
28838   gcc_assert ((align & 0x3) == 0);
28839   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28840   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28841   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28842     mode = V16QImode;
28843   else
28844     mode = V8QImode;
28845
28846   nelt_mode = GET_MODE_NUNITS (mode);
28847   gcc_assert (length >= nelt_mode);
28848   /* Skip if it isn't profitable.  */
28849   if (!arm_block_set_vect_profit_p (length, align, mode))
28850     return false;
28851
28852   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28853
28854   v = sext_hwi (v, BITS_PER_WORD);
28855   val_elt = GEN_INT (v);
28856   for (j = 0; j < nelt_mode; j++)
28857     rval[j] = val_elt;
28858
28859   reg = gen_reg_rtx (mode);
28860   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28861   /* Emit instruction loading the constant value.  */
28862   emit_move_insn (reg, val_vec);
28863
28864   i = 0;
28865   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
28866   if (mode == V16QImode)
28867     {
28868       mem = adjust_automodify_address (dstbase, mode, dst, 0);
28869       emit_insn (gen_movmisalignv16qi (mem, reg));
28870       i += nelt_mode;
28871       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
28872       if (i + nelt_v8 < length && i + nelt_v16 > length)
28873         {
28874           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28875           mem = adjust_automodify_address (dstbase, mode, dst, 0);
28876           /* We are shifting bytes back, set the alignment accordingly.  */
28877           if ((length & 0x3) == 0)
28878             set_mem_align (mem, BITS_PER_UNIT * 4);
28879           else if ((length & 0x1) == 0)
28880             set_mem_align (mem, BITS_PER_UNIT * 2);
28881           else
28882             set_mem_align (mem, BITS_PER_UNIT);
28883
28884           emit_insn (gen_movmisalignv16qi (mem, reg));
28885           return true;
28886         }
28887       /* Fall through for bytes leftover.  */
28888       mode = V8QImode;
28889       nelt_mode = GET_MODE_NUNITS (mode);
28890       reg = gen_lowpart (V8QImode, reg);
28891     }
28892
28893   /* Handle 8 bytes in a vector.  */
28894   for (; (i + nelt_mode <= length); i += nelt_mode)
28895     {
28896       addr = plus_constant (Pmode, dst, i);
28897       mem = adjust_automodify_address (dstbase, mode, addr, i);
28898       emit_move_insn (mem, reg);
28899     }
28900
28901   /* Handle single word leftover by shifting 4 bytes back.  We can
28902      use aligned access for this case.  */
28903   if (i + UNITS_PER_WORD == length)
28904     {
28905       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28906       mem = adjust_automodify_address (dstbase, mode,
28907                                        addr, i - UNITS_PER_WORD);
28908       /* We are shifting 4 bytes back, set the alignment accordingly.  */
28909       if (align > UNITS_PER_WORD)
28910         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28911
28912       emit_move_insn (mem, reg);
28913     }
28914   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28915      We have to use unaligned access for this case.  */
28916   else if (i < length)
28917     {
28918       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28919       mem = adjust_automodify_address (dstbase, mode, dst, 0);
28920       /* We are shifting bytes back, set the alignment accordingly.  */
28921       if ((length & 1) == 0)
28922         set_mem_align (mem, BITS_PER_UNIT * 2);
28923       else
28924         set_mem_align (mem, BITS_PER_UNIT);
28925
28926       emit_insn (gen_movmisalignv8qi (mem, reg));
28927     }
28928
28929   return true;
28930 }
28931
28932 /* Set a block of memory using plain strh/strb instructions, only
28933    using instructions allowed by ALIGN on processor.  We fill the
28934    first LENGTH bytes of the memory area starting from DSTBASE
28935    with byte constant VALUE.  ALIGN is the alignment requirement
28936    of memory.  */
28937 static bool
28938 arm_block_set_unaligned_non_vect (rtx dstbase,
28939                                   unsigned HOST_WIDE_INT length,
28940                                   unsigned HOST_WIDE_INT value,
28941                                   unsigned HOST_WIDE_INT align)
28942 {
28943   unsigned int i;
28944   rtx dst, addr, mem;
28945   rtx val_exp, val_reg, reg;
28946   machine_mode mode;
28947   HOST_WIDE_INT v = value;
28948
28949   gcc_assert (align == 1 || align == 2);
28950
28951   if (align == 2)
28952     v |= (value << BITS_PER_UNIT);
28953
28954   v = sext_hwi (v, BITS_PER_WORD);
28955   val_exp = GEN_INT (v);
28956   /* Skip if it isn't profitable.  */
28957   if (!arm_block_set_non_vect_profit_p (val_exp, length,
28958                                         align, true, false))
28959     return false;
28960
28961   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28962   mode = (align == 2 ? HImode : QImode);
28963   val_reg = force_reg (SImode, val_exp);
28964   reg = gen_lowpart (mode, val_reg);
28965
28966   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28967     {
28968       addr = plus_constant (Pmode, dst, i);
28969       mem = adjust_automodify_address (dstbase, mode, addr, i);
28970       emit_move_insn (mem, reg);
28971     }
28972
28973   /* Handle single byte leftover.  */
28974   if (i + 1 == length)
28975     {
28976       reg = gen_lowpart (QImode, val_reg);
28977       addr = plus_constant (Pmode, dst, i);
28978       mem = adjust_automodify_address (dstbase, QImode, addr, i);
28979       emit_move_insn (mem, reg);
28980       i++;
28981     }
28982
28983   gcc_assert (i == length);
28984   return true;
28985 }
28986
28987 /* Set a block of memory using plain strd/str/strh/strb instructions,
28988    to permit unaligned copies on processors which support unaligned
28989    semantics for those instructions.  We fill the first LENGTH bytes
28990    of the memory area starting from DSTBASE with byte constant VALUE.
28991    ALIGN is the alignment requirement of memory.  */
28992 static bool
28993 arm_block_set_aligned_non_vect (rtx dstbase,
28994                                 unsigned HOST_WIDE_INT length,
28995                                 unsigned HOST_WIDE_INT value,
28996                                 unsigned HOST_WIDE_INT align)
28997 {
28998   unsigned int i;
28999   rtx dst, addr, mem;
29000   rtx val_exp, val_reg, reg;
29001   unsigned HOST_WIDE_INT v;
29002   bool use_strd_p;
29003
29004   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29005                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29006
29007   v = (value | (value << 8) | (value << 16) | (value << 24));
29008   if (length < UNITS_PER_WORD)
29009     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29010
29011   if (use_strd_p)
29012     v |= (v << BITS_PER_WORD);
29013   else
29014     v = sext_hwi (v, BITS_PER_WORD);
29015
29016   val_exp = GEN_INT (v);
29017   /* Skip if it isn't profitable.  */
29018   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29019                                         align, false, use_strd_p))
29020     {
29021       if (!use_strd_p)
29022         return false;
29023
29024       /* Try without strd.  */
29025       v = (v >> BITS_PER_WORD);
29026       v = sext_hwi (v, BITS_PER_WORD);
29027       val_exp = GEN_INT (v);
29028       use_strd_p = false;
29029       if (!arm_block_set_non_vect_profit_p (val_exp, length,
29030                                             align, false, use_strd_p))
29031         return false;
29032     }
29033
29034   i = 0;
29035   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29036   /* Handle double words using strd if possible.  */
29037   if (use_strd_p)
29038     {
29039       val_reg = force_reg (DImode, val_exp);
29040       reg = val_reg;
29041       for (; (i + 8 <= length); i += 8)
29042         {
29043           addr = plus_constant (Pmode, dst, i);
29044           mem = adjust_automodify_address (dstbase, DImode, addr, i);
29045           emit_move_insn (mem, reg);
29046         }
29047     }
29048   else
29049     val_reg = force_reg (SImode, val_exp);
29050
29051   /* Handle words.  */
29052   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29053   for (; (i + 4 <= length); i += 4)
29054     {
29055       addr = plus_constant (Pmode, dst, i);
29056       mem = adjust_automodify_address (dstbase, SImode, addr, i);
29057       if ((align & 3) == 0)
29058         emit_move_insn (mem, reg);
29059       else
29060         emit_insn (gen_unaligned_storesi (mem, reg));
29061     }
29062
29063   /* Merge last pair of STRH and STRB into a STR if possible.  */
29064   if (unaligned_access && i > 0 && (i + 3) == length)
29065     {
29066       addr = plus_constant (Pmode, dst, i - 1);
29067       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29068       /* We are shifting one byte back, set the alignment accordingly.  */
29069       if ((align & 1) == 0)
29070         set_mem_align (mem, BITS_PER_UNIT);
29071
29072       /* Most likely this is an unaligned access, and we can't tell at
29073          compilation time.  */
29074       emit_insn (gen_unaligned_storesi (mem, reg));
29075       return true;
29076     }
29077
29078   /* Handle half word leftover.  */
29079   if (i + 2 <= length)
29080     {
29081       reg = gen_lowpart (HImode, val_reg);
29082       addr = plus_constant (Pmode, dst, i);
29083       mem = adjust_automodify_address (dstbase, HImode, addr, i);
29084       if ((align & 1) == 0)
29085         emit_move_insn (mem, reg);
29086       else
29087         emit_insn (gen_unaligned_storehi (mem, reg));
29088
29089       i += 2;
29090     }
29091
29092   /* Handle single byte leftover.  */
29093   if (i + 1 == length)
29094     {
29095       reg = gen_lowpart (QImode, val_reg);
29096       addr = plus_constant (Pmode, dst, i);
29097       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29098       emit_move_insn (mem, reg);
29099     }
29100
29101   return true;
29102 }
29103
29104 /* Set a block of memory using vectorization instructions for both
29105    aligned and unaligned cases.  We fill the first LENGTH bytes of
29106    the memory area starting from DSTBASE with byte constant VALUE.
29107    ALIGN is the alignment requirement of memory.  */
29108 static bool
29109 arm_block_set_vect (rtx dstbase,
29110                     unsigned HOST_WIDE_INT length,
29111                     unsigned HOST_WIDE_INT value,
29112                     unsigned HOST_WIDE_INT align)
29113 {
29114   /* Check whether we need to use unaligned store instruction.  */
29115   if (((align & 3) != 0 || (length & 3) != 0)
29116       /* Check whether unaligned store instruction is available.  */
29117       && (!unaligned_access || BYTES_BIG_ENDIAN))
29118     return false;
29119
29120   if ((align & 3) == 0)
29121     return arm_block_set_aligned_vect (dstbase, length, value, align);
29122   else
29123     return arm_block_set_unaligned_vect (dstbase, length, value, align);
29124 }
29125
29126 /* Expand string store operation.  Firstly we try to do that by using
29127    vectorization instructions, then try with ARM unaligned access and
29128    double-word store if profitable.  OPERANDS[0] is the destination,
29129    OPERANDS[1] is the number of bytes, operands[2] is the value to
29130    initialize the memory, OPERANDS[3] is the known alignment of the
29131    destination.  */
29132 bool
29133 arm_gen_setmem (rtx *operands)
29134 {
29135   rtx dstbase = operands[0];
29136   unsigned HOST_WIDE_INT length;
29137   unsigned HOST_WIDE_INT value;
29138   unsigned HOST_WIDE_INT align;
29139
29140   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29141     return false;
29142
29143   length = UINTVAL (operands[1]);
29144   if (length > 64)
29145     return false;
29146
29147   value = (UINTVAL (operands[2]) & 0xFF);
29148   align = UINTVAL (operands[3]);
29149   if (TARGET_NEON && length >= 8
29150       && current_tune->string_ops_prefer_neon
29151       && arm_block_set_vect (dstbase, length, value, align))
29152     return true;
29153
29154   if (!unaligned_access && (align & 3) != 0)
29155     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29156
29157   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29158 }
29159
29160 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
29161
29162 static unsigned HOST_WIDE_INT
29163 arm_asan_shadow_offset (void)
29164 {
29165   return (unsigned HOST_WIDE_INT) 1 << 29;
29166 }
29167
29168
29169 /* This is a temporary fix for PR60655.  Ideally we need
29170    to handle most of these cases in the generic part but
29171    currently we reject minus (..) (sym_ref).  We try to
29172    ameliorate the case with minus (sym_ref1) (sym_ref2)
29173    where they are in the same section.  */
29174
29175 static bool
29176 arm_const_not_ok_for_debug_p (rtx p)
29177 {
29178   tree decl_op0 = NULL;
29179   tree decl_op1 = NULL;
29180
29181   if (GET_CODE (p) == MINUS)
29182     {
29183       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29184         {
29185           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29186           if (decl_op1
29187               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29188               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29189             {
29190               if ((TREE_CODE (decl_op1) == VAR_DECL
29191                    || TREE_CODE (decl_op1) == CONST_DECL)
29192                   && (TREE_CODE (decl_op0) == VAR_DECL
29193                       || TREE_CODE (decl_op0) == CONST_DECL))
29194                 return (get_variable_section (decl_op1, false)
29195                         != get_variable_section (decl_op0, false));
29196
29197               if (TREE_CODE (decl_op1) == LABEL_DECL
29198                   && TREE_CODE (decl_op0) == LABEL_DECL)
29199                 return (DECL_CONTEXT (decl_op1)
29200                         != DECL_CONTEXT (decl_op0));
29201             }
29202
29203           return true;
29204         }
29205     }
29206
29207   return false;
29208 }
29209
29210 /* return TRUE if x is a reference to a value in a constant pool */
29211 extern bool
29212 arm_is_constant_pool_ref (rtx x)
29213 {
29214   return (MEM_P (x)
29215           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29216           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29217 }
29218
29219 /* If MEM is in the form of [base+offset], extract the two parts
29220    of address and set to BASE and OFFSET, otherwise return false
29221    after clearing BASE and OFFSET.  */
29222
29223 static bool
29224 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29225 {
29226   rtx addr;
29227
29228   gcc_assert (MEM_P (mem));
29229
29230   addr = XEXP (mem, 0);
29231
29232   /* Strip off const from addresses like (const (addr)).  */
29233   if (GET_CODE (addr) == CONST)
29234     addr = XEXP (addr, 0);
29235
29236   if (GET_CODE (addr) == REG)
29237     {
29238       *base = addr;
29239       *offset = const0_rtx;
29240       return true;
29241     }
29242
29243   if (GET_CODE (addr) == PLUS
29244       && GET_CODE (XEXP (addr, 0)) == REG
29245       && CONST_INT_P (XEXP (addr, 1)))
29246     {
29247       *base = XEXP (addr, 0);
29248       *offset = XEXP (addr, 1);
29249       return true;
29250     }
29251
29252   *base = NULL_RTX;
29253   *offset = NULL_RTX;
29254
29255   return false;
29256 }
29257
29258 /* If INSN is a load or store of address in the form of [base+offset],
29259    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
29260    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
29261    otherwise return FALSE.  */
29262
29263 static bool
29264 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29265 {
29266   rtx x, dest, src;
29267
29268   gcc_assert (INSN_P (insn));
29269   x = PATTERN (insn);
29270   if (GET_CODE (x) != SET)
29271     return false;
29272
29273   src = SET_SRC (x);
29274   dest = SET_DEST (x);
29275   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29276     {
29277       *is_load = false;
29278       extract_base_offset_in_addr (dest, base, offset);
29279     }
29280   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29281     {
29282       *is_load = true;
29283       extract_base_offset_in_addr (src, base, offset);
29284     }
29285   else
29286     return false;
29287
29288   return (*base != NULL_RTX && *offset != NULL_RTX);
29289 }
29290
29291 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29292
29293    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29294    and PRI are only calculated for these instructions.  For other instruction,
29295    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
29296    instruction fusion can be supported by returning different priorities.
29297
29298    It's important that irrelevant instructions get the largest FUSION_PRI.  */
29299
29300 static void
29301 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29302                            int *fusion_pri, int *pri)
29303 {
29304   int tmp, off_val;
29305   bool is_load;
29306   rtx base, offset;
29307
29308   gcc_assert (INSN_P (insn));
29309
29310   tmp = max_pri - 1;
29311   if (!fusion_load_store (insn, &base, &offset, &is_load))
29312     {
29313       *pri = tmp;
29314       *fusion_pri = tmp;
29315       return;
29316     }
29317
29318   /* Load goes first.  */
29319   if (is_load)
29320     *fusion_pri = tmp - 1;
29321   else
29322     *fusion_pri = tmp - 2;
29323
29324   tmp /= 2;
29325
29326   /* INSN with smaller base register goes first.  */
29327   tmp -= ((REGNO (base) & 0xff) << 20);
29328
29329   /* INSN with smaller offset goes first.  */
29330   off_val = (int)(INTVAL (offset));
29331   if (off_val >= 0)
29332     tmp -= (off_val & 0xfffff);
29333   else
29334     tmp += ((- off_val) & 0xfffff);
29335
29336   *pri = tmp;
29337   return;
29338 }
29339 #include "gt-arm.h"